diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3a6bbba1b..9ca65b9be 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -315,4 +315,7 @@ struct ei_palign_impl }; #endif +#define ei_vec4f_swizzle1(v,p,q,r,s) \ + (_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), ((s)<<6|(r)<<4|(q)<<2|(p))))) + #endif // EIGEN_PACKET_MATH_SSE_H diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 1cbf501f5..296c3caa5 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -239,4 +239,16 @@ enum { HasDirectAccess = DirectAccessBit }; +const int EiArch_Generic = 0x0; +const int EiArch_SSE = 0x1; +const int EiArch_AltiVec = 0x2; + +#if defined EIGEN_VECTORIZE_SSE + const int EiArch = EiArch_SSE; +#elif defined EIGEN_VECTORIZE_ALTIVEC + const int EiArch = EiArch_AltiVec; +#else + const int EiArch = EiArch_Generic; +#endif + #endif // EIGEN_CONSTANTS_H diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 9593c7b9d..3fcbff4e7 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -224,17 +224,45 @@ typedef Quaternion Quaternionf; * double precision quaternion type */ typedef Quaternion Quaterniond; +// Generic Quaternion * Quaternion product +template inline Quaternion +ei_quaternion_product(const Quaternion& a, const Quaternion& b) +{ + return Quaternion + ( + a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), + a.w() * b.x() + a.x() * b.w() + a.y() * b.z() - a.z() * b.y(), + a.w() * b.y() + a.y() * b.w() + a.z() * b.x() - a.x() * b.z(), + a.w() * b.z() + a.z() * b.w() + a.x() * b.y() - a.y() * b.x() + ); +} + +#ifdef EIGEN_VECTORIZE_SSE +template<> inline Quaternion +ei_quaternion_product(const Quaternion& _a, const Quaternion& _b) +{ + const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0,0,0,0x80000000)); + Quaternion res; + __m128 a = _a.coeffs().packet(0); + __m128 b = _b.coeffs().packet(0); + __m128 flip1 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,1,2,0,2), + ei_vec4f_swizzle1(b,2,0,1,2)),mask); + __m128 flip2 = _mm_xor_ps(_mm_mul_ps(ei_vec4f_swizzle1(a,3,3,3,1), + ei_vec4f_swizzle1(b,0,1,2,1)),mask); + ei_pstore(&res.x(), + _mm_add_ps(_mm_sub_ps(_mm_mul_ps(a,ei_vec4f_swizzle1(b,3,3,3,3)), + _mm_mul_ps(ei_vec4f_swizzle1(a,2,0,1,0), + ei_vec4f_swizzle1(b,1,2,0,0))), + _mm_add_ps(flip1,flip2))); + return res; +} +#endif + /** \returns the concatenation of two rotations as a quaternion-quaternion product */ template inline Quaternion Quaternion::operator* (const Quaternion& other) const { - return Quaternion - ( - this->w() * other.w() - this->x() * other.x() - this->y() * other.y() - this->z() * other.z(), - this->w() * other.x() + this->x() * other.w() + this->y() * other.z() - this->z() * other.y(), - this->w() * other.y() + this->y() * other.w() + this->z() * other.x() - this->x() * other.z(), - this->w() * other.z() + this->z() * other.w() + this->x() * other.y() - this->y() * other.x() - ); + return ei_quaternion_product(*this,other); } /** \sa operator*(Quaternion) */