diff --git a/Eigen/Geometry b/Eigen/Geometry index 04aa316cb..16b4bd6e1 100644 --- a/Eigen/Geometry +++ b/Eigen/Geometry @@ -49,9 +49,8 @@ #include "src/Geometry/AlignedBox.h" #include "src/Geometry/Umeyama.h" -// Use the SSE optimized version whenever possible. At the moment the -// SSE version doesn't compile when AVX is enabled -#if defined EIGEN_VECTORIZE_SSE && !defined EIGEN_VECTORIZE_AVX +// Use the SSE optimized version whenever possible. +#if defined EIGEN_VECTORIZE_SSE #include "src/Geometry/arch/Geometry_SSE.h" #endif diff --git a/Eigen/src/Geometry/arch/Geometry_SSE.h b/Eigen/src/Geometry/arch/Geometry_SSE.h index d4346aa1c..108cc9f8e 100644 --- a/Eigen/src/Geometry/arch/Geometry_SSE.h +++ b/Eigen/src/Geometry/arch/Geometry_SSE.h @@ -25,10 +25,12 @@ struct quat_product }; static inline Quaternion run(const QuaternionBase& _a, const QuaternionBase& _b) { + evaluator ae(_a.coeffs()); + evaluator be(_b.coeffs()); Quaternion res; const Packet4f mask = _mm_setr_ps(0.f,0.f,0.f,-0.f); - Packet4f a = _a.coeffs().template packet(0); - Packet4f b = _b.coeffs().template packet(0); + Packet4f a = ae.template packet(0); + Packet4f b = be.template packet(0); Packet4f s1 = pmul(vec4f_swizzle1(a,1,2,0,2),vec4f_swizzle1(b,2,0,1,2)); Packet4f s2 = pmul(vec4f_swizzle1(a,3,3,3,1),vec4f_swizzle1(b,0,1,2,1)); pstoret( @@ -50,9 +52,10 @@ struct quat_conj }; static inline Quaternion run(const QuaternionBase& q) { + evaluator qe(q.coeffs()); Quaternion res; - const __m128 mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); - pstoret(&res.x(), _mm_xor_ps(mask, q.coeffs().template packet::Alignment>(0))); + const Packet4f mask = _mm_setr_ps(-0.f,-0.f,-0.f,0.f); + pstoret(&res.x(), pxor(mask, qe.template packet::Alignment,Packet4f>(0))); return res; } }; @@ -67,12 +70,14 @@ struct cross3_impl static inline typename plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { - __m128 a = lhs.template packet::Alignment>(0); - __m128 b = rhs.template packet::Alignment>(0); - __m128 mul1=_mm_mul_ps(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); - __m128 mul2=_mm_mul_ps(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); + evaluator lhs_eval(lhs); + evaluator rhs_eval(rhs); + Packet4f a = lhs_eval.template packet::Alignment,Packet4f>(0); + Packet4f b = rhs_eval.template packet::Alignment,Packet4f>(0); + Packet4f mul1 = pmul(vec4f_swizzle1(a,1,2,0,3),vec4f_swizzle1(b,2,0,1,3)); + Packet4f mul2 = pmul(vec4f_swizzle1(a,2,0,1,3),vec4f_swizzle1(b,1,2,0,3)); typename plain_matrix_type::type res; - pstoret(&res.x(),_mm_sub_ps(mul1,mul2)); + pstoret(&res.x(),psub(mul1,mul2)); return res; } }; @@ -94,9 +99,12 @@ struct quat_product Quaternion res; + evaluator ae(_a.coeffs()); + evaluator be(_b.coeffs()); + const double* a = _a.coeffs().data(); - Packet2d b_xy = _b.coeffs().template packet(0); - Packet2d b_zw = _b.coeffs().template packet(2); + Packet2d b_xy = be.template packet(0); + Packet2d b_zw = be.template packet(2); Packet2d a_xx = pset1(a[0]); Packet2d a_yy = pset1(a[1]); Packet2d a_zz = pset1(a[2]); @@ -145,11 +153,12 @@ struct quat_conj }; static inline Quaternion run(const QuaternionBase& q) { + evaluator qe(q.coeffs()); Quaternion res; - const __m128d mask0 = _mm_setr_pd(-0.,-0.); - const __m128d mask2 = _mm_setr_pd(-0.,0.); - pstoret(&res.x(), _mm_xor_pd(mask0, q.coeffs().template packet::Alignment>(0))); - pstoret(&res.z(), _mm_xor_pd(mask2, q.coeffs().template packet::Alignment>(2))); + const Packet2d mask0 = _mm_setr_pd(-0.,-0.); + const Packet2d mask2 = _mm_setr_pd(-0.,0.); + pstoret(&res.x(), pxor(mask0, qe.template packet::Alignment,Packet2d>(0))); + pstoret(&res.z(), pxor(mask2, qe.template packet::Alignment,Packet2d>(2))); return res; } };