From 0eff51e2ed386e213d5dd78f0ddec9d8fc9ebf9b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 22 Nov 2016 21:53:14 +0100 Subject: [PATCH] Disable usage of SSE3 _mm_hadd_ps that is extremely slow. (grafted from 178c084856003f1cfd3020615ab98230d9520a80 ) --- Eigen/src/Core/arch/SSE/PacketMath.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3646abdb1..80cf8af09 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -510,20 +510,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) return _mm_hadd_pd(vecs[0], vecs[1]); } -template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) -{ - Packet4f tmp0 = _mm_hadd_ps(a,a); - return pfirst(_mm_hadd_ps(tmp0, tmp0)); -} - #else -// SSE2 versions -template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) -{ - Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); - return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); -} - template<> EIGEN_STRONG_INLINE Packet4f preduxp(const Packet4f* vecs) { Packet4f tmp0, tmp1, tmp2; @@ -544,6 +531,19 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) } #endif // SSE3 +template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) +{ + // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures + // (from Nehalem to Haswell) +// #ifdef EIGEN_VECTORIZE_SSE3 +// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3)); +// return pfirst(_mm_hadd_ps(tmp, tmp)); +// #else + Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a)); + return pfirst(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1))); +// #endif +} + template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { // Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures