From f047ecc36a4e940417c1980d16e1b029539f8f10 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 7 Oct 2015 15:48:35 +0200 Subject: [PATCH] _mm_hadd_epi32 is for SSSE3 only (and not SSE3) --- Eigen/src/Core/arch/SSE/PacketMath.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index e7b676f4c..2e0a807bf 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -532,10 +532,6 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_hadd_pd(vecs[0], vecs[1]); } -template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) -{ - return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); -} template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) { @@ -544,12 +540,6 @@ template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) } template<> EIGEN_STRONG_INLINE double predux(const Packet2d& a) { return pfirst(_mm_hadd_pd(a, a)); } - -template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) -{ - Packet4i tmp0 = _mm_hadd_epi32(a,a); - return pfirst(_mm_hadd_epi32(tmp0,tmp0)); -} #else // SSE2 versions template<> EIGEN_STRONG_INLINE float predux(const Packet4f& a) @@ -580,7 +570,20 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp(const Packet2d* vecs) { return _mm_add_pd(_mm_unpacklo_pd(vecs[0], vecs[1]), _mm_unpackhi_pd(vecs[0], vecs[1])); } +#endif // SSE3 + +#ifdef EIGEN_VECTORIZE_SSSE3 +template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) +{ + return _mm_hadd_epi32(_mm_hadd_epi32(vecs[0], vecs[1]),_mm_hadd_epi32(vecs[2], vecs[3])); +} +template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) +{ + Packet4i tmp0 = _mm_hadd_epi32(a,a); + return pfirst(_mm_hadd_epi32(tmp0,tmp0)); +} +#else template<> EIGEN_STRONG_INLINE int predux(const Packet4i& a) { Packet4i tmp = _mm_add_epi32(a, _mm_unpackhi_epi64(a,a)); @@ -600,8 +603,7 @@ template<> EIGEN_STRONG_INLINE Packet4i preduxp(const Packet4i* vecs) tmp0 = _mm_unpackhi_epi64(tmp0, tmp1); return _mm_add_epi32(tmp0, tmp2); } -#endif // SSE3 - +#endif // Other reduction functions: // mul