From eef03525b84474607701b0adde8608a3e2d6ce36 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 28 Feb 2011 00:04:05 -0500 Subject: [PATCH] fix bug #203: revert to using _mm_set1_p[sd] --- Eigen/src/Core/arch/SSE/PacketMath.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index dea744396..43ad28d42 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -110,22 +110,8 @@ template<> struct unpacket_traits { typedef float type; enum {size=4} template<> struct unpacket_traits { typedef double type; enum {size=2}; }; template<> struct unpacket_traits { typedef int type; enum {size=4}; }; -#ifdef __GNUC__ -// Sometimes GCC implements _mm_set1_p* using multiple moves, -// that is inefficient :( (e.g., see gemm_pack_rhs) -template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { - Packet4f res = _mm_set_ss(from); - return vec4f_swizzle1(res,0,0,0,0); -} -template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { - // NOTE the SSE3 intrinsic _mm_loaddup_pd is never faster but sometimes much slower - Packet2d res = _mm_set_sd(from); - return vec2d_swizzle1(res, 0, 0); -} -#else template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return _mm_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return _mm_set1_pd(from); } -#endif template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return _mm_set1_epi32(from); } template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); }