fix #53: performance regression, hopefully I did not resurected another

perf. issue...
This commit is contained in:
Gael Guennebaud 2009-09-17 23:18:21 +02:00
parent e4f94b8c58
commit 9395326e44

View File

@ -77,15 +77,16 @@ template<> struct ei_unpacket_traits<Packet4i> { typedef int type; enum {size
#ifdef __GNUC__ #ifdef __GNUC__
// Sometimes GCC implements _mm_set1_p* using multiple moves, // Sometimes GCC implements _mm_set1_p* using multiple moves,
// that is inefficient :( // that is inefficient :(
// TODO make sure the new solution using the shuffle/unpacklo is ok
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) {
Packet4f res = _mm_set_ss(from); Packet4f res = _mm_set_ss(from);
asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : ); return _mm_shuffle_ps(res,res,0);
return res; //asm("shufps $0, %[x], %[x]" : [x] "+x" (res) : );
} }
template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) {
Packet2d res = _mm_set_sd(from); Packet2d res = _mm_set_sd(from);
asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : ); return _mm_unpacklo_pd(res,res);
return res; // asm("unpcklpd %[x], %[x]" : [x] "+x" (res) : );
} }
#else #else
template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); } template<> EIGEN_STRONG_INLINE Packet4f ei_pset1<float>(const float& from) { return _mm_set1_ps(from); }