mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-10 23:21:47 +08:00
Disable usage of SSE3 _mm_hadd_ps that is extremely slow.
(grafted from 178c084856003f1cfd3020615ab98230d9520a80 )
This commit is contained in:
parent
1b7dd46d94
commit
0eff51e2ed
@ -510,20 +510,7 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
|||||||
return _mm_hadd_pd(vecs[0], vecs[1]);
|
return _mm_hadd_pd(vecs[0], vecs[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
|
||||||
{
|
|
||||||
Packet4f tmp0 = _mm_hadd_ps(a,a);
|
|
||||||
return pfirst<Packet4f>(_mm_hadd_ps(tmp0, tmp0));
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// SSE2 versions
|
|
||||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
|
||||||
{
|
|
||||||
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
|
||||||
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
template<> EIGEN_STRONG_INLINE Packet4f preduxp<Packet4f>(const Packet4f* vecs)
|
||||||
{
|
{
|
||||||
Packet4f tmp0, tmp1, tmp2;
|
Packet4f tmp0, tmp1, tmp2;
|
||||||
@ -544,6 +531,19 @@ template<> EIGEN_STRONG_INLINE Packet2d preduxp<Packet2d>(const Packet2d* vecs)
|
|||||||
}
|
}
|
||||||
#endif // SSE3
|
#endif // SSE3
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||||
|
{
|
||||||
|
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||||
|
// (from Nehalem to Haswell)
|
||||||
|
// #ifdef EIGEN_VECTORIZE_SSE3
|
||||||
|
// Packet4f tmp = _mm_add_ps(a, vec4f_swizzle1(a,2,3,2,3));
|
||||||
|
// return pfirst<Packet4f>(_mm_hadd_ps(tmp, tmp));
|
||||||
|
// #else
|
||||||
|
Packet4f tmp = _mm_add_ps(a, _mm_movehl_ps(a,a));
|
||||||
|
return pfirst<Packet4f>(_mm_add_ss(tmp, _mm_shuffle_ps(tmp,tmp, 1)));
|
||||||
|
// #endif
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
|
||||||
{
|
{
|
||||||
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
// Disable SSE3 _mm_hadd_pd that is extremely slow on all existing Intel's architectures
|
||||||
|
Loading…
x
Reference in New Issue
Block a user