Remove useless register keyword, and optimize predux_min/max for SSE4

This commit is contained in:
Gael Guennebaud 2014-01-25 16:54:13 +01:00
parent 6cf938df53
commit a7621809fe
2 changed files with 17 additions and 7 deletions

View File

@ -504,13 +504,18 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
} }
template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
{ {
#ifdef EIGEN_VECTORIZE_SSE4_1
Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
return pfirst(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
#else
// after some experiments, it is seems this is the fastest way to implement it // after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the pstore !!) // for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
pstore(aux, a); pstore(aux, a);
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1]; int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3]; int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
return aux0<aux2 ? aux0 : aux2; return aux0<aux2 ? aux0 : aux2;
#endif // EIGEN_VECTORIZE_SSE4_1
} }
// max // max
@ -525,13 +530,18 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
} }
template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
{ {
#ifdef EIGEN_VECTORIZE_SSE4_1
Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
return pfirst(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
#else
// after some experiments, it is seems this is the fastest way to implement it // after some experiments, it is seems this is the fastest way to implement it
// for GCC (eg., it does not like using std::min after the pstore !!) // for GCC (eg., it does not like using std::min after the pstore !!)
EIGEN_ALIGN16 int aux[4]; EIGEN_ALIGN16 int aux[4];
pstore(aux, a); pstore(aux, a);
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1]; int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3]; int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
return aux0>aux2 ? aux0 : aux2; return aux0>aux2 ? aux0 : aux2;
#endif // EIGEN_VECTORIZE_SSE4_1
} }
#if (defined __GNUC__) #if (defined __GNUC__)

View File

@ -79,8 +79,8 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
for (Index j=FirstTriangular ? bound : 0; for (Index j=FirstTriangular ? bound : 0;
j<(FirstTriangular ? size : bound);j+=2) j<(FirstTriangular ? size : bound);j+=2)
{ {
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride; const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
Scalar t0 = cjAlpha * rhs[j]; Scalar t0 = cjAlpha * rhs[j];
Packet ptmp0 = pset1<Packet>(t0); Packet ptmp0 = pset1<Packet>(t0);
@ -147,7 +147,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
} }
for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++) for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
{ {
register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride; const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
Scalar t1 = cjAlpha * rhs[j]; Scalar t1 = cjAlpha * rhs[j];
Scalar t2(0); Scalar t2(0);