Add SSE4 min/max for integers

This commit is contained in:
Gael Guennebaud 2013-03-20 18:28:40 +01:00
parent da6219b19d
commit d63712163c
2 changed files with 23 additions and 0 deletions

View File

@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_min_epi32(a,b);
#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmplt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
{
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_max_epi32(a,b);
#else
// after some bench, this version *is* faster than a scalar implementation
Packet4i mask = _mm_cmpgt_epi32(a,b);
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }

View File

@ -250,6 +250,17 @@ template<typename Scalar> void packetmath_real()
data1[internal::random<int>(0, PacketSize)] = 0;
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
}
template<typename Scalar> void packetmath_notcomplex()
{
using std::abs;
typedef typename internal::packet_traits<Scalar>::type Packet;
const int PacketSize = internal::packet_traits<Scalar>::size;
EIGEN_ALIGN16 Scalar data1[internal::packet_traits<Scalar>::size*4];
EIGEN_ALIGN16 Scalar data2[internal::packet_traits<Scalar>::size*4];
EIGEN_ALIGN16 Scalar ref[internal::packet_traits<Scalar>::size*4];
ref[0] = data1[0];
for (int i=0; i<PacketSize; ++i)
@ -340,6 +351,10 @@ void test_packetmath()
CALL_SUBTEST_1( packetmath<std::complex<float> >() );
CALL_SUBTEST_2( packetmath<std::complex<double> >() );
CALL_SUBTEST_1( packetmath_notcomplex<float>() );
CALL_SUBTEST_2( packetmath_notcomplex<double>() );
CALL_SUBTEST_3( packetmath_notcomplex<int>() );
CALL_SUBTEST_1( packetmath_real<float>() );
CALL_SUBTEST_2( packetmath_real<double>() );