mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
Add SSE4 min/max for integers
This commit is contained in:
parent
da6219b19d
commit
d63712163c
@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_min_epi32(a,b);
|
||||
#else
|
||||
// after some bench, this version *is* faster than a scalar implementation
|
||||
Packet4i mask = _mm_cmplt_epi32(a,b);
|
||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
return _mm_max_epi32(a,b);
|
||||
#else
|
||||
// after some bench, this version *is* faster than a scalar implementation
|
||||
Packet4i mask = _mm_cmpgt_epi32(a,b);
|
||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
|
||||
|
@ -250,6 +250,17 @@ template<typename Scalar> void packetmath_real()
|
||||
data1[internal::random<int>(0, PacketSize)] = 0;
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
|
||||
}
|
||||
|
||||
template<typename Scalar> void packetmath_notcomplex()
|
||||
{
|
||||
using std::abs;
|
||||
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||
const int PacketSize = internal::packet_traits<Scalar>::size;
|
||||
|
||||
EIGEN_ALIGN16 Scalar data1[internal::packet_traits<Scalar>::size*4];
|
||||
EIGEN_ALIGN16 Scalar data2[internal::packet_traits<Scalar>::size*4];
|
||||
EIGEN_ALIGN16 Scalar ref[internal::packet_traits<Scalar>::size*4];
|
||||
|
||||
ref[0] = data1[0];
|
||||
for (int i=0; i<PacketSize; ++i)
|
||||
@ -340,6 +351,10 @@ void test_packetmath()
|
||||
CALL_SUBTEST_1( packetmath<std::complex<float> >() );
|
||||
CALL_SUBTEST_2( packetmath<std::complex<double> >() );
|
||||
|
||||
CALL_SUBTEST_1( packetmath_notcomplex<float>() );
|
||||
CALL_SUBTEST_2( packetmath_notcomplex<double>() );
|
||||
CALL_SUBTEST_3( packetmath_notcomplex<int>() );
|
||||
|
||||
CALL_SUBTEST_1( packetmath_real<float>() );
|
||||
CALL_SUBTEST_2( packetmath_real<double>() );
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user