mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Add SSE4 min/max for integers
This commit is contained in:
parent
da6219b19d
commit
d63712163c
@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
|
|||||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
|
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||||
{
|
{
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
return _mm_min_epi32(a,b);
|
||||||
|
#else
|
||||||
// after some bench, this version *is* faster than a scalar implementation
|
// after some bench, this version *is* faster than a scalar implementation
|
||||||
Packet4i mask = _mm_cmplt_epi32(a,b);
|
Packet4i mask = _mm_cmplt_epi32(a,b);
|
||||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
|
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||||
{
|
{
|
||||||
|
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||||
|
return _mm_max_epi32(a,b);
|
||||||
|
#else
|
||||||
// after some bench, this version *is* faster than a scalar implementation
|
// after some bench, this version *is* faster than a scalar implementation
|
||||||
Packet4i mask = _mm_cmpgt_epi32(a,b);
|
Packet4i mask = _mm_cmpgt_epi32(a,b);
|
||||||
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
|
||||||
|
@ -250,6 +250,17 @@ template<typename Scalar> void packetmath_real()
|
|||||||
data1[internal::random<int>(0, PacketSize)] = 0;
|
data1[internal::random<int>(0, PacketSize)] = 0;
|
||||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLog, std::log, internal::plog);
|
||||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
|
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasSqrt, std::sqrt, internal::psqrt);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar> void packetmath_notcomplex()
|
||||||
|
{
|
||||||
|
using std::abs;
|
||||||
|
typedef typename internal::packet_traits<Scalar>::type Packet;
|
||||||
|
const int PacketSize = internal::packet_traits<Scalar>::size;
|
||||||
|
|
||||||
|
EIGEN_ALIGN16 Scalar data1[internal::packet_traits<Scalar>::size*4];
|
||||||
|
EIGEN_ALIGN16 Scalar data2[internal::packet_traits<Scalar>::size*4];
|
||||||
|
EIGEN_ALIGN16 Scalar ref[internal::packet_traits<Scalar>::size*4];
|
||||||
|
|
||||||
ref[0] = data1[0];
|
ref[0] = data1[0];
|
||||||
for (int i=0; i<PacketSize; ++i)
|
for (int i=0; i<PacketSize; ++i)
|
||||||
@ -340,6 +351,10 @@ void test_packetmath()
|
|||||||
CALL_SUBTEST_1( packetmath<std::complex<float> >() );
|
CALL_SUBTEST_1( packetmath<std::complex<float> >() );
|
||||||
CALL_SUBTEST_2( packetmath<std::complex<double> >() );
|
CALL_SUBTEST_2( packetmath<std::complex<double> >() );
|
||||||
|
|
||||||
|
CALL_SUBTEST_1( packetmath_notcomplex<float>() );
|
||||||
|
CALL_SUBTEST_2( packetmath_notcomplex<double>() );
|
||||||
|
CALL_SUBTEST_3( packetmath_notcomplex<int>() );
|
||||||
|
|
||||||
CALL_SUBTEST_1( packetmath_real<float>() );
|
CALL_SUBTEST_1( packetmath_real<float>() );
|
||||||
CALL_SUBTEST_2( packetmath_real<double>() );
|
CALL_SUBTEST_2( packetmath_real<double>() );
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user