diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index f84e5b3ec..addb2fc0e 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const template<> EIGEN_STRONG_INLINE Packet2d pmin(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmin(const Packet4i& a, const Packet4i& b) { +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_min_epi32(a,b); +#else // after some bench, this version *is* faster than a scalar implementation Packet4i mask = _mm_cmplt_epi32(a,b); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +#endif } template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pmax(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const Packet4i& b) { +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_max_epi32(a,b); +#else // after some bench, this version *is* faster than a scalar implementation Packet4i mask = _mm_cmpgt_epi32(a,b); return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b)); +#endif } template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index cdc945813..804ae9063 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -250,6 +250,17 @@ template void packetmath_real() data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(internal::packet_traits::HasLog, std::log, internal::plog); CHECK_CWISE1_IF(internal::packet_traits::HasSqrt, std::sqrt, internal::psqrt); +} + +template void packetmath_notcomplex() +{ + using std::abs; + typedef typename internal::packet_traits::type Packet; + const int PacketSize = internal::packet_traits::size; + + EIGEN_ALIGN16 Scalar data1[internal::packet_traits::size*4]; + EIGEN_ALIGN16 Scalar data2[internal::packet_traits::size*4]; + EIGEN_ALIGN16 Scalar ref[internal::packet_traits::size*4]; ref[0] = data1[0]; for (int i=0; i >() ); CALL_SUBTEST_2( packetmath >() ); + CALL_SUBTEST_1( packetmath_notcomplex() ); + CALL_SUBTEST_2( packetmath_notcomplex() ); + CALL_SUBTEST_3( packetmath_notcomplex() ); + CALL_SUBTEST_1( packetmath_real() ); CALL_SUBTEST_2( packetmath_real() );