diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index dfdb71abd..b313fb09a 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -66,7 +66,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; template<> struct packet_traits : default_packet_traits @@ -83,7 +86,10 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + HasRound = 1, + HasFloor = 1, + HasCeil = 1 }; }; @@ -176,6 +182,15 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin(const Packet4d& a, const template<> EIGEN_STRONG_INLINE Packet8f pmax(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pmax(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); } +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet8f& a) { return _mm256_round_ps(a, 0); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet4d& a) { return _mm256_round_pd(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet8f& a) { return _mm256_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet4d& a) { return _mm256_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet8f& a) { return _mm256_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet4d& a) { return _mm256_floor_pd(a); } + template<> EIGEN_STRONG_INLINE Packet8f pand(const Packet8f& a, const Packet8f& b) { return _mm256_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d pand(const Packet4d& a, const Packet4d& b) { return _mm256_and_pd(a,b); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 3c30b2cf0..3fcb1c138 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -109,7 +109,13 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + +#ifdef EIGEN_VECTORIZE_SSE4_1 + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; template<> struct packet_traits : default_packet_traits @@ -126,7 +132,13 @@ template<> struct packet_traits : default_packet_traits HasExp = 1, HasSqrt = 1, HasRsqrt = 1, - HasBlend = 1 + HasBlend = 1, + +#ifdef EIGEN_VECTORIZE_SSE4_1 + HasRound = 1, + HasFloor = 1, + HasCeil = 1 +#endif }; }; #endif @@ -256,6 +268,17 @@ template<> EIGEN_STRONG_INLINE Packet4i pmax(const Packet4i& a, const #endif } +#ifdef EIGEN_VECTORIZE_SSE4_1 +template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { return _mm_round_ps(a, 0); } +template<> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { return _mm_round_pd(a, 0); } + +template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return _mm_ceil_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { return _mm_ceil_pd(a); } + +template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return _mm_floor_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2d pfloor(const Packet2d& a) { return _mm_floor_pd(a); } +#endif + template<> EIGEN_STRONG_INLINE Packet4f pand(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d pand(const Packet2d& a, const Packet2d& b) { return _mm_and_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i pand(const Packet4i& a, const Packet4i& b) { return _mm_and_si128(a,b); } diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 568058f1a..dea648002 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -296,6 +296,10 @@ template void packetmath_real() EIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4]; EIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4]; + CHECK_CWISE1_IF(PacketTraits::HasRound, std::round, internal::pround); + CHECK_CWISE1_IF(PacketTraits::HasCeil, std::ceil, internal::pceil); + CHECK_CWISE1_IF(PacketTraits::HasFloor, std::floor, internal::pfloor); + for (int i=0; i(-1,1) * std::pow(Scalar(10), internal::random(-3,3));