diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 1d640cf76..25705e7b2 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -45,6 +45,7 @@ typedef __m128d Packet2d; typedef eigen_packet_wrapper<__m128i, 0> Packet4i; typedef eigen_packet_wrapper<__m128i, 1> Packet16b; +typedef eigen_packet_wrapper<__m128i, 2> Packet2l; template<> struct is_arithmetic<__m128> { enum { value = true }; }; template<> struct is_arithmetic<__m128i> { enum { value = true }; }; @@ -192,6 +193,7 @@ template<> struct unpacket_traits { template<> struct unpacket_traits { typedef double type; typedef Packet2d half; + typedef Packet2l integer_packet; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; }; template<> struct unpacket_traits { @@ -483,6 +485,9 @@ template<> EIGEN_STRONG_INLINE Packet4i pandnot(const Packet4i& a, con template EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(Packet4i a) { return _mm_srai_epi32(a,N); } template EIGEN_STRONG_INLINE Packet4i plogical_shift_right(Packet4i a) { return _mm_srli_epi32(a,N); } template EIGEN_STRONG_INLINE Packet4i plogical_shift_left(Packet4i a) { return _mm_slli_epi32(a,N); } +template EIGEN_STRONG_INLINE Packet2l plogical_shift_right(Packet2l a) { return _mm_srli_epi64(a,N); } +template EIGEN_STRONG_INLINE Packet2l plogical_shift_left(Packet2l a) { return _mm_slli_epi64(a,N); } + #ifdef EIGEN_VECTORIZE_SSE4_1 template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) @@ -753,12 +758,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, cons } template<> EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { - const Packet4i cst_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0); - Packet4i emm0 = _mm_cvttpd_epi32(exponent); - emm0 = padd(emm0, cst_1023_0); - emm0 = _mm_slli_epi32(emm0, 20); - emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3)); - return pmul(a, Packet2d(_mm_castsi128_pd(emm0))); + return pldexp_double(a,exponent); } // with AVX, the default implementations based on pload1 are faster diff --git a/Eigen/src/Core/arch/SSE/TypeCasting.h b/Eigen/src/Core/arch/SSE/TypeCasting.h index 1b8e9a550..09a69660f 100644 --- a/Eigen/src/Core/arch/SSE/TypeCasting.h +++ b/Eigen/src/Core/arch/SSE/TypeCasting.h @@ -69,6 +69,14 @@ template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet4f return _mm_cvtps_pd(a); } +template<> EIGEN_STRONG_INLINE Packet2l pcast(const Packet2d& a) { + return _mm_set_epi64x(int64_t(a[1]), int64_t(a[0])); + } + +template<> EIGEN_STRONG_INLINE Packet2d pcast(const Packet2l& a) { + return _mm_set_pd(double(_mm_cvtsi128_si64(_mm_unpackhi_epi64(a, a))), double(_mm_cvtsi128_si64(a))); +} + template<> EIGEN_STRONG_INLINE Packet4i preinterpret(const Packet4f& a) { return _mm_castps_si128(a); } @@ -77,6 +85,13 @@ template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Pa return _mm_castsi128_ps(a); } +template<> EIGEN_STRONG_INLINE Packet2l preinterpret(const Packet2d& a) { + return _mm_castpd_si128(a); +} + +template<> EIGEN_STRONG_INLINE Packet2d preinterpret(const Packet2l& a) { + return _mm_castsi128_pd(a); +} // Disable the following code since it's broken on too many platforms / compilers. //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)