From f32ad79b419c206551284eab1df0f07de1a14f0d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 8 Feb 2016 14:29:12 +0100 Subject: [PATCH] Remove custom unaligned loads for SSE. They were only useful for core2 CPU. --- Eigen/src/Core/arch/SSE/PacketMath.h | 44 +++------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index fc8ae50fe..bef898b1f 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -235,63 +235,27 @@ template<> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { E return _mm_loadu_ps(from); #endif } - template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_pd(from); } - template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm_loadu_si128(reinterpret_cast(from)); } #else -// Fast unaligned loads. Note that here we cannot directly use intrinsics: this would -// require pointer casting to incompatible pointer types and leads to invalid code -// because of the strict aliasing rule. The "dummy" stuff are required to enforce -// a correct instruction dependency. -// TODO: do the same for MSVC (ICC is compatible) // NOTE: with the code below, MSVC's compiler crashes! -#if defined(__GNUC__) && defined(__i386__) - // bug 195: gcc/i386 emits weird x87 fldl/fstpl instructions for _mm_load_sd - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 -#elif defined(__clang__) - // bug 201: Segfaults in __mm_loadh_pd with clang 2.8 - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 1 -#else - #define EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS 0 -#endif - template<> EIGEN_STRONG_INLINE Packet4f ploadu(const float* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS return _mm_loadu_ps(from); -#else - __m128d res; - res = _mm_load_sd((const double*)(from)) ; - res = _mm_loadh_pd(res, (const double*)(from+2)) ; - return _mm_castpd_ps(res); -#endif } +#endif + template<> EIGEN_STRONG_INLINE Packet2d ploadu(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS return _mm_loadu_pd(from); -#else - __m128d res; - res = _mm_load_sd(from) ; - res = _mm_loadh_pd(res,from+1); - return res; -#endif } template<> EIGEN_STRONG_INLINE Packet4i ploadu(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD -#if EIGEN_AVOID_CUSTOM_UNALIGNED_LOADS - return _mm_loadu_si128(reinterpret_cast(from)); -#else - __m128d res; - res = _mm_load_sd((const double*)(from)) ; - res = _mm_loadh_pd(res, (const double*)(from+2)) ; - return _mm_castpd_si128(res); -#endif + return _mm_loadu_si128(reinterpret_cast(from)); } -#endif + template<> EIGEN_STRONG_INLINE Packet4f ploaddup(const float* from) {