From aa17b5b5143306546f5f3e15a8fe0c9d39b0285a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 23 Jun 2009 23:28:34 +0200 Subject: [PATCH] use the slower unaligned load intrinsics in ei_ploadu because GCC mess up with my tricks --- Eigen/src/Core/arch/SSE/PacketMath.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 356a7e64f..7af63bbc9 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -163,14 +163,9 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pload(const float* from) { template<> EIGEN_STRONG_INLINE Packet2d ei_pload(const double* from) { return _mm_load_pd(from); } template<> EIGEN_STRONG_INLINE Packet4i ei_pload(const int* from) { return _mm_load_si128(reinterpret_cast(from)); } -template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { - Packet4f r; - r = _mm_castpd_ps(_mm_load_sd((double*)(from))); - r = _mm_loadh_pi(r, (const __m64*)(from+2)); - return r; -} -template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) { return _mm_castps_pd(ei_ploadu((const float*)(from))); } -template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { return _mm_castpd_si128(ei_ploadu((const double*)(from))); } +template<> EIGEN_STRONG_INLINE Packet4f ei_ploadu(const float* from) { return _mm_loadu_ps(from); } +template<> EIGEN_STRONG_INLINE Packet2d ei_ploadu(const double* from) { return _mm_loadu_pd(from); } +template<> EIGEN_STRONG_INLINE Packet4i ei_ploadu(const int* from) { return _mm_loadu_si128(reinterpret_cast(from)); } template<> EIGEN_STRONG_INLINE void ei_pstore(float* to, const Packet4f& from) { _mm_store_ps(to, from); } template<> EIGEN_STRONG_INLINE void ei_pstore(double* to, const Packet2d& from) { _mm_store_pd(to, from); }