diff --git a/Eigen/src/Core/arch/SSE/Complex.h b/Eigen/src/Core/arch/SSE/Complex.h index 6b73a42f9..d61e82d00 100644 --- a/Eigen/src/Core/arch/SSE/Complex.h +++ b/Eigen/src/Core/arch/SSE/Complex.h @@ -56,13 +56,6 @@ template<> struct ei_packet_traits > : ei_default_packet_tr template<> struct ei_unpacket_traits { typedef std::complex type; enum {size=2}; }; -template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1 >(const std::complex& from) -{ - Packet2cf res; - res.v = _mm_loadl_pi(res.v, (const __m64*)&from); - return Packet2cf(_mm_movelh_ps(res.v,res.v)); -} - template<> EIGEN_STRONG_INLINE Packet2cf ei_padd(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf ei_psub(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf ei_pnegate(const Packet2cf& a) @@ -96,19 +89,29 @@ template<> EIGEN_STRONG_INLINE Packet2cf ei_por (const Packet2cf& template<> EIGEN_STRONG_INLINE Packet2cf ei_pxor (const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf ei_pandnot(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(a.v,b.v)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_pload >(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(_mm_load_ps((const float*)from)); } -template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu >(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu((const float*)from)); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pload >(const std::complex* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(ei_pload(&ei_real_ref(*from))); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_ploadu >(const std::complex* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ei_ploadu(&ei_real_ref(*from))); } -template<> EIGEN_STRONG_INLINE void ei_pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE _mm_store_ps((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void ei_pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu((float*)to, from.v); } +template<> EIGEN_STRONG_INLINE void ei_pstore >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE ei_pstore(&ei_real_ref(*to), from.v); } +template<> EIGEN_STRONG_INLINE void ei_pstoreu >(std::complex * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE ei_pstoreu(&ei_real_ref(*to), from.v); } template<> EIGEN_STRONG_INLINE void ei_prefetch >(const std::complex * addr) { _mm_prefetch((const char*)(addr), _MM_HINT_T0); } +template<> EIGEN_STRONG_INLINE Packet2cf ei_pset1 >(const std::complex& from) +{ + Packet2cf res; + res.v = _mm_loadl_pi(res.v, (const __m64*)&from); + return Packet2cf(_mm_movelh_ps(res.v,res.v)); +} + template<> EIGEN_STRONG_INLINE std::complex ei_pfirst(const Packet2cf& a) { - std::complex res; - _mm_storel_pi((__m64*)&res, a.v); - return res; + union { + float res[2]; + double asDouble; + }; + _mm_store_sd(&asDouble,_mm_castps_pd(a.v)); + return *(std::complex*)res; } template<> EIGEN_STRONG_INLINE Packet2cf ei_preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(ei_preverse(_mm_castps_pd(a.v)))); } diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index c480ce14d..f2f4ae506 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -88,9 +88,9 @@ static void run(Index rows, Index cols, Index depth, // if (ConjugateRhs) // alpha = ei_conj(alpha); -// ei_gemm_pack_lhs pack_lhs; -// ei_gemm_pack_rhs pack_rhs; -// ei_gebp_kernel gebp; +// ei_gemm_pack_lhs pack_lhs; +// ei_gemm_pack_rhs pack_rhs; +// ei_gebp_kernel gebp; #ifdef EIGEN_HAS_OPENMP if(info)