diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 8027cb597..482064e17 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -1514,16 +1514,13 @@ EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { template EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet) * from) { - EIGEN_DEBUG_ALIGNED_LOAD -#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) EIGEN_DEBUG_UNALIGNED_LOAD +#if defined(EIGEN_VECTORIZE_VSX) return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else - Packet16uc MSQ, LSQ; - Packet16uc mask; - MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword - LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword - mask = vec_lvsl(0, from); // create the permute mask + Packet16uc MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword + Packet16uc LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword + Packet16uc mask = vec_lvsl(0, from); // create the permute mask // TODO: Add static_cast here return (Packet)vec_perm(MSQ, LSQ, mask); // align the data #endif @@ -1733,7 +1730,7 @@ EIGEN_STRONG_INLINE Packet16uc ploadquad(const unsigned char* from) template EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) { EIGEN_DEBUG_UNALIGNED_STORE -#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) +#if defined(EIGEN_VECTORIZE_VSX) vec_xst(from, 0, to); #else // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -2069,7 +2066,7 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f) { input = padd(input, rounding_bias); const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000); -#ifdef _ARCH_PWR9 +#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX) Packet4bi nan_selector = vec_test_data_class(p4f, __VEC_CLASS_FP_NAN); input = vec_sel(input, p4ui_nan, nan_selector); @@ -2178,7 +2175,7 @@ EIGEN_ALWAYS_INLINE Packet8bf F32ToBf16Two(Packet4f lo, Packet4f hi) { Packet8bi rounding_bias = vec_cmplt(lsb, p4f2); Packet8us input = psub(p4f, reinterpret_cast(rounding_bias)); -#ifdef _ARCH_PWR9 +#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX) Packet4bi nan_selector_lo = vec_test_data_class(lo, __VEC_CLASS_FP_NAN); Packet4bi nan_selector_hi = vec_test_data_class(hi, __VEC_CLASS_FP_NAN); Packet8us nan_selector = @@ -3400,9 +3397,17 @@ EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return reinterpret_cast(vec_cmpeq(a, b)); } template <> +#ifdef __POWER8_VECTOR__ EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) { return reinterpret_cast(vec_cmpeq(a, b)); } +#else +EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) { + Packet4i halves = reinterpret_cast(vec_cmpeq(reinterpret_cast(a), reinterpret_cast(b))); + Packet4i flipped = vec_perm(halves, halves, p16uc_COMPLEX32_REV); + return reinterpret_cast(pand(halves, flipped)); +} +#endif template <> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { Packet2d c = reinterpret_cast(vec_cmpge(a, b));