diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 46812f97b..60f28ff70 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -18,7 +18,7 @@ namespace Eigen { namespace internal { static Packet4ui p4ui_CONJ_XOR = vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);//{ 0x00000000, 0x80000000, 0x00000000, 0x80000000 }; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX #if defined(_BIG_ENDIAN) static Packet2ul p2ul_CONJ_XOR1 = (Packet2ul) vec_sld((Packet4ui) p2d_MZERO, (Packet4ui) p2l_ZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; static Packet2ul p2ul_CONJ_XOR2 = (Packet2ul) vec_sld((Packet4ui) p2l_ZERO, (Packet4ui) p2d_MZERO, 8);//{ 0x8000000000000000, 0x0000000000000000 }; @@ -103,7 +103,7 @@ template<> struct packet_traits > : default_packet_traits HasMin = 0, HasMax = 0, HasSqrt = 1, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasBlend = 1, #endif HasSetLinear = 0 @@ -115,7 +115,7 @@ template<> struct unpacket_traits { typedef std::complex type; template<> EIGEN_STRONG_INLINE Packet2cf pset1(const std::complex& from) { Packet2cf res; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX // Load a single std::complex from memory and duplicate // // Using pload would read past the end of the reference in this case @@ -151,7 +151,7 @@ template<> EIGEN_ALWAYS_INLINE void pstoreu_partial >(std::c EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex& from0, const std::complex& from1) { Packet4f res0, res1; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX // Load two std::complex from memory and combine __asm__ ("lxsdx %x0,%y1" : "=wa" (res0) : "Z" (from0)); __asm__ ("lxsdx %x0,%y1" : "=wa" (res1) : "Z" (from1)); @@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x EIGEN_STRONG_INLINE void ptranspose(PacketBlock& kernel) { -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX Packet4f tmp = reinterpret_cast(vec_mergeh(reinterpret_cast(kernel.packet[0].v), reinterpret_cast(kernel.packet[1].v))); kernel.packet[1].v = reinterpret_cast(vec_mergel(reinterpret_cast(kernel.packet[0].v), reinterpret_cast(kernel.packet[1].v))); #else @@ -284,7 +284,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packe return Packet2cf(vec_and(eq, vec_perm(eq, eq, p16uc_COMPLEX32_REV))); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) { Packet2cf result; result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); @@ -298,7 +298,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) } //---------- double ---------- -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX struct Packet1cd { EIGEN_STRONG_INLINE Packet1cd() {} diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 6f48d98fb..bda3722da 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -60,7 +60,7 @@ Packet4f patan(const Packet4f& _x) return patan_float(_x); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX #ifndef EIGEN_COMP_CLANG template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f prsqrt(const Packet4f& x) diff --git a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h index 9d00b93e4..bb84ac977 100644 --- a/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h +++ b/Eigen/src/Core/arch/AltiVec/MatrixVectorProduct.h @@ -599,7 +599,7 @@ EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2(Packet2cf a) EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a) { -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2)); #else return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP))); @@ -610,7 +610,7 @@ EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2(Packet1cd a) EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex* src) { Packet4f t; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX // Load float64/two float32 (doubleword alignment) __asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src)); #else @@ -636,7 +636,7 @@ EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet4f& r, Packet4f& i template EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar* src, Packet2d& r, Packet2d& i) { -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX __asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast(src) + 0))); __asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast(src) + 1))); #else @@ -675,7 +675,7 @@ EIGEN_ALWAYS_INLINE void pload_realimag_row(RhsScalar* src, Packet2d& r, Packet2 /** \internal load and splat a complex value into a vector - column-wise */ EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine(std::complex* src) { -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX Packet4f ret; __asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast(src) + 0))); return ret; diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b0f85294e..86532a087 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -173,7 +173,7 @@ struct packet_traits : default_packet_traits { HasATan = 1, HasLog = 1, HasExp = 1, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, #if !EIGEN_COMP_CLANG HasRsqrt = 1, @@ -218,7 +218,7 @@ struct packet_traits : default_packet_traits { HasCos = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, #if !EIGEN_COMP_CLANG HasRsqrt = 1, @@ -446,7 +446,7 @@ EIGEN_STRONG_INLINE Packet pload_common(const __UNPACK_TYPE__(Packet)* from) // ignoring these warnings for now. EIGEN_UNUSED_VARIABLE(from); EIGEN_DEBUG_ALIGNED_LOAD -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else return vec_ld(0, from); @@ -501,7 +501,7 @@ EIGEN_ALWAYS_INLINE Packet pload_ignore(const __UNPACK_TYPE__(Packet)* from) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else return vec_ld(0, from); @@ -608,7 +608,7 @@ EIGEN_STRONG_INLINE void pstore_common(__UNPACK_TYPE__(Packet)* to, const Packet // ignoring these warnings for now. EIGEN_UNUSED_VARIABLE(to); EIGEN_DEBUG_ALIGNED_STORE -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX vec_xst(from, 0, to); #else vec_st(from, 0, to); @@ -1054,7 +1054,7 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); } template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); } @@ -1062,7 +1062,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { - #ifdef __VSX__ + #ifdef EIGEN_VECTORIZE_VSX // NOTE: about 10% slower than vec_min, but consistent with std::min and SSE regarding NaN Packet4f ret; __asm__ ("xvcmpgesp %x0,%x1,%x2\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); @@ -1080,7 +1080,7 @@ template<> EIGEN_STRONG_INLINE Packet16uc pmin(const Packet16uc& a, template<> EIGEN_STRONG_INLINE Packet4f pmax(const Packet4f& a, const Packet4f& b) { - #ifdef __VSX__ + #ifdef EIGEN_VECTORIZE_VSX // NOTE: about 10% slower than vec_max, but consistent with std::max and SSE regarding NaN Packet4f ret; __asm__ ("xvcmpgtsp %x0,%x2,%x1\n\txxsel %x0,%x1,%x2,%x0" : "=&wa" (ret) : "wa" (a), "wa" (b)); @@ -1103,27 +1103,27 @@ template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const return vec_nor(c,c); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return reinterpret_cast(vec_cmple(a,b)); } #endif template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return reinterpret_cast(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return reinterpret_cast(vec_cmpeq(a,b)); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet8s pcmp_le(const Packet8s& a, const Packet8s& b) { return reinterpret_cast(vec_cmple(a,b)); } #endif template<> EIGEN_STRONG_INLINE Packet8s pcmp_lt(const Packet8s& a, const Packet8s& b) { return reinterpret_cast(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet8s pcmp_eq(const Packet8s& a, const Packet8s& b) { return reinterpret_cast(vec_cmpeq(a,b)); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet8us pcmp_le(const Packet8us& a, const Packet8us& b) { return reinterpret_cast(vec_cmple(a,b)); } #endif template<> EIGEN_STRONG_INLINE Packet8us pcmp_lt(const Packet8us& a, const Packet8us& b) { return reinterpret_cast(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet8us pcmp_eq(const Packet8us& a, const Packet8us& b) { return reinterpret_cast(vec_cmpeq(a,b)); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet16c pcmp_le(const Packet16c& a, const Packet16c& b) { return reinterpret_cast(vec_cmple(a,b)); } #endif template<> EIGEN_STRONG_INLINE Packet16c pcmp_lt(const Packet16c& a, const Packet16c& b) { return reinterpret_cast(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet16c pcmp_eq(const Packet16c& a, const Packet16c& b) { return reinterpret_cast(vec_cmpeq(a,b)); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet16uc pcmp_le(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast(vec_cmple(a,b)); } #endif template<> EIGEN_STRONG_INLINE Packet16uc pcmp_lt(const Packet16uc& a, const Packet16uc& b) { return reinterpret_cast(vec_cmplt(a,b)); } @@ -1164,7 +1164,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) Packet4f t = vec_add(reinterpret_cast(vec_or(vec_and(reinterpret_cast(a), p4ui_SIGN), p4ui_PREV0DOT5)), a); Packet4f res; -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX __asm__("xvrspiz %x0, %x1\n\t" : "=&wa" (res) : "wa" (t)); @@ -1178,7 +1178,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) } template<> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { return vec_ceil(a); } template<> EIGEN_STRONG_INLINE Packet4f pfloor(const Packet4f& a) { return vec_floor(a); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) { Packet4f res; @@ -1194,7 +1194,7 @@ template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) template EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from) { EIGEN_DEBUG_ALIGNED_LOAD -#if defined(__VSX__) || !defined(_BIG_ENDIAN) +#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) EIGEN_DEBUG_UNALIGNED_LOAD return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); #else @@ -1377,7 +1377,7 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup(const unsigned ch template EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from) { EIGEN_DEBUG_UNALIGNED_STORE -#if defined(__VSX__) || !defined(_BIG_ENDIAN) +#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN) vec_xst(from, 0, to); #else // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html @@ -1773,7 +1773,7 @@ template<> EIGEN_STRONG_INLINE Packet8bf pceil (const Packet8bf& a){ template<> EIGEN_STRONG_INLINE Packet8bf pround (const Packet8bf& a){ BF16_TO_F32_UNARY_OP_WRAPPER(pround, a); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_STRONG_INLINE Packet8bf print (const Packet8bf& a){ BF16_TO_F32_UNARY_OP_WRAPPER(print, a); } @@ -2657,7 +2657,7 @@ template<> EIGEN_STRONG_INLINE Packet4f preinterpret(const Pa //---------- double ---------- -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX typedef __vector double Packet2d; typedef __vector unsigned long long Packet2ul; typedef __vector long long Packet2l; diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h index 7c1a08bdc..b49fc01c3 100644 --- a/Eigen/src/Core/util/ConfigureVectorization.h +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -352,10 +352,10 @@ #endif } // end extern "C" - #elif defined __VSX__ + #elif defined(__VSX__) && !defined(__APPLE__) #define EIGEN_VECTORIZE - #define EIGEN_VECTORIZE_VSX + #define EIGEN_VECTORIZE_VSX 1 #include // We need to #undef all these ugly tokens defined in // => use __vector instead of vector diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 344815943..c3804b376 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -371,7 +371,7 @@ #endif /// \internal EIGEN_ARCH_PPC set to 1 if the architecture is PowerPC -#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) +#if defined(__powerpc__) || defined(__ppc__) || defined(_M_PPC) || defined(__POWERPC__) #define EIGEN_ARCH_PPC 1 #else #define EIGEN_ARCH_PPC 0 @@ -976,11 +976,16 @@ namespace Eigen { // directly for std::complex, Eigen::half, Eigen::bfloat16. For these, // you will need to apply to the underlying POD type. #if EIGEN_ARCH_PPC && EIGEN_COMP_GNUC_STRICT - // This seems to be broken on clang. Packet4f is loaded into a single - // register rather than a vector, zeroing out some entries. Integer + // This seems to be broken on clang. Packet4f is loaded into a single + // register rather than a vector, zeroing out some entries. Integer // types also generate a compile error. - // General, Altivec, VSX. - #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #if EIGEN_OS_MAC + // General, Altivec for Apple (VSX were added in ISA v2.06): + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v" (X)); + #else + // General, Altivec, VSX otherwise: + #define EIGEN_OPTIMIZATION_BARRIER(X) __asm__ ("" : "+r,v,wa" (X)); + #endif #elif EIGEN_ARCH_ARM_OR_ARM64 #ifdef __ARM_FP // General, VFP or NEON.