From 88cd53774e9be9dae0fb2801f32ee94ec754137c Mon Sep 17 00:00:00 2001 From: "C. Antonio Sanchez" Date: Sat, 15 Feb 2025 13:18:14 -0800 Subject: [PATCH] Fix altivec and vsx builds --- Eigen/src/Core/arch/AltiVec/Complex.h | 2 +- Eigen/src/Core/arch/AltiVec/MathFunctions.h | 26 +--------------- Eigen/src/Core/arch/AltiVec/PacketMath.h | 33 +++++++-------------- 3 files changed, 12 insertions(+), 49 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/Complex.h b/Eigen/src/Core/arch/AltiVec/Complex.h index 34f890949..dca2ab79f 100644 --- a/Eigen/src/Core/arch/AltiVec/Complex.h +++ b/Eigen/src/Core/arch/AltiVec/Complex.h @@ -237,12 +237,12 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con result.v = reinterpret_cast(pblend(ifPacket, reinterpret_cast(thenPacket.v), reinterpret_cast(elsePacket.v))); return result; } -#endif template<> EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { return psqrt_complex(a); } +#endif //---------- double ---------- #ifdef EIGEN_VECTORIZE_VSX diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index f99f5e627..e3d761628 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -40,7 +40,7 @@ Packet4f pcos(const Packet4f& _x) return pcos_float(_x); } -#ifdef __VSX__ +#ifdef EIGEN_VECTORIZE_VSX #ifndef EIGEN_COMP_CLANG template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f prsqrt(const Packet4f& x) @@ -48,8 +48,6 @@ Packet4f prsqrt(const Packet4f& x) return vec_rsqrt(x); } -#ifdef EIGEN_VECTORIZE_VSX -#ifndef EIGEN_COMP_CLANG template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d prsqrt(const Packet2d& x) { @@ -85,30 +83,8 @@ Packet2d prsqrt(const Packet2d& x) // vec_rsqrt returns different results from the generic version // return vec_rsqrt(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d patan(const Packet2d& _x) -{ - return patan_double(_x); -} #endif -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f prsqrt(const Packet4f& x) -{ - return pset1(1.0f) / psqrt(x); -// vec_rsqrt returns different results from the generic version -// return vec_rsqrt(x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d prsqrt(const Packet2d& x) -{ - return pset1(1.0) / psqrt(x); -// vec_rsqrt returns different results from the generic version -// return vec_rsqrt(x); -} - template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp(const Packet2d& _x) { diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 0f6fed4d7..5c1abe70f 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -846,12 +846,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); } template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); } -#ifdef EIGEN_VECTORIZE_VSX -template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); } -template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); } -template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); } -#endif - template<> EIGEN_STRONG_INLINE Packet4f pmin(const Packet4f& a, const Packet4f& b) { #ifdef EIGEN_VECTORIZE_VSX @@ -988,18 +982,15 @@ template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) template EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from) { - EIGEN_DEBUG_ALIGNED_LOAD -#ifdef _BIG_ENDIAN - Packet16uc MSQ, LSQ; - Packet16uc mask; - MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword - LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword - mask = vec_lvsl(0, from); // create the permute mask - //TODO: Add static_cast here - return (Packet) vec_perm(MSQ, LSQ, mask); // align the data -#else EIGEN_DEBUG_UNALIGNED_LOAD +#ifdef EIGEN_VECTORIZE_VSX return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from)); +#else + Packet16uc mask = vec_lvsl(0, from); // create the permute mask + Packet16uc MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword + Packet16uc LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword + //TODO: Add static_cast here + return (Packet) vec_perm(MSQ, LSQ, mask); // align the data #endif } @@ -1104,7 +1095,9 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup(const unsigned ch template EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from) { EIGEN_DEBUG_UNALIGNED_STORE -#ifdef _BIG_ENDIAN +#ifdef EIGEN_VECTORIZE_VSX + vec_xst(from, 0, to); +#else // Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html // Warning: not thread safe! Packet16uc MSQ, LSQ, edges; @@ -1119,8 +1112,6 @@ template EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE_ LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ) vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part second -#else - vec_xst(from, 0, to); #endif } template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet4f& from) @@ -1379,10 +1370,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub(const Packet8bf& a, con BF16_TO_F32_BINARY_OP_WRAPPER(psub, a, b); } -template<> EIGEN_STRONG_INLINE Packet8bf pexp (const Packet8bf& a){ - BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_generic(a,exponent); }