From 4a58f30aa083d2f5753465a36730ca7734b483be Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Tue, 31 Jan 2023 19:40:24 +0000 Subject: [PATCH] Fix pre-POWER8_VECTOR bugs in pcmp_lt and pnegate and reactivate psqrt. --- Eigen/src/Core/arch/AltiVec/MathFunctions.h | 44 +++++++++++---------- Eigen/src/Core/arch/AltiVec/PacketMath.h | 21 +++++++++- 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index ae75787c2..45761e217 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -61,26 +61,6 @@ Packet4f patan(const Packet4f& _x) } #ifdef EIGEN_VECTORIZE_VSX -#if !EIGEN_COMP_CLANG -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f prsqrt(const Packet4f& x) -{ - return vec_rsqrt(x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d prsqrt(const Packet2d& x) -{ - return vec_rsqrt(x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d patan(const Packet2d& _x) -{ - return patan_double(_x); -} -#endif - template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psqrt(const Packet4f& x) { @@ -93,6 +73,30 @@ Packet2d psqrt(const Packet2d& x) return vec_sqrt(x); } +#if !EIGEN_COMP_CLANG +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet4f prsqrt(const Packet4f& x) +{ + return pset1(1.0f) / psqrt(x); +// vec_rsqrt returns different results from the generic version +// return vec_rsqrt(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet2d prsqrt(const Packet2d& x) +{ + return pset1(1.0) / psqrt(x); +// vec_rsqrt returns different results from the generic version +// return vec_rsqrt(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet2d patan(const Packet2d& _x) +{ + return patan_double(_x); +} +#endif + template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp(const Packet2d& _x) { diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 96ca9d655..fa12892fe 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -175,6 +175,11 @@ struct packet_traits : default_packet_traits { HasExp = 1, #ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, +#if !EIGEN_COMP_CLANG + HasRsqrt = 1, +#else + HasRsqrt = 0, +#endif HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, HasRint = 1, @@ -215,6 +220,11 @@ struct packet_traits : default_packet_traits { HasExp = 1, #ifdef EIGEN_VECTORIZE_VSX HasSqrt = 1, +#if !EIGEN_COMP_CLANG + HasRsqrt = 1, +#else + HasRsqrt = 0, +#endif HasRint = 1, #else HasSqrt = 0, @@ -992,7 +1002,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) #ifdef __POWER8_VECTOR__ return vec_neg(a); #else - return p4f_ZERO - a; + return vec_xor(a, p4f_MZERO); #endif } template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) @@ -1086,7 +1096,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax(const Packet16c& a, con template<> EIGEN_STRONG_INLINE Packet16uc pmax(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmple(a,b)); } +// To fix bug with vec_cmplt on older versions +#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmplt(a,b)); } +#endif template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { Packet4f c = reinterpret_cast(vec_cmpge(a,b)); @@ -2700,7 +2713,11 @@ template<> struct packet_traits : default_packet_traits HasLog = 0, HasExp = 1, HasSqrt = 1, +#if !EIGEN_COMP_CLANG + HasRsqrt = 1, +#else HasRsqrt = 0, +#endif HasRound = 1, HasFloor = 1, HasCeil = 1, @@ -2806,7 +2823,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) #ifdef __POWER8_VECTOR__ return vec_neg(a); #else - return p2d_ZERO - a; + return vec_xor(a, p2d_MZERO); #endif }