diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 3a7a32936..2b7c204e3 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -40,24 +40,9 @@ Packet4f pcos(const Packet4f& _x) return pcos_float(_x); } -#ifndef EIGEN_COMP_CLANG -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet4f prsqrt(const Packet4f& x) -{ - return vec_rsqrt(x); -} -#endif - #ifdef __VSX__ -#ifndef EIGEN_COMP_CLANG -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED -Packet2d prsqrt(const Packet2d& x) -{ - return vec_rsqrt(x); -} -#endif -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psqrt(const Packet4f& x) { return vec_sqrt(x); @@ -69,12 +54,41 @@ Packet2d psqrt(const Packet2d& x) return vec_sqrt(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet4f prsqrt(const Packet4f& x) +{ + return pset1(1.0f) / psqrt(x); +// vec_rsqrt returns different results from the generic version +// return vec_rsqrt(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet2d prsqrt(const Packet2d& x) +{ + return pset1(1.0) / psqrt(x); +// vec_rsqrt returns different results from the generic version +// return vec_rsqrt(x); +} + +template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp(const Packet2d& _x) { return pexp_double(_x); } -#endif + +template<> EIGEN_STRONG_INLINE Packet8bf psqrt (const Packet8bf& a){ + BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a); +} + +template<> EIGEN_STRONG_INLINE Packet8bf prsqrt (const Packet8bf& a){ + BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt, a); +} + +template<> EIGEN_STRONG_INLINE Packet8bf pexp (const Packet8bf& a){ + BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a); +} + +#endif // __VSX__ // Hyperbolic Tangent function. template <> diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 2a440545b..528f995d3 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -786,8 +786,22 @@ template<> EIGEN_STRONG_INLINE Packet8us psub (const Packet8us& a, template<> EIGEN_STRONG_INLINE Packet16c psub (const Packet16c& a, const Packet16c& b) { return a - b; } template<> EIGEN_STRONG_INLINE Packet16uc psub(const Packet16uc& a, const Packet16uc& b) { return a - b; } -template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; } -template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; } +template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) +{ +#ifdef __POWER8_VECTOR__ + return vec_neg(a); +#else + return vec_xor(a, p4f_MZERO); +#endif +} +template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) +{ +#ifdef __POWER8_VECTOR__ + return vec_neg(a); +#else + return p4i_ZERO - a; +#endif +} template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; } template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; } @@ -865,7 +879,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax(const Packet16c& a, con template<> EIGEN_STRONG_INLINE Packet16uc pmax(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmple(a,b)); } +// To fix bug with vec_cmplt on older versions +#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmplt(a,b)); } +#endif template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { Packet4f c = reinterpret_cast(vec_cmpge(a,b)); @@ -1341,16 +1358,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub(const Packet8bf& a, con BF16_TO_F32_BINARY_OP_WRAPPER(psub, a, b); } -template<> EIGEN_STRONG_INLINE Packet8bf psqrt (const Packet8bf& a){ - BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a); -} -template<> EIGEN_STRONG_INLINE Packet8bf prsqrt (const Packet8bf& a){ - BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt, a); -} -template<> EIGEN_STRONG_INLINE Packet8bf pexp (const Packet8bf& a){ - BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a); -} - template<> EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { return pldexp_generic(a,exponent); } @@ -2304,7 +2311,11 @@ template<> struct packet_traits : default_packet_traits HasLog = 0, HasExp = 1, HasSqrt = 1, +#if !EIGEN_COMP_CLANG HasRsqrt = 1, +#else + HasRsqrt = 0, +#endif HasRound = 1, HasFloor = 1, HasCeil = 1, @@ -2393,7 +2404,14 @@ template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const template<> EIGEN_STRONG_INLINE Packet2d psub(const Packet2d& a, const Packet2d& b) { return a - b; } -template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; } +template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) +{ +#ifdef __POWER8_VECTOR__ + return vec_neg(a); +#else + return vec_xor(a, p2d_MZERO); +#endif +} template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }