Fix pre-POWER8_VECTOR bugs in pcmp_lt and pnegate and reactivate psqrt.

(cherry picked from commit 4a58f30aa083d2f5753465a36730ca7734b483be)
This commit is contained in:
Chip Kerchner 2023-01-31 19:40:24 +00:00 committed by Antonio Sanchez
parent 1217390db4
commit e734787bb7
2 changed files with 63 additions and 31 deletions

View File

@ -40,24 +40,9 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
return pcos_float(_x);
}
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return vec_rsqrt(x);
}
#endif
#ifdef __VSX__
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return vec_rsqrt(x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psqrt<Packet4f>(const Packet4f& x)
{
return vec_sqrt(x);
@ -69,12 +54,41 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
return vec_sqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d pexp<Packet2d>(const Packet2d& _x)
{
return pexp_double(_x);
}
#endif
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
}
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
}
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
}
#endif // __VSX__
// Hyperbolic Tangent function.
template <>

View File

@ -786,8 +786,22 @@ template<> EIGEN_STRONG_INLINE Packet8us psub<Packet8us> (const Packet8us& a,
template<> EIGEN_STRONG_INLINE Packet16c psub<Packet16c> (const Packet16c& a, const Packet16c& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet16uc psub<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
{
#ifdef __POWER8_VECTOR__
return vec_neg(a);
#else
return vec_xor(a, p4f_MZERO);
#endif
}
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
{
#ifdef __POWER8_VECTOR__
return vec_neg(a);
#else
return p4i_ZERO - a;
#endif
}
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
@ -865,7 +879,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, con
template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
// To fix bug with vec_cmplt on older versions
#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
@ -1341,16 +1358,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, con
BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
}
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
}
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
}
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
}
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
return pldexp_generic(a,exponent);
}
@ -2304,7 +2311,11 @@ template<> struct packet_traits<double> : default_packet_traits
HasLog = 0,
HasExp = 1,
HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0,
#endif
HasRound = 1,
HasFloor = 1,
HasCeil = 1,
@ -2393,7 +2404,14 @@ template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
{
#ifdef __POWER8_VECTOR__
return vec_neg(a);
#else
return vec_xor(a, p2d_MZERO);
#endif
}
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }