Fix pre-POWER8_VECTOR bugs in pcmp_lt and pnegate and reactivate psqrt.

This commit is contained in:
Chip Kerchner 2023-01-31 19:40:24 +00:00 committed by Rasmus Munk Larsen
parent 12ad99ce60
commit 4a58f30aa0
2 changed files with 43 additions and 22 deletions

View File

@ -61,26 +61,6 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
} }
#ifdef EIGEN_VECTORIZE_VSX #ifdef EIGEN_VECTORIZE_VSX
#if !EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x)
{
return patan_double(_x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psqrt<Packet4f>(const Packet4f& x) Packet4f psqrt<Packet4f>(const Packet4f& x)
{ {
@ -93,6 +73,30 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
return vec_sqrt(x); return vec_sqrt(x);
} }
#if !EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x)
{
return patan_double(_x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d pexp<Packet2d>(const Packet2d& _x) Packet2d pexp<Packet2d>(const Packet2d& _x)
{ {

View File

@ -175,6 +175,11 @@ struct packet_traits<float> : default_packet_traits {
HasExp = 1, HasExp = 1,
#ifdef EIGEN_VECTORIZE_VSX #ifdef EIGEN_VECTORIZE_VSX
HasSqrt = 1, HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0,
#endif
HasTanh = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH,
HasErf = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH,
HasRint = 1, HasRint = 1,
@ -215,6 +220,11 @@ struct packet_traits<bfloat16> : default_packet_traits {
HasExp = 1, HasExp = 1,
#ifdef EIGEN_VECTORIZE_VSX #ifdef EIGEN_VECTORIZE_VSX
HasSqrt = 1, HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0,
#endif
HasRint = 1, HasRint = 1,
#else #else
HasSqrt = 0, HasSqrt = 0,
@ -992,7 +1002,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
#ifdef __POWER8_VECTOR__ #ifdef __POWER8_VECTOR__
return vec_neg(a); return vec_neg(a);
#else #else
return p4f_ZERO - a; return vec_xor(a, p4f_MZERO);
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
@ -1086,7 +1096,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, con
template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); } template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
// To fix bug with vec_cmplt on older versions
#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); } template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b)); Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
@ -2700,7 +2713,11 @@ template<> struct packet_traits<double> : default_packet_traits
HasLog = 0, HasLog = 0,
HasExp = 1, HasExp = 1,
HasSqrt = 1, HasSqrt = 1,
#if !EIGEN_COMP_CLANG
HasRsqrt = 1,
#else
HasRsqrt = 0, HasRsqrt = 0,
#endif
HasRound = 1, HasRound = 1,
HasFloor = 1, HasFloor = 1,
HasCeil = 1, HasCeil = 1,
@ -2806,7 +2823,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
#ifdef __POWER8_VECTOR__ #ifdef __POWER8_VECTOR__
return vec_neg(a); return vec_neg(a);
#else #else
return p2d_ZERO - a; return vec_xor(a, p2d_MZERO);
#endif #endif
} }