mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-24 02:29:33 +08:00
Fix pre-POWER8_VECTOR bugs in pcmp_lt and pnegate and reactivate psqrt.
This commit is contained in:
parent
12ad99ce60
commit
4a58f30aa0
@ -61,26 +61,6 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_VECTORIZE_VSX
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
#if !EIGEN_COMP_CLANG
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
||||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
|
||||||
{
|
|
||||||
return vec_rsqrt(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
||||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
|
||||||
{
|
|
||||||
return vec_rsqrt(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
||||||
Packet2d patan<Packet2d>(const Packet2d& _x)
|
|
||||||
{
|
|
||||||
return patan_double(_x);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||||
{
|
{
|
||||||
@ -93,6 +73,30 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
|
|||||||
return vec_sqrt(x);
|
return vec_sqrt(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !EIGEN_COMP_CLANG
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||||
|
{
|
||||||
|
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
|
||||||
|
// vec_rsqrt returns different results from the generic version
|
||||||
|
// return vec_rsqrt(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||||
|
{
|
||||||
|
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||||
|
// vec_rsqrt returns different results from the generic version
|
||||||
|
// return vec_rsqrt(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet2d patan<Packet2d>(const Packet2d& _x)
|
||||||
|
{
|
||||||
|
return patan_double(_x);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||||
{
|
{
|
||||||
|
@ -175,6 +175,11 @@ struct packet_traits<float> : default_packet_traits {
|
|||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
#ifdef EIGEN_VECTORIZE_VSX
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
|
#if !EIGEN_COMP_CLANG
|
||||||
|
HasRsqrt = 1,
|
||||||
|
#else
|
||||||
|
HasRsqrt = 0,
|
||||||
|
#endif
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
HasErf = EIGEN_FAST_MATH,
|
HasErf = EIGEN_FAST_MATH,
|
||||||
HasRint = 1,
|
HasRint = 1,
|
||||||
@ -215,6 +220,11 @@ struct packet_traits<bfloat16> : default_packet_traits {
|
|||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
#ifdef EIGEN_VECTORIZE_VSX
|
#ifdef EIGEN_VECTORIZE_VSX
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
|
#if !EIGEN_COMP_CLANG
|
||||||
|
HasRsqrt = 1,
|
||||||
|
#else
|
||||||
|
HasRsqrt = 0,
|
||||||
|
#endif
|
||||||
HasRint = 1,
|
HasRint = 1,
|
||||||
#else
|
#else
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
@ -992,7 +1002,7 @@ template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
|||||||
#ifdef __POWER8_VECTOR__
|
#ifdef __POWER8_VECTOR__
|
||||||
return vec_neg(a);
|
return vec_neg(a);
|
||||||
#else
|
#else
|
||||||
return p4f_ZERO - a;
|
return vec_xor(a, p4f_MZERO);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
||||||
@ -1086,7 +1096,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, con
|
|||||||
template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }
|
template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
|
||||||
|
// To fix bug with vec_cmplt on older versions
|
||||||
|
#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
|
||||||
|
#endif
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
|
||||||
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
|
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
|
||||||
@ -2700,7 +2713,11 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
HasLog = 0,
|
HasLog = 0,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
|
#if !EIGEN_COMP_CLANG
|
||||||
|
HasRsqrt = 1,
|
||||||
|
#else
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
|
#endif
|
||||||
HasRound = 1,
|
HasRound = 1,
|
||||||
HasFloor = 1,
|
HasFloor = 1,
|
||||||
HasCeil = 1,
|
HasCeil = 1,
|
||||||
@ -2806,7 +2823,7 @@ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
|
|||||||
#ifdef __POWER8_VECTOR__
|
#ifdef __POWER8_VECTOR__
|
||||||
return vec_neg(a);
|
return vec_neg(a);
|
||||||
#else
|
#else
|
||||||
return p2d_ZERO - a;
|
return vec_xor(a, p2d_MZERO);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user