mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-15 17:41:48 +08:00
Fix pre-POWER8_VECTOR bugs in pcmp_lt and pnegate and reactivate psqrt.
(cherry picked from commit 4a58f30aa083d2f5753465a36730ca7734b483be)
This commit is contained in:
parent
1217390db4
commit
e734787bb7
@ -40,24 +40,9 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return vec_rsqrt(x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return vec_sqrt(x);
|
||||
@ -69,12 +54,41 @@ Packet2d psqrt<Packet2d>(const Packet2d& x)
|
||||
return vec_sqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f prsqrt<Packet4f>(const Packet4f& x)
|
||||
{
|
||||
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
|
||||
// vec_rsqrt returns different results from the generic version
|
||||
// return vec_rsqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
|
||||
// vec_rsqrt returns different results from the generic version
|
||||
// return vec_rsqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return pexp_double(_x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
|
||||
}
|
||||
|
||||
#endif // __VSX__
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
|
@ -786,8 +786,22 @@ template<> EIGEN_STRONG_INLINE Packet8us psub<Packet8us> (const Packet8us& a,
|
||||
template<> EIGEN_STRONG_INLINE Packet16c psub<Packet16c> (const Packet16c& a, const Packet16c& b) { return a - b; }
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc psub<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return a - b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) { return p4f_ZERO - a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return p4i_ZERO - a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
|
||||
{
|
||||
#ifdef __POWER8_VECTOR__
|
||||
return vec_neg(a);
|
||||
#else
|
||||
return vec_xor(a, p4f_MZERO);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a)
|
||||
{
|
||||
#ifdef __POWER8_VECTOR__
|
||||
return vec_neg(a);
|
||||
#else
|
||||
return p4i_ZERO - a;
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) { return a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
|
||||
@ -865,7 +879,10 @@ template<> EIGEN_STRONG_INLINE Packet16c pmax<Packet16c>(const Packet16c& a, con
|
||||
template<> EIGEN_STRONG_INLINE Packet16uc pmax<Packet16uc>(const Packet16uc& a, const Packet16uc& b) { return vec_max(a, b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
|
||||
// To fix bug with vec_cmplt on older versions
|
||||
#if defined(__POWER8_VECTOR__) || EIGEN_COMP_LLVM
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
|
||||
#endif
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
|
||||
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
|
||||
@ -1341,16 +1358,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, con
|
||||
BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(vec_sqrt, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf prsqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(prsqrt<Packet4f>, a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
|
||||
return pldexp_generic(a,exponent);
|
||||
}
|
||||
@ -2304,7 +2311,11 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
#if !EIGEN_COMP_CLANG
|
||||
HasRsqrt = 1,
|
||||
#else
|
||||
HasRsqrt = 0,
|
||||
#endif
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
@ -2393,7 +2404,14 @@ template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return a - b; }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return p2d_ZERO - a; }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a)
|
||||
{
|
||||
#ifdef __POWER8_VECTOR__
|
||||
return vec_neg(a);
|
||||
#else
|
||||
return vec_xor(a, p2d_MZERO);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user