Fix altivec and vsx builds

This commit is contained in:
C. Antonio Sanchez 2025-02-15 13:18:14 -08:00
parent c0378fedd8
commit 88cd53774e
3 changed files with 12 additions and 49 deletions

View File

@ -237,12 +237,12 @@ template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, con
result.v = reinterpret_cast<Packet4f>(pblend<Packet2d>(ifPacket, reinterpret_cast<Packet2d>(thenPacket.v), reinterpret_cast<Packet2d>(elsePacket.v)));
return result;
}
#endif
template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a)
{
return psqrt_complex<Packet2cf>(a);
}
#endif
//---------- double ----------
#ifdef EIGEN_VECTORIZE_VSX

View File

@ -40,7 +40,7 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
return pcos_float(_x);
}
#ifdef __VSX__
#ifdef EIGEN_VECTORIZE_VSX
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet4f prsqrt<Packet4f>(const Packet4f& x)
@ -48,8 +48,6 @@ Packet4f prsqrt<Packet4f>(const Packet4f& x)
return vec_rsqrt(x);
}
#ifdef EIGEN_VECTORIZE_VSX
#ifndef EIGEN_COMP_CLANG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
@ -85,30 +83,8 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x)
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x)
{
return patan_double(_x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f prsqrt<Packet4f>(const Packet4f& x)
{
return pset1<Packet4f>(1.0f) / psqrt<Packet4f>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d prsqrt<Packet2d>(const Packet2d& x)
{
return pset1<Packet2d>(1.0) / psqrt<Packet2d>(x);
// vec_rsqrt returns different results from the generic version
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d pexp<Packet2d>(const Packet2d& _x)
{

View File

@ -846,12 +846,6 @@ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i&
template<> EIGEN_STRONG_INLINE Packet8s pmadd(const Packet8s& a, const Packet8s& b, const Packet8s& c) { return vec_madd(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet8us pmadd(const Packet8us& a, const Packet8us& b, const Packet8us& c) { return vec_madd(a,b,c); }
#ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_STRONG_INLINE Packet4f pmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_msub(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4f pnmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmsub(a,b,c); }
template<> EIGEN_STRONG_INLINE Packet4f pnmsub(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_nmadd(a,b,c); }
#endif
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
{
#ifdef EIGEN_VECTORIZE_VSX
@ -988,18 +982,15 @@ template<> EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a)
template<typename Packet> EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet)* from)
{
EIGEN_DEBUG_ALIGNED_LOAD
#ifdef _BIG_ENDIAN
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
mask = vec_lvsl(0, from); // create the permute mask
//TODO: Add static_cast here
return (Packet) vec_perm(MSQ, LSQ, mask); // align the data
#else
EIGEN_DEBUG_UNALIGNED_LOAD
#ifdef EIGEN_VECTORIZE_VSX
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else
Packet16uc mask = vec_lvsl(0, from); // create the permute mask
Packet16uc MSQ = vec_ld(0, (unsigned char *)from); // most significant quadword
Packet16uc LSQ = vec_ld(15, (unsigned char *)from); // least significant quadword
//TODO: Add static_cast here
return (Packet) vec_perm(MSQ, LSQ, mask); // align the data
#endif
}
@ -1104,7 +1095,9 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploaddup<Packet16uc>(const unsigned ch
template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet)* to, const Packet& from)
{
EIGEN_DEBUG_UNALIGNED_STORE
#ifdef _BIG_ENDIAN
#ifdef EIGEN_VECTORIZE_VSX
vec_xst(from, 0, to);
#else
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
// Warning: not thread safe!
Packet16uc MSQ, LSQ, edges;
@ -1119,8 +1112,6 @@ template<typename Packet> EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE_
LSQ = vec_perm((Packet16uc)from,edges,align); // misalign the data (LSQ)
vec_st( LSQ, 15, (unsigned char *)to ); // Store the LSQ part first
vec_st( MSQ, 0, (unsigned char *)to ); // Store the MSQ part second
#else
vec_xst(from, 0, to);
#endif
}
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from)
@ -1379,10 +1370,6 @@ template<> EIGEN_STRONG_INLINE Packet8bf psub<Packet8bf>(const Packet8bf& a, con
BF16_TO_F32_BINARY_OP_WRAPPER(psub<Packet4f>, a, b);
}
template<> EIGEN_STRONG_INLINE Packet8bf pexp<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(pexp_float, a);
}
template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
return pldexp_generic(a,exponent);
}