Fix power builds for no VSX and no POWER8.

This commit is contained in:
C. Antonio Sanchez 2025-02-15 13:55:14 -08:00
parent eb3f9f443d
commit 1d8b82b074

View File

@ -1514,16 +1514,13 @@ EIGEN_STRONG_INLINE Packet4f print<Packet4f>(const Packet4f& a) {
template <typename Packet>
EIGEN_STRONG_INLINE Packet ploadu_common(const __UNPACK_TYPE__(Packet) * from) {
EIGEN_DEBUG_ALIGNED_LOAD
#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
EIGEN_DEBUG_UNALIGNED_LOAD
#if defined(EIGEN_VECTORIZE_VSX)
return vec_xl(0, const_cast<__UNPACK_TYPE__(Packet)*>(from));
#else
Packet16uc MSQ, LSQ;
Packet16uc mask;
MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword
LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword
mask = vec_lvsl(0, from); // create the permute mask
Packet16uc MSQ = vec_ld(0, (unsigned char*)from); // most significant quadword
Packet16uc LSQ = vec_ld(15, (unsigned char*)from); // least significant quadword
Packet16uc mask = vec_lvsl(0, from); // create the permute mask
// TODO: Add static_cast here
return (Packet)vec_perm(MSQ, LSQ, mask); // align the data
#endif
@ -1733,7 +1730,7 @@ EIGEN_STRONG_INLINE Packet16uc ploadquad<Packet16uc>(const unsigned char* from)
template <typename Packet>
EIGEN_STRONG_INLINE void pstoreu_common(__UNPACK_TYPE__(Packet) * to, const Packet& from) {
EIGEN_DEBUG_UNALIGNED_STORE
#if defined(EIGEN_VECTORIZE_VSX) || !defined(_BIG_ENDIAN)
#if defined(EIGEN_VECTORIZE_VSX)
vec_xst(from, 0, to);
#else
// Taken from http://developer.apple.com/hardwaredrivers/ve/alignment.html
@ -2069,7 +2066,7 @@ EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f) {
input = padd<Packet4ui>(input, rounding_bias);
const EIGEN_DECLARE_CONST_FAST_Packet4ui(nan, 0x7FC00000);
#ifdef _ARCH_PWR9
#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
Packet4bi nan_selector = vec_test_data_class(p4f, __VEC_CLASS_FP_NAN);
input = vec_sel(input, p4ui_nan, nan_selector);
@ -2178,7 +2175,7 @@ EIGEN_ALWAYS_INLINE Packet8bf F32ToBf16Two(Packet4f lo, Packet4f hi) {
Packet8bi rounding_bias = vec_cmplt(lsb, p4f2);
Packet8us input = psub<Packet8us>(p4f, reinterpret_cast<Packet8us>(rounding_bias));
#ifdef _ARCH_PWR9
#if defined(_ARCH_PWR9) && defined(EIGEN_VECTORIZE_VSX)
Packet4bi nan_selector_lo = vec_test_data_class(lo, __VEC_CLASS_FP_NAN);
Packet4bi nan_selector_hi = vec_test_data_class(hi, __VEC_CLASS_FP_NAN);
Packet8us nan_selector =
@ -3400,9 +3397,17 @@ EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) {
return reinterpret_cast<Packet2d>(vec_cmpeq(a, b));
}
template <>
#ifdef __POWER8_VECTOR__
EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
return reinterpret_cast<Packet2l>(vec_cmpeq(a, b));
}
#else
EIGEN_STRONG_INLINE Packet2l pcmp_eq(const Packet2l& a, const Packet2l& b) {
Packet4i halves = reinterpret_cast<Packet4i>(vec_cmpeq(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(b)));
Packet4i flipped = vec_perm(halves, halves, p16uc_COMPLEX32_REV);
return reinterpret_cast<Packet2l>(pand(halves, flipped));
}
#endif
template <>
EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) {
Packet2d c = reinterpret_cast<Packet2d>(vec_cmpge(a, b));