Clean up most of testsuite on s390x

This commit is contained in:
Aleksei Nikiforov 2025-08-15 20:04:25 +00:00 committed by Rasmus Munk Larsen
parent 4033cfcc1d
commit c487a4fe9e
4 changed files with 199 additions and 43 deletions

View File

@ -131,8 +131,15 @@ struct random_longdouble_impl {
uint64_t randomBits[2];
long double result = 2.0L;
memcpy(&randomBits, &result, Size);
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
randomBits[0] |= getRandomBits<uint64_t>(numLowBits);
randomBits[1] |= getRandomBits<uint64_t>(numHighBits);
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
randomBits[0] |= getRandomBits<uint64_t>(numHighBits);
randomBits[1] |= getRandomBits<uint64_t>(numLowBits);
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
memcpy(&result, &randomBits, Size);
result -= 3.0L;
return result;

View File

@ -20,7 +20,7 @@ namespace internal {
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
inline Packet4ui p4ui_CONJ_XOR() {
return {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
return Packet4ui {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
}
#endif
@ -178,7 +178,7 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
}
template <>
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2));
return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2()));
}
template <>
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
@ -257,8 +257,27 @@ EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1c
}
template <>
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
return plog_complex(a, b);
EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
return psqrt_complex<Packet1cd>(a);
}
template <>
EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
return psqrt_complex<Packet2cf>(a);
}
template <>
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
return plog_complex<Packet1cd>(a);
}
template <>
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
return plog_complex<Packet2cf>(a);
}
template <>
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
return pexp_complex(a);
}
EIGEN_STRONG_INLINE Packet1cd pcplxflip /*<Packet1cd>*/ (const Packet1cd& x) {
@ -437,16 +456,6 @@ EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2c
return pdiv_complex(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
return plog_complex(a, b);
}
template <>
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
return pexp_complex(a, b);
}
EIGEN_STRONG_INLINE Packet2cf pcplxflip /*<Packet2cf>*/ (const Packet2cf& x) {
Packet2cf res;
res.cd[0] = pcplxflip(x.cd[0]);

View File

@ -23,6 +23,20 @@ namespace Eigen {
namespace internal {
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet2d)
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet2d)
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet2d)
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet2d)
EIGEN_FLOAT_PACKET_FUNCTION(atanh, Packet4f)
EIGEN_FLOAT_PACKET_FUNCTION(log, Packet4f)
EIGEN_FLOAT_PACKET_FUNCTION(log2, Packet4f)
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet2d)
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4f)
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet2d)
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4f)
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
@ -170,7 +184,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp<Packet4f>(cons
y = padd(y, p4f_1);
// build 2^n
emm0 = (Packet4i){(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]};
emm0 = Packet4i{(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]};
emm0 = emm0 + p4i_0x7f;
emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);

View File

@ -251,6 +251,7 @@ struct unpacket_traits<Packet4f> {
masked_store_available = false
};
typedef Packet4f half;
typedef Packet4i integer_packet;
};
template <>
struct unpacket_traits<Packet2d> {
@ -263,6 +264,7 @@ struct unpacket_traits<Packet2d> {
masked_store_available = false
};
typedef Packet2d half;
typedef Packet2l integer_packet;
};
/* Forward declaration */
@ -314,38 +316,36 @@ inline std::ostream& operator<<(std::ostream& s, const Packet4f& v) {
template <>
EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet* vfrom;
vfrom = (Packet*)from;
return vfrom->v4i;
return vec_xl(0, from);
}
template <>
EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet* vfrom;
vfrom = (Packet*)from;
return vfrom->v2d;
return vec_xl(0, from);
}
template <>
EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet* vto;
vto = (Packet*)to;
vto->v4i = from;
vec_xst(from, 0, to);
}
template <>
EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet* vto;
vto = (Packet*)to;
vto->v2d = from;
vec_xst(from, 0, to);
}
template <>
EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
return pfrexp_generic(a, exponent);
}
template <>
EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent) {
return pfrexp_generic(a, exponent);
}
template <>
@ -541,7 +541,8 @@ EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d
template <>
EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
return vec_round(a);
/* Uses non-default rounding for vec_round */
return __builtin_s390_vfidb(a, 0, 1);
}
template <>
EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
@ -591,6 +592,45 @@ EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
EIGEN_ZVECTOR_PREFETCH(addr);
}
template <int N>
EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) {
return Packet2l { parithmetic_shift_right<N>(a[0]), parithmetic_shift_right<N>(a[1]) };
}
template <int N>
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) {
return Packet4i {
parithmetic_shift_right<N>(a[0]),
parithmetic_shift_right<N>(a[1]),
parithmetic_shift_right<N>(a[2]),
parithmetic_shift_right<N>(a[3]) };
}
template <int N>
EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
return Packet2l { plogical_shift_right<N>(a[0]), plogical_shift_right<N>(a[1]) };
}
template <int N>
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) {
return Packet4i {
plogical_shift_right<N>(a[0]),
plogical_shift_right<N>(a[1]),
plogical_shift_right<N>(a[2]),
plogical_shift_right<N>(a[3]) };
}
template <int N>
EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
return Packet2l { plogical_shift_left<N>(a[0]), plogical_shift_left<N>(a[1]) };
}
template <int N>
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) {
return Packet4i {
plogical_shift_left<N>(a[0]),
plogical_shift_left<N>(a[1]),
plogical_shift_left<N>(a[2]),
plogical_shift_left<N>(a[3]) };
}
template <>
EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
EIGEN_ALIGN16 int x[4];
@ -907,8 +947,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f
template <>
EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
Packet4f res;
res.v4f[0] = vec_round(a.v4f[0]);
res.v4f[1] = vec_round(a.v4f[1]);
res.v4f[0] = generic_round(a.v4f[0]);
res.v4f[1] = generic_round(a.v4f[1]);
return res;
}
@ -1068,20 +1108,14 @@ Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f
#else
template <>
EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_LOAD
Packet* vfrom;
vfrom = (Packet*)from;
return vfrom->v4f;
return vec_xl(0, from);
}
template <>
EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
// FIXME: No intrinsic yet
EIGEN_DEBUG_ALIGNED_STORE
Packet* vto;
vto = (Packet*)to;
vto->v4f = from;
vec_xst(from, 0, to);
}
template <>
@ -1172,7 +1206,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f
}
template <>
EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
return vec_round(a);
/* Uses non-default rounding for vec_round */
return __builtin_s390_vfisb(a, 0, 1);
}
template <>
EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
@ -1263,6 +1298,28 @@ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f&
#endif
template <>
EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
return pldexp_generic(a, exponent);
}
template <>
EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
// Clamp exponent to [-2099, 2099]
const Packet2d max_exponent = pset1<Packet2d>(2099.0);
const Packet2l e = pcast<Packet2d, Packet2l>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
// Split 2^e into four factors and multiply:
const Packet2l bias = {1023, 1023};
Packet2l b = plogical_shift_right<2>(e); // floor(e/4)
Packet2d c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias));
Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
b = psub(psub(psub(e, b), b), b); // e - 3b
c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias)); // 2^(e - 3b)
out = pmul(out, c); // a * 2^e
return out;
}
template <>
EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
EIGEN_ZVECTOR_PREFETCH(addr);
@ -1280,6 +1337,75 @@ EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN);
}
#if !defined(vec_float) || !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 13)
#pragma GCC warning \
"float->int and int->float conversion is simulated. compile for z15 for improved performance"
template <>
struct cast_impl<Packet4i, Packet4f> {
EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3]) };
}
};
template <>
struct cast_impl<Packet4f, Packet4i> {
EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3]) };
}
};
template <>
struct cast_impl<Packet2l, Packet2d> {
EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
return Packet2d{double(a[0]), double(a[1]) };
}
};
template <>
struct cast_impl<Packet2d, Packet2l> {
EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
return Packet2l{(long long)(a[0]), (long long)(a[1]) };
}
};
#else
template <>
struct cast_impl<Packet4i, Packet4f> {
EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
return vec_float(a);
}
};
template <>
struct cast_impl<Packet4f, Packet4i> {
EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
return vec_signed(a);
}
};
template <>
struct cast_impl<Packet2l, Packet2d> {
EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
return vec_double(a);
}
};
template <>
struct cast_impl<Packet2d, Packet2l> {
EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
return vec_signed(a);
}
};
#endif
template <>
EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(uint32_t from) {
return pset1<Packet4f>(Eigen::numext::bit_cast<float>(from));
}
template <>
EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from) {
return pset1<Packet2d>(Eigen::numext::bit_cast<double>(from));
}
} // end namespace internal
} // end namespace Eigen