From c487a4fe9eab5836330246b992fe7c17c06d6726 Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov Date: Fri, 15 Aug 2025 20:04:25 +0000 Subject: [PATCH] Clean up most of testsuite on s390x --- Eigen/src/Core/RandomImpl.h | 7 + Eigen/src/Core/arch/ZVector/Complex.h | 37 ++-- Eigen/src/Core/arch/ZVector/MathFunctions.h | 16 +- Eigen/src/Core/arch/ZVector/PacketMath.h | 182 +++++++++++++++++--- 4 files changed, 199 insertions(+), 43 deletions(-) diff --git a/Eigen/src/Core/RandomImpl.h b/Eigen/src/Core/RandomImpl.h index efba33680..1a82e6253 100644 --- a/Eigen/src/Core/RandomImpl.h +++ b/Eigen/src/Core/RandomImpl.h @@ -131,8 +131,15 @@ struct random_longdouble_impl { uint64_t randomBits[2]; long double result = 2.0L; memcpy(&randomBits, &result, Size); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ randomBits[0] |= getRandomBits(numLowBits); randomBits[1] |= getRandomBits(numHighBits); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + randomBits[0] |= getRandomBits(numHighBits); + randomBits[1] |= getRandomBits(numLowBits); +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif memcpy(&result, &randomBits, Size); result -= 3.0L; return result; diff --git a/Eigen/src/Core/arch/ZVector/Complex.h b/Eigen/src/Core/arch/ZVector/Complex.h index a750b26b8..692f90f3e 100644 --- a/Eigen/src/Core/arch/ZVector/Complex.h +++ b/Eigen/src/Core/arch/ZVector/Complex.h @@ -20,7 +20,7 @@ namespace internal { #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) inline Packet4ui p4ui_CONJ_XOR() { - return {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO); + return Packet4ui {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO); } #endif @@ -178,7 +178,7 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { } template <> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) { - return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2)); + return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2())); } template <> EIGEN_STRONG_INLINE Packet1cd pmul(const Packet1cd& a, const Packet1cd& b) { @@ -257,8 +257,27 @@ EIGEN_STRONG_INLINE Packet1cd pdiv(const Packet1cd& a, const Packet1c } template <> -EIGEN_STRONG_INLINE Packet1cd plog(const Packet1cd& a, const Packet1cd& b) { - return plog_complex(a, b); +EIGEN_STRONG_INLINE Packet1cd psqrt(const Packet1cd& a) { + return psqrt_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf psqrt(const Packet2cf& a) { + return psqrt_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet1cd plog(const Packet1cd& a) { + return plog_complex(a); +} +template <> +EIGEN_STRONG_INLINE Packet2cf plog(const Packet2cf& a) { + return plog_complex(a); +} + +template <> +EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a) { + return pexp_complex(a); } EIGEN_STRONG_INLINE Packet1cd pcplxflip /**/ (const Packet1cd& x) { @@ -437,16 +456,6 @@ EIGEN_STRONG_INLINE Packet2cf pdiv(const Packet2cf& a, const Packet2c return pdiv_complex(a, b); } -template <> -EIGEN_STRONG_INLINE Packet2cf plog(const Packet2cf& a, const Packet2cf& b) { - return plog_complex(a, b); -} - -template <> -EIGEN_STRONG_INLINE Packet2cf pexp(const Packet2cf& a, const Packet2cf& b) { - return pexp_complex(a, b); -} - EIGEN_STRONG_INLINE Packet2cf pcplxflip /**/ (const Packet2cf& x) { Packet2cf res; res.cd[0] = pcplxflip(x.cd[0]); diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h index 32e042554..348d643ac 100644 --- a/Eigen/src/Core/arch/ZVector/MathFunctions.h +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -23,6 +23,20 @@ namespace Eigen { namespace internal { +EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet2d) +EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet2d) +EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet2d) +EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet2d) + +EIGEN_FLOAT_PACKET_FUNCTION(atanh, Packet4f) +EIGEN_FLOAT_PACKET_FUNCTION(log, Packet4f) +EIGEN_FLOAT_PACKET_FUNCTION(log2, Packet4f) + +EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet2d) +EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4f) +EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet2d) +EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4f) + #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f); static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f); @@ -170,7 +184,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp(cons y = padd(y, p4f_1); // build 2^n - emm0 = (Packet4i){(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]}; + emm0 = Packet4i{(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]}; emm0 = emm0 + p4i_0x7f; emm0 = emm0 << reinterpret_cast(p4i_23); diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index 4d18af0c0..39073ed81 100644 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -251,6 +251,7 @@ struct unpacket_traits { masked_store_available = false }; typedef Packet4f half; + typedef Packet4i integer_packet; }; template <> struct unpacket_traits { @@ -263,6 +264,7 @@ struct unpacket_traits { masked_store_available = false }; typedef Packet2d half; + typedef Packet2l integer_packet; }; /* Forward declaration */ @@ -314,38 +316,36 @@ inline std::ostream& operator<<(std::ostream& s, const Packet4f& v) { template <> EIGEN_STRONG_INLINE Packet4i pload(const int* from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_LOAD - Packet* vfrom; - vfrom = (Packet*)from; - return vfrom->v4i; + return vec_xl(0, from); } template <> EIGEN_STRONG_INLINE Packet2d pload(const double* from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_LOAD - Packet* vfrom; - vfrom = (Packet*)from; - return vfrom->v2d; + return vec_xl(0, from); } template <> EIGEN_STRONG_INLINE void pstore(int* to, const Packet4i& from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_STORE - Packet* vto; - vto = (Packet*)to; - vto->v4i = from; + vec_xst(from, 0, to); } template <> EIGEN_STRONG_INLINE void pstore(double* to, const Packet2d& from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_STORE - Packet* vto; - vto = (Packet*)to; - vto->v2d = from; + vec_xst(from, 0, to); +} + +template <> +EIGEN_STRONG_INLINE Packet4f pfrexp(const Packet4f& a, Packet4f& exponent) { + return pfrexp_generic(a, exponent); +} + +template <> +EIGEN_STRONG_INLINE Packet2d pfrexp(const Packet2d& a, Packet2d& exponent) { + return pfrexp_generic(a, exponent); } template <> @@ -541,7 +541,8 @@ EIGEN_STRONG_INLINE Packet2d pandnot(const Packet2d& a, const Packet2d template <> EIGEN_STRONG_INLINE Packet2d pround(const Packet2d& a) { - return vec_round(a); + /* Uses non-default rounding for vec_round */ + return __builtin_s390_vfidb(a, 0, 1); } template <> EIGEN_STRONG_INLINE Packet2d pceil(const Packet2d& a) { @@ -591,6 +592,45 @@ EIGEN_STRONG_INLINE void prefetch(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); } +template +EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) { + return Packet2l { parithmetic_shift_right(a[0]), parithmetic_shift_right(a[1]) }; +} +template +EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) { + return Packet4i { + parithmetic_shift_right(a[0]), + parithmetic_shift_right(a[1]), + parithmetic_shift_right(a[2]), + parithmetic_shift_right(a[3]) }; +} + +template +EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) { + return Packet2l { plogical_shift_right(a[0]), plogical_shift_right(a[1]) }; +} +template +EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) { + return Packet4i { + plogical_shift_right(a[0]), + plogical_shift_right(a[1]), + plogical_shift_right(a[2]), + plogical_shift_right(a[3]) }; +} + +template +EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) { + return Packet2l { plogical_shift_left(a[0]), plogical_shift_left(a[1]) }; +} +template +EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) { + return Packet4i { + plogical_shift_left(a[0]), + plogical_shift_left(a[1]), + plogical_shift_left(a[2]), + plogical_shift_left(a[3]) }; +} + template <> EIGEN_STRONG_INLINE int pfirst(const Packet4i& a) { EIGEN_ALIGN16 int x[4]; @@ -907,8 +947,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f template <> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { Packet4f res; - res.v4f[0] = vec_round(a.v4f[0]); - res.v4f[1] = vec_round(a.v4f[1]); + res.v4f[0] = generic_round(a.v4f[0]); + res.v4f[1] = generic_round(a.v4f[1]); return res; } @@ -1068,20 +1108,14 @@ Packet4f EIGEN_STRONG_INLINE pcmp_eq(const Packet4f& a, const Packet4f #else template <> EIGEN_STRONG_INLINE Packet4f pload(const float* from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_LOAD - Packet* vfrom; - vfrom = (Packet*)from; - return vfrom->v4f; + return vec_xl(0, from); } template <> EIGEN_STRONG_INLINE void pstore(float* to, const Packet4f& from) { - // FIXME: No intrinsic yet EIGEN_DEBUG_ALIGNED_STORE - Packet* vto; - vto = (Packet*)to; - vto->v4f = from; + vec_xst(from, 0, to); } template <> @@ -1172,7 +1206,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot(const Packet4f& a, const Packet4f } template <> EIGEN_STRONG_INLINE Packet4f pround(const Packet4f& a) { - return vec_round(a); + /* Uses non-default rounding for vec_round */ + return __builtin_s390_vfisb(a, 0, 1); } template <> EIGEN_STRONG_INLINE Packet4f pceil(const Packet4f& a) { @@ -1263,6 +1298,28 @@ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& #endif +template <> +EIGEN_STRONG_INLINE Packet4f pldexp(const Packet4f& a, const Packet4f& exponent) { + return pldexp_generic(a, exponent); +} + +template <> +EIGEN_STRONG_INLINE Packet2d pldexp(const Packet2d& a, const Packet2d& exponent) { + // Clamp exponent to [-2099, 2099] + const Packet2d max_exponent = pset1(2099.0); + const Packet2l e = pcast(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent)); + + // Split 2^e into four factors and multiply: + const Packet2l bias = {1023, 1023}; + Packet2l b = plogical_shift_right<2>(e); // floor(e/4) + Packet2d c = reinterpret_cast(plogical_shift_left<52>(b + bias)); + Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b) + b = psub(psub(psub(e, b), b), b); // e - 3b + c = reinterpret_cast(plogical_shift_left<52>(b + bias)); // 2^(e - 3b) + out = pmul(out, c); // a * 2^e + return out; +} + template <> EIGEN_STRONG_INLINE void prefetch(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); @@ -1280,6 +1337,75 @@ EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return padd(pset1(a), p4f_COUNTDOWN); } +#if !defined(vec_float) || !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 13) +#pragma GCC warning \ + "float->int and int->float conversion is simulated. compile for z15 for improved performance" +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) { + return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3]) }; + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) { + return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3]) }; + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { + return Packet2d{double(a[0]), double(a[1]) }; + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) { + return Packet2l{(long long)(a[0]), (long long)(a[1]) }; + } +}; +#else +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) { + return vec_float(a); + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) { + return vec_signed(a); + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) { + return vec_double(a); + } +}; + +template <> +struct cast_impl { + EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) { + return vec_signed(a); + } +}; +#endif + +template <> +EIGEN_STRONG_INLINE Packet4f pset1frombits(uint32_t from) { + return pset1(Eigen::numext::bit_cast(from)); +} +template <> +EIGEN_STRONG_INLINE Packet2d pset1frombits(uint64_t from) { + return pset1(Eigen::numext::bit_cast(from)); +} + } // end namespace internal } // end namespace Eigen