mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-18 06:35:55 +08:00
Clean up most of testsuite on s390x
This commit is contained in:
parent
4033cfcc1d
commit
c487a4fe9e
@ -131,8 +131,15 @@ struct random_longdouble_impl {
|
||||
uint64_t randomBits[2];
|
||||
long double result = 2.0L;
|
||||
memcpy(&randomBits, &result, Size);
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
randomBits[0] |= getRandomBits<uint64_t>(numLowBits);
|
||||
randomBits[1] |= getRandomBits<uint64_t>(numHighBits);
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
randomBits[0] |= getRandomBits<uint64_t>(numHighBits);
|
||||
randomBits[1] |= getRandomBits<uint64_t>(numLowBits);
|
||||
#else
|
||||
#error Unexpected or undefined __BYTE_ORDER__
|
||||
#endif
|
||||
memcpy(&result, &randomBits, Size);
|
||||
result -= 3.0L;
|
||||
return result;
|
||||
|
@ -20,7 +20,7 @@ namespace internal {
|
||||
|
||||
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
|
||||
inline Packet4ui p4ui_CONJ_XOR() {
|
||||
return {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
|
||||
return Packet4ui {0x00000000, 0x80000000, 0x00000000, 0x80000000}; // vec_mergeh((Packet4ui)p4i_ZERO, (Packet4ui)p4f_MZERO);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -178,7 +178,7 @@ EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) {
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a) {
|
||||
return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2));
|
||||
return Packet1cd((Packet2d)vec_xor((Packet2d)a.v, (Packet2d)p2ul_CONJ_XOR2()));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
@ -257,8 +257,27 @@ EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1c
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a, const Packet1cd& b) {
|
||||
return plog_complex(a, b);
|
||||
EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
|
||||
return psqrt_complex<Packet1cd>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
|
||||
return psqrt_complex<Packet2cf>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet1cd plog<Packet1cd>(const Packet1cd& a) {
|
||||
return plog_complex<Packet1cd>(a);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a) {
|
||||
return plog_complex<Packet2cf>(a);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a) {
|
||||
return pexp_complex(a);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet1cd pcplxflip /*<Packet1cd>*/ (const Packet1cd& x) {
|
||||
@ -437,16 +456,6 @@ EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2c
|
||||
return pdiv_complex(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf plog<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return plog_complex(a, b);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2cf pexp<Packet2cf>(const Packet2cf& a, const Packet2cf& b) {
|
||||
return pexp_complex(a, b);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pcplxflip /*<Packet2cf>*/ (const Packet2cf& x) {
|
||||
Packet2cf res;
|
||||
res.cd[0] = pcplxflip(x.cd[0]);
|
||||
|
@ -23,6 +23,20 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atanh, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log2, Packet2d)
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(tanh, Packet2d)
|
||||
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atanh, Packet4f)
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log, Packet4f)
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log2, Packet4f)
|
||||
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet2d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(atan, Packet4f)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet2d)
|
||||
EIGEN_GENERIC_PACKET_FUNCTION(exp2, Packet4f)
|
||||
|
||||
#if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
|
||||
static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
|
||||
static EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
|
||||
@ -170,7 +184,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp<Packet4f>(cons
|
||||
y = padd(y, p4f_1);
|
||||
|
||||
// build 2^n
|
||||
emm0 = (Packet4i){(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]};
|
||||
emm0 = Packet4i{(int)fx[0], (int)fx[1], (int)fx[2], (int)fx[3]};
|
||||
emm0 = emm0 + p4i_0x7f;
|
||||
emm0 = emm0 << reinterpret_cast<Packet4i>(p4i_23);
|
||||
|
||||
|
@ -251,6 +251,7 @@ struct unpacket_traits<Packet4f> {
|
||||
masked_store_available = false
|
||||
};
|
||||
typedef Packet4f half;
|
||||
typedef Packet4i integer_packet;
|
||||
};
|
||||
template <>
|
||||
struct unpacket_traits<Packet2d> {
|
||||
@ -263,6 +264,7 @@ struct unpacket_traits<Packet2d> {
|
||||
masked_store_available = false
|
||||
};
|
||||
typedef Packet2d half;
|
||||
typedef Packet2l integer_packet;
|
||||
};
|
||||
|
||||
/* Forward declaration */
|
||||
@ -314,38 +316,36 @@ inline std::ostream& operator<<(std::ostream& s, const Packet4f& v) {
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet* vfrom;
|
||||
vfrom = (Packet*)from;
|
||||
return vfrom->v4i;
|
||||
return vec_xl(0, from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet* vfrom;
|
||||
vfrom = (Packet*)from;
|
||||
return vfrom->v2d;
|
||||
return vec_xl(0, from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet* vto;
|
||||
vto = (Packet*)to;
|
||||
vto->v4i = from;
|
||||
vec_xst(from, 0, to);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet* vto;
|
||||
vto = (Packet*)to;
|
||||
vto->v2d = from;
|
||||
vec_xst(from, 0, to);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||
return pfrexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d>(const Packet2d& a, Packet2d& exponent) {
|
||||
return pfrexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -541,7 +541,8 @@ EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
|
||||
return vec_round(a);
|
||||
/* Uses non-default rounding for vec_round */
|
||||
return __builtin_s390_vfidb(a, 0, 1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
|
||||
@ -591,6 +592,45 @@ EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
|
||||
EIGEN_ZVECTOR_PREFETCH(addr);
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet2l parithmetic_shift_right(const Packet2l& a) {
|
||||
return Packet2l { parithmetic_shift_right<N>(a[0]), parithmetic_shift_right<N>(a[1]) };
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i& a) {
|
||||
return Packet4i {
|
||||
parithmetic_shift_right<N>(a[0]),
|
||||
parithmetic_shift_right<N>(a[1]),
|
||||
parithmetic_shift_right<N>(a[2]),
|
||||
parithmetic_shift_right<N>(a[3]) };
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
|
||||
return Packet2l { plogical_shift_right<N>(a[0]), plogical_shift_right<N>(a[1]) };
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i& a) {
|
||||
return Packet4i {
|
||||
plogical_shift_right<N>(a[0]),
|
||||
plogical_shift_right<N>(a[1]),
|
||||
plogical_shift_right<N>(a[2]),
|
||||
plogical_shift_right<N>(a[3]) };
|
||||
}
|
||||
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
|
||||
return Packet2l { plogical_shift_left<N>(a[0]), plogical_shift_left<N>(a[1]) };
|
||||
}
|
||||
template <int N>
|
||||
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i& a) {
|
||||
return Packet4i {
|
||||
plogical_shift_left<N>(a[0]),
|
||||
plogical_shift_left<N>(a[1]),
|
||||
plogical_shift_left<N>(a[2]),
|
||||
plogical_shift_left<N>(a[3]) };
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) {
|
||||
EIGEN_ALIGN16 int x[4];
|
||||
@ -907,8 +947,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
|
||||
Packet4f res;
|
||||
res.v4f[0] = vec_round(a.v4f[0]);
|
||||
res.v4f[1] = vec_round(a.v4f[1]);
|
||||
res.v4f[0] = generic_round(a.v4f[0]);
|
||||
res.v4f[1] = generic_round(a.v4f[1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -1068,20 +1108,14 @@ Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f
|
||||
#else
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
Packet* vfrom;
|
||||
vfrom = (Packet*)from;
|
||||
return vfrom->v4f;
|
||||
return vec_xl(0, from);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
|
||||
// FIXME: No intrinsic yet
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet* vto;
|
||||
vto = (Packet*)to;
|
||||
vto->v4f = from;
|
||||
vec_xst(from, 0, to);
|
||||
}
|
||||
|
||||
template <>
|
||||
@ -1172,7 +1206,8 @@ EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
|
||||
return vec_round(a);
|
||||
/* Uses non-default rounding for vec_round */
|
||||
return __builtin_s390_vfisb(a, 0, 1);
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
|
||||
@ -1263,6 +1298,28 @@ EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f&
|
||||
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, const Packet4f& exponent) {
|
||||
return pldexp_generic(a, exponent);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
|
||||
// Clamp exponent to [-2099, 2099]
|
||||
const Packet2d max_exponent = pset1<Packet2d>(2099.0);
|
||||
const Packet2l e = pcast<Packet2d, Packet2l>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||
|
||||
// Split 2^e into four factors and multiply:
|
||||
const Packet2l bias = {1023, 1023};
|
||||
Packet2l b = plogical_shift_right<2>(e); // floor(e/4)
|
||||
Packet2d c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias));
|
||||
Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||
c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias)); // 2^(e - 3b)
|
||||
out = pmul(out, c); // a * 2^e
|
||||
return out;
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
|
||||
EIGEN_ZVECTOR_PREFETCH(addr);
|
||||
@ -1280,6 +1337,75 @@ EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
|
||||
return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN);
|
||||
}
|
||||
|
||||
#if !defined(vec_float) || !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 13)
|
||||
#pragma GCC warning \
|
||||
"float->int and int->float conversion is simulated. compile for z15 for improved performance"
|
||||
template <>
|
||||
struct cast_impl<Packet4i, Packet4f> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
|
||||
return Packet4f{float(a[0]), float(a[1]), float(a[2]), float(a[3]) };
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet4f, Packet4i> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
|
||||
return Packet4i{int(a[0]), int(a[1]), int(a[2]), int(a[3]) };
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet2l, Packet2d> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
|
||||
return Packet2d{double(a[0]), double(a[1]) };
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet2d, Packet2l> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
|
||||
return Packet2l{(long long)(a[0]), (long long)(a[1]) };
|
||||
}
|
||||
};
|
||||
#else
|
||||
template <>
|
||||
struct cast_impl<Packet4i, Packet4f> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet4f run(const Packet4i& a) {
|
||||
return vec_float(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet4f, Packet4i> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet4i run(const Packet4f& a) {
|
||||
return vec_signed(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet2l, Packet2d> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet2d run(const Packet2l& a) {
|
||||
return vec_double(a);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct cast_impl<Packet2d, Packet2l> {
|
||||
EIGEN_DEVICE_FUNC static inline Packet2l run(const Packet2d& a) {
|
||||
return vec_signed(a);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(uint32_t from) {
|
||||
return pset1<Packet4f>(Eigen::numext::bit_cast<float>(from));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pset1frombits<Packet2d>(uint64_t from) {
|
||||
return pset1<Packet2d>(Eigen::numext::bit_cast<double>(from));
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
Loading…
x
Reference in New Issue
Block a user