mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Get rid of EIGEN_HAS_AVX512_MATH workaround.
This commit is contained in:
parent
00844e3865
commit
1c0a6cf228
@ -39,7 +39,7 @@ template<> struct packet_traits<std::complex<float> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
HasSqrt = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
@ -192,7 +192,7 @@ template<> struct packet_traits<std::complex<double> > : default_packet_traits
|
|||||||
HasMul = 1,
|
HasMul = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasNegate = 1,
|
HasNegate = 1,
|
||||||
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
HasSqrt = 1,
|
||||||
HasAbs = 0,
|
HasAbs = 0,
|
||||||
HasAbs2 = 0,
|
HasAbs2 = 0,
|
||||||
HasMin = 0,
|
HasMin = 0,
|
||||||
@ -368,8 +368,6 @@ ptranspose(PacketBlock<Packet4cd,4>& kernel) {
|
|||||||
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<0,2,0,2>::mask))); // [a0 b0 c0 d0]
|
kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<0,2,0,2>::mask))); // [a0 b0 c0 d0]
|
||||||
}
|
}
|
||||||
|
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4cd psqrt<Packet4cd>(const Packet4cd& a) {
|
template<> EIGEN_STRONG_INLINE Packet4cd psqrt<Packet4cd>(const Packet4cd& a) {
|
||||||
return psqrt_complex<Packet4cd>(a);
|
return psqrt_complex<Packet4cd>(a);
|
||||||
}
|
}
|
||||||
@ -378,8 +376,6 @@ template<> EIGEN_STRONG_INLINE Packet8cf psqrt<Packet8cf>(const Packet8cf& a) {
|
|||||||
return psqrt_complex<Packet8cf>(a);
|
return psqrt_complex<Packet8cf>(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
@ -16,8 +16,6 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
|
||||||
|
|
||||||
#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||||
|
|
||||||
@ -165,8 +163,11 @@ psqrt<Packet16f>(const Packet16f& _x) {
|
|||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||||
psqrt<Packet8d>(const Packet8d& _x) {
|
psqrt<Packet8d>(const Packet8d& _x) {
|
||||||
// Double requires 2 Newton-Raphson steps for convergence.
|
#ifdef EIGEN_VECTORIZE_AVX512ER
|
||||||
|
return generic_sqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
|
||||||
|
#else
|
||||||
return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
|
return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
template <>
|
template <>
|
||||||
@ -185,7 +186,6 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
|||||||
|
|
||||||
// prsqrt for float.
|
// prsqrt for float.
|
||||||
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
|
||||||
return _mm512_rsqrt28_ps(x);
|
return _mm512_rsqrt28_ps(x);
|
||||||
@ -221,10 +221,10 @@ template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f&
|
|||||||
return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
|
return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
||||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
||||||
#endif
|
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
||||||
@ -242,9 +242,6 @@ Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
|||||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
||||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
||||||
|
|
||||||
#endif // EIGEN_HAS_AVX512_MATH
|
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||||
psin<Packet16f>(const Packet16f& _x) {
|
psin<Packet16f>(const Packet16f& _x) {
|
||||||
|
@ -30,13 +30,6 @@ namespace internal {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics.
|
|
||||||
#if EIGEN_GNUC_STRICT_AT_LEAST(5,3,0) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900
|
|
||||||
#define EIGEN_HAS_AVX512_MATH 1
|
|
||||||
#else
|
|
||||||
#define EIGEN_HAS_AVX512_MATH 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef __m512 Packet16f;
|
typedef __m512 Packet16f;
|
||||||
typedef __m512i Packet16i;
|
typedef __m512i Packet16i;
|
||||||
typedef __m512d Packet8d;
|
typedef __m512d Packet8d;
|
||||||
@ -84,14 +77,14 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
HasMax = 1,
|
HasMax = 1,
|
||||||
HasConj = 1,
|
HasConj = 1,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0,
|
||||||
HasLog = EIGEN_HAS_AVX512_MATH,
|
HasSqrt = 1,
|
||||||
HasLog1p = EIGEN_HAS_AVX512_MATH,
|
HasRsqrt = 1,
|
||||||
HasExp = EIGEN_HAS_AVX512_MATH,
|
HasLog = 1,
|
||||||
HasExpm1 = EIGEN_HAS_AVX512_MATH,
|
HasLog1p = 1,
|
||||||
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
HasExp = 1,
|
||||||
HasRsqrt = EIGEN_HAS_AVX512_MATH,
|
HasExpm1 = 1,
|
||||||
HasBessel = EIGEN_HAS_AVX512_MATH,
|
HasBessel = 1,
|
||||||
HasNdtri = EIGEN_HAS_AVX512_MATH,
|
HasNdtri = 1,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
@ -126,19 +119,17 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasASin = 1,
|
HasASin = 1,
|
||||||
HasATan = 1,
|
HasATan = 1,
|
||||||
HasATanh = 1,
|
HasATanh = 1,
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasLog1p = 1,
|
HasLog1p = 1,
|
||||||
HasExpm1 = 1,
|
HasExpm1 = 1,
|
||||||
HasNdtri = 1,
|
HasNdtri = 1,
|
||||||
HasBessel = 1,
|
HasBessel = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
|
||||||
HasReciprocal = EIGEN_FAST_MATH,
|
HasReciprocal = EIGEN_FAST_MATH,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
HasErf = EIGEN_FAST_MATH,
|
HasErf = EIGEN_FAST_MATH,
|
||||||
#endif
|
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasRound = 1,
|
HasRound = 1,
|
||||||
@ -156,12 +147,10 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
AlignedOnScalar = 1,
|
AlignedOnScalar = 1,
|
||||||
size = 8,
|
size = 8,
|
||||||
HasHalfPacket = 1,
|
HasHalfPacket = 1,
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
|
||||||
#endif
|
|
||||||
HasATan = 1,
|
HasATan = 1,
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
@ -2294,20 +2283,18 @@ struct packet_traits<bfloat16> : default_packet_traits {
|
|||||||
HasInsert = 1,
|
HasInsert = 1,
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||||
HasLog = 1, // Currently fails test with bad accuracy.
|
HasLog = 1, // Currently fails test with bad accuracy.
|
||||||
HasLog1p = 1,
|
HasLog1p = 1,
|
||||||
HasExpm1 = 1,
|
HasExpm1 = 1,
|
||||||
HasNdtri = 1,
|
HasNdtri = 1,
|
||||||
HasBessel = 1,
|
HasBessel = 1,
|
||||||
#endif
|
#endif
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
HasErf = EIGEN_FAST_MATH,
|
HasErf = EIGEN_FAST_MATH,
|
||||||
#endif
|
|
||||||
HasCmp = 1,
|
HasCmp = 1,
|
||||||
HasDiv = 1
|
HasDiv = 1
|
||||||
};
|
};
|
||||||
|
@ -16,13 +16,6 @@ namespace Eigen {
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics.
|
|
||||||
#if EIGEN_GNUC_STRICT_AT_LEAST(5,3,0) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900
|
|
||||||
#define EIGEN_HAS_AVX512_MATH 1
|
|
||||||
#else
|
|
||||||
#define EIGEN_HAS_AVX512_MATH 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
typedef __m512h Packet32h;
|
typedef __m512h Packet32h;
|
||||||
typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
|
typedef eigen_packet_wrapper<__m256i, 1> Packet16h;
|
||||||
typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
|
typedef eigen_packet_wrapper<__m128i, 2> Packet8h;
|
||||||
@ -54,15 +47,15 @@ struct packet_traits<half> : default_packet_traits {
|
|||||||
HasMax = 1,
|
HasMax = 1,
|
||||||
HasConj = 1,
|
HasConj = 1,
|
||||||
HasSetLinear = 0,
|
HasSetLinear = 0,
|
||||||
|
HasLog = 1,
|
||||||
|
HasLog1p = 1,
|
||||||
|
HasExp = 1,
|
||||||
|
HasExpm1 = 1,
|
||||||
|
HasSqrt = 1,
|
||||||
|
HasRsqrt = 1,
|
||||||
// These ones should be implemented in future
|
// These ones should be implemented in future
|
||||||
HasLog = EIGEN_HAS_AVX512_MATH,
|
HasBessel = 0,
|
||||||
HasLog1p = EIGEN_HAS_AVX512_MATH,
|
HasNdtri = 0,
|
||||||
HasExp = EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasExpm1 = EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasSqrt = EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasRsqrt = EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasBessel = 0, // EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasNdtri = 0, // EIGEN_HAS_AVX512_MATH,
|
|
||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
@ -4,9 +4,6 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
// Bessel functions only available for some compilers.
|
|
||||||
#if EIGEN_HAS_AVX512_MATH
|
|
||||||
|
|
||||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0)
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0)
|
||||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0)
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0)
|
||||||
|
|
||||||
@ -43,8 +40,6 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y0)
|
|||||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1)
|
F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1)
|
||||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1)
|
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1)
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
} // namespace Eigen
|
} // namespace Eigen
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user