From 1c0a6cf228bddf5a93024adbe224796cf4977ee7 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 23 Feb 2023 23:16:41 +0000 Subject: [PATCH] Get rid of EIGEN_HAS_AVX512_MATH workaround. --- Eigen/src/Core/arch/AVX512/Complex.h | 8 +--- Eigen/src/Core/arch/AVX512/MathFunctions.h | 13 +++--- Eigen/src/Core/arch/AVX512/PacketMath.h | 43 +++++++------------ Eigen/src/Core/arch/AVX512/PacketMathFP16.h | 25 ++++------- .../arch/AVX512/BesselFunctions.h | 5 --- 5 files changed, 31 insertions(+), 63 deletions(-) diff --git a/Eigen/src/Core/arch/AVX512/Complex.h b/Eigen/src/Core/arch/AVX512/Complex.h index 1b08f417c..6d8ee2bcd 100644 --- a/Eigen/src/Core/arch/AVX512/Complex.h +++ b/Eigen/src/Core/arch/AVX512/Complex.h @@ -39,7 +39,7 @@ template<> struct packet_traits > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, - HasSqrt = EIGEN_HAS_AVX512_MATH, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -192,7 +192,7 @@ template<> struct packet_traits > : default_packet_traits HasMul = 1, HasDiv = 1, HasNegate = 1, - HasSqrt = EIGEN_HAS_AVX512_MATH, + HasSqrt = 1, HasAbs = 0, HasAbs2 = 0, HasMin = 0, @@ -368,8 +368,6 @@ ptranspose(PacketBlock& kernel) { kernel.packet[0] = Packet4cd(_mm512_shuffle_f64x2(T0, T2, (shuffle_mask<0,2,0,2>::mask))); // [a0 b0 c0 d0] } -#if EIGEN_HAS_AVX512_MATH - template<> EIGEN_STRONG_INLINE Packet4cd psqrt(const Packet4cd& a) { return psqrt_complex(a); } @@ -378,8 +376,6 @@ template<> EIGEN_STRONG_INLINE Packet8cf psqrt(const Packet8cf& a) { return psqrt_complex(a); } -#endif - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index be0e4dd94..33e3fc69e 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -16,8 +16,6 @@ namespace Eigen { namespace internal { -#if EIGEN_HAS_AVX512_MATH - #define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \ const Packet16f p16f_##NAME = pset1(X) @@ -165,8 +163,11 @@ psqrt(const Packet16f& _x) { template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d psqrt(const Packet8d& _x) { - // Double requires 2 Newton-Raphson steps for convergence. +#ifdef EIGEN_VECTORIZE_AVX512ER + return generic_sqrt_newton_step::run(_x, _mm512_rsqrt28_pd(_x)); +#else return generic_sqrt_newton_step::run(_x, _mm512_rsqrt14_pd(_x)); +#endif } #else template <> @@ -185,7 +186,6 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt) // prsqrt for float. #if defined(EIGEN_VECTORIZE_AVX512ER) - template <> EIGEN_STRONG_INLINE Packet16f prsqrt(const Packet16f& x) { return _mm512_rsqrt28_ps(x); @@ -221,10 +221,10 @@ template<> EIGEN_STRONG_INLINE Packet16f preciprocal(const Packet16f& return generic_reciprocal_newton_step::run(a, _mm512_rcp14_ps(a)); #endif } +#endif F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal) -#endif template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f plog1p(const Packet16f& _x) { @@ -242,9 +242,6 @@ Packet16f pexpm1(const Packet16f& _x) { F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1) -#endif // EIGEN_HAS_AVX512_MATH - - template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f psin(const Packet16f& _x) { diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 4628b21f5..262211705 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -30,13 +30,6 @@ namespace internal { #endif #endif -// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics. -#if EIGEN_GNUC_STRICT_AT_LEAST(5,3,0) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900 -#define EIGEN_HAS_AVX512_MATH 1 -#else -#define EIGEN_HAS_AVX512_MATH 0 -#endif - typedef __m512 Packet16f; typedef __m512i Packet16i; typedef __m512d Packet8d; @@ -84,14 +77,14 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 0, - HasLog = EIGEN_HAS_AVX512_MATH, - HasLog1p = EIGEN_HAS_AVX512_MATH, - HasExp = EIGEN_HAS_AVX512_MATH, - HasExpm1 = EIGEN_HAS_AVX512_MATH, - HasSqrt = EIGEN_HAS_AVX512_MATH, - HasRsqrt = EIGEN_HAS_AVX512_MATH, - HasBessel = EIGEN_HAS_AVX512_MATH, - HasNdtri = EIGEN_HAS_AVX512_MATH, + HasSqrt = 1, + HasRsqrt = 1, + HasLog = 1, + HasLog1p = 1, + HasExp = 1, + HasExpm1 = 1, + HasBessel = 1, + HasNdtri = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, @@ -126,19 +119,17 @@ template<> struct packet_traits : default_packet_traits HasASin = 1, HasATan = 1, HasATanh = 1, -#if EIGEN_HAS_AVX512_MATH + HasSqrt = 1, + HasRsqrt = 1, HasLog = 1, HasLog1p = 1, HasExpm1 = 1, HasNdtri = 1, HasBessel = 1, HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, HasReciprocal = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, -#endif HasCmp = 1, HasDiv = 1, HasRound = 1, @@ -156,12 +147,10 @@ template<> struct packet_traits : default_packet_traits AlignedOnScalar = 1, size = 8, HasHalfPacket = 1, -#if EIGEN_HAS_AVX512_MATH + HasSqrt = 1, + HasRsqrt = 1, HasLog = 1, HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, -#endif HasATan = 1, HasCmp = 1, HasDiv = 1, @@ -2294,20 +2283,18 @@ struct packet_traits : default_packet_traits { HasInsert = 1, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, -#if EIGEN_HAS_AVX512_MATH + HasSqrt = 1, + HasRsqrt = 1, #ifdef EIGEN_VECTORIZE_AVX512DQ HasLog = 1, // Currently fails test with bad accuracy. HasLog1p = 1, HasExpm1 = 1, HasNdtri = 1, - HasBessel = 1, + HasBessel = 1, #endif HasExp = 1, - HasSqrt = EIGEN_FAST_MATH, - HasRsqrt = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, HasErf = EIGEN_FAST_MATH, -#endif HasCmp = 1, HasDiv = 1 }; diff --git a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h index 0122aeea9..33442da99 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMathFP16.h +++ b/Eigen/src/Core/arch/AVX512/PacketMathFP16.h @@ -16,13 +16,6 @@ namespace Eigen { namespace internal { -// Disable the code for older versions of gcc that don't support many of the required avx512 math instrinsics. -#if EIGEN_GNUC_STRICT_AT_LEAST(5,3,0) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923 || EIGEN_COMP_ICC >= 1900 -#define EIGEN_HAS_AVX512_MATH 1 -#else -#define EIGEN_HAS_AVX512_MATH 0 -#endif - typedef __m512h Packet32h; typedef eigen_packet_wrapper<__m256i, 1> Packet16h; typedef eigen_packet_wrapper<__m128i, 2> Packet8h; @@ -54,15 +47,15 @@ struct packet_traits : default_packet_traits { HasMax = 1, HasConj = 1, HasSetLinear = 0, + HasLog = 1, + HasLog1p = 1, + HasExp = 1, + HasExpm1 = 1, + HasSqrt = 1, + HasRsqrt = 1, // These ones should be implemented in future - HasLog = EIGEN_HAS_AVX512_MATH, - HasLog1p = EIGEN_HAS_AVX512_MATH, - HasExp = EIGEN_HAS_AVX512_MATH, - HasExpm1 = EIGEN_HAS_AVX512_MATH, - HasSqrt = EIGEN_HAS_AVX512_MATH, - HasRsqrt = EIGEN_HAS_AVX512_MATH, - HasBessel = 0, // EIGEN_HAS_AVX512_MATH, - HasNdtri = 0, // EIGEN_HAS_AVX512_MATH, + HasBessel = 0, + HasNdtri = 0, HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, @@ -874,4 +867,4 @@ EIGEN_STRONG_INLINE Packet32h pldexp(const Packet32h& a, const Packet } // end namespace internal } // end namespace Eigen -#endif // EIGEN_PACKET_MATH_FP16_AVX512_H \ No newline at end of file +#endif // EIGEN_PACKET_MATH_FP16_AVX512_H diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h index 909b08e16..7dd3c3e5b 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/AVX512/BesselFunctions.h @@ -4,9 +4,6 @@ namespace Eigen { namespace internal { -// Bessel functions only available for some compilers. -#if EIGEN_HAS_AVX512_MATH - F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_i0) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_i0) @@ -43,8 +40,6 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y0) F16_PACKET_FUNCTION(Packet16f, Packet16h, pbessel_y1) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pbessel_y1) -#endif - } // namespace internal } // namespace Eigen