diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 8ce181e30..a3320b852 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -20,105 +20,8 @@ namespace Eigen { namespace internal { -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -psin(const Packet8f& _x) { - return psin_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -pcos(const Packet8f& _x) { - return pcos_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -pasin(const Packet8f& _x) { - return pasin_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -pacos(const Packet8f& _x) { - return pacos_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -patan(const Packet8f& _x) { - return patan_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d -patan(const Packet4d& _x) { - return patan_double(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -patanh(const Packet8f& _x) { - return patanh_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -plog(const Packet8f& _x) { - return plog_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d -plog(const Packet4d& _x) { - return plog_double(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -plog2(const Packet8f& _x) { - return plog2_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d -plog2(const Packet4d& _x) { - return plog2_double(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet8f plog1p(const Packet8f& _x) { - return generic_plog1p(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet8f pexpm1(const Packet8f& _x) { - return generic_expm1(_x); -} - -// Exponential function. Works by writing "x = m*log(2) + r" where -// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then -// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1). -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -pexp(const Packet8f& _x) { - return pexp_float(_x); -} - -// Hyperbolic Tangent function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f -ptanh(const Packet8f& _x) { - return internal::generic_fast_tanh_float(_x); -} - -// Exponential function for doubles. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d -pexp(const Packet4d& _x) { - return pexp_double(_x); -} - +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d) // Notice that for newer processors, it is counterproductive to use Newton // iteration for square root. In particular, Skylake and Zen2 processors @@ -153,19 +56,6 @@ template<> EIGEN_STRONG_INLINE Packet8f preciprocal(const Packet8f& a) #endif - -F16_PACKET_FUNCTION(Packet8f, Packet8h, psin) -F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos) -F16_PACKET_FUNCTION(Packet8f, Packet8h, plog) -F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2) -F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p) -F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1) -F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp) -F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh) -F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt) -F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt) -F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal) - template <> EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) { Packet8f fexponent; @@ -179,18 +69,6 @@ EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) return float2half(pldexp(half2float(a), half2float(exponent))); } -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt) -BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal) - template <> EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) { Packet8f fexponent; @@ -204,6 +82,30 @@ EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& expone return F32ToBf16(pldexp(Bf16ToF32(a), Bf16ToF32(exponent))); } +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt) +BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh) +F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos) +F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp) +F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1) +F16_PACKET_FUNCTION(Packet8f, Packet8h, plog) +F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p) +F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2) +F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal) +F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt) +F16_PACKET_FUNCTION(Packet8f, Packet8h, psin) +F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt) +F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh) + + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index 33e3fc69e..b327988bc 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -15,117 +15,8 @@ namespace Eigen { namespace internal { - -#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \ - const Packet16f p16f_##NAME = pset1(X) - -#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \ - const Packet16f p16f_##NAME = preinterpret(pset1(X)) - -#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \ - const Packet8d p8d_##NAME = pset1(X) - -#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \ - const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X)) - -#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \ - const Packet16bf p16bf_##NAME = pset1(X) - -#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \ - const Packet16bf p16bf_##NAME = preinterpret(pset1(X)) - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -plog(const Packet16f& _x) { - return plog_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d -plog(const Packet8d& _x) { - return plog_double(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, plog) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog) - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -plog2(const Packet16f& _x) { - return plog2_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d -plog2(const Packet8d& _x) { - return plog2_double(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2) - -// Exponential function. Works by writing "x = m*log(2) + r" where -// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then -// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1). -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -pexp(const Packet16f& _x) { - EIGEN_DECLARE_CONST_Packet16f(1, 1.0f); - EIGEN_DECLARE_CONST_Packet16f(half, 0.5f); - EIGEN_DECLARE_CONST_Packet16f(127, 127.0f); - - EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f); - EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f); - - EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f); - - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f); - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f); - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f); - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f); - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f); - EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f); - - // Clamp x. - Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo); - - // Express exp(x) as exp(m*ln(2) + r), start by extracting - // m = floor(x/ln(2) + 0.5). - Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half)); - - // Get r = x - m*ln(2). Note that we can do this without losing more than one - // ulp precision due to the FMA instruction. - EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f); - Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x); - Packet16f r2 = pmul(r, r); - Packet16f r3 = pmul(r2, r); - - // Evaluate the polynomial approximant,improved by instruction-level parallelism. - Packet16f y, y1, y2; - y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1); - y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4); - y2 = padd(r, p16f_1); - y = pmadd(y, r, p16f_cephes_exp_p2); - y1 = pmadd(y1, r, p16f_cephes_exp_p5); - y = pmadd(y, r3, y1); - y = pmadd(y, r2, y2); - - // Build emm0 = 2^m. - Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127)); - emm0 = _mm512_slli_epi32(emm0, 23); - - // Return 2^m * exp(r). - return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d -pexp(const Packet8d& _x) { - return pexp_double(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d) template <> EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) { @@ -181,9 +72,6 @@ EIGEN_STRONG_INLINE Packet8d psqrt(const Packet8d& x) { } #endif -F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt) - // prsqrt for float. #if defined(EIGEN_VECTORIZE_AVX512ER) template <> @@ -199,8 +87,6 @@ prsqrt(const Packet16f& _x) { } #endif -F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt) // prsqrt for double. #if EIGEN_FAST_MATH @@ -223,80 +109,28 @@ template<> EIGEN_STRONG_INLINE Packet16f preciprocal(const Packet16f& } #endif -F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal) - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet16f plog1p(const Packet16f& _x) { - return generic_plog1p(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p) - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet16f pexpm1(const Packet16f& _x) { - return generic_expm1(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1) -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1) - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -psin(const Packet16f& _x) { - return psin_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -pcos(const Packet16f& _x) { - return pcos_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -pacos(const Packet16f& _x) { - return pacos_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -pasin(const Packet16f& _x) { - return pasin_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -patan(const Packet16f& _x) { - return patan_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -patanh(const Packet16f& _x) { - return patanh_float(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d -patan(const Packet8d& _x) { - return patan_double(_x); -} - -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f -ptanh(const Packet16f& _x) { - return internal::generic_fast_tanh_float(_x); -} - -F16_PACKET_FUNCTION(Packet16f, Packet16h, psin) -F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos) -F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh) - -BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin) +BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt) BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh) +F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos) +F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp) +F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1) +F16_PACKET_FUNCTION(Packet16f, Packet16h, plog) +F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p) +F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2) +F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal) +F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt) +F16_PACKET_FUNCTION(Packet16f, Packet16h, psin) +F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt) +F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh) } // end namespace internal diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index 7648b404e..d64d1335f 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -18,53 +18,10 @@ namespace Eigen { namespace internal { -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f plog(const Packet4f& _x) -{ - return plog_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pexp(const Packet4f& _x) -{ - return pexp_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f psin(const Packet4f& _x) -{ - return psin_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pcos(const Packet4f& _x) -{ - return pcos_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pacos(const Packet4f& _x) -{ - return pacos_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pasin(const Packet4f& _x) -{ - return pasin_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f patan(const Packet4f& _x) -{ - return patan_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f patanh(const Packet4f& _x) -{ - return patanh_float(_x); -} +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f) +#ifdef EIGEN_VECTORIZE_VSX +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2d) +#endif #ifdef EIGEN_VECTORIZE_VSX template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS @@ -96,19 +53,8 @@ Packet2d prsqrt(const Packet2d& x) // return vec_rsqrt(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d patan(const Packet2d& _x) -{ - return patan_double(_x); -} #endif -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d pexp(const Packet2d& _x) -{ - return pexp_double(_x); -} - template<> EIGEN_STRONG_INLINE Packet8bf psqrt (const Packet8bf& a){ BF16_TO_F32_UNARY_OP_WRAPPER(psqrt, a); } @@ -130,13 +76,6 @@ Packet4f psqrt(const Packet4f& x) } #endif -// Hyperbolic Tangent function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f -ptanh(const Packet4f& x) { - return internal::generic_fast_tanh_float(x); -} - } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index d7335797d..dc08efaa7 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -109,7 +109,7 @@ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x); -/** \internal \returns atan(x) for single precision float */ +/** \internal \returns atanh(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x); @@ -126,6 +126,44 @@ Packet pdiv_complex(const Packet& x, const Packet& y); template struct ppolevl; +// Macros for instantiating these generic functions for different backends. +#define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \ + template <> \ + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET p##METHOD(const PACKET& _x) { \ + return p##METHOD##_##SCALAR(_x); \ + } + +#define EIGEN_FLOAT_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, float, PACKET) +#define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, double, PACKET) + +#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \ + template <> \ + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET pexpm1(const PACKET& _x) { \ + return internal::generic_expm1(_x); \ + } \ + template <> \ + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p(const PACKET& _x) { \ + return internal::generic_plog1p(_x); \ + } \ + template <> \ + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh(const PACKET& _x) { \ + return internal::generic_fast_tanh_float(_x); \ + } + +#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \ + EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET) } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index f4ea6f91a..7eda4cdf9 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -14,52 +14,9 @@ namespace Eigen { namespace internal { -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pexp(const Packet2f& x) -{ return pexp_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp(const Packet4f& x) -{ return pexp_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f plog(const Packet2f& x) -{ return plog_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f plog(const Packet4f& x) -{ return plog_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f psin(const Packet2f& x) -{ return psin_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psin(const Packet4f& x) -{ return psin_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pcos(const Packet2f& x) -{ return pcos_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pcos(const Packet4f& x) -{ return pcos_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pacos(const Packet2f& x) -{ return pacos_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pacos(const Packet4f& x) -{ return pacos_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pasin(const Packet2f& x) -{ return pasin_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pasin(const Packet4f& x) -{ return pasin_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patan(const Packet2f& x) -{ return patan_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patan(const Packet4f& x) -{ return patan_float(x); } - -// Hyperbolic Tangent function. -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh(const Packet2f& x) -{ return internal::generic_fast_tanh_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh(const Packet4f& x) -{ return internal::generic_fast_tanh_float(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh(const Packet2f& x) -{ return patanh_float(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh(const Packet4f& x) -{ return patanh_float(x); } - +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2f) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f) + #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC template <> @@ -79,7 +36,6 @@ Packet8hf ptanh(const Packet8hf& x) { } #endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC - BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos) BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog) @@ -102,14 +58,8 @@ EIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& expone //---------- double ---------- #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp(const Packet2d& x) -{ return pexp_double(x); } -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog(const Packet2d& x) -{ return plog_double(x); } - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan(const Packet2d& x) -{ return patan_double(x); } +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d) #endif diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index b0cd13aef..f0ddbe6cf 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -21,88 +21,8 @@ namespace Eigen { namespace internal { -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f plog(const Packet4f& _x) { - return plog_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d plog(const Packet2d& _x) { - return plog_double(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f plog2(const Packet4f& _x) { - return plog2_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d plog2(const Packet2d& _x) { - return plog2_double(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f plog1p(const Packet4f& _x) { - return generic_plog1p(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pexpm1(const Packet4f& _x) { - return generic_expm1(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pexp(const Packet4f& _x) -{ - return pexp_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d pexp(const Packet2d& x) -{ - return pexp_double(x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f psin(const Packet4f& _x) -{ - return psin_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pcos(const Packet4f& _x) -{ - return pcos_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pacos(const Packet4f& _x) -{ - return pacos_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f pasin(const Packet4f& _x) -{ - return pasin_float(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f patan(const Packet4f& _x) -{ - return patan_float(_x); -} - -template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet2d patan(const Packet2d& _x) { - return patan_double(_x); -} - -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet4f patanh(const Packet4f& _x) -{ - return patanh_float(_x); -} +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f) +EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d) // Notice that for newer processors, it is counterproductive to use Newton // iteration for square root. In particular, Skylake and Zen2 processors @@ -133,15 +53,6 @@ template<> EIGEN_STRONG_INLINE Packet4f preciprocal(const Packet4f& x) #endif - - -// Hyperbolic Tangent function. -template <> -EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f -ptanh(const Packet4f& x) { - return internal::generic_fast_tanh_float(x); -} - } // end namespace internal namespace numext {