From 4d419e22094cf6ca423e32dcf14cb18859d0a808 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 16 Feb 2024 21:27:22 +0000 Subject: [PATCH] Rename generic_fast_tanh_float to ptanh_float and move it to... --- Eigen/src/Core/MathFunctions.h | 4 +- Eigen/src/Core/MathFunctionsImpl.h | 59 ------------------- .../arch/Default/GenericPacketMathFunctions.h | 59 +++++++++++++++++++ .../Default/GenericPacketMathFunctionsFwd.h | 9 +-- Eigen/src/Core/arch/SVE/MathFunctions.h | 3 +- Eigen/src/Core/arch/ZVector/MathFunctions.h | 2 +- 6 files changed, 69 insertions(+), 67 deletions(-) diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index c92572f69..aba1d5009 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -980,7 +980,7 @@ EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); template -T generic_fast_tanh_float(const T& a_x); +T ptanh_float(const T& a_x); /**************************************************************************** * Implementation of sign * @@ -1798,7 +1798,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tanh(const T& x) { } #if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); } #endif #if defined(SYCL_DEVICE_ONLY) diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index ed44089ea..689c6d827 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -146,65 +146,6 @@ struct generic_sqrt_newton_step { } }; -/** \internal \returns the hyperbolic tan of \a a (coeff-wise) - Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulps in the (approximate) range [-8, 8], - outside of which tanh(x) = +/-1 in single precision. The input is clamped - to the range [-c, c]. The value c is chosen as the smallest value where - the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] - the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. - - This implementation works on both scalars and packets. -*/ -template -T generic_fast_tanh_float(const T& a_x) { - // Clamp the inputs to the range [-c, c] -#ifdef EIGEN_VECTORIZE_FMA - const T plus_clamp = pset1(7.99881172180175781f); - const T minus_clamp = pset1(-7.99881172180175781f); -#else - const T plus_clamp = pset1(7.90531110763549805f); - const T minus_clamp = pset1(-7.90531110763549805f); -#endif - const T tiny = pset1(0.0004f); - const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); - const T tiny_mask = pcmp_lt(pabs(a_x), tiny); - // The monomial coefficients of the numerator polynomial (odd). - const T alpha_1 = pset1(4.89352455891786e-03f); - const T alpha_3 = pset1(6.37261928875436e-04f); - const T alpha_5 = pset1(1.48572235717979e-05f); - const T alpha_7 = pset1(5.12229709037114e-08f); - const T alpha_9 = pset1(-8.60467152213735e-11f); - const T alpha_11 = pset1(2.00018790482477e-13f); - const T alpha_13 = pset1(-2.76076847742355e-16f); - - // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(4.89352518554385e-03f); - const T beta_2 = pset1(2.26843463243900e-03f); - const T beta_4 = pset1(1.18534705686654e-04f); - const T beta_6 = pset1(1.19825839466702e-06f); - - // Since the polynomials are odd/even, we need x^2. - const T x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. - T p = pmadd(x2, alpha_13, alpha_11); - p = pmadd(x2, p, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); - p = pmul(x, p); - - // Evaluate the denominator polynomial q. - T q = pmadd(x2, beta_6, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); - - // Divide the numerator by the denominator. - return pselect(tiny_mask, x, pdiv(p, q)); -} - template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) { // IEEE IEC 6059 special cases. diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 118426fbe..839df3781 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -917,6 +917,65 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Pa return pxor(p, x_signmask); } +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant which + is accurate up to a couple of ulps in the (approximate) range [-8, 8], + outside of which tanh(x) = +/-1 in single precision. The input is clamped + to the range [-c, c]. The value c is chosen as the smallest value where + the approximation evaluates to exactly 1. In the reange [-0.0004, 0.0004] + the approximation tanh(x) ~= x is used for better accuracy as x tends to zero. + + This implementation works on both scalars and packets. +*/ +template +T ptanh_float(const T& a_x) { + // Clamp the inputs to the range [-c, c] +#ifdef EIGEN_VECTORIZE_FMA + const T plus_clamp = pset1(7.99881172180175781f); + const T minus_clamp = pset1(-7.99881172180175781f); +#else + const T plus_clamp = pset1(7.90531110763549805f); + const T minus_clamp = pset1(-7.90531110763549805f); +#endif + const T tiny = pset1(0.0004f); + const T x = pmax(pmin(a_x, plus_clamp), minus_clamp); + const T tiny_mask = pcmp_lt(pabs(a_x), tiny); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(4.89352455891786e-03f); + const T alpha_3 = pset1(6.37261928875436e-04f); + const T alpha_5 = pset1(1.48572235717979e-05f); + const T alpha_7 = pset1(5.12229709037114e-08f); + const T alpha_9 = pset1(-8.60467152213735e-11f); + const T alpha_11 = pset1(2.00018790482477e-13f); + const T alpha_13 = pset1(-2.76076847742355e-16f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(4.89352518554385e-03f); + const T beta_2 = pset1(2.26843463243900e-03f); + const T beta_4 = pset1(1.18534705686654e-04f); + const T beta_6 = pset1(1.19825839466702e-06f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial q. + T q = pmadd(x2, beta_6, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pselect(tiny_mask, x, pdiv(p, q)); +} + template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x) { typedef typename unpacket_traits::type Scalar; diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h index 960bb67fb..dd1698830 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h @@ -98,6 +98,10 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Pac template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet& x); +/** \internal \returns tanh(x) for single precision float */ +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh_float(const Packet& x); + /** \internal \returns atanh(x) for single precision float */ template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet& x); @@ -133,6 +137,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Pa EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \ + EIGEN_FLOAT_PACKET_FUNCTION(tanh, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \ EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \ @@ -144,10 +149,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_complex(const Pa template <> \ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p(const PACKET& _x) { \ return internal::generic_plog1p(_x); \ - } \ - template <> \ - EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh(const PACKET& _x) { \ - return internal::generic_fast_tanh_float(_x); \ } #define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \ diff --git a/Eigen/src/Core/arch/SVE/MathFunctions.h b/Eigen/src/Core/arch/SVE/MathFunctions.h index b09527545..8c8ed84cf 100644 --- a/Eigen/src/Core/arch/SVE/MathFunctions.h +++ b/Eigen/src/Core/arch/SVE/MathFunctions.h @@ -39,8 +39,9 @@ EIGEN_STRONG_INLINE PacketXf pcos(const PacketXf& x) { // Hyperbolic Tangent function. template <> EIGEN_STRONG_INLINE PacketXf ptanh(const PacketXf& x) { - return internal::generic_fast_tanh_float(x); + return ptanh_float(x); } + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h index 5c5535075..32e042554 100644 --- a/Eigen/src/Core/arch/ZVector/MathFunctions.h +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -220,7 +220,7 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f prsqrt(co // Hyperbolic Tangent function. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh(const Packet4f& x) { - return internal::generic_fast_tanh_float(x); + return ptanh_float(x); } } // end namespace internal