From 74dcfbbd0f74922c954aa55cca673037a099e8df Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Mon, 7 Oct 2024 13:27:28 -0700 Subject: [PATCH] Use ppolevl for polynomial evaluation in more places. --- .../arch/Default/GenericPacketMathFunctions.h | 464 +++++++----------- .../SpecialFunctions/SpecialFunctionsImpl.h | 94 ++-- 2 files changed, 223 insertions(+), 335 deletions(-) diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index 5bce19464..c3270e132 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -42,6 +42,134 @@ struct make_integer { typedef numext::int16_t type; }; +/* polevl (modified for Eigen) + * + * Evaluate polynomial + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N+1]; + * + * y = polevl( x, coef); + * + * + * + * DESCRIPTION: + * + * Evaluates polynomial of degree N: + * + * 2 N + * y = C + C x + C x +...+ C x + * 0 1 2 N + * + * Coefficients are stored in reverse order: + * + * coef[0] = C , ..., coef[N] = C . + * N 0 + * + * The function p1evl() assumes that coef[N] = 1.0 and is + * omitted from the array. Its calling arguments are + * otherwise the same as polevl(). + * + * + * The Eigen implementation is templatized. For best speed, store + * coef as a const array (constexpr), e.g. + * + * const double coef[] = {1.0, 2.0, 3.0, ...}; + * + */ +template +struct ppolevl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, + const typename unpacket_traits::type coeff[]) { + EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); + return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); + } +}; + +template +struct ppolevl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, + const typename unpacket_traits::type coeff[]) { + EIGEN_UNUSED_VARIABLE(x); + return pset1(coeff[0]); + } +}; + +/* chbevl (modified for Eigen) + * + * Evaluate Chebyshev series + * + * + * + * SYNOPSIS: + * + * int N; + * Scalar x, y, coef[N], chebevl(); + * + * y = chbevl( x, coef, N ); + * + * + * + * DESCRIPTION: + * + * Evaluates the series + * + * N-1 + * - ' + * y = > coef[i] T (x/2) + * - i + * i=0 + * + * of Chebyshev polynomials Ti at argument x/2. + * + * Coefficients are stored in reverse order, i.e. the zero + * order term is last in the array. Note N is the number of + * coefficients, not the order. + * + * If coefficients are for the interval a to b, x must + * have been transformed to x -> 2(2x - b - a)/(b-a) before + * entering the routine. This maps x from (a, b) to (-1, 1), + * over which the Chebyshev polynomials are defined. + * + * If the coefficients are for the inverted interval, in + * which (a, b) is mapped to (1/b, 1/a), the transformation + * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, + * this becomes x -> 4a/x - 1. + * + * + * + * SPEED: + * + * Taking advantage of the recurrence properties of the + * Chebyshev polynomials, the routine requires one more + * addition per loop than evaluating a nested polynomial of + * the same degree. + * + */ + +template +struct pchebevl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x, + const typename unpacket_traits::type coef[]) { + typedef typename unpacket_traits::type Scalar; + Packet b0 = pset1(coef[0]); + Packet b1 = pset1(static_cast(0.f)); + Packet b2; + + for (int i = 1; i < N; i++) { + b2 = b1; + b1 = b0; + b0 = psub(pmadd(x, b1, pset1(coef[i])), b2); + } + + return pmul(pset1(static_cast(0.5f)), psub(b0, b2)); + } +}; + template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic_get_biased_exponent(const Packet& a) { typedef typename unpacket_traits::type Scalar; @@ -193,21 +321,14 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_impl_float(const e = psub(e, pand(cst_1, mask)); x = padd(x, tmp); - // Polynomial coefficients for rational (3,3) r(x) = p(x)/q(x) + // Polynomial coefficients for rational r(x) = p(x)/q(x) // approximating log(1+x) on [sqrt(0.5)-1;sqrt(2)-1]. - const Packet cst_p1 = pset1(1.0000000190281136f); - const Packet cst_p2 = pset1(1.0000000190281063f); - const Packet cst_p3 = pset1(0.18256296349849254f); - const Packet cst_q1 = pset1(1.4999999999999927f); - const Packet cst_q2 = pset1(0.59923249590823520f); - const Packet cst_q3 = pset1(0.049616247954120038f); + constexpr float alpha[] = {0.18256296349849254f, 1.0000000190281063f, 1.0000000190281136f}; + constexpr float beta[] = {0.049616247954120038f, 0.59923249590823520f, 1.4999999999999927f, 1.0f}; - Packet p = pmadd(x, cst_p3, cst_p2); - p = pmadd(x, p, cst_p1); + Packet p = ppolevl::run(x, alpha); p = pmul(x, p); - Packet q = pmadd(x, cst_q3, cst_q2); - q = pmadd(x, q, cst_q1); - q = pmadd(x, q, cst_1); + Packet q = ppolevl::run(x, beta); x = pdiv(p, q); // Add the logarithm of the exponent back to the result of the interpolation. @@ -919,13 +1040,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac const Packet cst_one = pset1(1.0f); const Packet cst_two = pset1(2.0f); const Packet cst_pi_over_two = pset1(kPiOverTwo); - // For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with - // even terms only. - const Packet p9 = pset1(5.08838854730129241943359375e-2f); - const Packet p7 = pset1(3.95139865577220916748046875e-2f); - const Packet p5 = pset1(7.550220191478729248046875e-2f); - const Packet p3 = pset1(0.16664917767047882080078125f); - const Packet p1 = pset1(1.00000011920928955078125f); const Packet abs_x = pabs(x_in); const Packet sign_mask = pandnot(x_in, abs_x); @@ -941,13 +1055,11 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Pac const Packet x = pselect(large_mask, x_large, abs_x); const Packet x2 = pmul(x, x); - // Compute polynomial. - // x * (p1 + x^2*(p3 + x^2*(p5 + x^2*(p7 + x^2*p9)))) - - Packet p = pmadd(p9, x2, p7); - p = pmadd(p, x2, p5); - p = pmadd(p, x2, p3); - p = pmadd(p, x2, p1); + // For |x| < 0.5 approximate asin(x)/x by an 8th order polynomial with + // even terms only. + constexpr float alpha[] = {5.08838854730129241943359375e-2f, 3.95139865577220916748046875e-2f, + 7.550220191478729248046875e-2f, 0.16664917767047882080078125f, 1.00000011920928955078125f}; + Packet p = ppolevl::run(x2, alpha); p = pmul(p, x); const Packet p_large = pnmadd(cst_two, p, cst_pi_over_two); @@ -967,34 +1079,17 @@ struct patan_reduced { template <> template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced::run(const Packet& x) { - const Packet p1 = pset1(3.3004361289279920e-01); - const Packet p3 = pset1(8.2704055405494614e-01); - const Packet p5 = pset1(7.5365702534987022e-01); - const Packet p7 = pset1(3.0409318473444424e-01); - const Packet p9 = pset1(5.2574296781008604e-02); - const Packet p11 = pset1(3.0917513112462781e-03); - const Packet p13 = pset1(2.6667153866462208e-05); - const Packet q0 = p1; - const Packet q2 = pset1(9.3705509168587852e-01); - const Packet q4 = pset1(1.0); - const Packet q6 = pset1(4.9716458728465573e-01); - const Packet q8 = pset1(1.1548932646420353e-01); - const Packet q10 = pset1(1.0899150928962708e-02); - const Packet q12 = pset1(2.7311202462436667e-04); + constexpr double alpha[] = {2.6667153866462208e-05, 3.0917513112462781e-03, 5.2574296781008604e-02, + 3.0409318473444424e-01, 7.5365702534987022e-01, 8.2704055405494614e-01, + 3.3004361289279920e-01}; + + constexpr double beta[] = { + 2.7311202462436667e-04, 1.0899150928962708e-02, 1.1548932646420353e-01, 4.9716458728465573e-01, 1.0, + 9.3705509168587852e-01, 3.3004361289279920e-01}; Packet x2 = pmul(x, x); - Packet p = pmadd(p13, x2, p11); - Packet q = pmadd(q12, x2, q10); - p = pmadd(p, x2, p9); - q = pmadd(q, x2, q8); - p = pmadd(p, x2, p7); - q = pmadd(q, x2, q6); - p = pmadd(p, x2, p5); - q = pmadd(q, x2, q4); - p = pmadd(p, x2, p3); - q = pmadd(q, x2, q2); - p = pmadd(p, x2, p1); - q = pmadd(q, x2, q0); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); return pmul(x, pdiv(p, q)); } @@ -1002,20 +1097,14 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced template <> template EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced::run(const Packet& x) { - const Packet p1 = pset1(8.109951019287109375e-01f); - const Packet p3 = pset1(7.296695709228515625e-01f); - const Packet p5 = pset1(1.12026982009410858154296875e-01f); - const Packet q0 = p1; - const Packet q2 = pset1(1.0f); - const Packet q4 = pset1(2.8318560123443603515625e-01f); - const Packet q6 = pset1(1.00917108356952667236328125e-02f); + constexpr float alpha[] = {1.12026982009410858154296875e-01f, 7.296695709228515625e-01f, 8.109951019287109375e-01f}; + + constexpr float beta[] = {1.00917108356952667236328125e-02f, 2.8318560123443603515625e-01f, 1.0f, + 8.109951019287109375e-01f}; Packet x2 = pmul(x, x); - Packet p = pmadd(p5, x2, p3); - Packet q = pmadd(q6, x2, q4); - p = pmadd(p, x2, p1); - q = pmadd(q, x2, q2); - q = pmadd(q, x2, q0); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); return pmul(x, pdiv(p, q)); } @@ -1076,34 +1165,20 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x) // --output=tanhf.sollya --dispCoeff="dec" // The monomial coefficients of the numerator polynomial (odd). - const T alpha_3 = pset1(1.340216100e-1f); - const T alpha_5 = pset1(3.520756727e-3f); - const T alpha_7 = pset1(2.102733560e-5f); - const T alpha_9 = pset1(1.394553628e-8f); + constexpr float alpha[] = {1.394553628e-8f, 2.102733560e-5f, 3.520756727e-3f, 1.340216100e-1f}; // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(1.0f); - const T beta_2 = pset1(4.673548340e-1f); - const T beta_4 = pset1(2.597254514e-2f); - const T beta_6 = pset1(3.326951409e-4f); - const T beta_8 = pset1(8.015776984e-7f); + constexpr float beta[] = {8.015776984e-7f, 3.326951409e-4f, 2.597254514e-2f, 4.673548340e-1f, 1.0f}; // Since the polynomials are odd/even, we need x^2. const T x2 = pmul(x, x); const T x3 = pmul(x2, x); - // Interleave the evaluation of the numerator polynomial p and - // denominator polynomial q. - T p = pmadd(x2, alpha_9, alpha_7); - T q = pmadd(x2, beta_8, beta_6); - p = pmadd(x2, p, alpha_5); - q = pmadd(x2, q, beta_4); - p = pmadd(x2, p, alpha_3); - q = pmadd(x2, q, beta_2); - // Take advantage of the fact that alpha_1 = 1 to compute - // x*(x^2*p + alpha_1) = x^3 * p + x. + T p = ppolevl::run(x2, alpha); + T q = ppolevl::run(x2, beta); + // Take advantage of the fact that the constant term in p is 1 to compute + // x*(x^2*p + 1) = x^3 * p + x. p = pmadd(x3, p, x); - q = pmadd(x2, q, beta_0); // Divide the numerator by the denominator. return pdiv(p, q); @@ -1141,27 +1216,16 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_double(const T& a_x) // --denF="[D]" --log --output=tanh.sollya --dispCoeff="dec" // The monomial coefficients of the numerator polynomial (odd). - const T alpha_3 = pset1(1.5184719640284322e-01); - const T alpha_5 = pset1(5.9809711724441161e-03); - const T alpha_7 = pset1(9.3839087674268880e-05); - const T alpha_9 = pset1(6.8644367682497074e-07); - const T alpha_11 = pset1(2.4618379131293676e-09); - const T alpha_13 = pset1(4.2303918148209176e-12); - const T alpha_15 = pset1(3.1309488231386680e-15); - const T alpha_17 = pset1(7.6534862268749319e-19); - const T alpha_19 = pset1(2.6158007860482230e-23); + constexpr double alpha[] = {2.6158007860482230e-23, 7.6534862268749319e-19, 3.1309488231386680e-15, + 4.2303918148209176e-12, 2.4618379131293676e-09, 6.8644367682497074e-07, + 9.3839087674268880e-05, 5.9809711724441161e-03, 1.5184719640284322e-01}; // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(1.0); - const T beta_2 = pset1(4.851805297361760360e-01); - const T beta_4 = pset1(3.437448108450402717e-02); - const T beta_6 = pset1(8.295161192716231542e-04); - const T beta_8 = pset1(8.785185266237658698e-06); - const T beta_10 = pset1(4.492975677839633985e-08); - const T beta_12 = pset1(1.123643448069621992e-10); - const T beta_14 = pset1(1.293019623712687916e-13); - const T beta_16 = pset1(5.782506856739003571e-17); - const T beta_18 = pset1(6.463747022670968018e-21); + constexpr double beta[] = {6.463747022670968018e-21, 5.782506856739003571e-17, + 1.293019623712687916e-13, 1.123643448069621992e-10, + 4.492975677839633985e-08, 8.785185266237658698e-06, + 8.295161192716231542e-04, 3.437448108450402717e-02, + 4.851805297361760360e-01, 1.0}; // Since the polynomials are odd/even, we need x^2. const T x2 = pmul(x, x); @@ -1169,26 +1233,11 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_double(const T& a_x) // Interleave the evaluation of the numerator polynomial p and // denominator polynomial q. - T p = pmadd(x2, alpha_19, alpha_17); - T q = pmadd(x2, beta_18, beta_16); - p = pmadd(x2, p, alpha_15); - q = pmadd(x2, q, beta_14); - p = pmadd(x2, p, alpha_13); - q = pmadd(x2, q, beta_12); - p = pmadd(x2, p, alpha_11); - q = pmadd(x2, q, beta_10); - p = pmadd(x2, p, alpha_9); - q = pmadd(x2, q, beta_8); - p = pmadd(x2, p, alpha_7); - q = pmadd(x2, q, beta_6); - p = pmadd(x2, p, alpha_5); - q = pmadd(x2, q, beta_4); - p = pmadd(x2, p, alpha_3); - q = pmadd(x2, q, beta_2); - // Take advantage of the fact that alpha_1 = 1 to compute - // x*(x^2*p + alpha_1) = x^3 * p + x. + T p = ppolevl::run(x2, alpha); + T q = ppolevl::run(x2, beta); + // Take advantage of the fact that the constant term in p is 1 to compute + // x*(x^2*p + 1) = x^3 * p + x. p = pmadd(x3, p, x); - q = pmadd(x2, q, beta_0); // Divide the numerator by the denominator. return pdiv(p, q); @@ -1200,18 +1249,13 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Pa static_assert(std::is_same::value, "Scalar type must be float"); // For |x| in [0:0.5] we use a polynomial approximation of the form - // P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )). - const Packet C3 = pset1(0.3333373963832855224609375f); - const Packet C5 = pset1(0.1997792422771453857421875f); - const Packet C7 = pset1(0.14672131836414337158203125f); - const Packet C9 = pset1(8.2311116158962249755859375e-2f); - const Packet C11 = pset1(0.1819281280040740966796875f); + // P(x) = x + x^3*(alpha[4] + x^2 * (alpha[3] + x^2 * (... x^2 * alpha[0]) ... )). + constexpr float alpha[] = {0.1819281280040740966796875f, 8.2311116158962249755859375e-2f, + 0.14672131836414337158203125f, 0.1997792422771453857421875f, 0.3333373963832855224609375f}; const Packet x2 = pmul(x, x); - Packet p = pmadd(C11, x2, C9); - p = pmadd(x2, p, C7); - p = pmadd(x2, p, C5); - p = pmadd(x2, p, C3); - p = pmadd(pmul(x, x2), p, x); + const Packet x3 = pmul(x, x2); + Packet p = ppolevl::run(x2, alpha); + p = pmadd(x3, p, x); // For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x)); const Packet half = pset1(0.5f); @@ -1234,30 +1278,16 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_double(const P static_assert(std::is_same::value, "Scalar type must be double"); // For x in [-0.5:0.5] we use a rational approximation of the form // R(x) = x + x^3*P(x^2)/Q(x^2), where P is or order 4 and Q is of order 5. - const Packet p0 = pset1(1.2306328729812676e-01); - const Packet p2 = pset1(-2.5949536095445679e-01); - const Packet p4 = pset1(1.8185306179826699e-01); - const Packet p6 = pset1(-4.7129526768798737e-02); - const Packet p8 = pset1(3.3071338469301391e-03); + constexpr double alpha[] = {3.3071338469301391e-03, -4.7129526768798737e-02, 1.8185306179826699e-01, + -2.5949536095445679e-01, 1.2306328729812676e-01}; - const Packet q0 = pset1(3.6918986189438030e-01); - const Packet q2 = pset1(-1.0000000000000000e+00); - const Packet q4 = pset1(9.8733495886883648e-01); - const Packet q6 = pset1(-4.2828141436397615e-01); - const Packet q8 = pset1(7.6391885763341910e-02); - const Packet q10 = pset1(-3.8679974580640881e-03); + constexpr double beta[] = {-3.8679974580640881e-03, 7.6391885763341910e-02, -4.2828141436397615e-01, + 9.8733495886883648e-01, -1.0000000000000000e+00, 3.6918986189438030e-01}; const Packet x2 = pmul(x, x); const Packet x3 = pmul(x, x2); - Packet q = pmadd(q10, x2, q8); - Packet p = pmadd(p8, x2, p6); - q = pmadd(x2, q, q6); - p = pmadd(x2, p, p4); - q = pmadd(x2, q, q4); - p = pmadd(x2, p, p2); - q = pmadd(x2, q, q2); - p = pmadd(x2, p, p0); - q = pmadd(x2, q, q0); + Packet p = ppolevl::run(x2, alpha); + Packet q = ppolevl::run(x2, beta); Packet y_small = pmadd(x3, pdiv(p, q), x); // For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x)); @@ -2200,134 +2230,6 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Pac pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))))); } -/* polevl (modified for Eigen) - * - * Evaluate polynomial - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N+1]; - * - * y = polevl( x, coef); - * - * - * - * DESCRIPTION: - * - * Evaluates polynomial of degree N: - * - * 2 N - * y = C + C x + C x +...+ C x - * 0 1 2 N - * - * Coefficients are stored in reverse order: - * - * coef[0] = C , ..., coef[N] = C . - * N 0 - * - * The function p1evl() assumes that coef[N] = 1.0 and is - * omitted from the array. Its calling arguments are - * otherwise the same as polevl(). - * - * - * The Eigen implementation is templatized. For best speed, store - * coef as a const array (constexpr), e.g. - * - * const double coef[] = {1.0, 2.0, 3.0, ...}; - * - */ -template -struct ppolevl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, - const typename unpacket_traits::type coeff[]) { - EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE); - return pmadd(ppolevl::run(x, coeff), x, pset1(coeff[N])); - } -}; - -template -struct ppolevl { - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, - const typename unpacket_traits::type coeff[]) { - EIGEN_UNUSED_VARIABLE(x); - return pset1(coeff[0]); - } -}; - -/* chbevl (modified for Eigen) - * - * Evaluate Chebyshev series - * - * - * - * SYNOPSIS: - * - * int N; - * Scalar x, y, coef[N], chebevl(); - * - * y = chbevl( x, coef, N ); - * - * - * - * DESCRIPTION: - * - * Evaluates the series - * - * N-1 - * - ' - * y = > coef[i] T (x/2) - * - i - * i=0 - * - * of Chebyshev polynomials Ti at argument x/2. - * - * Coefficients are stored in reverse order, i.e. the zero - * order term is last in the array. Note N is the number of - * coefficients, not the order. - * - * If coefficients are for the interval a to b, x must - * have been transformed to x -> 2(2x - b - a)/(b-a) before - * entering the routine. This maps x from (a, b) to (-1, 1), - * over which the Chebyshev polynomials are defined. - * - * If the coefficients are for the inverted interval, in - * which (a, b) is mapped to (1/b, 1/a), the transformation - * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, - * this becomes x -> 4a/x - 1. - * - * - * - * SPEED: - * - * Taking advantage of the recurrence properties of the - * Chebyshev polynomials, the routine requires one more - * addition per loop than evaluating a nested polynomial of - * the same degree. - * - */ - -template -struct pchebevl { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(Packet x, - const typename unpacket_traits::type coef[]) { - typedef typename unpacket_traits::type Scalar; - Packet b0 = pset1(coef[0]); - Packet b1 = pset1(static_cast(0.f)); - Packet b2; - - for (int i = 1; i < N; i++) { - b2 = b1; - b1 = b0; - b0 = psub(pmadd(x, b1, pset1(coef[i])), b2); - } - - return pmul(pset1(static_cast(0.5f)), psub(b0, b2)); - } -}; - namespace unary_pow { template ::IsInteger> diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 56615d367..9f12a3413 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -131,8 +131,8 @@ struct digamma_impl_maybe_poly { template <> struct digamma_impl_maybe_poly { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float run(const float s) { - const float A[] = {-4.16666666666666666667E-3f, 3.96825396825396825397E-3f, -8.33333333333333333333E-3f, - 8.33333333333333333333E-2f}; + constexpr float A[] = {-4.16666666666666666667E-3f, 3.96825396825396825397E-3f, -8.33333333333333333333E-3f, + 8.33333333333333333333E-2f}; float z; if (s < 1.0e8f) { @@ -146,9 +146,9 @@ struct digamma_impl_maybe_poly { template <> struct digamma_impl_maybe_poly { EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE double run(const double s) { - const double A[] = {8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, - -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, - 8.33333333333333333333E-2}; + constexpr double A[] = {8.33333333333333333333E-2, -2.10927960927960927961E-2, 7.57575757575757575758E-3, + -4.16666666666666666667E-3, 3.96825396825396825397E-3, -8.33333333333333333333E-3, + 8.33333333333333333333E-2}; double z; if (s < 1.0e17) { @@ -282,20 +282,14 @@ struct digamma_impl { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& x) { // The monomial coefficients of the numerator polynomial (odd). - const T alpha_1 = pset1(1.128379106521606445312500000000000000e+00f); - const T alpha_3 = pset1(1.874160766601562500000000000000000000e-01f); - const T alpha_5 = pset1(5.243302136659622192382812500000000000e-02f); - const T alpha_7 = pset1(3.658048342913389205932617187500000000e-03f); - const T alpha_9 = pset1(2.861979592125862836837768554687500000e-04f); - const T alpha_11 = pset1(2.123732201653183437883853912353515625e-06f); + constexpr float alpha[] = {2.123732201653183437883853912353515625e-06f, 2.861979592125862836837768554687500000e-04f, + 3.658048342913389205932617187500000000e-03f, 5.243302136659622192382812500000000000e-02f, + 1.874160766601562500000000000000000000e-01f, 1.128379106521606445312500000000000000e+00f}; // The monomial coefficients of the denominator polynomial (even). - const T beta_0 = pset1(1.0f); - const T beta_2 = pset1(4.99425798654556274414062500000e-01f); - const T beta_4 = pset1(1.12945675849914550781250000000e-01f); - const T beta_6 = pset1(1.47520881146192550659179687500e-02f); - const T beta_8 = pset1(1.14329601638019084930419921875e-03f); - const T beta_10 = pset1(3.89185734093189239501953125000e-05f); + constexpr float beta[] = {3.89185734093189239501953125000e-05f, 1.14329601638019084930419921875e-03f, + 1.47520881146192550659179687500e-02f, 1.12945675849914550781250000000e-01f, + 4.99425798654556274414062500000e-01f, 1.0f}; // Since the polynomials are odd/even, we need x^2. // Since erf(4) == 1 in float, we clamp x^2 to 16 to avoid @@ -303,19 +297,11 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& x) { const T x2 = pmin(pset1(16.0f), pmul(x, x)); // Evaluate the numerator polynomial p. - T p = pmadd(x2, alpha_11, alpha_9); - p = pmadd(x2, p, alpha_7); - p = pmadd(x2, p, alpha_5); - p = pmadd(x2, p, alpha_3); - p = pmadd(x2, p, alpha_1); + T p = ppolevl::run(x2, alpha); p = pmul(x, p); // Evaluate the denominator polynomial p. - T q = pmadd(x2, beta_10, beta_8); - q = pmadd(x2, q, beta_6); - q = pmadd(x2, q, beta_4); - q = pmadd(x2, q, beta_2); - q = pmadd(x2, q, beta_0); + T q = ppolevl::run(x2, beta); const T r = pdiv(p, q); // Clamp to [-1:1]. @@ -475,18 +461,18 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign(const float& should_ template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) { - const ScalarType p0[] = {ScalarType(-5.99633501014107895267e1), ScalarType(9.80010754185999661536e1), - ScalarType(-5.66762857469070293439e1), ScalarType(1.39312609387279679503e1), - ScalarType(-1.23916583867381258016e0)}; - const ScalarType q0[] = {ScalarType(1.0), - ScalarType(1.95448858338141759834e0), - ScalarType(4.67627912898881538453e0), - ScalarType(8.63602421390890590575e1), - ScalarType(-2.25462687854119370527e2), - ScalarType(2.00260212380060660359e2), - ScalarType(-8.20372256168333339912e1), - ScalarType(1.59056225126211695515e1), - ScalarType(-1.18331621121330003142e0)}; + constexpr ScalarType p0[] = {ScalarType(-5.99633501014107895267e1), ScalarType(9.80010754185999661536e1), + ScalarType(-5.66762857469070293439e1), ScalarType(1.39312609387279679503e1), + ScalarType(-1.23916583867381258016e0)}; + constexpr ScalarType q0[] = {ScalarType(1.0), + ScalarType(1.95448858338141759834e0), + ScalarType(4.67627912898881538453e0), + ScalarType(8.63602421390890590575e1), + ScalarType(-2.25462687854119370527e2), + ScalarType(2.00260212380060660359e2), + ScalarType(-8.20372256168333339912e1), + ScalarType(1.59056225126211695515e1), + ScalarType(-1.18331621121330003142e0)}; const T sqrt2pi = pset1(ScalarType(2.50662827463100050242e0)); const T half = pset1(ScalarType(0.5)); T c, c2, ndtri_gt_exp_neg_two; @@ -503,20 +489,20 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(const T& b, /* Approximation for interval z = sqrt(-2 log a ) between 2 and 8 * i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14. */ - const ScalarType p1[] = {ScalarType(4.05544892305962419923e0), ScalarType(3.15251094599893866154e1), - ScalarType(5.71628192246421288162e1), ScalarType(4.40805073893200834700e1), - ScalarType(1.46849561928858024014e1), ScalarType(2.18663306850790267539e0), - ScalarType(-1.40256079171354495875e-1), ScalarType(-3.50424626827848203418e-2), - ScalarType(-8.57456785154685413611e-4)}; - const ScalarType q1[] = {ScalarType(1.0), - ScalarType(1.57799883256466749731e1), - ScalarType(4.53907635128879210584e1), - ScalarType(4.13172038254672030440e1), - ScalarType(1.50425385692907503408e1), - ScalarType(2.50464946208309415979e0), - ScalarType(-1.42182922854787788574e-1), - ScalarType(-3.80806407691578277194e-2), - ScalarType(-9.33259480895457427372e-4)}; + constexpr ScalarType p1[] = {ScalarType(4.05544892305962419923e0), ScalarType(3.15251094599893866154e1), + ScalarType(5.71628192246421288162e1), ScalarType(4.40805073893200834700e1), + ScalarType(1.46849561928858024014e1), ScalarType(2.18663306850790267539e0), + ScalarType(-1.40256079171354495875e-1), ScalarType(-3.50424626827848203418e-2), + ScalarType(-8.57456785154685413611e-4)}; + constexpr ScalarType q1[] = {ScalarType(1.0), + ScalarType(1.57799883256466749731e1), + ScalarType(4.53907635128879210584e1), + ScalarType(4.13172038254672030440e1), + ScalarType(1.50425385692907503408e1), + ScalarType(2.50464946208309415979e0), + ScalarType(-1.42182922854787788574e-1), + ScalarType(-3.80806407691578277194e-2), + ScalarType(-9.33259480895457427372e-4)}; /* Approximation for interval z = sqrt(-2 log a ) between 8 and 64 * i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890. */ @@ -1267,7 +1253,7 @@ struct zeta_impl { int i; Scalar p, r, a, b, k, s, t, w; - const Scalar A[] = { + constexpr Scalar A[] = { Scalar(12.0), Scalar(-720.0), Scalar(30240.0),