From a097f728fe8a6b87dddd33f205839eb617796238 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 4 Oct 2024 12:15:23 -0700 Subject: [PATCH] Avoid producing erf(x) = NaN for large |x|. --- .../Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index 837c32d0f..56615d367 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -298,7 +298,9 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& x) { const T beta_10 = pset1(3.89185734093189239501953125000e-05f); // Since the polynomials are odd/even, we need x^2. - const T x2 = pmul(x, x); + // Since erf(4) == 1 in float, we clamp x^2 to 16 to avoid + // computing Inf/Inf below. + const T x2 = pmin(pset1(16.0f), pmul(x, x)); // Evaluate the numerator polynomial p. T p = pmadd(x2, alpha_11, alpha_9); @@ -307,13 +309,13 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& x) { p = pmadd(x2, p, alpha_3); p = pmadd(x2, p, alpha_1); p = pmul(x, p); + // Evaluate the denominator polynomial p. T q = pmadd(x2, beta_10, beta_8); q = pmadd(x2, q, beta_6); q = pmadd(x2, q, beta_4); q = pmadd(x2, q, beta_2); q = pmadd(x2, q, beta_0); - const T r = pdiv(p, q); // Clamp to [-1:1].