Modified sqrt/rsqrt for denormal handling.

2025-10-09 22:56:31 +08:00 · 2022-03-02 17:20:47 +00:00 · 2022-03-02 17:20:47 +00:00 · 9c07e201ff
commit 9c07e201ff
parent 1c2690ed24
4 changed files with 58 additions and 86 deletions
--- a/Eigen/src/Core/MathFunctionsImpl.h
+++ b/Eigen/src/Core/MathFunctionsImpl.h
@ -83,20 +83,20 @@ struct generic_rsqrt_newton_step {
    using Scalar = typename unpacket_traits<Packet>::type;
    const Packet one_point_five = pset1<Packet>(Scalar(1.5));
    const Packet minus_half = pset1<Packet>(Scalar(-0.5));
-    const Packet minus_half_a = pmul(minus_half, a);
-    const Scalar norm_min = (std::numeric_limits<Scalar>::min)();
-    const Packet denorm_mask = pcmp_lt(a, pset1<Packet>(norm_min));
-    Packet x =
-         generic_rsqrt_newton_step<Packet,Steps - 1>::run(a, approx_rsqrt);
-     const Packet tmp = pmul(minus_half_a, x);
-     // If tmp is NaN, it means that a is either 0 or Inf.
-     // In this case return the approximation directly. Do the same for
-     // positive subnormals. Otherwise return the Newton iterate.
-     const Packet return_x_newton = pandnot(pcmp_eq(tmp, tmp), denorm_mask);
+    
    // Refine the approximation using one Newton-Raphson step:
-    //   x_{n+1} = x_n * (1.5 - x_n * ((0.5 * a) * x_n)).
-     const Packet x_newton  = pmul(x, pmadd(tmp, x, one_point_five));
-     return pselect(return_x_newton, x_newton, x);
+    //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n)).
+    // The approximation is expressed this way to avoid over/under-flows.  
+    Packet x_newton  = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
+    for (int step = 1; step < Steps; ++step) {
+      x_newton  = pmul(x_newton, pmadd(pmul(minus_half, x_newton), pmul(a, x_newton), one_point_five));
+    }
+    
+    // If approx_rsqrt is 0 or +/-inf, we should return it as is.  Note:
+    // on intel, approx_rsqrt can be inf for small denormal values.
+    const Packet return_approx = por(pcmp_eq(approx_rsqrt, pzero(a)),
+                                     pcmp_eq(pabs(approx_rsqrt), pset1<Packet>(NumTraits<Scalar>::infinity())));
+    return pselect(return_approx, approx_rsqrt, x_newton);
  }
 };

@ -132,27 +132,21 @@ struct generic_sqrt_newton_step {
  run(const Packet& a, const Packet& approx_rsqrt) {
    using Scalar = typename unpacket_traits<Packet>::type;
    const Packet one_point_five = pset1<Packet>(Scalar(1.5));
-    const Packet negative_mask = pcmp_lt(a, pzero(a));
-    const Scalar norm_min = (std::numeric_limits<Scalar>::min)();
-    const Packet denorm_mask = pcmp_lt(a, pset1<Packet>(norm_min));
-    // Set negative arguments to NaN and positive subnormals to zero.
-    const Packet a_poisoned = por(pandnot(a, denorm_mask), negative_mask);
-    const Packet minus_half_a = pmul(a_poisoned, pset1<Packet>(Scalar(-0.5)));
-
+    const Packet minus_half = pset1<Packet>(Scalar(-0.5));
+    // If a is inf or zero, return a directly.
+    const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
+    const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
    // Do a single step of Newton's iteration for reciprocal square root:
-    //   x_{n+1} = x_n * (1.5 - x_n * ((0.5 * a) * x_n)).
-    const Packet tmp = pmul(approx_rsqrt, minus_half_a);
-    // If tmp is NaN, it means that the argument was either 0 or +inf,
-    // and we should return the argument itself as the result.
-    const Packet return_rsqrt = pcmp_eq(tmp, tmp);
-    Packet rsqrt = pmul(approx_rsqrt, pmadd(tmp, approx_rsqrt, one_point_five));
+    //   x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
+    // The Newton's step is computed this way to avoid over/under-flows.
+    Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
    for (int step = 1; step < Steps; ++step) {
-      rsqrt = pmul(rsqrt, pmadd(pmul(rsqrt, minus_half_a), rsqrt, one_point_five));
+      rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
    }

    // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
    // Return a itself for 0 or +inf, NaN for negative arguments.
-    return pselect(return_rsqrt, pmul(a_poisoned, rsqrt), a_poisoned);
+    return pselect(return_a, a, pmul(a, rsqrt));
  }
 };

--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@ -108,6 +108,12 @@ Packet4d psqrt<Packet4d>(const Packet4d& _x) {
 #if EIGEN_FAST_MATH
 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f prsqrt<Packet8f>(const Packet8f& a) {
+  // _mm256_rsqrt_ps returns -inf for negative denormals.
+  // _mm512_rsqrt**_ps returns -NaN for negative denormals.  We may want
+  // consistency here.
+  // const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
+  //                                pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
+  //                                _mm256_rsqrt_ps(a));
  return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
 }

--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@ -207,44 +207,16 @@ BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
 template <>
 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8d
 prsqrt<Packet8d>(const Packet8d& _x) {
-  EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
-  EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
-  EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
-
-  Packet8d neg_half = pmul(_x, p8d_minus_half);
-
-  // Identity infinite, negative and denormal arguments.
-  __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
-  __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
-  __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
-
-  // Compute an approximate result using the rsqrt intrinsic, forcing +inf
-  // for denormals for consistency with AVX and SSE implementations.
-#if defined(EIGEN_VECTORIZE_AVX512ER)
-  Packet8d y_approx = _mm512_rsqrt28_pd(_x);
-#else
-  Packet8d y_approx = _mm512_rsqrt14_pd(_x);
-#endif
-  // Do one or two steps of Newton-Raphson's to improve the approximation, depending on the
-  // starting accuracy (either 2^-14 or 2^-28, depending on whether AVX512ER is available).
-  // The Newton-Raphson algorithm has quadratic convergence and roughly doubles the number
-  // of correct digits for each step.
-  // This uses the formula y_{n+1} = y_n * (1.5 - y_n * (0.5 * x) * y_n).
-  // It is essential to evaluate the inner term like this because forming
-  // y_n^2 may over- or underflow.
-  Packet8d y_newton = pmul(y_approx, pmadd(neg_half, pmul(y_approx, y_approx), p8d_one_point_five));
-#if !defined(EIGEN_VECTORIZE_AVX512ER)
-  y_newton = pmul(y_newton, pmadd(y_newton, pmul(neg_half, y_newton), p8d_one_point_five));
-#endif
-  // Select the result of the Newton-Raphson step for positive finite arguments.
-  // For other arguments, choose the output of the intrinsic. This will
-  // return rsqrt(+inf) = 0, rsqrt(x) = NaN if x < 0, and rsqrt(0) = +inf.
-  return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
+  #ifdef EIGEN_VECTORIZE_AVX512ER
+    return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
+  #else
+    return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
+  #endif
 }

 template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
 #ifdef EIGEN_VECTORIZE_AVX512ER
-  return _mm512_rcp28_ps(a));
+  return _mm512_rcp28_ps(a);
 #else
  return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
 #endif
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@ -682,6 +682,7 @@ void packetmath_test_IEEE_corner_cases(const RefFunctorT& ref_fun,
                                       const FunctorT& fun) {
  const int PacketSize = internal::unpacket_traits<Packet>::size;
  const Scalar norm_min = (std::numeric_limits<Scalar>::min)();
+  const Scalar norm_max = (std::numeric_limits<Scalar>::max)();

  constexpr int size = PacketSize * 2;
  EIGEN_ALIGN_MAX Scalar data1[size];
@ -694,37 +695,36 @@ void packetmath_test_IEEE_corner_cases(const RefFunctorT& ref_fun,
  // Test for subnormals.
  if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {

-    // When EIGEN_FAST_MATH is 1 we relax the conditions slightly, and allow the function
-    // to return the same value for subnormals as the reference would return for zero with
-    // the same sign as the input.
-    // TODO(rmlarsen): Currently we ignore the error that occurs if the input is equal to
-    // denorm_min. Specifically, the term 0.5*x in the Newton iteration for reciprocal sqrt
-    // underflows to zero and the result ends up a factor of 2 too large.
-#if EIGEN_FAST_MATH
-    // TODO(rmlarsen): Remove factor of 2 here if we can fix the underflow in reciprocal sqrt.
-    data1[0] = Scalar(2) * std::numeric_limits<Scalar>::denorm_min();
-    data1[1] = -data1[0];
-    test::packet_helper<Cond, Packet> h;
-    h.store(data2, fun(h.load(data1)));
-    for (int i=0; i < 2; ++i) {
-      const Scalar ref_zero = ref_fun(data1[i] < 0 ? -Scalar(0) : Scalar(0));
-      const Scalar ref_val = ref_fun(data1[i]);
-      // TODO(rmlarsen): Remove debug cruft.
-      //      std::cerr << "x = " << data1[i] << "y = " << data2[i] << ", ref_val = " << ref_val << ", ref_zero " << ref_zero << std::endl;
-      VERIFY(((std::isnan)(data2[i]) && (std::isnan)(ref_val)) || data2[i] == ref_zero ||
-             verifyIsApprox(data2[i], ref_val));
+    for (int scale = 1; scale < 5; ++scale) {
+      // When EIGEN_FAST_MATH is 1 we relax the conditions slightly, and allow the function
+      // to return the same value for subnormals as the reference would return for zero with
+      // the same sign as the input.
+      #if EIGEN_FAST_MATH
+        data1[0] = Scalar(scale) * std::numeric_limits<Scalar>::denorm_min();
+        data1[1] = -data1[0];
+        test::packet_helper<Cond, Packet> h;
+        h.store(data2, fun(h.load(data1)));
+        for (int i=0; i < PacketSize; ++i) {
+          const Scalar ref_zero = ref_fun(data1[i] < 0 ? -Scalar(0) : Scalar(0));
+          const Scalar ref_val = ref_fun(data1[i]);
+          VERIFY(((std::isnan)(data2[i]) && (std::isnan)(ref_val)) || data2[i] == ref_zero ||
+                verifyIsApprox(data2[i], ref_val));
+        }
+      #else
+        CHECK_CWISE1_IF(Cond, ref_fun, fun);
+      #endif
    }
-#else
-    data1[0] = std::numeric_limits<Scalar>::denorm_min();
-    data1[1] = -data1[0];
-    CHECK_CWISE1_IF(Cond, ref_fun, fun);
-#endif
  }

  // Test for smallest normalized floats.
  data1[0] = norm_min;
  data1[1] = -data1[0];
  CHECK_CWISE1_IF(Cond, ref_fun, fun);
+  
+  // Test for largest floats.
+  data1[0] = norm_max;
+  data1[1] = -data1[0];
+  CHECK_CWISE1_IF(Cond, ref_fun, fun);

  // Test for zeros.
  data1[0] = Scalar(0.0);