Implement a faster fix for sin/cos of large entries that also correctly handle INF input.

2025-04-22 01:29:35 +08:00 · 2018-12-23 17:26:21 +01:00 · 2018-12-23 17:26:21 +01:00 · 0f6f75bd8a
commit 0f6f75bd8a
parent 38d704def8
2 changed files with 23 additions and 6 deletions
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@ -289,15 +289,9 @@ Packet psincos_float(const Packet& _x)
  const Packet cst_coscof_p1        = pset1<Packet>(-1.388731625493765E-003f);
  const Packet cst_coscof_p2        = pset1<Packet>( 4.166664568298827E-002f);
  const Packet cst_cephes_FOPI      = pset1<Packet>( 1.27323954473516f); // 4 / M_PI
  const Packet cst_sincos_max_arg   = pset1<Packet>( 13176795.0f);  // Approx. (2**24) / (4/Pi).
  Packet x = pabs(_x);
  // Prevent sin/cos from generating values larger than 1.0 in magnitude
  // for very large arguments by setting x to 0.0.
  Packet small_or_nan_mask = pcmp_lt_or_nan(x, cst_sincos_max_arg);
  x = pand(x, small_or_nan_mask);
  // Scale x by 4/Pi to find x's octant.
  Packet y = pmul(x, cst_cephes_FOPI);
@ -348,6 +342,13 @@ Packet psincos_float(const Packet& _x)
  y = ComputeSine ? pselect(poly_mask,y2,y1)
                  : pselect(poly_mask,y1,y2);
  // For very large arguments the the reduction to the [-Pi/4,+Pi/4] range
  // does not work thus leading to sine/cosine out of the [-1:1] range.
  // Since computing the sine/cosine for very large entry entries makes little
  // sense in term of accuracy, we simply clamp to [-1,1]:
  y = pmin(y,pset1<Packet>( 1.f));
  y = pmax(y,pset1<Packet>(-1.f));
  // Update the sign
  return pxor(y, sign_bit);
 }
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@ -579,6 +579,22 @@ template<typename Scalar,typename Packet> void packetmath_real()
        VERIFY(data2[0]<=Scalar(1.) && data2[0]>=Scalar(-1.));
        VERIFY(data2[1]<=Scalar(1.) && data2[1]>=Scalar(-1.));
      }
      data1[0] =  std::numeric_limits<Scalar>::infinity();
      data1[1] = -std::numeric_limits<Scalar>::infinity();
      h.store(data2, internal::psin(h.load(data1)));
      VERIFY((numext::isnan)(data2[0]));
      VERIFY((numext::isnan)(data2[1]));
      h.store(data2, internal::pcos(h.load(data1)));
      VERIFY((numext::isnan)(data2[0]));
      VERIFY((numext::isnan)(data2[1]));
      data1[0] =  std::numeric_limits<Scalar>::quiet_NaN();
      h.store(data2, internal::psin(h.load(data1)));
      VERIFY((numext::isnan)(data2[0]));
      h.store(data2, internal::pcos(h.load(data1)));
      VERIFY((numext::isnan)(data2[0]));
    }
  }
 }