Implement a faster fix for sin/cos of large entries that also correctly handle INF input.

This commit is contained in:
Gael Guennebaud 2018-12-23 17:26:21 +01:00
parent 38d704def8
commit 0f6f75bd8a
2 changed files with 23 additions and 6 deletions

View File

@ -289,15 +289,9 @@ Packet psincos_float(const Packet& _x)
const Packet cst_coscof_p1 = pset1<Packet>(-1.388731625493765E-003f); const Packet cst_coscof_p1 = pset1<Packet>(-1.388731625493765E-003f);
const Packet cst_coscof_p2 = pset1<Packet>( 4.166664568298827E-002f); const Packet cst_coscof_p2 = pset1<Packet>( 4.166664568298827E-002f);
const Packet cst_cephes_FOPI = pset1<Packet>( 1.27323954473516f); // 4 / M_PI const Packet cst_cephes_FOPI = pset1<Packet>( 1.27323954473516f); // 4 / M_PI
const Packet cst_sincos_max_arg = pset1<Packet>( 13176795.0f); // Approx. (2**24) / (4/Pi).
Packet x = pabs(_x); Packet x = pabs(_x);
// Prevent sin/cos from generating values larger than 1.0 in magnitude
// for very large arguments by setting x to 0.0.
Packet small_or_nan_mask = pcmp_lt_or_nan(x, cst_sincos_max_arg);
x = pand(x, small_or_nan_mask);
// Scale x by 4/Pi to find x's octant. // Scale x by 4/Pi to find x's octant.
Packet y = pmul(x, cst_cephes_FOPI); Packet y = pmul(x, cst_cephes_FOPI);
@ -348,6 +342,13 @@ Packet psincos_float(const Packet& _x)
y = ComputeSine ? pselect(poly_mask,y2,y1) y = ComputeSine ? pselect(poly_mask,y2,y1)
: pselect(poly_mask,y1,y2); : pselect(poly_mask,y1,y2);
// For very large arguments the the reduction to the [-Pi/4,+Pi/4] range
// does not work thus leading to sine/cosine out of the [-1:1] range.
// Since computing the sine/cosine for very large entry entries makes little
// sense in term of accuracy, we simply clamp to [-1,1]:
y = pmin(y,pset1<Packet>( 1.f));
y = pmax(y,pset1<Packet>(-1.f));
// Update the sign // Update the sign
return pxor(y, sign_bit); return pxor(y, sign_bit);
} }

View File

@ -579,6 +579,22 @@ template<typename Scalar,typename Packet> void packetmath_real()
VERIFY(data2[0]<=Scalar(1.) && data2[0]>=Scalar(-1.)); VERIFY(data2[0]<=Scalar(1.) && data2[0]>=Scalar(-1.));
VERIFY(data2[1]<=Scalar(1.) && data2[1]>=Scalar(-1.)); VERIFY(data2[1]<=Scalar(1.) && data2[1]>=Scalar(-1.));
} }
data1[0] = std::numeric_limits<Scalar>::infinity();
data1[1] = -std::numeric_limits<Scalar>::infinity();
h.store(data2, internal::psin(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
VERIFY((numext::isnan)(data2[1]));
h.store(data2, internal::pcos(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
VERIFY((numext::isnan)(data2[1]));
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
h.store(data2, internal::psin(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
h.store(data2, internal::pcos(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
} }
} }
} }