mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-13 01:43:13 +08:00
Clamp tanh approximation outside [-c, c] where c is the smallest value where the approximation is exactly +/-1. Without FMA, c = 7.90531110763549805, with FMA c = 7.99881172180175781.
This commit is contained in:
parent
88062b7fed
commit
73a8d572f5
@ -17,19 +17,25 @@ namespace internal {
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-9, 9], outside of which
|
||||
the tanh(x) = +/-1.
|
||||
is accurate up to a couple of ulps in the (approximate) range [-8, 8],
|
||||
outside of which tanh(x) = +/-1 in single precision. This is done by
|
||||
Clamp the inputs to the range [-c, c]. The value c is chosen as the smallest
|
||||
value where the approximation evaluates to exactly 1.
|
||||
|
||||
This implementation works on both scalars and packets.
|
||||
*/
|
||||
template<typename T>
|
||||
T generic_fast_tanh_float(const T& a_x)
|
||||
{
|
||||
// Clamp the inputs to the range [-9, 9] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_9 = pset1<T>(9.f);
|
||||
const T minus_9 = pset1<T>(-9.f);
|
||||
const T x = pmax(pmin(a_x, plus_9), minus_9);
|
||||
// Clamp the inputs to the range [-c, c]
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
const T plus_clamp = pset1<T>(7.99881172180175781);
|
||||
const T minus_clamp = pset1<T>(-7.99881172180175781);
|
||||
#else
|
||||
const T plus_clamp = pset1<T>(7.90531110763549805);
|
||||
const T minus_clamp = pset1<T>(-7.90531110763549805);
|
||||
#endif
|
||||
const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||
|
Loading…
x
Reference in New Issue
Block a user