Clamp tanh approximation outside [-c, c] where c is the smallest value where the approximation is exactly +/-1. Without FMA, c = 7.90531110763549805, with FMA c = 7.99881172180175781.

This commit is contained in:
Rasmus Munk Larsen 2019-12-12 19:34:25 +00:00
parent 88062b7fed
commit 73a8d572f5

View File

@ -17,19 +17,25 @@ namespace internal {
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) /** \internal \returns the hyperbolic tan of \a a (coeff-wise)
Doesn't do anything fancy, just a 13/6-degree rational interpolant which Doesn't do anything fancy, just a 13/6-degree rational interpolant which
is accurate up to a couple of ulp in the range [-9, 9], outside of which is accurate up to a couple of ulps in the (approximate) range [-8, 8],
the tanh(x) = +/-1. outside of which tanh(x) = +/-1 in single precision. This is done by
Clamp the inputs to the range [-c, c]. The value c is chosen as the smallest
value where the approximation evaluates to exactly 1.
This implementation works on both scalars and packets. This implementation works on both scalars and packets.
*/ */
template<typename T> template<typename T>
T generic_fast_tanh_float(const T& a_x) T generic_fast_tanh_float(const T& a_x)
{ {
// Clamp the inputs to the range [-9, 9] since anything outside // Clamp the inputs to the range [-c, c]
// this range is +/-1.0f in single-precision. #ifdef EIGEN_VECTORIZE_FMA
const T plus_9 = pset1<T>(9.f); const T plus_clamp = pset1<T>(7.99881172180175781);
const T minus_9 = pset1<T>(-9.f); const T minus_clamp = pset1<T>(-7.99881172180175781);
const T x = pmax(pmin(a_x, plus_9), minus_9); #else
const T plus_clamp = pset1<T>(7.90531110763549805);
const T minus_clamp = pset1<T>(-7.90531110763549805);
#endif
const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
// The monomial coefficients of the numerator polynomial (odd). // The monomial coefficients of the numerator polynomial (odd).
const T alpha_1 = pset1<T>(4.89352455891786e-03f); const T alpha_1 = pset1<T>(4.89352455891786e-03f);
const T alpha_3 = pset1<T>(6.37261928875436e-04f); const T alpha_3 = pset1<T>(6.37261928875436e-04f);