mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 08:09:36 +08:00
Improve exp<float>(): Don't flush denormal results +4% speedup.
1. Speed up exp(x) by reducing the polynomial approximant from degree 7 to degree 6. With exactly representable coefficients computed by the Sollya tool, this still gives a maximum relative error of 1 ulp, i.e. faithfully rounded, for arguments where exp(x) is a normalized float. This change results in a speedup of about 4% for AVX2. 2. Extend the range where exp(x) returns a non-zero result to from ~[-88;88] to ~[-104;88] i.e. return denormalized values for large negative arguments instead of zero. Compared to exp<double>(x) the denormalized results gradually decrease in accuracy down to 0.033 relative error for arguments around x = -104 where exp(x) is ~std::numeric<float>::denorm_min(). This is expected and acceptable.
This commit is contained in:
parent
6e95c0cd9a
commit
8eab7b6886
@ -435,24 +435,24 @@ Packet generic_expm1(const Packet& x)
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
// exp(r) is computed using a 6th order minimax polynomial approximation.
|
||||
template <typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
EIGEN_UNUSED
|
||||
Packet pexp_float(const Packet _x)
|
||||
{
|
||||
const Packet cst_zero = pset1<Packet>(0.0f);
|
||||
const Packet cst_1 = pset1<Packet>(1.0f);
|
||||
const Packet cst_one = pset1<Packet>(1.0f);
|
||||
const Packet cst_half = pset1<Packet>(0.5f);
|
||||
const Packet cst_exp_hi = pset1<Packet>( 88.723f);
|
||||
const Packet cst_exp_lo = pset1<Packet>(-88.723f);
|
||||
const Packet cst_exp_hi = pset1<Packet>(88.723f);
|
||||
const Packet cst_exp_lo = pset1<Packet>(-104.f);
|
||||
|
||||
const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
|
||||
const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
|
||||
const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
|
||||
const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
|
||||
const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
|
||||
const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
|
||||
const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
|
||||
const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
|
||||
const Packet cst_p3 = pset1<Packet>(0.16666518151760101318359375f);
|
||||
const Packet cst_p4 = pset1<Packet>(4.166965186595916748046875e-2f);
|
||||
const Packet cst_p5 = pset1<Packet>(8.36894474923610687255859375e-3f);
|
||||
const Packet cst_p6 = pset1<Packet>(1.37449637986719608306884765625e-3f);
|
||||
|
||||
// Clamp x.
|
||||
Packet zero_mask = pcmp_lt(_x, cst_exp_lo);
|
||||
@ -470,18 +470,15 @@ Packet pexp_float(const Packet _x)
|
||||
Packet r = pmadd(m, cst_cephes_exp_C1, x);
|
||||
r = pmadd(m, cst_cephes_exp_C2, r);
|
||||
|
||||
Packet r2 = pmul(r, r);
|
||||
Packet r3 = pmul(r2, r);
|
||||
|
||||
// Evaluate the polynomial approximant,improved by instruction-level parallelism.
|
||||
Packet y, y1, y2;
|
||||
y = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
|
||||
y1 = pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
|
||||
y2 = padd(r, cst_1);
|
||||
y = pmadd(y, r, cst_cephes_exp_p2);
|
||||
y1 = pmadd(y1, r, cst_cephes_exp_p5);
|
||||
y = pmadd(y, r3, y1);
|
||||
y = pmadd(y, r2, y2);
|
||||
// Evaluate the 6th order polynomial approximation to exp(r)
|
||||
// with r in the interval [-ln(2)/2;ln(2)/2].
|
||||
const Packet r2 = pmul(r, r);
|
||||
Packet p_even = pmadd(r2, cst_p6, cst_p4);
|
||||
const Packet p_odd = pmadd(r2, cst_p5, cst_p3);
|
||||
p_even = pmadd(r2, p_even, cst_p2);
|
||||
const Packet p_low = padd(r, cst_one);
|
||||
Packet y = pmadd(r, p_odd, p_even);
|
||||
y = pmadd(r2, y, p_low);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
// TODO: replace pldexp with faster implementation since y in [-1, 1).
|
||||
|
Loading…
x
Reference in New Issue
Block a user