mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-04 12:15:11 +08:00
Avoid underflow in prsqrt.
This commit is contained in:
parent
b7151ffaab
commit
7d7576f326
@ -87,11 +87,11 @@ struct generic_rsqrt_newton_step {
|
|||||||
Packet inv_sqrt = approx_rsqrt;
|
Packet inv_sqrt = approx_rsqrt;
|
||||||
for (int step = 0; step < Steps; ++step) {
|
for (int step = 0; step < Steps; ++step) {
|
||||||
// Refine the approximation using one Newton-Raphson step:
|
// Refine the approximation using one Newton-Raphson step:
|
||||||
// h_n = x * (inv_sqrt * inv_sqrt) - 1 (so that h_n is nearly 0).
|
// h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
|
||||||
// inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
|
// inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
|
||||||
Packet r2 = pmul(inv_sqrt, inv_sqrt);
|
Packet r2 = pmul(a, inv_sqrt);
|
||||||
Packet half_r = pmul(inv_sqrt, cst_minus_half);
|
Packet half_r = pmul(inv_sqrt, cst_minus_half);
|
||||||
Packet h_n = pmadd(a, r2, cst_minus_one);
|
Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
|
||||||
inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
|
inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user