Make preciprocal IEEE compliant w.r.t. 1/0 and 1/inf.

This commit is contained in:
Rasmus Munk Larsen 2022-01-26 20:38:05 +00:00
parent d271a7d545
commit 8f2c6f0aa6
3 changed files with 44 additions and 9 deletions

View File

@ -18,9 +18,18 @@ namespace Eigen {
namespace internal {
/** \internal Fast reciprocal using Newton-Raphson's method.
We assume that the starting guess provided in approx_a_recip has at least
half the leading mantissa bits in the correct result, such that a single
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect result.
Preconditions:
1. The starting guess provided in approx_a_recip must have at least half
the leading mantissa bits in the correct result, such that a single
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
result.
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
If the preconditions are satisfied, which they are for for the _*_rcp_ps
instructions on x86, the result has a maximum relative error of 2 ulps,
and correctly handles reciprocals of zero and infinity.
*/
template <typename Packet, int Steps>
struct generic_reciprocal_newton_step {
@ -29,12 +38,15 @@ struct generic_reciprocal_newton_step {
run(const Packet& a, const Packet& approx_a_recip) {
using Scalar = typename unpacket_traits<Packet>::type;
const Packet two = pset1<Packet>(Scalar(2));
const Packet neg_a = pnegate(a);
// Refine the approximation using one Newton-Raphson step:
// x_{i} = x_{i-1} * (2 - a * x_{i-1})
const Packet x =
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
return pmul(x, pmadd(neg_a, x, two));
const Packet x =
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
const Packet tmp = pnmadd(a, x, two);
// If tmp is NaN, it means that a is either +/-0 or +/-Inf.
// In this case return the approximation directly.
const Packet is_not_nan = pcmp_eq(tmp, tmp);
return pselect(is_not_nan, pmul(x, tmp), x);
}
};

View File

@ -148,11 +148,17 @@ Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
}
#endif
#ifdef EIGEN_VECTORIZE_FMA
// Trying to speed up reciprocal using Newton-Raphson is counterproductive
// unless FMA is available. Without FMA pdiv(pset1<Packet>(Scalar(1),a) is
// 30% faster.
template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& a) {
return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(a, _mm_rcp_ps(a));
}
#endif
#endif
// Hyperbolic Tangent function.

View File

@ -1007,6 +1007,23 @@ void packetmath_real() {
VERIFY_IS_EQUAL(data2[0], Scalar(1));
}
}
if (PacketTraits::HasReciprocal && PacketSize >= 2) {
test::packet_helper<PacketTraits::HasReciprocal, Packet> h;
const Scalar inf = NumTraits<Scalar>::infinity();
const Scalar zero = Scalar(0);
data1[0] = zero;
data1[1] = -zero;
h.store(data2, internal::preciprocal(h.load(data1)));
VERIFY_IS_EQUAL(data2[0], inf);
VERIFY_IS_EQUAL(data2[1], -inf);
data1[0] = inf;
data1[1] = -inf;
h.store(data2, internal::preciprocal(h.load(data1)));
VERIFY_IS_EQUAL(data2[0], zero);
VERIFY_IS_EQUAL(data2[1], -zero);
}
}
#define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) if(COND) { \