mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 08:09:36 +08:00
Make preciprocal IEEE compliant w.r.t. 1/0 and 1/inf.
This commit is contained in:
parent
d271a7d545
commit
8f2c6f0aa6
@ -18,9 +18,18 @@ namespace Eigen {
|
||||
namespace internal {
|
||||
|
||||
/** \internal Fast reciprocal using Newton-Raphson's method.
|
||||
We assume that the starting guess provided in approx_a_recip has at least
|
||||
half the leading mantissa bits in the correct result, such that a single
|
||||
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect result.
|
||||
|
||||
Preconditions:
|
||||
1. The starting guess provided in approx_a_recip must have at least half
|
||||
the leading mantissa bits in the correct result, such that a single
|
||||
Newton-Raphson step is sufficient to get within 1-2 ulps of the currect
|
||||
result.
|
||||
2. If a is zero, approx_a_recip must be infinite with the same sign as a.
|
||||
3. If a is infinite, approx_a_recip must be zero with the same sign as a.
|
||||
|
||||
If the preconditions are satisfied, which they are for for the _*_rcp_ps
|
||||
instructions on x86, the result has a maximum relative error of 2 ulps,
|
||||
and correctly handles reciprocals of zero and infinity.
|
||||
*/
|
||||
template <typename Packet, int Steps>
|
||||
struct generic_reciprocal_newton_step {
|
||||
@ -29,12 +38,15 @@ struct generic_reciprocal_newton_step {
|
||||
run(const Packet& a, const Packet& approx_a_recip) {
|
||||
using Scalar = typename unpacket_traits<Packet>::type;
|
||||
const Packet two = pset1<Packet>(Scalar(2));
|
||||
const Packet neg_a = pnegate(a);
|
||||
// Refine the approximation using one Newton-Raphson step:
|
||||
// x_{i} = x_{i-1} * (2 - a * x_{i-1})
|
||||
const Packet x =
|
||||
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
|
||||
return pmul(x, pmadd(neg_a, x, two));
|
||||
const Packet x =
|
||||
generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
|
||||
const Packet tmp = pnmadd(a, x, two);
|
||||
// If tmp is NaN, it means that a is either +/-0 or +/-Inf.
|
||||
// In this case return the approximation directly.
|
||||
const Packet is_not_nan = pcmp_eq(tmp, tmp);
|
||||
return pselect(is_not_nan, pmul(x, tmp), x);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -148,11 +148,17 @@ Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
|
||||
return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// Trying to speed up reciprocal using Newton-Raphson is counterproductive
|
||||
// unless FMA is available. Without FMA pdiv(pset1<Packet>(Scalar(1),a) is
|
||||
// 30% faster.
|
||||
template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& a) {
|
||||
return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(a, _mm_rcp_ps(a));
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
|
@ -1007,6 +1007,23 @@ void packetmath_real() {
|
||||
VERIFY_IS_EQUAL(data2[0], Scalar(1));
|
||||
}
|
||||
}
|
||||
if (PacketTraits::HasReciprocal && PacketSize >= 2) {
|
||||
test::packet_helper<PacketTraits::HasReciprocal, Packet> h;
|
||||
const Scalar inf = NumTraits<Scalar>::infinity();
|
||||
const Scalar zero = Scalar(0);
|
||||
data1[0] = zero;
|
||||
data1[1] = -zero;
|
||||
h.store(data2, internal::preciprocal(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(data2[0], inf);
|
||||
VERIFY_IS_EQUAL(data2[1], -inf);
|
||||
|
||||
data1[0] = inf;
|
||||
data1[1] = -inf;
|
||||
h.store(data2, internal::preciprocal(h.load(data1)));
|
||||
VERIFY_IS_EQUAL(data2[0], zero);
|
||||
VERIFY_IS_EQUAL(data2[1], -zero);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) if(COND) { \
|
||||
|
Loading…
x
Reference in New Issue
Block a user