Fix packetmath_1 float tests for arm/aarch64.

Added missing `pmadd<Packet2f>` for NEON. This leads to significant
improvement in precision than previous `pmul+padd`, which was causing
the `pcos` tests to fail. Also added an approx test with
`std::sin`/`std::cos` since otherwise returning any `a^2+b^2=1` would
pass.

Modified `log(denorm)` tests.  Denorms are not always supported by all
systems (returns `::min`), are always flushed to zero on 32-bit arm,
and configurably flush to zero on sse/avx/aarch64. This leads to
inconsistent results across different systems (i.e. `-inf` vs `nan`).
Added a check for existence and exclude ARM.

Removed logistic exactness test, since scalar and vectorized versions
follow different code-paths due to differences in `pexp` and `pmadd`,
which result in slightly different values. For example, exactness always
fails on arm, aarch64, and altivec.
This commit is contained in:
Antonio Sanchez 2020-06-23 08:58:34 -07:00
parent 14f84978e8
commit 7222f0b6b5
2 changed files with 22 additions and 8 deletions

View File

@ -1023,6 +1023,8 @@ template<> EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/,
// MLA: 10 GFlop/s ; FMA: 12 GFlops/s. // MLA: 10 GFlop/s ; FMA: 12 GFlops/s.
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{ return vfmaq_f32(c,a,b); } { return vfmaq_f32(c,a,b); }
template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
{ return vfma_f32(c,a,b); }
#else #else
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
{ {
@ -1046,6 +1048,10 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
return vmlaq_f32(c,a,b); return vmlaq_f32(c,a,b);
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet2f pmadd(const Packet2f& a, const Packet2f& b, const Packet2f& c)
{
return vmla_f32(c,a,b);
}
#endif #endif
// No FMA instruction for int, so use MLA unconditionally. // No FMA instruction for int, so use MLA unconditionally.

View File

@ -8,6 +8,7 @@
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include <limits>
#include "packetmath_test_shared.h" #include "packetmath_test_shared.h"
template <typename T> template <typename T>
@ -666,9 +667,6 @@ void packetmath_real() {
h.store(data2, logistic.packetOp(h.load(data1))); h.store(data2, logistic.packetOp(h.load(data1)));
for (int i = 0; i < PacketSize; ++i) { for (int i = 0; i < PacketSize; ++i) {
VERIFY_IS_APPROX(data2[i], logistic(data1[i])); VERIFY_IS_APPROX(data2[i], logistic(data1[i]));
#ifdef EIGEN_VECTORIZE // don't check for exactness when using the i387 FPU
VERIFY_IS_EQUAL(data2[i], logistic(data1[i]));
#endif
} }
} }
@ -702,11 +700,16 @@ void packetmath_real() {
VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]); VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
VERIFY((numext::isnan)(data2[1])); VERIFY((numext::isnan)(data2[1]));
data1[0] = std::numeric_limits<Scalar>::denorm_min(); // Note: 32-bit arm always flushes denorms to zero.
data1[1] = -std::numeric_limits<Scalar>::denorm_min(); #if !EIGEN_ARCH_ARM
h.store(data2, internal::plog(h.load(data1))); if (std::numeric_limits<Scalar>::has_denorm == std::float_denorm_style::denorm_present) {
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]); data1[0] = std::numeric_limits<Scalar>::denorm_min();
VERIFY((numext::isnan)(data2[1])); data1[1] = -std::numeric_limits<Scalar>::denorm_min();
h.store(data2, internal::plog(h.load(data1)));
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
VERIFY((numext::isnan)(data2[1]));
}
#endif
data1[0] = Scalar(-1.0f); data1[0] = Scalar(-1.0f);
h.store(data2, internal::plog(h.load(data1))); h.store(data2, internal::plog(h.load(data1)));
@ -745,6 +748,11 @@ void packetmath_real() {
VERIFY(data2[PacketSize + 0] <= Scalar(1.) && data2[PacketSize + 0] >= Scalar(-1.)); VERIFY(data2[PacketSize + 0] <= Scalar(1.) && data2[PacketSize + 0] >= Scalar(-1.));
VERIFY(data2[PacketSize + 1] <= Scalar(1.) && data2[PacketSize + 1] >= Scalar(-1.)); VERIFY(data2[PacketSize + 1] <= Scalar(1.) && data2[PacketSize + 1] >= Scalar(-1.));
VERIFY_IS_APPROX(data2[0], std::cos(data1[0]));
VERIFY_IS_APPROX(data2[1], std::cos(data1[1]));
VERIFY_IS_APPROX(data2[PacketSize + 0], std::sin(data1[0]));
VERIFY_IS_APPROX(data2[PacketSize + 1], std::sin(data1[1]));
VERIFY_IS_APPROX(numext::abs2(data2[0]) + numext::abs2(data2[PacketSize + 0]), Scalar(1)); VERIFY_IS_APPROX(numext::abs2(data2[0]) + numext::abs2(data2[PacketSize + 0]), Scalar(1));
VERIFY_IS_APPROX(numext::abs2(data2[1]) + numext::abs2(data2[PacketSize + 1]), Scalar(1)); VERIFY_IS_APPROX(numext::abs2(data2[1]) + numext::abs2(data2[PacketSize + 1]), Scalar(1));
} }