mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Make NaN propagatation consistent between the pmax/pmin and std::max/std::min. This makes the NaN propagation consistent between the scalar and vectorized code paths of Eigen's scalar_max_op and scalar_min_op.
See #1373 for details.
This commit is contained in:
parent
156e6234f1
commit
5e144bbaa4
@ -29,12 +29,7 @@ T generic_fast_tanh_float(const T& a_x)
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_9 = pset1<T>(9.f);
|
||||
const T minus_9 = pset1<T>(-9.f);
|
||||
// NOTE GCC prior to 6.3 might improperly optimize this max/min
|
||||
// step such that if a_x is nan, x will be either 9 or -9,
|
||||
// and tanh will return 1 or -1 instead of nan.
|
||||
// This is supposed to be fixed in gcc6.3,
|
||||
// see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
const T x = pmax(minus_9,pmin(plus_9,a_x));
|
||||
const T x = pmax(pmin(a_x, plus_9), minus_9);
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(4.89352455891786e-03f);
|
||||
const T alpha_3 = pset1<T>(6.37261928875436e-04f);
|
||||
|
@ -183,12 +183,22 @@ template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d&
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_max_pd(a,b); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) {
|
||||
// Arguments are swapped to match NaN propagation behavior of std::min.
|
||||
return _mm256_min_ps(a,b);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) {
|
||||
// Arguments are swapped to match NaN propagation behavior of std::min.
|
||||
return _mm256_min_pd(a,b);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
|
||||
// Arguments are swapped to match NaN propagation behavior of std::max.
|
||||
return _mm256_max_ps(b,a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const Packet4d& b) {
|
||||
// Arguments are swapped to match NaN propagation behavior of std::max.
|
||||
return _mm256_max_pd(b,a);
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pround<Packet8f>(const Packet8f& a) { return _mm256_round_ps(a, _MM_FROUND_CUR_DIRECTION); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pround<Packet4d>(const Packet4d& a) { return _mm256_round_pd(a, _MM_FROUND_CUR_DIRECTION); }
|
||||
|
||||
|
@ -250,8 +250,34 @@ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f&
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
|
||||
#if EIGEN_COMP_GNUC
|
||||
// There appears to be a bug in GCC, by which the optimizer may
|
||||
// flip the argument order in calls to _mm_min_ps, so we have to
|
||||
// resort to inline ASM here. This is supposed to be fixed in gcc6.3,
|
||||
// see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
Packet4f res = b;
|
||||
asm("minps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
|
||||
return res;
|
||||
#else
|
||||
// Arguments are reversed to match NaN propagation behavior of std::min.
|
||||
return _mm_min_ps(b, a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
|
||||
#if EIGEN_COMP_GNUC
|
||||
// There appears to be a bug in GCC, by which the optimizer may
|
||||
// flip the argument order in calls to _mm_min_pd, so we have to
|
||||
// resort to inline ASM here. This is supposed to be fixed in gcc6.3,
|
||||
// see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
Packet2d res = b;
|
||||
asm("minpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
|
||||
return res;
|
||||
#else
|
||||
// Arguments are reversed to match NaN propagation behavior of std::min.
|
||||
return _mm_min_pd(b, a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
@ -263,8 +289,34 @@ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
|
||||
#if EIGEN_COMP_GNUC
|
||||
// There appears to be a bug in GCC, by which the optimizer may
|
||||
// flip the argument order in calls to _mm_max_ps, so we have to
|
||||
// resort to inline ASM here. This is supposed to be fixed in gcc6.3,
|
||||
// see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
Packet4f res = b;
|
||||
asm("maxps %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
|
||||
return res;
|
||||
#else
|
||||
// Arguments are reversed to match NaN propagation behavior of std::max.
|
||||
return _mm_max_ps(b, a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
|
||||
#if EIGEN_COMP_GNUC
|
||||
// There appears to be a bug in GCC, by which the optimizer may
|
||||
// flip the argument order in calls to _mm_max_pd, so we have to
|
||||
// resort to inline ASM here. This is supposed to be fixed in gcc6.3,
|
||||
// see also: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867
|
||||
Packet2d res = b;
|
||||
asm("maxpd %[a], %[res]" : [res] "+x" (res) : [a] "x" (a));
|
||||
return res;
|
||||
#else
|
||||
// Arguments are reversed to match NaN propagation behavior of std::max.
|
||||
return _mm_max_pd(b, a);
|
||||
#endif
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_SSE4_1
|
||||
|
@ -300,6 +300,51 @@ static void test_select()
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_minmax_nan_propagation_templ() {
|
||||
for (int size = 1; size < 17; ++size) {
|
||||
const Scalar kNan = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
Tensor<Scalar, 1> vec_nan(size);
|
||||
Tensor<Scalar, 1> vec_zero(size);
|
||||
Tensor<Scalar, 1> vec_res(size);
|
||||
vec_nan.setConstant(kNan);
|
||||
vec_zero.setZero();
|
||||
vec_res.setZero();
|
||||
|
||||
// Test that we propagate NaNs in the tensor when applying the
|
||||
// cwiseMax(scalar) operator, which is used for the Relu operator.
|
||||
vec_res = vec_nan.cwiseMax(Scalar(0));
|
||||
for (int i = 0; i < size; ++i) {
|
||||
VERIFY((numext::isnan)(vec_res(i)));
|
||||
}
|
||||
|
||||
// Test that NaNs do not propagate if we reverse the arguments.
|
||||
vec_res = vec_zero.cwiseMax(kNan);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
|
||||
}
|
||||
|
||||
// Test that we propagate NaNs in the tensor when applying the
|
||||
// cwiseMin(scalar) operator.
|
||||
vec_res.setZero();
|
||||
vec_res = vec_nan.cwiseMin(Scalar(0));
|
||||
for (int i = 0; i < size; ++i) {
|
||||
VERIFY((numext::isnan)(vec_res(i)));
|
||||
}
|
||||
|
||||
// Test that NaNs do not propagate if we reverse the arguments.
|
||||
vec_res = vec_zero.cwiseMin(kNan);
|
||||
for (int i = 0; i < size; ++i) {
|
||||
VERIFY_IS_EQUAL(vec_res(i), Scalar(0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_minmax_nan_propagation()
|
||||
{
|
||||
test_minmax_nan_propagation_templ<float>();
|
||||
test_minmax_nan_propagation_templ<double>();
|
||||
}
|
||||
|
||||
void test_cxx11_tensor_expr()
|
||||
{
|
||||
@ -311,4 +356,5 @@ void test_cxx11_tensor_expr()
|
||||
CALL_SUBTEST(test_functors());
|
||||
CALL_SUBTEST(test_type_casting());
|
||||
CALL_SUBTEST(test_select());
|
||||
CALL_SUBTEST(test_minmax_nan_propagation());
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user