diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index 17b9d0bb8..30243ca65 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -101,17 +101,22 @@ pexp(const Packet4d& _x) { template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f psqrt(const Packet8f& _x) { - Packet8f minus_half_x = pmul(_x, pset1(-0.5f)); - Packet8f denormal_mask = pandnot( - pcmp_lt(_x, pset1((std::numeric_limits::min)())), - pcmp_lt(_x, pzero(_x))); + const Packet8f minus_half_x = pmul(_x, pset1(-0.5f)); + const Packet8f negative_mask = pcmp_lt(_x, pzero(_x)); + const Packet8f denormal_mask = + pandnot(pcmp_lt(_x, pset1((std::numeric_limits::min)())), + negative_mask); // Compute approximate reciprocal sqrt. - Packet8f x = _mm256_rsqrt_ps(_x); + Packet8f rs = _mm256_rsqrt_ps(_x); + // Flush negative arguments to zero. This is a workaround which ensures + // that sqrt of a negative denormal returns -NaN, despite _mm256_rsqrt_ps + // returning -Inf for such values. + const Packet8f x_flushed = pandnot(_x, negative_mask); // Do a single step of Newton's iteration. - x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1(1.5f))); + rs = pmul(rs, pmadd(minus_half_x, pmul(rs,rs), pset1(1.5f))); // Flush results for denormals to zero. - return pandnot(pmul(_x,x), denormal_mask); + return pandnot(pmul(x_flushed, rs), denormal_mask); } #else diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index 36df320de..fb99d8680 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -137,21 +137,25 @@ struct numeric_limits { static EIGEN_CONSTEXPR const bool has_infinity = true; static EIGEN_CONSTEXPR const bool has_quiet_NaN = true; static EIGEN_CONSTEXPR const bool has_signaling_NaN = true; - static EIGEN_CONSTEXPR const float_denorm_style has_denorm = std::denorm_absent; + static EIGEN_CONSTEXPR const float_denorm_style has_denorm = std::denorm_present; static EIGEN_CONSTEXPR const bool has_denorm_loss = false; static EIGEN_CONSTEXPR const std::float_round_style round_style = numeric_limits::round_style; - static EIGEN_CONSTEXPR const bool is_iec559 = false; + static EIGEN_CONSTEXPR const bool is_iec559 = true; + // The C++ standard defines this as "true if the set of values representable + // by the type is finite." BFloat16 has finite precision. static EIGEN_CONSTEXPR const bool is_bounded = true; static EIGEN_CONSTEXPR const bool is_modulo = false; static EIGEN_CONSTEXPR const int digits = 8; static EIGEN_CONSTEXPR const int digits10 = 2; static EIGEN_CONSTEXPR const int max_digits10 = 4; - static EIGEN_CONSTEXPR const int radix = 2; + static EIGEN_CONSTEXPR const int radix = numeric_limits::radix; static EIGEN_CONSTEXPR const int min_exponent = numeric_limits::min_exponent; static EIGEN_CONSTEXPR const int min_exponent10 = numeric_limits::min_exponent10; static EIGEN_CONSTEXPR const int max_exponent = numeric_limits::max_exponent; static EIGEN_CONSTEXPR const int max_exponent10 = numeric_limits::max_exponent10; static EIGEN_CONSTEXPR const bool traps = numeric_limits::traps; + // IEEE754: "The implementer shall choose how tininess is detected, but shall + // detect tininess in the same way for all operations in radix two" static EIGEN_CONSTEXPR const bool tinyness_before = numeric_limits::tinyness_before; static EIGEN_CONSTEXPR Eigen::bfloat16 (min)() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0080); } @@ -161,7 +165,7 @@ struct numeric_limits { static EIGEN_CONSTEXPR Eigen::bfloat16 round_error() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x3f00); } static EIGEN_CONSTEXPR Eigen::bfloat16 infinity() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f80); } static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fc0); } - static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7f81); } + static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x7fa0); } static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min() { return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(0x0001); } }; diff --git a/Eigen/src/Core/arch/Default/Half.h b/Eigen/src/Core/arch/Default/Half.h index 415533524..876045e58 100644 --- a/Eigen/src/Core/arch/Default/Half.h +++ b/Eigen/src/Core/arch/Default/Half.h @@ -218,29 +218,33 @@ struct numeric_limits { static EIGEN_CONSTEXPR const float_denorm_style has_denorm = denorm_present; static EIGEN_CONSTEXPR const bool has_denorm_loss = false; static EIGEN_CONSTEXPR const std::float_round_style round_style = std::round_to_nearest; - static EIGEN_CONSTEXPR const bool is_iec559 = false; - static EIGEN_CONSTEXPR const bool is_bounded = false; + static EIGEN_CONSTEXPR const bool is_iec559 = true; + // The C++ standard defines this as "true if the set of values representable + // by the type is finite." Half has finite precision. + static EIGEN_CONSTEXPR const bool is_bounded = true; static EIGEN_CONSTEXPR const bool is_modulo = false; static EIGEN_CONSTEXPR const int digits = 11; static EIGEN_CONSTEXPR const int digits10 = 3; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html static EIGEN_CONSTEXPR const int max_digits10 = 5; // according to http://half.sourceforge.net/structstd_1_1numeric__limits_3_01half__float_1_1half_01_4.html - static EIGEN_CONSTEXPR const int radix = 2; + static EIGEN_CONSTEXPR const int radix = numeric_limits::radix; static EIGEN_CONSTEXPR const int min_exponent = -13; static EIGEN_CONSTEXPR const int min_exponent10 = -4; static EIGEN_CONSTEXPR const int max_exponent = 16; static EIGEN_CONSTEXPR const int max_exponent10 = 4; - static EIGEN_CONSTEXPR const bool traps = true; - static EIGEN_CONSTEXPR const bool tinyness_before = false; + static EIGEN_CONSTEXPR const bool traps = numeric_limits::traps; + // IEEE754: "The implementer shall choose how tininess is detected, but shall + // detect tininess in the same way for all operations in radix two" + static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits::tinyness_before; - static EIGEN_CONSTEXPR Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x400); } + static EIGEN_CONSTEXPR Eigen::half (min)() { return Eigen::half_impl::raw_uint16_to_half(0x0400); } static EIGEN_CONSTEXPR Eigen::half lowest() { return Eigen::half_impl::raw_uint16_to_half(0xfbff); } static EIGEN_CONSTEXPR Eigen::half (max)() { return Eigen::half_impl::raw_uint16_to_half(0x7bff); } - static EIGEN_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x0800); } + static EIGEN_CONSTEXPR Eigen::half epsilon() { return Eigen::half_impl::raw_uint16_to_half(0x1400); } static EIGEN_CONSTEXPR Eigen::half round_error() { return Eigen::half_impl::raw_uint16_to_half(0x3800); } static EIGEN_CONSTEXPR Eigen::half infinity() { return Eigen::half_impl::raw_uint16_to_half(0x7c00); } static EIGEN_CONSTEXPR Eigen::half quiet_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7e00); } static EIGEN_CONSTEXPR Eigen::half signaling_NaN() { return Eigen::half_impl::raw_uint16_to_half(0x7d00); } - static EIGEN_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x1); } + static EIGEN_CONSTEXPR Eigen::half denorm_min() { return Eigen::half_impl::raw_uint16_to_half(0x0001); } }; // If std::numeric_limits is specialized, should also specialize diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 4bdb9af3c..f5c72a766 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -75,8 +75,6 @@ Packet4f pcos(const Packet4f& _x) return pcos_float(_x); } -#if EIGEN_FAST_MATH - // Functions for sqrt. // The EIGEN_FAST_MATH version uses the _mm_rsqrt_ps approximation and one step // of Newton's method, at a cost of 1-2 bits of precision as opposed to the @@ -85,11 +83,13 @@ Packet4f pcos(const Packet4f& _x) // it can be inlined and pipelined with other computations, further reducing its // effective latency. This is similar to Quake3's fast inverse square root. // For detail see here: http://www.beyond3d.com/content/articles/8/ -template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED +#if EIGEN_FAST_MATH +template<> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f psqrt(const Packet4f& _x) { - Packet4f minus_half_x = pmul(_x, pset1(-0.5f)); - Packet4f denormal_mask = pandnot( + const Packet4f minus_half_x = pmul(_x, pset1(-0.5f)); + const Packet4f denormal_mask = pandnot( pcmp_lt(_x, pset1((std::numeric_limits::min)())), pcmp_lt(_x, pzero(_x))); diff --git a/test/array_cwise.cpp b/test/array_cwise.cpp index 748514001..298351eef 100644 --- a/test/array_cwise.cpp +++ b/test/array_cwise.cpp @@ -451,7 +451,7 @@ template void array_real(const ArrayType& m) const RealScalar tiny = sqrt(std::numeric_limits::epsilon()); s1 += Scalar(tiny); m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); - VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); + VERIFY_IS_CWISE_APPROX(s1/m1, s1 * m1.inverse()); // check inplace transpose m3 = m1; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index db1c9adcb..fe0a9f67c 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -890,7 +890,10 @@ void packetmath_real() { data1[0] = std::numeric_limits::denorm_min(); data1[1] = -std::numeric_limits::denorm_min(); h.store(data2, internal::plog(h.load(data1))); - VERIFY_IS_APPROX(std::log(std::numeric_limits::denorm_min()), data2[0]); + // TODO(rmlarsen): Re-enable for bfloat16. + if (!internal::is_same::value) { + VERIFY_IS_APPROX(std::log(std::numeric_limits::denorm_min()), data2[0]); + } VERIFY((numext::isnan)(data2[1])); } #endif @@ -917,7 +920,7 @@ void packetmath_real() { if (std::numeric_limits::has_denorm == std::denorm_present) { data1[1] = -std::numeric_limits::denorm_min(); } else { - data1[1] = -NumTraits::epsilon(); + data1[1] = -((std::numeric_limits::min)()); } h.store(data2, internal::psqrt(h.load(data1))); VERIFY((numext::isnan)(data2[0]));