From 0e1a33a4617e605fbe93caf1ee51d1ef0f7cc29d Mon Sep 17 00:00:00 2001 From: Niels Dekker Date: Tue, 14 Jul 2020 23:22:34 +0200 Subject: [PATCH] Faster conversion from integer types to bfloat16 Specialized `bfloat16_impl::float_to_bfloat16_rtne(float)` for normal floating point numbers, infinity and zero, in order to improve the performance of `bfloat16::bfloat16(const T&)` for integer argument types. A reduction of more than 20% of the runtime duration of conversion from int to bfloat16 was observed, using Visual C++ 2019 on Windows 10. --- Eigen/src/Core/arch/Default/BFloat16.h | 44 ++++++++++++++++++++------ test/bfloat16_float.cpp | 4 +-- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/arch/Default/BFloat16.h b/Eigen/src/Core/arch/Default/BFloat16.h index d96105923..96accd7ed 100644 --- a/Eigen/src/Core/arch/Default/BFloat16.h +++ b/Eigen/src/Core/arch/Default/BFloat16.h @@ -58,7 +58,14 @@ struct __bfloat16_raw { }; EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value); +template EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff); +// Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying: +// > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff); +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h); struct bfloat16_base : public __bfloat16_raw { @@ -81,14 +88,14 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base { : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {} template explicit EIGEN_DEVICE_FUNC bfloat16(const T& val) - : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(static_cast(val))) {} + : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne::value>(static_cast(val))) {} explicit EIGEN_DEVICE_FUNC bfloat16(float f) - : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(f)) {} + : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(f)) {} // Following the convention of numpy, converting between complex and // float will lead to loss of imag value. template explicit EIGEN_DEVICE_FUNC bfloat16(const std::complex& val) - : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(static_cast(val.real()))) {} + : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne(static_cast(val.real()))) {} EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { // +0.0 and -0.0 become false, everything else becomes true. @@ -326,11 +333,13 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw raw_uint16_to_bfloat16(unsi return h; } -EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff) { +// float_to_bfloat16_rtne template specialization that does not make any +// assumption about the value of its function argument (ff). +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff) { #if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16)) // Nothing to do here #else - unsigned int input = numext::as_uint(ff); __bfloat16_raw output; if (Eigen::numext::isnan EIGEN_NOT_A_MACRO(ff)) { @@ -491,15 +500,32 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(floa // Sign | Exp (8 bit) | Frac (first 7 bit) // S E E E E E E E E F F F F F F L // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 - // - // + + // At this point, ff must be either a normal float, or +/-infinity. + output = float_to_bfloat16_rtne(ff); + } + return output; +#endif +} + +// float_to_bfloat16_rtne template specialization that assumes that its function +// argument (ff) is either a normal floating point number, or +/-infinity, or +// zero. Used to improve the runtime performance of conversion from an integer +// type to bfloat16. +template <> +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw float_to_bfloat16_rtne(float ff) { +#if (defined(EIGEN_HAS_CUDA_BF16) && defined(EIGEN_HAS_HIP_BF16)) + // Nothing to do here +#else + unsigned int input = numext::as_uint(ff); + __bfloat16_raw output; + // Least significant bit of resulting bfloat. unsigned int lsb = (input >> 16) & 1; unsigned int rounding_bias = 0x7fff + lsb; input += rounding_bias; output.value = static_cast(input >> 16); - } - return output; + return output; #endif } diff --git a/test/bfloat16_float.cpp b/test/bfloat16_float.cpp index 478aef3a3..96341929a 100644 --- a/test/bfloat16_float.cpp +++ b/test/bfloat16_float.cpp @@ -31,7 +31,7 @@ float BinaryToFloat(uint32_t sign, uint32_t exponent, uint32_t high_mantissa, void test_truncate(float input, float expected_truncation, float expected_rounding){ bfloat16 truncated = Eigen::bfloat16_impl::truncate_to_bfloat16(input); - bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne(input); + bfloat16 rounded = Eigen::bfloat16_impl::float_to_bfloat16_rtne(input); if ((numext::isnan)(input)){ VERIFY((numext::isnan)(static_cast(truncated)) || (numext::isinf)(static_cast(truncated))); VERIFY((numext::isnan)(static_cast(rounded)) || (numext::isinf)(static_cast(rounded))); @@ -93,7 +93,7 @@ void test_conversion() } else { VERIFY_IS_EQUAL(bf_trunc.value, 0x0000); } - bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm); + bfloat16 bf_round = Eigen::bfloat16_impl::float_to_bfloat16_rtne(denorm); VERIFY_IS_EQUAL(static_cast(bf_round), 0.0f); if (std::signbit(denorm)) { VERIFY_IS_EQUAL(bf_round.value, 0x8000);