Allow implicit conversion from bfloat16 to float and double

Conversion from `bfloat16` to `float` and `double` is lossless. It seems natural to allow the conversion to be implicit, as the C++ language also support implicit conversion from a smaller to a larger floating point type.

Intel's OneDLL bfloat16 implementation also has an implicit `operator float()`: https://github.com/oneapi-src/oneDNN/blob/v1.5/src/common/bfloat16.hpp
This commit is contained in:
Niels Dekker 2020-07-11 12:50:46 +02:00
parent dcf7655b3d
commit 4ab32e2de2
2 changed files with 5 additions and 5 deletions

View File

@ -117,10 +117,10 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
return static_cast<unsigned long long>(bfloat16_to_float(*this)); return static_cast<unsigned long long>(bfloat16_to_float(*this));
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { EIGEN_DEVICE_FUNC operator float() const {
return bfloat16_impl::bfloat16_to_float(*this); return bfloat16_impl::bfloat16_to_float(*this);
} }
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { EIGEN_DEVICE_FUNC operator double() const {
return static_cast<double>(bfloat16_impl::bfloat16_to_float(*this)); return static_cast<double>(bfloat16_impl::bfloat16_to_float(*this));
} }
template<typename RealScalar> template<typename RealScalar>

View File

@ -53,9 +53,9 @@ void test_conversion()
VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity. VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity.
// Verify round-to-nearest-even behavior. // Verify round-to-nearest-even behavior.
float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00))); float val1 = bfloat16(__bfloat16_raw(0x3c00));
float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01))); float val2 = bfloat16(__bfloat16_raw(0x3c01));
float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02))); float val3 = bfloat16(__bfloat16_raw(0x3c02));
VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00); VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02); VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);