mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
Allow implicit conversion from bfloat16 to float and double
Conversion from `bfloat16` to `float` and `double` is lossless. It seems natural to allow the conversion to be implicit, as the C++ language also support implicit conversion from a smaller to a larger floating point type. Intel's OneDLL bfloat16 implementation also has an implicit `operator float()`: https://github.com/oneapi-src/oneDNN/blob/v1.5/src/common/bfloat16.hpp
This commit is contained in:
parent
dcf7655b3d
commit
4ab32e2de2
@ -117,10 +117,10 @@ struct bfloat16 : public bfloat16_impl::bfloat16_base {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const {
|
||||||
return static_cast<unsigned long long>(bfloat16_to_float(*this));
|
return static_cast<unsigned long long>(bfloat16_to_float(*this));
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const {
|
EIGEN_DEVICE_FUNC operator float() const {
|
||||||
return bfloat16_impl::bfloat16_to_float(*this);
|
return bfloat16_impl::bfloat16_to_float(*this);
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const {
|
EIGEN_DEVICE_FUNC operator double() const {
|
||||||
return static_cast<double>(bfloat16_impl::bfloat16_to_float(*this));
|
return static_cast<double>(bfloat16_impl::bfloat16_to_float(*this));
|
||||||
}
|
}
|
||||||
template<typename RealScalar>
|
template<typename RealScalar>
|
||||||
|
@ -53,9 +53,9 @@ void test_conversion()
|
|||||||
VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity.
|
VERIFY_IS_EQUAL(bfloat16(3.40e38f).value, 0x7f80); // Becomes infinity.
|
||||||
|
|
||||||
// Verify round-to-nearest-even behavior.
|
// Verify round-to-nearest-even behavior.
|
||||||
float val1 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c00)));
|
float val1 = bfloat16(__bfloat16_raw(0x3c00));
|
||||||
float val2 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c01)));
|
float val2 = bfloat16(__bfloat16_raw(0x3c01));
|
||||||
float val3 = static_cast<float>(bfloat16(__bfloat16_raw(0x3c02)));
|
float val3 = bfloat16(__bfloat16_raw(0x3c02));
|
||||||
VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
|
VERIFY_IS_EQUAL(bfloat16(0.5f * (val1 + val2)).value, 0x3c00);
|
||||||
VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);
|
VERIFY_IS_EQUAL(bfloat16(0.5f * (val2 + val3)).value, 0x3c02);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user