mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-22 12:37:35 +08:00
Improve accuracy of full tensor reduction for half and bfloat16 by reducing leaf size in tree reduction.
Add more unit tests for summation accuracy.
This commit is contained in:
parent
95bb645e92
commit
360290fc42
@ -227,13 +227,19 @@ struct InnerMostDimReducer<Self, Op, true, false> {
|
||||
// The following implements tree-based reduction, which improves the accuracy
|
||||
// of sum and mean reductions, since each of the n inputs only participates in
|
||||
// O(log n) additions.
|
||||
static const int kLeafSize = 1024;
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize() { return 1024; }
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize<half>() { return 200; }
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC inline Index LeafSize<bfloat16>() { return 128; }
|
||||
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimReducer<Self, Op, false, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||
reduce(const Self& self, typename Self::Index firstIndex,
|
||||
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
if (numValuesToReduce > kLeafSize) {
|
||||
const typename Self::Index half = numValuesToReduce / 2;
|
||||
@ -254,6 +260,7 @@ struct InnerMostDimReducer<Self, Op, true, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||
reduce(const Self& self, typename Self::Index firstIndex,
|
||||
typename Self::Index numValuesToReduce, Op& reducer) {
|
||||
const Index kLeafSize = LeafSize<typename Self::CoeffReturnType>();
|
||||
const typename Self::Index packetSize =
|
||||
internal::unpacket_traits<typename Self::PacketReturnType>::size;
|
||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||
|
@ -486,22 +486,25 @@ static void test_reduce_middle_dims() {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_sum_accuracy() {
|
||||
Tensor<float, 3> tensor(101, 101, 101);
|
||||
for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
|
||||
tensor.setRandom();
|
||||
tensor += tensor.constant(prescribed_mean);
|
||||
template <typename ScalarType, int num_elements, int max_mean>
|
||||
void test_sum_accuracy() {
|
||||
Tensor<double, 1> double_tensor(num_elements);
|
||||
Tensor<ScalarType, 1> tensor(num_elements);
|
||||
for (double prescribed_mean = 0; prescribed_mean <= max_mean; prescribed_mean = numext::maxi(1.0, prescribed_mean*3.99)) {
|
||||
// FIXME: NormalRandomGenerator doesn't work in bfloat and half.
|
||||
double_tensor.setRandom<Eigen::internal::NormalRandomGenerator<double>>();
|
||||
double_tensor += double_tensor.constant(prescribed_mean);
|
||||
tensor = double_tensor.cast<ScalarType>();
|
||||
|
||||
Tensor<float, 0> sum = tensor.sum();
|
||||
Tensor<ScalarType, 0> sum;
|
||||
sum = tensor.sum();
|
||||
|
||||
// Compute the reference value in double precsion.
|
||||
double expected_sum = 0.0;
|
||||
for (int i = 0; i < 101; ++i) {
|
||||
for (int j = 0; j < 101; ++j) {
|
||||
for (int k = 0; k < 101; ++k) {
|
||||
expected_sum += static_cast<double>(tensor(i, j, k));
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
expected_sum += static_cast<double>(tensor(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
|
||||
VERIFY_IS_APPROX(sum(), static_cast<ScalarType>(expected_sum));
|
||||
}
|
||||
}
|
||||
|
||||
@ -528,5 +531,11 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
|
||||
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
|
||||
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
|
||||
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
|
||||
CALL_SUBTEST(test_sum_accuracy());
|
||||
CALL_SUBTEST((test_sum_accuracy<float,10*1024*1024,8*1024>()));
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::bfloat16,10*1024*1024,8*1024>()));
|
||||
// The range of half is limited to 65519 when using round-to-even,
|
||||
// so we are severely limited in the size and mean of the tensors
|
||||
// we can reduce without overflow.
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::half,4*1024,16>()));
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::half,10*1024*1024,0>()));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user