Added a benchmark to measure the performance of full reductions of 16 bit floats

2025-09-10 08:13:16 +08:00 · 2016-05-05 14:15:11 -07:00 · 2016-05-05 14:15:11 -07:00 · f81e413180
commit f81e413180
parent 28d5572658
2 changed files with 2 additions and 1 deletions
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@ -368,7 +368,7 @@ template <typename Device, typename T> class BenchmarkSuite {
    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
        b_, input_size);
    Eigen::array<TensorIndex, 0> output_size;
-    TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
+    TensorMap<Tensor<T, 0, 0, TensorIndex>, Eigen::Aligned> C(
        c_, output_size);
    StartBenchmarkTiming();
--- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
 BM_FuncGPU(transcendentalFunc);
 BM_FuncGPU(rowReduction);
 BM_FuncGPU(colReduction);
 BM_FuncGPU(fullReduction);
 // Contractions