Added benchmarks for type casting of float16

2025-09-17 20:03:17 +08:00 · 2016-02-26 12:24:58 -08:00 · 2016-02-26 12:24:58 -08:00 · 93485d86bc
commit 93485d86bc
parent 002824e32d
2 changed files with 4 additions and 4 deletions
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@ -48,12 +48,12 @@ template <typename Device, typename T> class BenchmarkSuite {
    Eigen::array<TensorIndex, 2> sizes;
    sizes[0] = m_;
    sizes[1] = k_;
-    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> A(a_, sizes);
-    TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> B((int*)b_, sizes);
+    const TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> A((int*)a_, sizes);
+    TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, sizes);

    StartBenchmarkTiming();
    for (int iter = 0; iter < num_iters; ++iter) {
-      B.device(device_) = A.template cast<int>();
+      B.device(device_) = A.template cast<T>();
    }
    // Record the number of values copied per second
    finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
--- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
@ -19,7 +19,7 @@
  BENCHMARK_RANGE(BM_##FUNC, 10, 5000);

 BM_FuncGPU(memcpy);
-//BM_FuncGPU(typeCasting);
+BM_FuncGPU(typeCasting);
 //BM_FuncGPU(random);
 BM_FuncGPU(slicing);
 BM_FuncGPU(rowChip);