From fffe63045cb48da3fd3ff0e5e8b7ab4180db8f64 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 29 Jun 2015 14:10:32 -0700 Subject: [PATCH] Added a test for full reductions on GPU --- unsupported/test/CMakeLists.txt | 1 + .../test/cxx11_tensor_reduction_cuda.cpp | 55 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 unsupported/test/cxx11_tensor_reduction_cuda.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index f438d4107..64e26683e 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -141,5 +141,6 @@ if(EIGEN_TEST_CXX11) # ei_add_test(cxx11_tensor_device "-std=c++0x") # ei_add_test(cxx11_tensor_cuda "-std=c++0x") # ei_add_test(cxx11_tensor_contract_cuda "-std=c++0x") +# ei_add_test(cxx11_tensor_reduction_cuda "-std=c++0x") endif() diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cpp b/unsupported/test/cxx11_tensor_reduction_cuda.cpp new file mode 100644 index 000000000..a7eb7ac75 --- /dev/null +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cpp @@ -0,0 +1,55 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda +#define EIGEN_USE_GPU + +#include "main.h" +#include + + +template +static void test_full_reductions() { + + Eigen::GpuDevice gpu_device; + + const int num_rows = internal::random(1024, 5*1024); + const int num_cols = internal::random(1024, 5*1024); + + Tensor in(num_rows, num_cols); + in.setRandom(); + + Tensor full_redux(1); + full_redux = in.sum(); + + std::size_t in_bytes = in.size() * sizeof(float); + std::size_t out_bytes = full_redux.size() * sizeof(float); + float* gpu_in_ptr = static_cast(gpu_device.allocate(in_bytes)); + float* gpu_out_ptr = static_cast(gpu_device.allocate(out_bytes)); + gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); + + TensorMap > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap > out_gpu(gpu_out_ptr, 1); + + out_gpu.device(gpu_device) = in_gpu.sum(); + + Tensor full_redux_gpu(1); + gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); + gpu_device.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + VERIFY_IS_APPROX(full_redux(0), full_redux_gpu(0)); +} + +void test_cxx11_tensor_reduction_cuda() { + CALL_SUBTEST(test_full_reductions()); + CALL_SUBTEST(test_full_reductions()); +}