From b50d8f8c4a2cdc3aaa436ea183324eca45a3aa97 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 16:50:13 -0700 Subject: [PATCH] Extended a regression test to validate that we basic fp16 support works with cuda 7.0 --- .../test/cxx11_tensor_of_float16_cuda.cu | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index b6df5a4d2..9a1de6c34 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -19,6 +19,44 @@ using Eigen::Tensor; +template +void test_cuda_numext() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + + Eigen::TensorMap, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op()); + gpu_res_half.device(gpu_device) = gpu_float.cast().unaryExpr(Eigen::internal::scalar_isnan_op()); + + Tensor half_prec(num_elem); + Tensor full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking unary " << i << std::endl; + VERIFY_IS_EQUAL(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + + #ifdef EIGEN_HAS_CUDA_FP16 template @@ -415,6 +453,8 @@ void test_cuda_forced_evals() { void test_cxx11_tensor_of_float16_cuda() { + CALL_SUBTEST_1(test_cuda_numext()); + #ifdef EIGEN_HAS_CUDA_FP16 CALL_SUBTEST_1(test_cuda_conversion()); CALL_SUBTEST_1(test_cuda_unary());