diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 138881996..15b784a95 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -239,8 +239,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_max(const half2& a) { #if __CUDA_ARCH__ >= 530 - half first = __low2half(a); - half second = __high2half(a); + __half first = __low2half(a); + __half second = __high2half(a); return __hgt(first, second) ? first : second; #else float a1 = __low2float(a); @@ -251,8 +251,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux_max(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_min(const half2& a) { #if __CUDA_ARCH__ >= 530 - half first = __low2half(a); - half second = __high2half(a); + __half first = __low2half(a); + __half second = __high2half(a); return __hlt(first, second) ? first : second; #else float a1 = __low2float(a); diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 992bd7bd0..8223285ee 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -392,7 +392,7 @@ void test_cuda_forced_evals() { no_bcast[0] = 1; gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); - gpu_res_float.device(gpu_device) = gpu_float.log(); + gpu_res_float.device(gpu_device) = gpu_float.abs(); gpu_res_half1.device(gpu_device) = gpu_float.cast().abs().eval().cast(); gpu_res_half2.device(gpu_device) = gpu_float.cast().abs().broadcast(no_bcast).eval().cast(); @@ -405,7 +405,7 @@ void test_cuda_forced_evals() { gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { - std::cout << "Checking forced eval " << i << std::endl; + std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl; VERIFY_IS_APPROX(full_prec(i), half_prec1(i)); VERIFY_IS_APPROX(full_prec(i), half_prec2(i)); }