Fixed a couple of bugs related to the Pascalfamily of GPUs

H: Enter commit message.  Lines beginning with 'HG:' are removed.
This commit is contained in:
Benoit Steiner 2016-05-11 23:02:26 -07:00
parent 886445ce4d
commit fae0493f98
2 changed files with 6 additions and 6 deletions

View File

@ -239,8 +239,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530 #if __CUDA_ARCH__ >= 530
half first = __low2half(a); __half first = __low2half(a);
half second = __high2half(a); __half second = __high2half(a);
return __hgt(first, second) ? first : second; return __hgt(first, second) ? first : second;
#else #else
float a1 = __low2float(a); float a1 = __low2float(a);
@ -251,8 +251,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) { template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
#if __CUDA_ARCH__ >= 530 #if __CUDA_ARCH__ >= 530
half first = __low2half(a); __half first = __low2half(a);
half second = __high2half(a); __half second = __high2half(a);
return __hlt(first, second) ? first : second; return __hlt(first, second) ? first : second;
#else #else
float a1 = __low2float(a); float a1 = __low2float(a);

View File

@ -392,7 +392,7 @@ void test_cuda_forced_evals() {
no_bcast[0] = 1; no_bcast[0] = 1;
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.log(); gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>(); gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>(); gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>();
@ -405,7 +405,7 @@ void test_cuda_forced_evals() {
gpu_device.synchronize(); gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) { for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking forced eval " << i << std::endl; std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl;
VERIFY_IS_APPROX(full_prec(i), half_prec1(i)); VERIFY_IS_APPROX(full_prec(i), half_prec1(i));
VERIFY_IS_APPROX(full_prec(i), half_prec2(i)); VERIFY_IS_APPROX(full_prec(i), half_prec2(i));
} }