mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Fixed a couple of bugs related to the Pascalfamily of GPUs
H: Enter commit message. Lines beginning with 'HG:' are removed.
This commit is contained in:
parent
886445ce4d
commit
fae0493f98
@ -239,8 +239,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux<half2>(const half2& a) {
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
half first = __low2half(a);
|
||||
half second = __high2half(a);
|
||||
__half first = __low2half(a);
|
||||
__half second = __high2half(a);
|
||||
return __hgt(first, second) ? first : second;
|
||||
#else
|
||||
float a1 = __low2float(a);
|
||||
@ -251,8 +251,8 @@ template<> EIGEN_DEVICE_FUNC inline half predux_max<half2>(const half2& a) {
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline half predux_min<half2>(const half2& a) {
|
||||
#if __CUDA_ARCH__ >= 530
|
||||
half first = __low2half(a);
|
||||
half second = __high2half(a);
|
||||
__half first = __low2half(a);
|
||||
__half second = __high2half(a);
|
||||
return __hlt(first, second) ? first : second;
|
||||
#else
|
||||
float a1 = __low2float(a);
|
||||
|
@ -392,7 +392,7 @@ void test_cuda_forced_evals() {
|
||||
no_bcast[0] = 1;
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
|
||||
gpu_res_float.device(gpu_device) = gpu_float.log();
|
||||
gpu_res_float.device(gpu_device) = gpu_float.abs();
|
||||
gpu_res_half1.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().eval().cast<float>();
|
||||
gpu_res_half2.device(gpu_device) = gpu_float.cast<Eigen::half>().abs().broadcast(no_bcast).eval().cast<float>();
|
||||
|
||||
@ -405,7 +405,7 @@ void test_cuda_forced_evals() {
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
std::cout << "Checking forced eval " << i << std::endl;
|
||||
std::cout << "Checking forced eval " << i << full_prec(i) << " vs " << half_prec1(i) << " vs " << half_prec2(i) << std::endl;
|
||||
VERIFY_IS_APPROX(full_prec(i), half_prec1(i));
|
||||
VERIFY_IS_APPROX(full_prec(i), half_prec2(i));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user