From a25f02d73e33ac95829f958a832c715c612994dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Mon, 6 Nov 2023 17:10:16 +0000 Subject: [PATCH] Fix int overflow causing cxx11_tensor_gpu_1 to fail. --- unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index 4eebbe70f..e8fb85d83 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor(device.getNumGpuMultiProcessors() * - device.maxGpuThreadsPerMultiProcessor(), - NumTraits::highest()) / - block_size; + static_cast(numext::mini(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(), + NumTraits::highest()) / block_size); const StorageIndex size = array_prod(evaluator.dimensions()); // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. - const int num_blocks = numext::maxi(numext::mini(max_blocks, numext::div_ceil(size, block_size)), 1); + const int num_blocks = numext::maxi(numext::mini(max_blocks, static_cast(numext::div_ceil(size, block_size))), 1); LAUNCH_GPU_KERNEL( (EigenMetaKernel, StorageIndex>),