Fix int overflow causing cxx11_tensor_gpu_1 to fail.

This commit is contained in:
Antonio Sánchez 2023-11-06 17:10:16 +00:00 committed by Rasmus Munk Larsen
parent 6f9ad7da61
commit a25f02d73e

View File

@ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
const int block_size = device.maxGpuThreadsPerBlock(); const int block_size = device.maxGpuThreadsPerBlock();
const int max_blocks = const int max_blocks =
numext::mini<int64_t>(device.getNumGpuMultiProcessors() * static_cast<int>(numext::mini<int64_t>(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(),
device.maxGpuThreadsPerMultiProcessor(), NumTraits<StorageIndex>::highest()) / block_size);
NumTraits<StorageIndex>::highest()) /
block_size;
const StorageIndex size = array_prod(evaluator.dimensions()); const StorageIndex size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0. // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, numext::div_ceil<int>(size, block_size)), 1); const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, static_cast<int>(numext::div_ceil<StorageIndex>(size, block_size))), 1);
LAUNCH_GPU_KERNEL( LAUNCH_GPU_KERNEL(
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>), (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>),