Fix int overflow causing cxx11_tensor_gpu_1 to fail.

2025-09-23 23:03:15 +08:00 · 2023-11-06 17:10:16 +00:00 · 2023-11-06 17:10:16 +00:00 · a25f02d73e
commit a25f02d73e
parent 6f9ad7da61
1 changed files with 3 additions and 5 deletions
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
    const int block_size = device.maxGpuThreadsPerBlock();
    const int max_blocks =
-        numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
+        static_cast<int>(numext::mini<int64_t>(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(),
-                              device.maxGpuThreadsPerMultiProcessor(),
+                                               NumTraits<StorageIndex>::highest()) / block_size);
                          NumTraits<StorageIndex>::highest()) /
        block_size;
    const StorageIndex size = array_prod(evaluator.dimensions());
    // Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
-    const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, numext::div_ceil<int>(size, block_size)), 1);
+    const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, static_cast<int>(numext::div_ceil<StorageIndex>(size, block_size))), 1);
    LAUNCH_GPU_KERNEL(
        (EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>),