mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-23 23:03:15 +08:00
Fix int overflow causing cxx11_tensor_gpu_1 to fail.
This commit is contained in:
parent
6f9ad7da61
commit
a25f02d73e
@ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
|
||||
|
||||
const int block_size = device.maxGpuThreadsPerBlock();
|
||||
const int max_blocks =
|
||||
numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
|
||||
device.maxGpuThreadsPerMultiProcessor(),
|
||||
NumTraits<StorageIndex>::highest()) /
|
||||
block_size;
|
||||
static_cast<int>(numext::mini<int64_t>(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(),
|
||||
NumTraits<StorageIndex>::highest()) / block_size);
|
||||
const StorageIndex size = array_prod(evaluator.dimensions());
|
||||
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, numext::div_ceil<int>(size, block_size)), 1);
|
||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, static_cast<int>(numext::div_ceil<StorageIndex>(size, block_size))), 1);
|
||||
|
||||
LAUNCH_GPU_KERNEL(
|
||||
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>),
|
||||
|
Loading…
x
Reference in New Issue
Block a user