mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-23 23:03:15 +08:00
Fix int overflow causing cxx11_tensor_gpu_1 to fail.
This commit is contained in:
parent
6f9ad7da61
commit
a25f02d73e
@ -655,13 +655,11 @@ EIGEN_STRONG_INLINE void TensorExecutor<Expression, GpuDevice, Vectorizable, Til
|
|||||||
|
|
||||||
const int block_size = device.maxGpuThreadsPerBlock();
|
const int block_size = device.maxGpuThreadsPerBlock();
|
||||||
const int max_blocks =
|
const int max_blocks =
|
||||||
numext::mini<int64_t>(device.getNumGpuMultiProcessors() *
|
static_cast<int>(numext::mini<int64_t>(device.getNumGpuMultiProcessors() * device.maxGpuThreadsPerMultiProcessor(),
|
||||||
device.maxGpuThreadsPerMultiProcessor(),
|
NumTraits<StorageIndex>::highest()) / block_size);
|
||||||
NumTraits<StorageIndex>::highest()) /
|
|
||||||
block_size;
|
|
||||||
const StorageIndex size = array_prod(evaluator.dimensions());
|
const StorageIndex size = array_prod(evaluator.dimensions());
|
||||||
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
|
// Create a least one block to ensure we won't crash when tensorflow calls with tensors of size 0.
|
||||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, numext::div_ceil<int>(size, block_size)), 1);
|
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, static_cast<int>(numext::div_ceil<StorageIndex>(size, block_size))), 1);
|
||||||
|
|
||||||
LAUNCH_GPU_KERNEL(
|
LAUNCH_GPU_KERNEL(
|
||||||
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>),
|
(EigenMetaKernel<TensorEvaluator<Expression, GpuDevice>, StorageIndex>),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user