diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index d57203ad9..7b5842571 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -333,9 +333,9 @@ class TensorExecutor(block_size * sizeof(Scalar), EIGEN_MAX_ALIGN_BYTES); + align * divup(block_size * sizeof(Scalar), align); void* buf = device.allocate((num_threads + 1) * aligned_blocksize); device.parallelFor( block_mapper.total_block_count(), cost * block_size,