From 13092b5d04122d2b85885c368a4dcd6e6a6837af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Fri, 29 Mar 2024 15:51:15 +0000 Subject: [PATCH] Fix usages of Eigen::array to be compatible with std::array. --- .../CXX11/src/Tensor/TensorConvolution.h | 14 ++--- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 9 +-- unsupported/test/cxx11_tensor_argmax_gpu.cu | 8 +-- unsupported/test/cxx11_tensor_contract_gpu.cu | 10 ++-- unsupported/test/cxx11_tensor_device.cu | 10 ++-- unsupported/test/cxx11_tensor_gpu.cu | 56 +++++++++---------- .../test/cxx11_tensor_of_float16_gpu.cu | 2 +- unsupported/test/cxx11_tensor_scan_gpu.cu | 4 +- 8 files changed, 57 insertions(+), 56 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 0493fe9a4..26984b690 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -898,8 +898,8 @@ struct TensorEvaluator indices(m_indices[0]); - const array kernel_dims(m_kernelImpl.dimensions()[0]); + const array indices{m_indices[0]}; + const array kernel_dims{m_kernelImpl.dimensions()[0]}; internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); switch (kernel_size) { case 4: { @@ -965,8 +965,8 @@ struct TensorEvaluator indices(m_indices[idxX], m_indices[idxY]); - const array kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]); + const array indices{m_indices[idxX], m_indices[idxY]}; + const array kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY]}; internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); switch (kernel_size_x) { case 4: { @@ -1059,9 +1059,9 @@ struct TensorEvaluator indices(m_indices[idxX], m_indices[idxY], m_indices[idxZ]); - const array kernel_dims(m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY], - m_kernelImpl.dimensions()[idxZ]); + const array indices{m_indices[idxX], m_indices[idxY], m_indices[idxZ]}; + const array kernel_dims{m_kernelImpl.dimensions()[idxX], m_kernelImpl.dimensions()[idxY], + m_kernelImpl.dimensions()[idxZ]}; internal::IndexMapper indexMapper(m_inputImpl.dimensions(), kernel_dims, indices); LAUNCH_GPU_KERNEL((EigenConvolutionKernel3D, Index, InputDims>), diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 86c6bf346..2ecbb7c20 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -977,11 +977,12 @@ struct TensorReductionEvaluatorBase m_outputStrides; - array, NumOutputDims> m_fastOutputStrides; - array m_preservedStrides; + // Avoid zero-sized arrays, since element access fails to compile on GPU. + array m_outputStrides; + array, (std::max)(NumOutputDims, 1)> m_fastOutputStrides; + array m_preservedStrides; // Map from output to input dimension index. - array m_output_to_input_dim_map; + array m_output_to_input_dim_map; // How many values go into each reduction Index m_numValuesToReduce; diff --git a/unsupported/test/cxx11_tensor_argmax_gpu.cu b/unsupported/test/cxx11_tensor_argmax_gpu.cu index 83058befe..629e5c02b 100644 --- a/unsupported/test/cxx11_tensor_argmax_gpu.cu +++ b/unsupported/test/cxx11_tensor_argmax_gpu.cu @@ -20,7 +20,7 @@ using Eigen::Tensor; template void test_gpu_simple_argmax() { - Tensor in(Eigen::array(72, 53, 97)); + Tensor in(Eigen::array{72, 53, 97}); Tensor out_max; Tensor out_min; in.setRandom(); @@ -43,7 +43,7 @@ void test_gpu_simple_argmax() { Eigen::GpuStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap, Aligned> gpu_in(d_in, Eigen::array(72, 53, 97)); + Eigen::TensorMap, Aligned> gpu_in(d_in, Eigen::array{72, 53, 97}); Eigen::TensorMap, Aligned> gpu_out_max(d_out_max); Eigen::TensorMap, Aligned> gpu_out_min(d_out_min); @@ -113,7 +113,7 @@ void test_gpu_argmax_dim() { Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap, Aligned> gpu_in(d_in, - Eigen::array(2, 3, 5, 7)); + Eigen::array{2, 3, 5, 7}); Eigen::TensorMap, Aligned> gpu_out(d_out, out_shape); gpu_out.device(gpu_device) = gpu_in.argmax(dim); @@ -212,7 +212,7 @@ void test_gpu_argmin_dim() { Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap, Aligned> gpu_in(d_in, - Eigen::array(2, 3, 5, 7)); + Eigen::array{2, 3, 5, 7}); Eigen::TensorMap, Aligned> gpu_out(d_out, out_shape); gpu_out.device(gpu_device) = gpu_in.argmin(dim); diff --git a/unsupported/test/cxx11_tensor_contract_gpu.cu b/unsupported/test/cxx11_tensor_contract_gpu.cu index 7088d6fb3..c9eebfca5 100644 --- a/unsupported/test/cxx11_tensor_contract_gpu.cu +++ b/unsupported/test/cxx11_tensor_contract_gpu.cu @@ -28,7 +28,7 @@ void test_gpu_contraction(int m_size, int k_size, int n_size) { Tensor t_right(k_size, n_size); Tensor t_result(m_size, n_size); Tensor t_result_gpu(m_size, n_size); - Eigen::array dims(DimPair(1, 0)); + Eigen::array dims{DimPair(1, 0)}; t_left.setRandom(); t_right.setRandom(); @@ -51,9 +51,9 @@ void test_gpu_contraction(int m_size, int k_size, int n_size) { Eigen::GpuStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_t_left(d_t_left, Eigen::array(m_size, k_size)); - Eigen::TensorMap > gpu_t_right(d_t_right, Eigen::array(k_size, n_size)); - Eigen::TensorMap > gpu_t_result(d_t_result, Eigen::array(m_size, n_size)); + Eigen::TensorMap > gpu_t_left(d_t_left, Eigen::array{m_size, k_size}); + Eigen::TensorMap > gpu_t_right(d_t_right, Eigen::array{k_size, n_size}); + Eigen::TensorMap > gpu_t_result(d_t_result, Eigen::array{m_size, n_size}); gpu_t_result.device(gpu_device) = gpu_t_left.contract(gpu_t_right, dims); t_result = t_left.contract(t_right, dims); @@ -85,7 +85,7 @@ void test_scalar(int m_size, int k_size, int n_size) { Tensor t_right(k_size, n_size); Tensor t_result; Tensor t_result_gpu; - Eigen::array dims(DimPair(0, 0), DimPair(1, 1)); + Eigen::array dims{DimPair(0, 0), DimPair(1, 1)}; t_left.setRandom(); t_right.setRandom(); diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index 0f91b10b9..0a26ab93b 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -140,7 +140,7 @@ void test_contraction(Context* context) { dims[0] = std::make_pair(1, 1); dims[1] = std::make_pair(2, 2); - Eigen::array shape(40, 50 * 70); + Eigen::array shape{40, 50 * 70}; Eigen::DSizes indices(0, 0); Eigen::DSizes sizes(40, 40); @@ -154,7 +154,7 @@ void test_1d_convolution(Context* context) { Eigen::DSizes indices(0, 0, 0); Eigen::DSizes sizes(40, 49, 70); - Eigen::array dims(1); + Eigen::array dims{1}; context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel1d(), dims); } @@ -163,7 +163,7 @@ void test_2d_convolution(Context* context) { Eigen::DSizes indices(0, 0, 0); Eigen::DSizes sizes(40, 49, 69); - Eigen::array dims(1, 2); + Eigen::array dims{1, 2}; context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel2d(), dims); } @@ -172,7 +172,7 @@ void test_3d_convolution(Context* context) { Eigen::DSizes indices(0, 0, 0); Eigen::DSizes sizes(39, 49, 69); - Eigen::array dims(0, 1, 2); + Eigen::array dims{0, 1, 2}; context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims); } @@ -188,7 +188,7 @@ void synchronize(Eigen::GpuDevice& device) { template void test_device_memory(const TensorDevice& device) { int count = 100; - Eigen::array tensorRange = {{count}}; + Eigen::array tensorRange{count}; Eigen::Tensor host(tensorRange); Eigen::Tensor expected(tensorRange); DataType* device_data = static_cast(device.allocate(count * sizeof(DataType))); diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu index d717c6ebc..c5046c029 100644 --- a/unsupported/test/cxx11_tensor_gpu.cu +++ b/unsupported/test/cxx11_tensor_gpu.cu @@ -99,9 +99,9 @@ void test_gpu_nullary_max_size() { } void test_gpu_elementwise_small() { - Tensor in1(Eigen::array(2)); - Tensor in2(Eigen::array(2)); - Tensor out(Eigen::array(2)); + Tensor in1(Eigen::array{2}); + Tensor in2(Eigen::array{2}); + Tensor out(Eigen::array{2}); in1.setRandom(); in2.setRandom(); @@ -122,9 +122,9 @@ void test_gpu_elementwise_small() { Eigen::GpuStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap, Eigen::Aligned> gpu_in1(d_in1, Eigen::array(2)); - Eigen::TensorMap, Eigen::Aligned> gpu_in2(d_in2, Eigen::array(2)); - Eigen::TensorMap, Eigen::Aligned> gpu_out(d_out, Eigen::array(2)); + Eigen::TensorMap, Eigen::Aligned> gpu_in1(d_in1, Eigen::array{2}); + Eigen::TensorMap, Eigen::Aligned> gpu_in2(d_in2, Eigen::array{2}); + Eigen::TensorMap, Eigen::Aligned> gpu_out(d_out, Eigen::array{2}); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; @@ -132,8 +132,8 @@ void test_gpu_elementwise_small() { assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess); for (int i = 0; i < 2; ++i) { - VERIFY_IS_APPROX(out(Eigen::array(i)), - in1(Eigen::array(i)) + in2(Eigen::array(i))); + VERIFY_IS_APPROX(out(Eigen::array{i}), + in1(Eigen::array{i}) + in2(Eigen::array{i})); } gpuFree(d_in1); @@ -142,10 +142,10 @@ void test_gpu_elementwise_small() { } void test_gpu_elementwise() { - Tensor in1(Eigen::array(72, 53, 97)); - Tensor in2(Eigen::array(72, 53, 97)); - Tensor in3(Eigen::array(72, 53, 97)); - Tensor out(Eigen::array(72, 53, 97)); + Tensor in1(Eigen::array{72, 53, 97}); + Tensor in2(Eigen::array{72, 53, 97}); + Tensor in3(Eigen::array{72, 53, 97}); + Tensor out(Eigen::array{72, 53, 97}); in1.setRandom(); in2.setRandom(); in3.setRandom(); @@ -171,10 +171,10 @@ void test_gpu_elementwise() { Eigen::GpuStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap > gpu_in1(d_in1, Eigen::array(72, 53, 97)); - Eigen::TensorMap > gpu_in2(d_in2, Eigen::array(72, 53, 97)); - Eigen::TensorMap > gpu_in3(d_in3, Eigen::array(72, 53, 97)); - Eigen::TensorMap > gpu_out(d_out, Eigen::array(72, 53, 97)); + Eigen::TensorMap > gpu_in1(d_in1, Eigen::array{72, 53, 97}); + Eigen::TensorMap > gpu_in2(d_in2, Eigen::array{72, 53, 97}); + Eigen::TensorMap > gpu_in3(d_in3, Eigen::array{72, 53, 97}); + Eigen::TensorMap > gpu_out(d_out, Eigen::array{72, 53, 97}); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3; @@ -185,9 +185,9 @@ void test_gpu_elementwise() { for (int j = 0; j < 53; ++j) { for (int k = 0; k < 97; ++k) { VERIFY_IS_APPROX( - out(Eigen::array(i, j, k)), - in1(Eigen::array(i, j, k)) + - in2(Eigen::array(i, j, k)) * in3(Eigen::array(i, j, k))); + out(Eigen::array{i, j, k}), + in1(Eigen::array{i, j, k}) + + in2(Eigen::array{i, j, k}) * in3(Eigen::array{i, j, k})); } } } @@ -284,8 +284,8 @@ void test_gpu_contraction() { // more than 30 * 1024, which is the number of threads in blocks on // a 15 SM GK110 GPU Tensor t_left(6, 50, 3, 31); - Tensor t_right(Eigen::array(3, 31, 7, 20, 1)); - Tensor t_result(Eigen::array(6, 50, 7, 20, 1)); + Tensor t_right(Eigen::array{3, 31, 7, 20, 1}); + Tensor t_result(Eigen::array{6, 50, 7, 20, 1}); t_left.setRandom(); t_right.setRandom(); @@ -369,7 +369,7 @@ void test_gpu_convolution_1d() { Eigen::TensorMap > gpu_kernel(d_kernel, 4); Eigen::TensorMap > gpu_out(d_out, 74, 34, 11, 137); - Eigen::array dims(1); + Eigen::array dims{1}; gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); @@ -421,7 +421,7 @@ void test_gpu_convolution_inner_dim_col_major_1d() { Eigen::TensorMap > gpu_kernel(d_kernel, 4); Eigen::TensorMap > gpu_out(d_out, 71, 9, 11, 7); - Eigen::array dims(0); + Eigen::array dims{0}; gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); @@ -473,7 +473,7 @@ void test_gpu_convolution_inner_dim_row_major_1d() { Eigen::TensorMap > gpu_kernel(d_kernel, 4); Eigen::TensorMap > gpu_out(d_out, 7, 9, 11, 71); - Eigen::array dims(3); + Eigen::array dims{3}; gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); @@ -526,7 +526,7 @@ void test_gpu_convolution_2d() { Eigen::TensorMap > gpu_kernel(d_kernel, 3, 4); Eigen::TensorMap > gpu_out(d_out, 74, 35, 8, 137); - Eigen::array dims(1, 2); + Eigen::array dims{1, 2}; gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); @@ -556,9 +556,9 @@ void test_gpu_convolution_2d() { template void test_gpu_convolution_3d() { - Tensor input(Eigen::array(74, 37, 11, 137, 17)); + Tensor input(Eigen::array{74, 37, 11, 137, 17}); Tensor kernel(3, 4, 2); - Tensor out(Eigen::array(74, 35, 8, 136, 17)); + Tensor out(Eigen::array{74, 35, 8, 136, 17}); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -583,7 +583,7 @@ void test_gpu_convolution_3d() { Eigen::TensorMap > gpu_kernel(d_kernel, 3, 4, 2); Eigen::TensorMap > gpu_out(d_out, 74, 35, 8, 136, 17); - Eigen::array dims(1, 2, 3); + Eigen::array dims{1, 2, 3}; gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(gpuMemcpyAsync(out.data(), d_out, out_bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess); diff --git a/unsupported/test/cxx11_tensor_of_float16_gpu.cu b/unsupported/test/cxx11_tensor_of_float16_gpu.cu index a9a82676d..88482deb3 100644 --- a/unsupported/test/cxx11_tensor_of_float16_gpu.cu +++ b/unsupported/test/cxx11_tensor_of_float16_gpu.cu @@ -281,7 +281,7 @@ void test_gpu_contractions() { gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f); typedef Tensor::DimensionPair DimPair; - Eigen::array dims(DimPair(1, 0)); + Eigen::array dims{DimPair(1, 0)}; gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast(); gpu_res_half.device(gpu_device) = gpu_float1.cast().contract(gpu_float2.cast(), dims); diff --git a/unsupported/test/cxx11_tensor_scan_gpu.cu b/unsupported/test/cxx11_tensor_scan_gpu.cu index c316f1e08..c9997adb4 100644 --- a/unsupported/test/cxx11_tensor_scan_gpu.cu +++ b/unsupported/test/cxx11_tensor_scan_gpu.cu @@ -45,9 +45,9 @@ void test_gpu_cumsum(int m_size, int k_size, int n_size) { Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap > gpu_t_input(d_t_input, - Eigen::array(m_size, k_size, n_size)); + Eigen::array{m_size, k_size, n_size}); Eigen::TensorMap > gpu_t_result(d_t_result, - Eigen::array(m_size, k_size, n_size)); + Eigen::array{m_size, k_size, n_size}); gpu_t_result.device(gpu_device) = gpu_t_input.cumsum(1); t_result = t_input.cumsum(1);