From 24ebb37f38287d65c0e0b60c714e39faffeb5b94 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Fri, 8 Oct 2021 11:38:13 -0700 Subject: [PATCH] Disable Tree reduction for GPU. For moderately sized inputs, running the Tree reduction quickly fills/overflows the GPU thread stack space, leading to memory errors. This was happening in the `cxx11_tensor_complex_gpu` test, for example. Disabling tree reduction on GPU fixes this. --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 87186e1b8..825d33512 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -168,7 +168,11 @@ struct GenericDimReducer<-1, Self, Op> { template + !Self::ReducerTraits::IsExactlyAssociative && + // GPU threads can quickly run out of stack space + // for moderately sized inputs. + !Self::RunningOnGPU + )> struct InnerMostDimReducer { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { typename Self::CoeffReturnType accum = reducer.initialize(); @@ -567,6 +571,18 @@ struct TensorReductionEvaluatorBase::size; + + // For full reductions +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + static constexpr bool RunningOnGPU = internal::is_same::value; + static constexpr bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static constexpr bool RunningOnGPU = false; + static constexpr bool RunningOnSycl = false; +#endif enum { IsAligned = false, @@ -989,17 +1005,6 @@ struct TensorReductionEvaluatorBase::value; - static const bool RunningOnSycl = false; -#elif defined(EIGEN_USE_SYCL) -static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; -static const bool RunningOnGPU = false; -#else - static const bool RunningOnGPU = false; - static const bool RunningOnSycl = false; -#endif EvaluatorPointerType m_result; const Device EIGEN_DEVICE_REF m_device;