diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 87186e1b8..825d33512 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -168,7 +168,11 @@ struct GenericDimReducer<-1, Self, Op> { template + !Self::ReducerTraits::IsExactlyAssociative && + // GPU threads can quickly run out of stack space + // for moderately sized inputs. + !Self::RunningOnGPU + )> struct InnerMostDimReducer { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) { typename Self::CoeffReturnType accum = reducer.initialize(); @@ -567,6 +571,18 @@ struct TensorReductionEvaluatorBase::size; + + // For full reductions +#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC)) + static constexpr bool RunningOnGPU = internal::is_same::value; + static constexpr bool RunningOnSycl = false; +#elif defined(EIGEN_USE_SYCL) +static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; +static const bool RunningOnGPU = false; +#else + static constexpr bool RunningOnGPU = false; + static constexpr bool RunningOnSycl = false; +#endif enum { IsAligned = false, @@ -989,17 +1005,6 @@ struct TensorReductionEvaluatorBase::value; - static const bool RunningOnSycl = false; -#elif defined(EIGEN_USE_SYCL) -static const bool RunningOnSycl = internal::is_same::type, Eigen::SyclDevice>::value; -static const bool RunningOnGPU = false; -#else - static const bool RunningOnGPU = false; - static const bool RunningOnSycl = false; -#endif EvaluatorPointerType m_result; const Device EIGEN_DEVICE_REF m_device;