mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-16 01:51:51 +08:00
Disable Tree reduction for GPU.
For moderately sized inputs, running the Tree reduction quickly fills/overflows the GPU thread stack space, leading to memory errors. This was happening in the `cxx11_tensor_complex_gpu` test, for example. Disabling tree reduction on GPU fixes this. (cherry picked from commit 24ebb37f38287d65c0e0b60c714e39faffeb5b94)
This commit is contained in:
parent
89a71f3126
commit
554982beef
@ -166,8 +166,12 @@ struct GenericDimReducer<-1, Self, Op> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
|
template <typename Self, typename Op, bool Vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess),
|
||||||
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
|
bool UseTreeReduction = (!Self::ReducerTraits::IsStateful &&
|
||||||
!Self::ReducerTraits::IsExactlyAssociative)>
|
!Self::ReducerTraits::IsExactlyAssociative &&
|
||||||
|
// GPU threads can quickly run out of stack space
|
||||||
|
// for moderately sized inputs.
|
||||||
|
!Self::RunningOnGPU
|
||||||
|
)>
|
||||||
struct InnerMostDimReducer {
|
struct InnerMostDimReducer {
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType reduce(const Self& self, typename Self::Index firstIndex, typename Self::Index numValuesToReduce, Op& reducer) {
|
||||||
typename Self::CoeffReturnType accum = reducer.initialize();
|
typename Self::CoeffReturnType accum = reducer.initialize();
|
||||||
@ -528,6 +532,18 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
|||||||
// Subset of strides of the input tensor for the non-reduced dimensions.
|
// Subset of strides of the input tensor for the non-reduced dimensions.
|
||||||
// Indexed by output dimensions.
|
// Indexed by output dimensions.
|
||||||
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
static const int NumPreservedStrides = max_n_1<NumOutputDims>::size;
|
||||||
|
|
||||||
|
// For full reductions
|
||||||
|
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
||||||
|
static constexpr bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
||||||
|
static constexpr bool RunningOnSycl = false;
|
||||||
|
#elif defined(EIGEN_USE_SYCL)
|
||||||
|
static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
|
||||||
|
static const bool RunningOnGPU = false;
|
||||||
|
#else
|
||||||
|
static constexpr bool RunningOnGPU = false;
|
||||||
|
static constexpr bool RunningOnSycl = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IsAligned = false,
|
IsAligned = false,
|
||||||
@ -950,17 +966,6 @@ struct TensorReductionEvaluatorBase<const TensorReductionOp<Op, Dims, ArgType, M
|
|||||||
// Operation to apply for computing the reduction.
|
// Operation to apply for computing the reduction.
|
||||||
Op m_reducer;
|
Op m_reducer;
|
||||||
|
|
||||||
// For full reductions
|
|
||||||
#if defined(EIGEN_USE_GPU) && (defined(EIGEN_GPUCC))
|
|
||||||
static const bool RunningOnGPU = internal::is_same<Device, Eigen::GpuDevice>::value;
|
|
||||||
static const bool RunningOnSycl = false;
|
|
||||||
#elif defined(EIGEN_USE_SYCL)
|
|
||||||
static const bool RunningOnSycl = internal::is_same<typename internal::remove_all<Device>::type, Eigen::SyclDevice>::value;
|
|
||||||
static const bool RunningOnGPU = false;
|
|
||||||
#else
|
|
||||||
static const bool RunningOnGPU = false;
|
|
||||||
static const bool RunningOnSycl = false;
|
|
||||||
#endif
|
|
||||||
EvaluatorPointerType m_result;
|
EvaluatorPointerType m_result;
|
||||||
|
|
||||||
const Device EIGEN_DEVICE_REF m_device;
|
const Device EIGEN_DEVICE_REF m_device;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user