diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 1d22843af..1d534f8ae 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -64,10 +64,10 @@ template struct DimInitializer { } }; -template <> struct DimInitializer > { +template <> struct DimInitializer > { template EIGEN_DEVICE_FUNC static void run(const InputDims& input_dims, const array&, - Sizes<1>*, array* reduced_dims) { + Sizes<0>*, array* reduced_dims) { const int NumInputDims = internal::array_size::value; for (int i = 0; i < NumInputDims; ++i) { (*reduced_dims)[i] = input_dims[i]; @@ -136,6 +136,12 @@ struct GenericDimReducer<0, Self, Op> { } } }; +template +struct GenericDimReducer<-1, Self, Op> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::CoeffReturnType*) { + eigen_assert(false && "should never be called"); + } +}; template struct InnerMostDimReducer { @@ -192,6 +198,12 @@ struct InnerMostDimPreserver<0, Self, Op, true> { } } }; +template +struct InnerMostDimPreserver<-1, Self, Op, true> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { + eigen_assert(false && "should never be called"); + } +}; // Default full reducer template @@ -550,8 +562,8 @@ struct TensorEvaluator, Device> typedef typename TensorEvaluator::Dimensions InputDimensions; static const int NumInputDims = internal::array_size::value; static const int NumReducedDims = internal::array_size::value; - static const int NumOutputDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims; - typedef typename internal::conditional, DSizes >::type Dimensions; + static const int NumOutputDims = NumInputDims - NumReducedDims; + typedef typename internal::conditional, DSizes >::type Dimensions; typedef typename XprType::Scalar Scalar; typedef TensorEvaluator, Device> Self; static const bool InputPacketAccess = TensorEvaluator::PacketAccess; @@ -565,7 +577,7 @@ struct TensorEvaluator, Device> static const bool ReducingInnerMostDims = internal::are_inner_most_dims::value; static const bool PreservingInnerMostDims = internal::preserve_inner_most_dims::value; - static const bool RunningFullReduction = (NumInputDims==NumReducedDims); + static const bool RunningFullReduction = (NumOutputDims==0); EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device), m_reducer(op.reducer()), m_result(NULL), m_device(device) @@ -589,51 +601,54 @@ struct TensorEvaluator, Device> internal::DimInitializer::run(input_dims, reduced, &m_dimensions, &m_reducedDims); // Precompute output strides. - if (static_cast(Layout) == static_cast(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } - } else { - m_outputStrides[NumOutputDims - 1] = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } - } - - // Precompute input strides. - array input_strides; - if (static_cast(Layout) == static_cast(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } - } else { - input_strides[NumInputDims - 1] = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } - } - - int outputIndex = 0; - int reduceIndex = 0; - for (int i = 0; i < NumInputDims; ++i) { - if (reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; + if (NumOutputDims > 0) { + if (static_cast(Layout) == static_cast(ColMajor)) { + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; + m_outputStrides[NumOutputDims - 1] = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } + } + } + + // Precompute input strides. + if (NumInputDims > 0) { + array input_strides; + if (static_cast(Layout) == static_cast(ColMajor)) { + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } + } else { + input_strides[NumInputDims - 1] = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } + } + + int outputIndex = 0; + int reduceIndex = 0; + for (int i = 0; i < NumInputDims; ++i) { + if (reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } } } // Special case for full reductions - if (NumInputDims == NumReducedDims) { - eigen_assert(m_dimensions[0] == 1); + if (NumOutputDims == 0) { m_preservedStrides[0] = internal::array_prod(input_dims); } } - + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } typedef typename internal::remove_const::type CoeffReturnType; @@ -674,9 +689,9 @@ struct TensorEvaluator, Device> return *m_result; } Op reducer(m_reducer); - if (ReducingInnerMostDims) { + if (ReducingInnerMostDims || RunningFullReduction) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; return internal::InnerMostDimReducer::reduce(*this, firstInput(index), num_values_to_reduce, reducer); } else { @@ -697,7 +712,7 @@ struct TensorEvaluator, Device> EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; if (ReducingInnerMostDims) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; const Index firstIndex = firstInput(index); for (Index i = 0; i < packetSize; ++i) { Op reducer(m_reducer); @@ -748,7 +763,7 @@ struct TensorEvaluator, Device> if (static_cast(Layout) == static_cast(ColMajor)) { return index * m_preservedStrides[0]; } else { - return index * m_preservedStrides[NumOutputDims - 1]; + return index * m_preservedStrides[NumPreservedStrides - 1]; } } // TBD: optimize the case where we preserve the innermost dimensions. @@ -774,10 +789,10 @@ struct TensorEvaluator, Device> index -= idx * m_outputStrides[i]; } if (PreservingInnerMostDims) { - eigen_assert(m_preservedStrides[NumOutputDims - 1] == 1); + eigen_assert(m_preservedStrides[NumPreservedStrides - 1] == 1); startInput += index; } else { - startInput += index * m_preservedStrides[NumOutputDims - 1]; + startInput += index * m_preservedStrides[NumPreservedStrides - 1]; } } return startInput; @@ -789,7 +804,8 @@ struct TensorEvaluator, Device> array m_outputStrides; // Subset of strides of the input tensor for the non-reduced dimensions. // Indexed by output dimensions. - array m_preservedStrides; + static const int NumPreservedStrides = max_n_1::size; + array m_preservedStrides; // Subset of strides of the input tensor for the reduced dimensions. // Indexed by reduced dimensions.