Added support for tensor reductions and concatenations

2025-09-29 01:33:14 +08:00 · 2014-10-01 20:38:22 -07:00 · 2014-10-01 20:38:22 -07:00 · 7caaf6453b
commit 7caaf6453b
parent 1c236f4c9a
9 changed files with 798 additions and 2 deletions
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@ -34,12 +34,15 @@
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDeviceType.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorDimensions.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorBase.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
 #include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@ -204,12 +204,40 @@ class TensorBase<Derived, ReadOnlyAccessors>
      return TensorSelectOp<const Derived, const ThenDerived, const ElseDerived>(derived(), thenTensor.derived(), elseTensor.derived());
    }
    // Reductions.
    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorReductionOp<internal::SumReducer<Scalar>, const Dims, const Derived>
    sum(const Dims& dims) const {
      return TensorReductionOp<internal::SumReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::SumReducer<Scalar>());
    }
    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorReductionOp<internal::MaxReducer<Scalar>, const Dims, const Derived>
    maximum(const Dims& dims) const {
      return TensorReductionOp<internal::MaxReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::MaxReducer<Scalar>());
    }
    template <typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorReductionOp<internal::MinReducer<Scalar>, const Dims, const Derived>
    minimum(const Dims& dims) const {
      return TensorReductionOp<internal::MinReducer<Scalar>, const Dims, const Derived>(derived(), dims, internal::MinReducer<Scalar>());
    }
    template <typename Reducer, typename Dims> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorReductionOp<Reducer, const Dims, const Derived>
    reduce(const Dims& dims, const Reducer& reducer) const {
      return TensorReductionOp<Reducer, const Dims, const Derived>(derived(), dims, reducer);
    }
    template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorBroadcastingOp<const Broadcast, const Derived>
    broadcast(const Broadcast& broadcast) const {
      return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
    }
    template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorConcatenationOp<Axis, const Derived, const OtherDerived>
    concatenate(const OtherDerived& other, Axis axis) const {
      return TensorConcatenationOp<Axis, const Derived, const OtherDerived>(derived(), other.derived(), axis);
    }
    // Morphing operators.
    template <typename NewDimensions> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorReshapingOp<const NewDimensions, const Derived>
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h
@ -0,0 +1,217 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
 namespace Eigen {
 /** \class TensorConcatenationOp
  * \ingroup CXX11_Tensor_Module
  *
  * \brief Tensor concatenation class.
  *
  *
  */
 namespace internal {
 template<typename Axis, typename LhsXprType, typename RhsXprType>
 struct traits<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >
 {
  // Type promotion to handle the case where the types of the lhs and the rhs are different.
  typedef typename promote_storage_type<typename LhsXprType::Scalar,
                                        typename RhsXprType::Scalar>::ret Scalar;
  typedef typename packet_traits<Scalar>::type Packet;
  typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
                                        typename traits<RhsXprType>::StorageKind>::ret StorageKind;
  typedef typename promote_index_type<typename traits<LhsXprType>::Index,
                                      typename traits<RhsXprType>::Index>::type Index;
  typedef typename LhsXprType::Nested LhsNested;
  typedef typename RhsXprType::Nested RhsNested;
  typedef typename remove_reference<LhsNested>::type _LhsNested;
  typedef typename remove_reference<RhsNested>::type _RhsNested;
  enum { Flags = 0 };
 };
 template<typename Axis, typename LhsXprType, typename RhsXprType>
 struct eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, Eigen::Dense>
 {
  typedef const TensorConcatenationOp<Axis, LhsXprType, RhsXprType>& type;
 };
 template<typename Axis, typename LhsXprType, typename RhsXprType>
 struct nested<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, 1, typename eval<TensorConcatenationOp<Axis, LhsXprType, RhsXprType> >::type>
 {
  typedef TensorConcatenationOp<Axis, LhsXprType, RhsXprType> type;
 };
 }  // end namespace internal
 template<typename Axis, typename LhsXprType, typename RhsXprType>
 class TensorConcatenationOp : public TensorBase<TensorConcatenationOp<Axis, LhsXprType, RhsXprType>, WriteAccessors>
 {
  public:
    typedef typename internal::traits<TensorConcatenationOp>::Scalar Scalar;
    typedef typename internal::traits<TensorConcatenationOp>::Packet Packet;
    typedef typename internal::traits<TensorConcatenationOp>::StorageKind StorageKind;
    typedef typename internal::traits<TensorConcatenationOp>::Index Index;
    typedef typename internal::nested<TensorConcatenationOp>::type Nested;
    typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
                                                    typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
    typedef typename internal::promote_storage_type<typename LhsXprType::PacketReturnType,
                                                    typename RhsXprType::PacketReturnType>::ret PacketReturnType;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConcatenationOp(const LhsXprType& lhs, const RhsXprType& rhs, Axis axis)
        : m_lhs_xpr(lhs), m_rhs_xpr(rhs), m_axis(axis) {}
    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<typename LhsXprType::Nested>::type&
    lhsExpression() const { return m_lhs_xpr; }
    EIGEN_DEVICE_FUNC
    const typename internal::remove_all<typename RhsXprType::Nested>::type&
    rhsExpression() const { return m_rhs_xpr; }
    EIGEN_DEVICE_FUNC Axis axis() const { return m_axis; }
  protected:
    typename LhsXprType::Nested m_lhs_xpr;
    typename RhsXprType::Nested m_rhs_xpr;
    const Axis m_axis;
 };
 // Eval as rvalue
 template<typename Axis, typename LeftArgType, typename RightArgType, typename Device>
 struct TensorEvaluator<const TensorConcatenationOp<Axis, LeftArgType, RightArgType>, Device>
 {
  typedef TensorConcatenationOp<Axis, LeftArgType, RightArgType> XprType;
  typedef typename XprType::Index Index;
  static const int NumDims = internal::array_size<typename TensorEvaluator<LeftArgType, Device>::Dimensions>::value;
  static const int RightNumDims = internal::array_size<typename TensorEvaluator<RightArgType, Device>::Dimensions>::value;
  typedef DSizes<Index, NumDims> Dimensions;
  typedef typename XprType::Scalar Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  typedef typename XprType::PacketReturnType PacketReturnType;
  enum {
    IsAligned = false,
    PacketAccess = TensorEvaluator<LeftArgType, Device>::PacketAccess & TensorEvaluator<RightArgType, Device>::PacketAccess,
  };
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
    : m_leftImpl(op.lhsExpression(), device), m_rightImpl(op.rhsExpression(), device), m_axis(op.axis())
  {
    EIGEN_STATIC_ASSERT(NumDims == RightNumDims, YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(0 <= m_axis && m_axis < NumDims);
    const Dimensions& lhs_dims = m_leftImpl.dimensions();
    const Dimensions& rhs_dims = m_rightImpl.dimensions();
    int i = 0;
    for (; i < m_axis; ++i) {
      eigen_assert(lhs_dims[i] > 0);
      eigen_assert(lhs_dims[i] == rhs_dims[i]);
      m_dimensions[i] = lhs_dims[i];
    }
    eigen_assert(lhs_dims[i] > 0);  // Now i == m_axis.
    eigen_assert(rhs_dims[i] > 0);
    m_dimensions[i] = lhs_dims[i] + rhs_dims[i];
    for (++i; i < NumDims; ++i) {
      eigen_assert(lhs_dims[i] > 0);
      eigen_assert(lhs_dims[i] == rhs_dims[i]);
      m_dimensions[i] = lhs_dims[i];
    }
    m_leftStrides[0] = 1;
    m_rightStrides[0] = 1;
    m_outputStrides[0] = 1;
    for (int i = 1; i < NumDims; ++i) {
      m_leftStrides[i] = m_leftStrides[i-1] * lhs_dims[i-1];
      m_rightStrides[i] = m_rightStrides[i-1] * rhs_dims[i-1];
      m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
    }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
  // TODO(phli): Add short-circuit memcpy evaluation if underlying data are linear?
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data)
  {
    m_leftImpl.evalSubExprsIfNeeded(NULL);
    m_rightImpl.evalSubExprsIfNeeded(NULL);
    return true;
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup()
  {
    m_leftImpl.cleanup();
    m_rightImpl.cleanup();
  }
  // TODO(phli): attempt to speed this up. The integer divisions and modulo are slow.
  // See CL/76180724 comments for more ideas.
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
  {
    // Collect dimension-wise indices (subs).
    array<Index, NumDims> subs;
    for (int i = NumDims - 1; i > 0; --i) {
      subs[i] = index / m_outputStrides[i];
      index -= subs[i] * m_outputStrides[i];
    }
    subs[0] = index;
    const Dimensions& left_dims = m_leftImpl.dimensions();
    if (subs[m_axis] < left_dims[m_axis]) {
      Index left_index = subs[0];
      for (int i = 1; i < NumDims; ++i) {
        left_index += (subs[i] % left_dims[i]) * m_leftStrides[i];
      }
      return m_leftImpl.coeff(left_index);
    } else {
      subs[m_axis] -= left_dims[m_axis];
      const Dimensions& right_dims = m_rightImpl.dimensions();
      Index right_index = subs[0];
      for (int i = 1; i < NumDims; ++i) {
        right_index += (subs[i] % right_dims[i]) * m_rightStrides[i];
      }
      return m_rightImpl.coeff(right_index);
    }
  }
  // TODO(phli): Add a real vectorization.
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
    static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
    EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
    for (int i = 0; i < packetSize; ++i) {
      values[i] = coeff(index+i);
    }
    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    return rslt;
  }
  Scalar* data() const { return NULL; }
  protected:
    const Axis m_axis;
    Dimensions m_dimensions;
    array<Index, NumDims> m_outputStrides;
    array<Index, NumDims> m_leftStrides;
    array<Index, NumDims> m_rightStrides;
    TensorEvaluator<LeftArgType, Device> m_leftImpl;
    TensorEvaluator<RightArgType, Device> m_rightImpl;
 };
 } // end namespace Eigen
 #endif // EIGEN_CXX11_TENSOR_TENSOR_CONCATENATION_H
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@ -21,8 +21,9 @@ template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryO
 template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
 template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
 template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
 template<typename XprType> class TensorReductionOp;
 template<typename Broadcast, typename XprType> class TensorBroadcastingOp;
 template<typename Op, typename Dims, typename XprType> class TensorReductionOp;
 template<typename Axis, typename LeftXprType, typename RightXprType> class TensorConcatenationOp;
 template<typename Dimensions, typename LeftXprType, typename RightXprType> class TensorContractionOp;
 template<typename Dimensions, typename InputXprType, typename KernelXprType> class TensorConvolutionOp;
 template<typename NewDimensions, typename XprType> class TensorReshapingOp;
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@ -0,0 +1,62 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
 #define EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
 namespace Eigen {
 namespace internal {
 // Standard reduction functors
 template <typename T> struct SumReducer
 {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SumReducer() : m_sum(0) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    m_sum += t;
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_sum;
  }
 private:
  T m_sum;
 };
 template <typename T> struct MaxReducer
 {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MaxReducer() : m_max((std::numeric_limits<T>::min)()) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    if (t > m_max) { m_max = t; }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_max;
  }
 private:
  T m_max;
 };
 template <typename T> struct MinReducer
 {
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MinReducer() : m_min((std::numeric_limits<T>::max)()) { }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t) {
    if (t < m_min) { m_min = t; }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize() const {
    return m_min;
  }
 private:
  T m_min;
 };
 } // end namespace internal
 } // end namespace Eigen
 #endif // EIGEN_CXX11_TENSOR_TENSOR_FUNCTORS_H
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@ -0,0 +1,226 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #ifndef EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
 #define EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
 namespace Eigen {
 /** \class TensorReduction
  * \ingroup CXX11_Tensor_Module
  *
  * \brief Tensor reduction class.
  *
  */
 namespace internal {
 template<typename Op, typename Dims, typename XprType>
 struct traits<TensorReductionOp<Op, Dims, XprType> >
 : traits<XprType>
 {
  typedef typename traits<XprType>::Scalar Scalar;
  typedef typename internal::packet_traits<Scalar>::type Packet;
  typedef typename traits<XprType>::StorageKind StorageKind;
  typedef typename traits<XprType>::Index Index;
  typedef typename XprType::Nested Nested;
 };
 template<typename Op, typename Dims, typename XprType>
 struct eval<TensorReductionOp<Op, Dims, XprType>, Eigen::Dense>
 {
  typedef const TensorReductionOp<Op, Dims, XprType>& type;
 };
 template<typename Op, typename Dims, typename XprType>
 struct nested<TensorReductionOp<Op, Dims, XprType>, 1, typename eval<TensorReductionOp<Op, Dims, XprType> >::type>
 {
  typedef TensorReductionOp<Op, Dims, XprType> type;
 };
 }  // end namespace internal
 template <typename Op, typename Dims, typename XprType>
 class TensorReductionOp : public TensorBase<TensorReductionOp<Op, Dims, XprType>, ReadOnlyAccessors> {
  public:
    typedef typename Eigen::internal::traits<TensorReductionOp>::Scalar Scalar;
    typedef typename Eigen::internal::traits<TensorReductionOp>::Packet Packet;
    typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
    typedef typename XprType::CoeffReturnType CoeffReturnType;
    typedef typename XprType::PacketReturnType PacketReturnType;
    typedef typename Eigen::internal::nested<TensorReductionOp>::type Nested;
    typedef typename Eigen::internal::traits<TensorReductionOp>::StorageKind StorageKind;
    typedef typename Eigen::internal::traits<TensorReductionOp>::Index Index;
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    TensorReductionOp(const XprType& expr, const Dims& dims) : m_expr(expr), m_dims(dims)
    { }
    TensorReductionOp(const XprType& expr, const Dims& dims, const Op& reducer) : m_expr(expr), m_dims(dims), m_reducer(reducer)
    { }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const XprType& expression() const { return m_expr; }
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const Dims& dims() const { return m_dims; }
    const Op& reducer() const { return m_reducer; }
  protected:
    typename XprType::Nested m_expr;
    const Dims m_dims;
    const Op m_reducer;
 };
 // Eval as rvalue
 template<typename Op, typename Dims, typename ArgType, typename Device>
 struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
 {
  typedef TensorReductionOp<Op, Dims, ArgType> XprType;
  typedef typename XprType::Index Index;
  static const int NumInputDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
  static const int NumReducedDims = internal::array_size<Dims>::value;
  static const int NumDims = (NumInputDims==NumReducedDims) ? 1 : NumInputDims - NumReducedDims;
  typedef DSizes<Index, NumDims> Dimensions;
  typedef typename XprType::Scalar Scalar;
  enum {
    IsAligned = false,
    PacketAccess = false,  // The code isn't vectorized properly yet
  };
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
      : m_impl(op.expression(), device), m_reducer(op.reducer())
  {
    EIGEN_STATIC_ASSERT(NumInputDims >= NumReducedDims, YOU_MADE_A_PROGRAMMING_MISTAKE);
    array<bool, NumInputDims> reduced;
    for (int i = 0; i < NumInputDims; ++i) {
      reduced[i] = false;
    }
    for (int i = 0; i < NumReducedDims; ++i) {
      eigen_assert(op.dims()[i] >= 0);
      eigen_assert(op.dims()[i] < NumInputDims);
      reduced[op.dims()[i]] = true;
    }
    const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
    int outputIndex = 0;
    int reduceIndex = 0;
    for (int i = 0; i < NumInputDims; ++i) {
      if (reduced[i]) {
        m_reducedDims[reduceIndex] = input_dims[i];
        ++reduceIndex;
      } else {
        m_dimensions[outputIndex] = input_dims[i];
        ++outputIndex;
      }
    }
    m_outputStrides[0] = 1;
    for (int i = 1; i < NumDims; ++i) {
      m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
    }
    array<Index, NumInputDims> strides;
    strides[0] = 1;
    for (int i = 1; i < NumInputDims; ++i) {
      strides[i] = strides[i-1] * input_dims[i-1];
    }
    outputIndex = 0;
    reduceIndex = 0;
    for (int i = 0; i < NumInputDims; ++i) {
      if (reduced[i]) {
        m_reducedStrides[reduceIndex] = strides[i];
        ++reduceIndex;
      } else {
        m_preservedStrides[outputIndex] = strides[i];
        ++outputIndex;
      }
    }
    // Special case for full reductions
    if (NumInputDims == NumReducedDims) {
      m_dimensions[0] = 1;
    }
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
    m_impl.evalSubExprsIfNeeded(NULL);
    return true;
  }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    m_impl.cleanup();
  }
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  typedef typename XprType::PacketReturnType PacketReturnType;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
  {
    Op reducer(m_reducer);
    reduce(firstInput(index), 0, reducer);
    return reducer.finalize();
  }
  // TODO(bsteiner): provide a more efficient implementation.
  template<int LoadMode>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
  {
    const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
    EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
    eigen_assert(index + packetSize - 1 < dimensions().TotalSize());
    EIGEN_ALIGN_DEFAULT CoeffReturnType values[packetSize];
    for (int i = 0; i < packetSize; ++i) {
      values[i] = coeff(index+i);
    }
    PacketReturnType rslt = internal::pload<PacketReturnType>(values);
    return rslt;
  }
  Scalar* data() const { return NULL; }
  private:
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index firstInput(Index index) const {
    Index startInput = 0;
    for (int i = NumDims - 1; i > 0; --i) {
      const Index idx = index / m_outputStrides[i];
      startInput += idx * m_preservedStrides[i];
      index -= idx * m_outputStrides[i];
    }
    startInput += index * m_preservedStrides[0];
    return startInput;
  }
  EIGEN_DEVICE_FUNC void reduce(Index firstIndex, int DimIndex, Op& reducer) const {
    for (int j = 0; j < m_reducedDims[DimIndex]; ++j) {
      const Index input = firstIndex + j * m_reducedStrides[DimIndex];
      if (DimIndex < NumReducedDims-1) {
        reduce(input, DimIndex+1, reducer);
      } else {
        reducer.reduce(m_impl.coeff(input));
      }
    }
  }
  Dimensions m_dimensions;
  array<Index, NumDims> m_outputStrides;
  array<Index, NumDims> m_preservedStrides;
  array<Index, NumReducedDims> m_reducedStrides;
  array<Index, NumReducedDims> m_reducedDims;
  Op m_reducer;
  TensorEvaluator<ArgType, Device> m_impl;
 };
 } // end namespace Eigen
 #endif // EIGEN_CXX11_TENSOR_TENSOR_REDUCTION_H
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -106,14 +106,16 @@ if(EIGEN_TEST_CXX11)
  ei_add_test(cxx11_tensor_convolution "-std=c++0x")
  ei_add_test(cxx11_tensor_expr "-std=c++0x")
 #  ei_add_test(cxx11_tensor_fixed_size "-std=c++0x")
-  ei_add_test(cxx11_tensor_of_const_values "-std=c++0x")
+#  ei_add_test(cxx11_tensor_of_const_values "-std=c++0x")
  ei_add_test(cxx11_tensor_of_strings "-std=c++0x")
  ei_add_test(cxx11_tensor_intdiv "-std=c++0x")
  ei_add_test(cxx11_tensor_lvalue "-std=c++0x")
  ei_add_test(cxx11_tensor_map "-std=c++0x")
  ei_add_test(cxx11_tensor_broadcasting "-std=c++0x")
  ei_add_test(cxx11_tensor_concatenation "-std=c++0x")
  ei_add_test(cxx11_tensor_morphing "-std=c++0x")
  ei_add_test(cxx11_tensor_padding "-std=c++0x")
  ei_add_test(cxx11_tensor_reduction "-std=c++0x")
 #  ei_add_test(cxx11_tensor_shuffling "-std=c++0x")
  ei_add_test(cxx11_tensor_striding "-std=c++0x")
 #  ei_add_test(cxx11_tensor_device  "-std=c++0x")
--- a/unsupported/test/cxx11_tensor_concatenation.cpp
+++ b/unsupported/test/cxx11_tensor_concatenation.cpp
@ -0,0 +1,110 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #include "main.h"
 #include <Eigen/CXX11/Tensor>
 using Eigen::Tensor;
 static void test_dimension_failures()
 {
  Tensor<int, 3> left(2, 3, 1);
  Tensor<int, 3> right(3, 3, 1);
  left.setRandom();
  right.setRandom();
  // Okay; other dimensions are equal.
  Tensor<int, 3> concatenation = left.concatenate(right, 0);
  // Dimension mismatches.
  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 1));
  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 2));
  // Axis > NumDims or < 0.
  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, 3));
  VERIFY_RAISES_ASSERT(concatenation = left.concatenate(right, -1));
 }
 static void test_static_dimension_failure()
 {
  Tensor<int, 2> left(2, 3);
  Tensor<int, 3> right(2, 3, 1);
 #ifdef CXX11_TENSOR_CONCATENATION_STATIC_DIMENSION_FAILURE
  // Technically compatible, but we static assert that the inputs have same
  // NumDims.
  Tensor<int, 3> concatenation = left.concatenate(right, 0);
 #endif
  // This can be worked around in this case.
  Tensor<int, 3> concatenation = left
      .reshape(Tensor<int, 3>::Dimensions{{2, 3, 1}})
      .concatenate(right, 0);
  Tensor<int, 2> alternative = left
      .concatenate(right.reshape(Tensor<int, 2>::Dimensions{{2, 3}}), 0);
 }
 static void test_simple_concatenation()
 {
  Tensor<int, 3> left(2, 3, 1);
  Tensor<int, 3> right(2, 3, 1);
  left.setRandom();
  right.setRandom();
  Tensor<int, 3> concatenation = left.concatenate(right, 0);
  VERIFY_IS_EQUAL(concatenation.dimension(0), 4);
  VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
  VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
  for (int j = 0; j < 3; ++j) {
    for (int i = 0; i < 2; ++i) {
      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
    }
    for (int i = 2; i < 4; ++i) {
      VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i - 2, j, 0));
    }
  }
  concatenation = left.concatenate(right, 1);
  VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
  VERIFY_IS_EQUAL(concatenation.dimension(1), 6);
  VERIFY_IS_EQUAL(concatenation.dimension(2), 1);
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 3; ++j) {
      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
    }
    for (int j = 3; j < 6; ++j) {
      VERIFY_IS_EQUAL(concatenation(i, j, 0), right(i, j - 3, 0));
    }
  }
  concatenation = left.concatenate(right, 2);
  VERIFY_IS_EQUAL(concatenation.dimension(0), 2);
  VERIFY_IS_EQUAL(concatenation.dimension(1), 3);
  VERIFY_IS_EQUAL(concatenation.dimension(2), 2);
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 3; ++j) {
      VERIFY_IS_EQUAL(concatenation(i, j, 0), left(i, j, 0));
      VERIFY_IS_EQUAL(concatenation(i, j, 1), right(i, j, 0));
    }
  }
 }
 // TODO(phli): Add test once we have a real vectorized implementation.
 // static void test_vectorized_concatenation() {}
 void test_cxx11_tensor_concatenation()
 {
   CALL_SUBTEST(test_dimension_failures());
   CALL_SUBTEST(test_static_dimension_failure());
   CALL_SUBTEST(test_simple_concatenation());
   // CALL_SUBTEST(test_vectorized_concatenation());
 }
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@ -0,0 +1,147 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 #include "main.h"
 #include <limits>
 #include <Eigen/CXX11/Tensor>
 using Eigen::Tensor;
 static void test_simple_reductions()
 {
  Tensor<float, 4> tensor(2,3,5,7);
  tensor.setRandom();
  array<ptrdiff_t, 2> reduction_axis;
  reduction_axis[0] = 1;
  reduction_axis[1] = 3;
  Tensor<float, 2> result = tensor.sum(reduction_axis);
  VERIFY_IS_EQUAL(result.dimension(0), 2);
  VERIFY_IS_EQUAL(result.dimension(1), 5);
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 5; ++j) {
      float sum = 0.0f;
      for (int k = 0; k < 3; ++k) {
        for (int l = 0; l < 7; ++l) {
          sum += tensor(i, k, j, l);
        }
      }
      VERIFY_IS_APPROX(result(i, j), sum);
    }
  }
  reduction_axis[0] = 0;
  reduction_axis[1] = 2;
  result = tensor.maximum(reduction_axis);
  VERIFY_IS_EQUAL(result.dimension(0), 3);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 3; ++i) {
    for (int j = 0; j < 7; ++j) {
      float max_val = std::numeric_limits<float>::lowest();
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 5; ++l) {
          max_val = (std::max)(max_val, tensor(k, i, l, j));
        }
      }
      VERIFY_IS_APPROX(result(i, j), max_val);
    }
  }
  reduction_axis[0] = 0;
  reduction_axis[1] = 1;
  result = tensor.minimum(reduction_axis);
  VERIFY_IS_EQUAL(result.dimension(0), 5);
  VERIFY_IS_EQUAL(result.dimension(1), 7);
  for (int i = 0; i < 5; ++i) {
    for (int j = 0; j < 7; ++j) {
      float min_val = (std::numeric_limits<float>::max)();
      for (int k = 0; k < 2; ++k) {
        for (int l = 0; l < 3; ++l) {
          min_val = (std::min)(min_val, tensor(k,  l, i, j));
        }
      }
      VERIFY_IS_APPROX(result(i, j), min_val);
    }
  }
 }
 static void test_full_reductions()
 {
  Tensor<float, 2> tensor(2,3);
  tensor.setRandom();
  array<ptrdiff_t, 2> reduction_axis;
  reduction_axis[0] = 0;
  reduction_axis[1] = 1;
  Tensor<float, 1> result = tensor.sum(reduction_axis);
  VERIFY_IS_EQUAL(result.dimension(0), 1);
  float sum = 0.0f;
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 3; ++j) {
      sum += tensor(i, j);
    }
  }
  VERIFY_IS_APPROX(result(0), sum);
  result = tensor.square().sum(reduction_axis).sqrt();
  VERIFY_IS_EQUAL(result.dimension(0), 1);
  sum = 0.0f;
  for (int i = 0; i < 2; ++i) {
    for (int j = 0; j < 3; ++j) {
      sum += tensor(i, j) * tensor(i, j);
    }
  }
  VERIFY_IS_APPROX(result(0), sqrtf(sum));
 }
 struct UserReducer {
  UserReducer(float offset) : offset_(offset), sum_(0.0f) {}
  void reduce(const float val) {
    sum_ += val * val;
  }
  float finalize() const {
    return 1.0f / (sum_ + offset_);
  }
 private:
  float offset_;
  float sum_;
 };
 static void test_user_defined_reductions()
 {
  Tensor<float, 2> tensor(5,7);
  tensor.setRandom();
  array<ptrdiff_t, 1> reduction_axis;
  reduction_axis[0] = 1;
  UserReducer reducer(10.0f);
  Tensor<float, 1> result = tensor.reduce(reduction_axis, reducer);
  VERIFY_IS_EQUAL(result.dimension(0), 5);
  for (int i = 0; i < 5; ++i) {
    float expected = 10.0f;
    for (int j = 0; j < 7; ++j) {
      expected += tensor(i, j) * tensor(i, j);
    }
    expected = 1.0f / expected;
    VERIFY_IS_APPROX(result(i), expected);
  }
 }
 void test_cxx11_tensor_reduction()
 {
   CALL_SUBTEST(test_simple_reductions());
   CALL_SUBTEST(test_full_reductions());
   CALL_SUBTEST(test_user_defined_reductions());
 }