Fix cost evaluation of partial reduxions -> improve performance of vectorwise/replicate expressions involving partial reduxions

2025-09-26 08:13:13 +08:00 · 2013-08-11 19:21:43 +02:00 · 2013-08-11 19:21:43 +02:00 · bffdc491b3
commit bffdc491b3
parent 6719e56b5b
2 changed files with 13 additions and 2 deletions
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@ -50,7 +50,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
    MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
    Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
    Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
-    TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
+    TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime :  MatrixType::ColsAtCompileTime
  };
  #if EIGEN_GNUC_AT_LEAST(3,4)
  typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
@ -58,7 +58,8 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
  typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
  #endif
  enum {
-    CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
+    CoeffReadCost = TraversalSize==Dynamic ? Dynamic
+                  : TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
  };
 };
 }
--- a/test/vectorwiseop.cpp
+++ b/test/vectorwiseop.cpp
@ -101,6 +101,16 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)

  VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose());
  VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose());
+  
+  m2 = m1;
+  // yes, there might be an aliasing issue there but ".rowwise() /="
+  // is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid
+  // evaluating the reducions multiple times
+  if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic)
+  {
+    m2.rowwise() /= m2.colwise().sum();
+    VERIFY_IS_APPROX(m2, m1.rowwise() / m1.colwise().sum());
+  }
 }

 template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)