Fix cost evaluation of partial reduxions -> improve performance of vectorwise/replicate expressions involving partial reduxions

This commit is contained in:
Gael Guennebaud 2013-08-11 19:21:43 +02:00
parent 6719e56b5b
commit bffdc491b3
2 changed files with 13 additions and 2 deletions

View File

@ -50,7 +50,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
};
#if EIGEN_GNUC_AT_LEAST(3,4)
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
@ -58,7 +58,8 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
#endif
enum {
CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
};
};
}

View File

@ -101,6 +101,16 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose());
VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose());
m2 = m1;
// yes, there might be an aliasing issue there but ".rowwise() /="
// is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid
// evaluating the reducions multiple times
if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic)
{
m2.rowwise() /= m2.colwise().sum();
VERIFY_IS_APPROX(m2, m1.rowwise() / m1.colwise().sum());
}
}
template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)