mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Fix cost evaluation of partial reduxions -> improve performance of vectorwise/replicate expressions involving partial reduxions
This commit is contained in:
parent
6719e56b5b
commit
bffdc491b3
@ -50,7 +50,7 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
|
||||
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
|
||||
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
|
||||
TraversalSize = Direction==Vertical ? RowsAtCompileTime : ColsAtCompileTime
|
||||
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
|
||||
};
|
||||
#if EIGEN_GNUC_AT_LEAST(3,4)
|
||||
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
|
||||
@ -58,7 +58,8 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
|
||||
typedef typename MemberOp::template Cost<InputScalar,TraversalSize> CostOpType;
|
||||
#endif
|
||||
enum {
|
||||
CoeffReadCost = TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
|
||||
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
|
||||
: TraversalSize * traits<_MatrixTypeNested>::CoeffReadCost + int(CostOpType::value)
|
||||
};
|
||||
};
|
||||
}
|
||||
|
@ -101,6 +101,16 @@ template<typename ArrayType> void vectorwiseop_array(const ArrayType& m)
|
||||
|
||||
VERIFY_RAISES_ASSERT(m2.rowwise() /= rowvec.transpose());
|
||||
VERIFY_RAISES_ASSERT(m1.rowwise() / rowvec.transpose());
|
||||
|
||||
m2 = m1;
|
||||
// yes, there might be an aliasing issue there but ".rowwise() /="
|
||||
// is suppposed to evaluate " m2.colwise().sum()" into to temporary to avoid
|
||||
// evaluating the reducions multiple times
|
||||
if(ArrayType::RowsAtCompileTime>2 || ArrayType::RowsAtCompileTime==Dynamic)
|
||||
{
|
||||
m2.rowwise() /= m2.colwise().sum();
|
||||
VERIFY_IS_APPROX(m2, m1.rowwise() / m1.colwise().sum());
|
||||
}
|
||||
}
|
||||
|
||||
template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
|
||||
|
Loading…
x
Reference in New Issue
Block a user