* improved product performance:

- fallback to normal product for small dynamic matrices
 - overloaded "c += (a * b).lazy()" to avoid the expensive and useless temporary and setZero()
   in such very common cases.
* fix a couple of issues with the flags
This commit is contained in:
Gael Guennebaud 2008-05-22 14:51:25 +00:00
parent 106a0c1bef
commit 94e1629a1b
5 changed files with 77 additions and 23 deletions

View File

@ -70,7 +70,7 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
Flags = ((int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| Like1DArrayBit
| int(LhsFlags) & int(RhsFlags) & Like1DArrayBit
| (ei_functor_traits<BinaryOp>::IsVectorizable && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
? int(LhsFlags) & int(RhsFlags) & VectorizableBit : 0))),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost

View File

@ -62,6 +62,9 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
inline Flagged(const ExpressionType& matrix) : m_expression(matrix) {}
/** \internal */
inline ExpressionType _expression() const { return m_expression; }
private:
inline int _rows() const { return m_expression.rows(); }

View File

@ -163,7 +163,7 @@ template<typename Derived> class MatrixBase
* \sa rows(), cols(), IsVectorAtCompileTime. */
inline bool isVector() const { return rows()==1 || cols()==1; }
//@}
/// \name Default return types
//@{
/** Represents a constant matrix */
@ -255,6 +255,9 @@ template<typename Derived> class MatrixBase
template<typename OtherDerived>
Derived& operator-=(const MatrixBase<OtherDerived>& other);
template<typename Lhs,typename Rhs>
Derived& operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other);
Derived& operator*=(const Scalar& other);
Derived& operator/=(const Scalar& other);
@ -407,7 +410,7 @@ template<typename Derived> class MatrixBase
bool isOrtho(const MatrixBase<OtherDerived>& other,
RealScalar prec = precision<Scalar>()) const;
bool isOrtho(RealScalar prec = precision<Scalar>()) const;
template<typename OtherDerived>
inline bool operator==(const MatrixBase<OtherDerived>& other) const
{ return derived().cwiseEqualTo(other.derived()).all(); }
@ -583,8 +586,8 @@ template<typename Derived> class MatrixBase
//@{
const QR<typename ei_eval<Derived>::type> qr() const;
//@}
#ifdef EIGEN_MATRIX_CUSTOM_ADDONS_FILE
#include EIGEN_MATRIX_CUSTOM_ADDONS_FILE
#endif

View File

@ -183,12 +183,12 @@ template<typename T, int n=1> struct ei_product_nested_rhs
template<typename T, int n=1> struct ei_product_nested_lhs
{
typedef typename ei_meta_if<
ei_is_temporary<T>::ret && !(ei_traits<T>::Flags & RowMajorBit),
ei_is_temporary<T>::ret,
T,
typename ei_meta_if<(
(ei_traits<T>::Flags & EvalBeforeNestingBit)
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
typename ei_meta_if<
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
typename ei_eval<T>::type,
const T&
>::ret
@ -209,8 +209,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
typedef typename ei_unref<LhsNested>::type _LhsNested;
typedef typename ei_unref<RhsNested>::type _RhsNested;
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
@ -224,7 +224,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
// the other one is always vectorized !
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 0 : 0,
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
_RowMajor = (RhsFlags & RowMajorBit)
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
_LostBits = HereditaryBits & ~(
@ -271,6 +271,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
template<typename DestDerived>
void _cacheFriendlyEval(DestDerived& res) const;
/** \internal */
template<typename DestDerived>
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
private:
inline int _rows() const { return m_lhs.rows(); }
@ -363,6 +367,16 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
return *this = *this * other;
}
/** \internal */
template<typename Derived>
template<typename Lhs,typename Rhs>
inline Derived&
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
{
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
return derived();
}
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
@ -375,16 +389,50 @@ template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
{
#ifndef EIGEN_WIP_PRODUCT_DIRTY
res.setZero();
#endif
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
#ifndef EIGEN_WIP_PRODUCT_DIRTY
res.setZero();
#endif
ei_cache_friendly_product<Scalar>(
_rows(), _cols(), m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(),
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
ei_cache_friendly_product<Scalar>(
_rows(), _cols(), m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(),
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
else
{
// lazy product
res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
}
}
template<typename Lhs, typename Rhs, int EvalMode>
template<typename DestDerived>
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
{
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
)
{
ei_cache_friendly_product<Scalar>(
_rows(), _cols(), m_lhs.cols(),
_LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
_RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(),
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
);
}
else
{
// lazy product
res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
}
}
#endif // EIGEN_PRODUCT_H

View File

@ -154,7 +154,7 @@ class ei_corrected_matrix_flags
enum { is_vectorizable
= ei_packet_traits<Scalar>::size > 1
&& (Size%ei_packet_traits<Scalar>::size==0),
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit | DirectAccessBit
};
public: