mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
* improved product performance:
- fallback to normal product for small dynamic matrices - overloaded "c += (a * b).lazy()" to avoid the expensive and useless temporary and setZero() in such very common cases. * fix a couple of issues with the flags
This commit is contained in:
parent
106a0c1bef
commit
94e1629a1b
@ -70,7 +70,7 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
||||
Flags = ((int(LhsFlags) | int(RhsFlags)) & (
|
||||
HereditaryBits
|
||||
| Like1DArrayBit
|
||||
| int(LhsFlags) & int(RhsFlags) & Like1DArrayBit
|
||||
| (ei_functor_traits<BinaryOp>::IsVectorizable && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
||||
? int(LhsFlags) & int(RhsFlags) & VectorizableBit : 0))),
|
||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
||||
|
@ -62,6 +62,9 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
|
||||
|
||||
inline Flagged(const ExpressionType& matrix) : m_expression(matrix) {}
|
||||
|
||||
/** \internal */
|
||||
inline ExpressionType _expression() const { return m_expression; }
|
||||
|
||||
private:
|
||||
|
||||
inline int _rows() const { return m_expression.rows(); }
|
||||
|
@ -255,6 +255,9 @@ template<typename Derived> class MatrixBase
|
||||
template<typename OtherDerived>
|
||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||
|
||||
template<typename Lhs,typename Rhs>
|
||||
Derived& operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other);
|
||||
|
||||
Derived& operator*=(const Scalar& other);
|
||||
Derived& operator/=(const Scalar& other);
|
||||
|
||||
|
@ -183,12 +183,12 @@ template<typename T, int n=1> struct ei_product_nested_rhs
|
||||
template<typename T, int n=1> struct ei_product_nested_lhs
|
||||
{
|
||||
typedef typename ei_meta_if<
|
||||
ei_is_temporary<T>::ret && !(ei_traits<T>::Flags & RowMajorBit),
|
||||
ei_is_temporary<T>::ret,
|
||||
T,
|
||||
typename ei_meta_if<(
|
||||
(ei_traits<T>::Flags & EvalBeforeNestingBit)
|
||||
|| (!(ei_traits<T>::Flags & DirectAccessBit))
|
||||
|| (n+1) * (NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * T::CoeffReadCost),
|
||||
typename ei_meta_if<
|
||||
int(ei_traits<T>::Flags) & EvalBeforeNestingBit
|
||||
|| (!(int(ei_traits<T>::Flags) & DirectAccessBit))
|
||||
|| (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost) < (n-1) * int(T::CoeffReadCost),
|
||||
typename ei_eval<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
@ -209,8 +209,8 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
typedef typename ei_meta_if<EvalMode==CacheFriendlyProduct,
|
||||
typename ei_product_nested_rhs<Rhs,Lhs::RowsAtCompileTime>::type,
|
||||
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
|
||||
typedef typename ei_unref<LhsNested>::type _LhsNested;
|
||||
typedef typename ei_unref<RhsNested>::type _RhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
|
||||
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
|
||||
enum {
|
||||
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
|
||||
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
|
||||
@ -224,7 +224,7 @@ struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
// the other one is always vectorized !
|
||||
_RhsVectorizable = (RhsFlags & RowMajorBit) && (RhsFlags & VectorizableBit) && (ColsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_LhsVectorizable = (!(LhsFlags & RowMajorBit)) && (LhsFlags & VectorizableBit) && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 0 : 0,
|
||||
_Vectorizable = (_LhsVectorizable || _RhsVectorizable) ? 1 : 0,
|
||||
_RowMajor = (RhsFlags & RowMajorBit)
|
||||
&& (EvalMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!_LhsVectorizable)),
|
||||
_LostBits = HereditaryBits & ~(
|
||||
@ -271,6 +271,10 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEval(DestDerived& res) const;
|
||||
|
||||
/** \internal */
|
||||
template<typename DestDerived>
|
||||
void _cacheFriendlyEvalAndAdd(DestDerived& res) const;
|
||||
|
||||
private:
|
||||
|
||||
inline int _rows() const { return m_lhs.rows(); }
|
||||
@ -363,6 +367,16 @@ MatrixBase<Derived>::operator*=(const MatrixBase<OtherDerived> &other)
|
||||
return *this = *this * other;
|
||||
}
|
||||
|
||||
/** \internal */
|
||||
template<typename Derived>
|
||||
template<typename Lhs,typename Rhs>
|
||||
inline Derived&
|
||||
MatrixBase<Derived>::operator+=(const Flagged<Product<Lhs,Rhs,CacheFriendlyProduct>, 0, EvalBeforeNestingBit | EvalBeforeAssigningBit>& other)
|
||||
{
|
||||
other._expression()._cacheFriendlyEvalAndAdd(const_cast_derived());
|
||||
return derived();
|
||||
}
|
||||
|
||||
template<typename Derived>
|
||||
template<typename Lhs, typename Rhs>
|
||||
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFriendlyProduct>& product)
|
||||
@ -375,6 +389,11 @@ template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) const
|
||||
{
|
||||
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
#ifndef EIGEN_WIP_PRODUCT_DIRTY
|
||||
res.setZero();
|
||||
#endif
|
||||
@ -385,6 +404,35 @@ inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEval(DestDerived& res) cons
|
||||
_RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(),
|
||||
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
// lazy product
|
||||
res = Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Lhs, typename Rhs, int EvalMode>
|
||||
template<typename DestDerived>
|
||||
inline void Product<Lhs,Rhs,EvalMode>::_cacheFriendlyEvalAndAdd(DestDerived& res) const
|
||||
{
|
||||
if ( _rows()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& _cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
&& m_lhs.cols()>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
|
||||
)
|
||||
{
|
||||
ei_cache_friendly_product<Scalar>(
|
||||
_rows(), _cols(), m_lhs.cols(),
|
||||
_LhsNested::Flags&RowMajorBit, &(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
|
||||
_RhsNested::Flags&RowMajorBit, &(m_rhs.const_cast_derived().coeffRef(0,0)), m_rhs.stride(),
|
||||
Flags&RowMajorBit, &(res.coeffRef(0,0)), res.stride()
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
// lazy product
|
||||
res += Product<_LhsNested,_RhsNested,NormalProduct>(m_lhs, m_rhs).lazy();
|
||||
}
|
||||
}
|
||||
|
||||
#endif // EIGEN_PRODUCT_H
|
||||
|
@ -154,7 +154,7 @@ class ei_corrected_matrix_flags
|
||||
enum { is_vectorizable
|
||||
= ei_packet_traits<Scalar>::size > 1
|
||||
&& (Size%ei_packet_traits<Scalar>::size==0),
|
||||
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit
|
||||
_flags1 = (SuggestedFlags & ~(EvalBeforeNestingBit | EvalBeforeAssigningBit)) | Like1DArrayBit | DirectAccessBit
|
||||
};
|
||||
|
||||
public:
|
||||
|
Loading…
x
Reference in New Issue
Block a user