mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
- merge ei_xpr_copy and ei_eval_if_needed_before_nesting
- make use of CoeffReadCost to determine when to unroll the loops, for now only in Product.h and in OperatorEquals.h performance remains the same: generally still not as good as before the big changes.
This commit is contained in:
parent
30ec34de36
commit
371d302efb
@ -83,27 +83,30 @@ template<typename T> struct ei_eval
|
||||
template<typename T> struct ei_unref { typedef T type; };
|
||||
template<typename T> struct ei_unref<T&> { typedef T type; };
|
||||
|
||||
template<typename T> struct ei_xpr_copy
|
||||
template<typename T> struct ei_is_temporary
|
||||
{
|
||||
typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit,
|
||||
typename ei_eval<T>::type, const T&>::ret type;
|
||||
enum { ret = 0 };
|
||||
};
|
||||
|
||||
template<typename T> struct ei_xpr_copy<Temporary<T> >
|
||||
template<typename T> struct ei_is_temporary<Temporary<T> >
|
||||
{
|
||||
typedef Temporary<T> type;
|
||||
enum { ret = 1 };
|
||||
};
|
||||
|
||||
template<typename T, int n=1> struct ei_eval_if_needed_before_nesting
|
||||
template<typename T, int n=1> struct ei_xpr_copy
|
||||
{
|
||||
// FIXME should we consider the additional store as well as the creation cost of the temporary ?
|
||||
enum { eval = T::Flags & EvalBeforeNestingBit
|
||||
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost };
|
||||
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType;
|
||||
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType;
|
||||
typedef typename ei_meta_if<
|
||||
ei_is_temporary<T>::ret,
|
||||
T,
|
||||
typename ei_meta_if<
|
||||
ei_traits<T>::Flags & EvalBeforeNestingBit
|
||||
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost,
|
||||
typename ei_eval<T>::type,
|
||||
const T&
|
||||
>::ret
|
||||
>::ret type;
|
||||
};
|
||||
|
||||
|
||||
template<typename T> struct ei_functor_traits
|
||||
{
|
||||
enum
|
||||
|
@ -102,14 +102,15 @@ template<typename OtherDerived>
|
||||
Derived& MatrixBase<Derived>
|
||||
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
|
||||
// copying a vector expression into a vector
|
||||
{
|
||||
ei_assert(size() == other.size());
|
||||
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
if(unroll)
|
||||
ei_vector_operator_equals_unroller
|
||||
<Derived, OtherDerived,
|
||||
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
|
||||
unroll ? SizeAtCompileTime : Dynamic
|
||||
>::run(derived(), other.derived());
|
||||
else
|
||||
for(int i = 0; i < size(); i++)
|
||||
@ -118,11 +119,11 @@ Derived& MatrixBase<Derived>
|
||||
else // copying a matrix expression into a matrix
|
||||
{
|
||||
ei_assert(rows() == other.rows() && cols() == other.cols());
|
||||
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
if(unroll)
|
||||
{
|
||||
ei_matrix_operator_equals_unroller
|
||||
<Derived, OtherDerived,
|
||||
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
|
||||
unroll ? SizeAtCompileTime : Dynamic
|
||||
>::run(derived(), other.derived());
|
||||
}
|
||||
else
|
||||
@ -152,7 +153,7 @@ template<typename OtherDerived>
|
||||
Derived& MatrixBase<Derived>
|
||||
::operator=(const MatrixBase<OtherDerived>& other)
|
||||
{
|
||||
if (OtherDerived::Flags & EvalBeforeAssigningBit)
|
||||
if(OtherDerived::Flags & EvalBeforeAssigningBit)
|
||||
{
|
||||
return lazyAssign(other.derived().eval());
|
||||
}
|
||||
|
@ -84,21 +84,29 @@ template<typename Lhs, typename Rhs, int EvalMode>
|
||||
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||
{
|
||||
typedef typename Lhs::Scalar Scalar;
|
||||
typedef typename ei_xpr_copy<Lhs,Rhs::ColsAtCompileTime>::type LhsXprCopy;
|
||||
typedef typename ei_xpr_copy<Rhs,Lhs::RowsAtCompileTime>::type RhsXprCopy;
|
||||
typedef typename ei_unref<LhsXprCopy>::type ActualLhs;
|
||||
typedef typename ei_unref<RhsXprCopy>::type ActualRhs;
|
||||
enum {
|
||||
LhsCoeffReadCost = ActualLhs::CoeffReadCost,
|
||||
RhsCoeffReadCost = ActualRhs::CoeffReadCost,
|
||||
LhsFlags = ActualLhs::Flags,
|
||||
RhsFlags = ActualRhs::Flags,
|
||||
RowsAtCompileTime = Lhs::RowsAtCompileTime,
|
||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
|
||||
? (unsigned int)(Lhs::Flags | Rhs::Flags)
|
||||
: (unsigned int)(Lhs::Flags | Rhs::Flags) & ~LargeBit )
|
||||
? (unsigned int)(LhsFlags | RhsFlags)
|
||||
: (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit )
|
||||
| EvalBeforeAssigningBit
|
||||
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
|
||||
CoeffReadCost
|
||||
= Lhs::ColsAtCompileTime == Dynamic
|
||||
? Dynamic
|
||||
: Lhs::ColsAtCompileTime
|
||||
* (NumTraits<Scalar>::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost)
|
||||
* (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
|
||||
};
|
||||
};
|
||||
@ -110,10 +118,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||
|
||||
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::CopyType CopyLhs;
|
||||
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::CopyType CopyRhs;
|
||||
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::XprType XprLhs;
|
||||
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::XprType XprRhs;
|
||||
typedef typename ei_traits<Product>::LhsXprCopy LhsXprCopy;
|
||||
typedef typename ei_traits<Product>::RhsXprCopy RhsXprCopy;
|
||||
|
||||
Product(const Lhs& lhs, const Rhs& rhs)
|
||||
: m_lhs(lhs), m_rhs(rhs)
|
||||
@ -133,12 +139,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
const Scalar _coeff(int row, int col) const
|
||||
{
|
||||
Scalar res;
|
||||
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
||||
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||
if(unroll)
|
||||
{
|
||||
ei_product_unroller<Lhs::ColsAtCompileTime-1,
|
||||
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
|
||||
? Lhs::ColsAtCompileTime : Dynamic,
|
||||
XprLhs, XprRhs>
|
||||
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
||||
typename ei_unref<LhsXprCopy>::type,
|
||||
typename ei_unref<RhsXprCopy>::type>
|
||||
::run(row, col, m_lhs, m_rhs, res);
|
||||
}
|
||||
else
|
||||
{
|
||||
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
|
||||
@ -149,8 +158,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
||||
}
|
||||
|
||||
protected:
|
||||
const CopyLhs m_lhs;
|
||||
const CopyRhs m_rhs;
|
||||
const LhsXprCopy m_lhs;
|
||||
const RhsXprCopy m_rhs;
|
||||
};
|
||||
|
||||
/** \returns the matrix product of \c *this and \a other.
|
||||
|
@ -31,7 +31,7 @@
|
||||
|
||||
/** Defines the maximal loop size to enable meta unrolling of loops */
|
||||
#ifndef EIGEN_UNROLLING_LIMIT
|
||||
#define EIGEN_UNROLLING_LIMIT 16
|
||||
#define EIGEN_UNROLLING_LIMIT 400
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||
|
@ -8,6 +8,10 @@
|
||||
using namespace std;
|
||||
USING_PART_OF_NAMESPACE_EIGEN
|
||||
|
||||
#ifndef REPEAT
|
||||
#define REPEAT 40000000
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
Matrix<double,MATSIZE,MATSIZE> I;
|
||||
@ -19,7 +23,7 @@ int main(int argc, char *argv[])
|
||||
m(i,j) = (i+MATSIZE*j);
|
||||
}
|
||||
asm("#begin");
|
||||
for(int a = 0; a < 40000000; a++)
|
||||
for(int a = 0; a < REPEAT; a++)
|
||||
{
|
||||
m = I + 0.00005 * (m + m*m);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user