- merge ei_xpr_copy and ei_eval_if_needed_before_nesting

- make use of CoeffReadCost to determine when to unroll the loops,
  for now only in Product.h and in OperatorEquals.h
performance remains the same: generally still not as good as before the
big changes.
This commit is contained in:
Benoit Jacob 2008-04-06 18:01:03 +00:00
parent 30ec34de36
commit 371d302efb
5 changed files with 49 additions and 32 deletions

View File

@ -83,27 +83,30 @@ template<typename T> struct ei_eval
template<typename T> struct ei_unref { typedef T type; }; template<typename T> struct ei_unref { typedef T type; };
template<typename T> struct ei_unref<T&> { typedef T type; }; template<typename T> struct ei_unref<T&> { typedef T type; };
template<typename T> struct ei_xpr_copy template<typename T> struct ei_is_temporary
{ {
typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit, enum { ret = 0 };
typename ei_eval<T>::type, const T&>::ret type;
}; };
template<typename T> struct ei_xpr_copy<Temporary<T> > template<typename T> struct ei_is_temporary<Temporary<T> >
{ {
typedef Temporary<T> type; enum { ret = 1 };
}; };
template<typename T, int n=1> struct ei_eval_if_needed_before_nesting template<typename T, int n=1> struct ei_xpr_copy
{ {
// FIXME should we consider the additional store as well as the creation cost of the temporary ? typedef typename ei_meta_if<
enum { eval = T::Flags & EvalBeforeNestingBit ei_is_temporary<T>::ret,
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost }; T,
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType; typename ei_meta_if<
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType; ei_traits<T>::Flags & EvalBeforeNestingBit
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost,
typename ei_eval<T>::type,
const T&
>::ret
>::ret type;
}; };
template<typename T> struct ei_functor_traits template<typename T> struct ei_functor_traits
{ {
enum enum

View File

@ -102,14 +102,15 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived> Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other) ::lazyAssign(const MatrixBase<OtherDerived>& other)
{ {
const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime) if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
// copying a vector expression into a vector // copying a vector expression into a vector
{ {
ei_assert(size() == other.size()); ei_assert(size() == other.size());
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) if(unroll)
ei_vector_operator_equals_unroller ei_vector_operator_equals_unroller
<Derived, OtherDerived, <Derived, OtherDerived,
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived()); >::run(derived(), other.derived());
else else
for(int i = 0; i < size(); i++) for(int i = 0; i < size(); i++)
@ -118,11 +119,11 @@ Derived& MatrixBase<Derived>
else // copying a matrix expression into a matrix else // copying a matrix expression into a matrix
{ {
ei_assert(rows() == other.rows() && cols() == other.cols()); ei_assert(rows() == other.rows() && cols() == other.cols());
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) if(unroll)
{ {
ei_matrix_operator_equals_unroller ei_matrix_operator_equals_unroller
<Derived, OtherDerived, <Derived, OtherDerived,
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived()); >::run(derived(), other.derived());
} }
else else
@ -152,7 +153,7 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived> Derived& MatrixBase<Derived>
::operator=(const MatrixBase<OtherDerived>& other) ::operator=(const MatrixBase<OtherDerived>& other)
{ {
if (OtherDerived::Flags & EvalBeforeAssigningBit) if(OtherDerived::Flags & EvalBeforeAssigningBit)
{ {
return lazyAssign(other.derived().eval()); return lazyAssign(other.derived().eval());
} }

View File

@ -84,21 +84,29 @@ template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> > struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{ {
typedef typename Lhs::Scalar Scalar; typedef typename Lhs::Scalar Scalar;
typedef typename ei_xpr_copy<Lhs,Rhs::ColsAtCompileTime>::type LhsXprCopy;
typedef typename ei_xpr_copy<Rhs,Lhs::RowsAtCompileTime>::type RhsXprCopy;
typedef typename ei_unref<LhsXprCopy>::type ActualLhs;
typedef typename ei_unref<RhsXprCopy>::type ActualRhs;
enum { enum {
LhsCoeffReadCost = ActualLhs::CoeffReadCost,
RhsCoeffReadCost = ActualRhs::CoeffReadCost,
LhsFlags = ActualLhs::Flags,
RhsFlags = ActualRhs::Flags,
RowsAtCompileTime = Lhs::RowsAtCompileTime, RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime, MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
? (unsigned int)(Lhs::Flags | Rhs::Flags) ? (unsigned int)(LhsFlags | RhsFlags)
: (unsigned int)(Lhs::Flags | Rhs::Flags) & ~LargeBit ) : (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit )
| EvalBeforeAssigningBit | EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0), | (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
CoeffReadCost CoeffReadCost
= Lhs::ColsAtCompileTime == Dynamic = Lhs::ColsAtCompileTime == Dynamic
? Dynamic ? Dynamic
: Lhs::ColsAtCompileTime : Lhs::ColsAtCompileTime
* (NumTraits<Scalar>::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost) * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost + (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
}; };
}; };
@ -110,10 +118,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
EIGEN_GENERIC_PUBLIC_INTERFACE(Product) EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::CopyType CopyLhs; typedef typename ei_traits<Product>::LhsXprCopy LhsXprCopy;
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::CopyType CopyRhs; typedef typename ei_traits<Product>::RhsXprCopy RhsXprCopy;
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::XprType XprLhs;
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::XprType XprRhs;
Product(const Lhs& lhs, const Rhs& rhs) Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs) : m_lhs(lhs), m_rhs(rhs)
@ -133,12 +139,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
const Scalar _coeff(int row, int col) const const Scalar _coeff(int row, int col) const
{ {
Scalar res; Scalar res;
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT) const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(unroll)
{
ei_product_unroller<Lhs::ColsAtCompileTime-1, ei_product_unroller<Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT unroll ? Lhs::ColsAtCompileTime : Dynamic,
? Lhs::ColsAtCompileTime : Dynamic, typename ei_unref<LhsXprCopy>::type,
XprLhs, XprRhs> typename ei_unref<RhsXprCopy>::type>
::run(row, col, m_lhs, m_rhs, res); ::run(row, col, m_lhs, m_rhs, res);
}
else else
{ {
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col); res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
@ -149,8 +158,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
} }
protected: protected:
const CopyLhs m_lhs; const LhsXprCopy m_lhs;
const CopyRhs m_rhs; const RhsXprCopy m_rhs;
}; };
/** \returns the matrix product of \c *this and \a other. /** \returns the matrix product of \c *this and \a other.

View File

@ -31,7 +31,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */ /** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT #ifndef EIGEN_UNROLLING_LIMIT
#define EIGEN_UNROLLING_LIMIT 16 #define EIGEN_UNROLLING_LIMIT 400
#endif #endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR

View File

@ -8,6 +8,10 @@
using namespace std; using namespace std;
USING_PART_OF_NAMESPACE_EIGEN USING_PART_OF_NAMESPACE_EIGEN
#ifndef REPEAT
#define REPEAT 40000000
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
Matrix<double,MATSIZE,MATSIZE> I; Matrix<double,MATSIZE,MATSIZE> I;
@ -19,7 +23,7 @@ int main(int argc, char *argv[])
m(i,j) = (i+MATSIZE*j); m(i,j) = (i+MATSIZE*j);
} }
asm("#begin"); asm("#begin");
for(int a = 0; a < 40000000; a++) for(int a = 0; a < REPEAT; a++)
{ {
m = I + 0.00005 * (m + m*m); m = I + 0.00005 * (m + m*m);
} }