- merge ei_xpr_copy and ei_eval_if_needed_before_nesting

- make use of CoeffReadCost to determine when to unroll the loops,
  for now only in Product.h and in OperatorEquals.h
performance remains the same: generally still not as good as before the
big changes.
This commit is contained in:
Benoit Jacob 2008-04-06 18:01:03 +00:00
parent 30ec34de36
commit 371d302efb
5 changed files with 49 additions and 32 deletions

View File

@ -83,27 +83,30 @@ template<typename T> struct ei_eval
template<typename T> struct ei_unref { typedef T type; };
template<typename T> struct ei_unref<T&> { typedef T type; };
template<typename T> struct ei_xpr_copy
template<typename T> struct ei_is_temporary
{
typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit,
typename ei_eval<T>::type, const T&>::ret type;
enum { ret = 0 };
};
template<typename T> struct ei_xpr_copy<Temporary<T> >
template<typename T> struct ei_is_temporary<Temporary<T> >
{
typedef Temporary<T> type;
enum { ret = 1 };
};
template<typename T, int n=1> struct ei_eval_if_needed_before_nesting
template<typename T, int n=1> struct ei_xpr_copy
{
// FIXME should we consider the additional store as well as the creation cost of the temporary ?
enum { eval = T::Flags & EvalBeforeNestingBit
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost };
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType;
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType;
typedef typename ei_meta_if<
ei_is_temporary<T>::ret,
T,
typename ei_meta_if<
ei_traits<T>::Flags & EvalBeforeNestingBit
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost,
typename ei_eval<T>::type,
const T&
>::ret
>::ret type;
};
template<typename T> struct ei_functor_traits
{
enum

View File

@ -102,14 +102,15 @@ template<typename OtherDerived>
Derived& MatrixBase<Derived>
::lazyAssign(const MatrixBase<OtherDerived>& other)
{
const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
// copying a vector expression into a vector
{
ei_assert(size() == other.size());
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
if(unroll)
ei_vector_operator_equals_unroller
<Derived, OtherDerived,
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived());
else
for(int i = 0; i < size(); i++)
@ -118,11 +119,11 @@ Derived& MatrixBase<Derived>
else // copying a matrix expression into a matrix
{
ei_assert(rows() == other.rows() && cols() == other.cols());
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
if(unroll)
{
ei_matrix_operator_equals_unroller
<Derived, OtherDerived,
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
unroll ? SizeAtCompileTime : Dynamic
>::run(derived(), other.derived());
}
else

View File

@ -84,21 +84,29 @@ template<typename Lhs, typename Rhs, int EvalMode>
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
{
typedef typename Lhs::Scalar Scalar;
typedef typename ei_xpr_copy<Lhs,Rhs::ColsAtCompileTime>::type LhsXprCopy;
typedef typename ei_xpr_copy<Rhs,Lhs::RowsAtCompileTime>::type RhsXprCopy;
typedef typename ei_unref<LhsXprCopy>::type ActualLhs;
typedef typename ei_unref<RhsXprCopy>::type ActualRhs;
enum {
LhsCoeffReadCost = ActualLhs::CoeffReadCost,
RhsCoeffReadCost = ActualRhs::CoeffReadCost,
LhsFlags = ActualLhs::Flags,
RhsFlags = ActualRhs::Flags,
RowsAtCompileTime = Lhs::RowsAtCompileTime,
ColsAtCompileTime = Rhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
? (unsigned int)(Lhs::Flags | Rhs::Flags)
: (unsigned int)(Lhs::Flags | Rhs::Flags) & ~LargeBit )
? (unsigned int)(LhsFlags | RhsFlags)
: (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit )
| EvalBeforeAssigningBit
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
CoeffReadCost
= Lhs::ColsAtCompileTime == Dynamic
? Dynamic
: Lhs::ColsAtCompileTime
* (NumTraits<Scalar>::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost)
* (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
};
};
@ -110,10 +118,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::CopyType CopyLhs;
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::CopyType CopyRhs;
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::XprType XprLhs;
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::XprType XprRhs;
typedef typename ei_traits<Product>::LhsXprCopy LhsXprCopy;
typedef typename ei_traits<Product>::RhsXprCopy RhsXprCopy;
Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
@ -133,12 +139,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
const Scalar _coeff(int row, int col) const
{
Scalar res;
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
if(unroll)
{
ei_product_unroller<Lhs::ColsAtCompileTime-1,
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
? Lhs::ColsAtCompileTime : Dynamic,
XprLhs, XprRhs>
unroll ? Lhs::ColsAtCompileTime : Dynamic,
typename ei_unref<LhsXprCopy>::type,
typename ei_unref<RhsXprCopy>::type>
::run(row, col, m_lhs, m_rhs, res);
}
else
{
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
@ -149,8 +158,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
}
protected:
const CopyLhs m_lhs;
const CopyRhs m_rhs;
const LhsXprCopy m_lhs;
const RhsXprCopy m_rhs;
};
/** \returns the matrix product of \c *this and \a other.

View File

@ -31,7 +31,7 @@
/** Defines the maximal loop size to enable meta unrolling of loops */
#ifndef EIGEN_UNROLLING_LIMIT
#define EIGEN_UNROLLING_LIMIT 16
#define EIGEN_UNROLLING_LIMIT 400
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR

View File

@ -8,6 +8,10 @@
using namespace std;
USING_PART_OF_NAMESPACE_EIGEN
#ifndef REPEAT
#define REPEAT 40000000
#endif
int main(int argc, char *argv[])
{
Matrix<double,MATSIZE,MATSIZE> I;
@ -19,7 +23,7 @@ int main(int argc, char *argv[])
m(i,j) = (i+MATSIZE*j);
}
asm("#begin");
for(int a = 0; a < 40000000; a++)
for(int a = 0; a < REPEAT; a++)
{
m = I + 0.00005 * (m + m*m);
}