mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
- merge ei_xpr_copy and ei_eval_if_needed_before_nesting
- make use of CoeffReadCost to determine when to unroll the loops, for now only in Product.h and in OperatorEquals.h performance remains the same: generally still not as good as before the big changes.
This commit is contained in:
parent
30ec34de36
commit
371d302efb
@ -83,27 +83,30 @@ template<typename T> struct ei_eval
|
|||||||
template<typename T> struct ei_unref { typedef T type; };
|
template<typename T> struct ei_unref { typedef T type; };
|
||||||
template<typename T> struct ei_unref<T&> { typedef T type; };
|
template<typename T> struct ei_unref<T&> { typedef T type; };
|
||||||
|
|
||||||
template<typename T> struct ei_xpr_copy
|
template<typename T> struct ei_is_temporary
|
||||||
{
|
{
|
||||||
typedef typename ei_meta_if< ei_traits<T>::Flags & EvalBeforeNestingBit,
|
enum { ret = 0 };
|
||||||
typename ei_eval<T>::type, const T&>::ret type;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T> struct ei_xpr_copy<Temporary<T> >
|
template<typename T> struct ei_is_temporary<Temporary<T> >
|
||||||
{
|
{
|
||||||
typedef Temporary<T> type;
|
enum { ret = 1 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T, int n=1> struct ei_eval_if_needed_before_nesting
|
template<typename T, int n=1> struct ei_xpr_copy
|
||||||
{
|
{
|
||||||
// FIXME should we consider the additional store as well as the creation cost of the temporary ?
|
typedef typename ei_meta_if<
|
||||||
enum { eval = T::Flags & EvalBeforeNestingBit
|
ei_is_temporary<T>::ret,
|
||||||
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost };
|
T,
|
||||||
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, T>::ret XprType;
|
typename ei_meta_if<
|
||||||
typedef typename ei_meta_if<eval, typename ei_eval<T>::type, typename T::XprCopy>::ret CopyType;
|
ei_traits<T>::Flags & EvalBeforeNestingBit
|
||||||
|
|| (n+1) * NumTraits<typename ei_traits<T>::Scalar>::ReadCost < (n-1) * T::CoeffReadCost,
|
||||||
|
typename ei_eval<T>::type,
|
||||||
|
const T&
|
||||||
|
>::ret
|
||||||
|
>::ret type;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<typename T> struct ei_functor_traits
|
template<typename T> struct ei_functor_traits
|
||||||
{
|
{
|
||||||
enum
|
enum
|
||||||
|
@ -102,14 +102,15 @@ template<typename OtherDerived>
|
|||||||
Derived& MatrixBase<Derived>
|
Derived& MatrixBase<Derived>
|
||||||
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
::lazyAssign(const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
|
const bool unroll = SizeAtCompileTime * OtherDerived::CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||||
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
|
if(IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime)
|
||||||
// copying a vector expression into a vector
|
// copying a vector expression into a vector
|
||||||
{
|
{
|
||||||
ei_assert(size() == other.size());
|
ei_assert(size() == other.size());
|
||||||
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
if(unroll)
|
||||||
ei_vector_operator_equals_unroller
|
ei_vector_operator_equals_unroller
|
||||||
<Derived, OtherDerived,
|
<Derived, OtherDerived,
|
||||||
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
|
unroll ? SizeAtCompileTime : Dynamic
|
||||||
>::run(derived(), other.derived());
|
>::run(derived(), other.derived());
|
||||||
else
|
else
|
||||||
for(int i = 0; i < size(); i++)
|
for(int i = 0; i < size(); i++)
|
||||||
@ -118,11 +119,11 @@ Derived& MatrixBase<Derived>
|
|||||||
else // copying a matrix expression into a matrix
|
else // copying a matrix expression into a matrix
|
||||||
{
|
{
|
||||||
ei_assert(rows() == other.rows() && cols() == other.cols());
|
ei_assert(rows() == other.rows() && cols() == other.cols());
|
||||||
if(SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
if(unroll)
|
||||||
{
|
{
|
||||||
ei_matrix_operator_equals_unroller
|
ei_matrix_operator_equals_unroller
|
||||||
<Derived, OtherDerived,
|
<Derived, OtherDerived,
|
||||||
SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT ? SizeAtCompileTime : Dynamic
|
unroll ? SizeAtCompileTime : Dynamic
|
||||||
>::run(derived(), other.derived());
|
>::run(derived(), other.derived());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -152,7 +153,7 @@ template<typename OtherDerived>
|
|||||||
Derived& MatrixBase<Derived>
|
Derived& MatrixBase<Derived>
|
||||||
::operator=(const MatrixBase<OtherDerived>& other)
|
::operator=(const MatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
if (OtherDerived::Flags & EvalBeforeAssigningBit)
|
if(OtherDerived::Flags & EvalBeforeAssigningBit)
|
||||||
{
|
{
|
||||||
return lazyAssign(other.derived().eval());
|
return lazyAssign(other.derived().eval());
|
||||||
}
|
}
|
||||||
|
@ -84,21 +84,29 @@ template<typename Lhs, typename Rhs, int EvalMode>
|
|||||||
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
struct ei_traits<Product<Lhs, Rhs, EvalMode> >
|
||||||
{
|
{
|
||||||
typedef typename Lhs::Scalar Scalar;
|
typedef typename Lhs::Scalar Scalar;
|
||||||
|
typedef typename ei_xpr_copy<Lhs,Rhs::ColsAtCompileTime>::type LhsXprCopy;
|
||||||
|
typedef typename ei_xpr_copy<Rhs,Lhs::RowsAtCompileTime>::type RhsXprCopy;
|
||||||
|
typedef typename ei_unref<LhsXprCopy>::type ActualLhs;
|
||||||
|
typedef typename ei_unref<RhsXprCopy>::type ActualRhs;
|
||||||
enum {
|
enum {
|
||||||
|
LhsCoeffReadCost = ActualLhs::CoeffReadCost,
|
||||||
|
RhsCoeffReadCost = ActualRhs::CoeffReadCost,
|
||||||
|
LhsFlags = ActualLhs::Flags,
|
||||||
|
RhsFlags = ActualRhs::Flags,
|
||||||
RowsAtCompileTime = Lhs::RowsAtCompileTime,
|
RowsAtCompileTime = Lhs::RowsAtCompileTime,
|
||||||
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
ColsAtCompileTime = Rhs::ColsAtCompileTime,
|
||||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = Rhs::MaxColsAtCompileTime,
|
||||||
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
|
Flags = ( (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic)
|
||||||
? (unsigned int)(Lhs::Flags | Rhs::Flags)
|
? (unsigned int)(LhsFlags | RhsFlags)
|
||||||
: (unsigned int)(Lhs::Flags | Rhs::Flags) & ~LargeBit )
|
: (unsigned int)(LhsFlags | RhsFlags) & ~LargeBit )
|
||||||
| EvalBeforeAssigningBit
|
| EvalBeforeAssigningBit
|
||||||
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
|
| (ei_product_eval_mode<Lhs, Rhs>::value == (int)CacheOptimal ? EvalBeforeNestingBit : 0),
|
||||||
CoeffReadCost
|
CoeffReadCost
|
||||||
= Lhs::ColsAtCompileTime == Dynamic
|
= Lhs::ColsAtCompileTime == Dynamic
|
||||||
? Dynamic
|
? Dynamic
|
||||||
: Lhs::ColsAtCompileTime
|
: Lhs::ColsAtCompileTime
|
||||||
* (NumTraits<Scalar>::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost)
|
* (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||||
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
|
+ (Lhs::ColsAtCompileTime - 1) * NumTraits<Scalar>::AddCost
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -110,10 +118,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
|||||||
|
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
|
||||||
|
|
||||||
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::CopyType CopyLhs;
|
typedef typename ei_traits<Product>::LhsXprCopy LhsXprCopy;
|
||||||
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::CopyType CopyRhs;
|
typedef typename ei_traits<Product>::RhsXprCopy RhsXprCopy;
|
||||||
typedef typename ei_eval_if_needed_before_nesting<Lhs,Rhs::ColsAtCompileTime>::XprType XprLhs;
|
|
||||||
typedef typename ei_eval_if_needed_before_nesting<Rhs,Lhs::RowsAtCompileTime>::XprType XprRhs;
|
|
||||||
|
|
||||||
Product(const Lhs& lhs, const Rhs& rhs)
|
Product(const Lhs& lhs, const Rhs& rhs)
|
||||||
: m_lhs(lhs), m_rhs(rhs)
|
: m_lhs(lhs), m_rhs(rhs)
|
||||||
@ -133,12 +139,15 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
|||||||
const Scalar _coeff(int row, int col) const
|
const Scalar _coeff(int row, int col) const
|
||||||
{
|
{
|
||||||
Scalar res;
|
Scalar res;
|
||||||
if(Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT)
|
const bool unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT;
|
||||||
|
if(unroll)
|
||||||
|
{
|
||||||
ei_product_unroller<Lhs::ColsAtCompileTime-1,
|
ei_product_unroller<Lhs::ColsAtCompileTime-1,
|
||||||
Lhs::ColsAtCompileTime <= EIGEN_UNROLLING_LIMIT
|
unroll ? Lhs::ColsAtCompileTime : Dynamic,
|
||||||
? Lhs::ColsAtCompileTime : Dynamic,
|
typename ei_unref<LhsXprCopy>::type,
|
||||||
XprLhs, XprRhs>
|
typename ei_unref<RhsXprCopy>::type>
|
||||||
::run(row, col, m_lhs, m_rhs, res);
|
::run(row, col, m_lhs, m_rhs, res);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
|
res = m_lhs.coeff(row, 0) * m_rhs.coeff(0, col);
|
||||||
@ -149,8 +158,8 @@ template<typename Lhs, typename Rhs, int EvalMode> class Product : ei_no_assignm
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const CopyLhs m_lhs;
|
const LhsXprCopy m_lhs;
|
||||||
const CopyRhs m_rhs;
|
const RhsXprCopy m_rhs;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \returns the matrix product of \c *this and \a other.
|
/** \returns the matrix product of \c *this and \a other.
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
|
|
||||||
/** Defines the maximal loop size to enable meta unrolling of loops */
|
/** Defines the maximal loop size to enable meta unrolling of loops */
|
||||||
#ifndef EIGEN_UNROLLING_LIMIT
|
#ifndef EIGEN_UNROLLING_LIMIT
|
||||||
#define EIGEN_UNROLLING_LIMIT 16
|
#define EIGEN_UNROLLING_LIMIT 400
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||||
|
@ -8,6 +8,10 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
USING_PART_OF_NAMESPACE_EIGEN
|
USING_PART_OF_NAMESPACE_EIGEN
|
||||||
|
|
||||||
|
#ifndef REPEAT
|
||||||
|
#define REPEAT 40000000
|
||||||
|
#endif
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
Matrix<double,MATSIZE,MATSIZE> I;
|
Matrix<double,MATSIZE,MATSIZE> I;
|
||||||
@ -19,7 +23,7 @@ int main(int argc, char *argv[])
|
|||||||
m(i,j) = (i+MATSIZE*j);
|
m(i,j) = (i+MATSIZE*j);
|
||||||
}
|
}
|
||||||
asm("#begin");
|
asm("#begin");
|
||||||
for(int a = 0; a < 40000000; a++)
|
for(int a = 0; a < REPEAT; a++)
|
||||||
{
|
{
|
||||||
m = I + 0.00005 * (m + m*m);
|
m = I + 0.00005 * (m + m*m);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user