From b8900d0b80734c6cca25595e8613d47f1d48909f Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Thu, 3 Apr 2008 14:17:56 +0000 Subject: [PATCH] More clever evaluation of arguments: now it occurs in earlier, in operator*, before the Product<> type is constructed. This resets template depth on each intermediate evaluation, and gives simpler code. Introducing ei_eval_if_expensive which evaluates Derived if it's worth it given that each of its coeffs will be accessed n times. Operator* uses this with adequate values of n to evaluate args exactly when needed. --- Eigen/src/Core/ForwardDeclarations.h | 8 ++++++++ Eigen/src/Core/Product.h | 20 +++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/ForwardDeclarations.h b/Eigen/src/Core/ForwardDeclarations.h index 50cffe637..ea0dd9502 100644 --- a/Eigen/src/Core/ForwardDeclarations.h +++ b/Eigen/src/Core/ForwardDeclarations.h @@ -27,6 +27,7 @@ template struct ei_traits; template struct ei_product_eval_mode; +template struct NumTraits; template class Matrix; template class Lazy; @@ -89,6 +90,13 @@ template struct ei_eval ei_traits::MaxColsAtCompileTime> type; }; +template struct ei_eval_if_expensive +{ + enum { eval = n * NumTraits::ReadCost < (n-1) * T::CoeffReadCost }; + typedef typename ei_meta_if::ret type; + typedef typename ei_meta_if::ret reftype; +}; + template struct ei_eval_unless_lazy { typedef typename ei_meta_if::Flags & LazyBit, diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index c71637779..6f21292b1 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -78,6 +78,7 @@ template struct ei_traits > { typedef typename Lhs::Scalar Scalar; +#if 0 typedef typename ei_meta_if< (int)NumTraits::ReadCost < (int)Lhs::CoeffReadCost, typename Lhs::Eval, @@ -95,6 +96,7 @@ struct ei_traits > (int)NumTraits::ReadCost < (int)Rhs::CoeffReadCost, typename Rhs::Eval, typename Rhs::XprCopy>::ret ActualRhsXprCopy; +#endif enum { RowsAtCompileTime = Lhs::RowsAtCompileTime, ColsAtCompileTime = Rhs::ColsAtCompileTime, @@ -107,7 +109,7 @@ struct ei_traits > = Lhs::ColsAtCompileTime == Dynamic ? Dynamic : Lhs::ColsAtCompileTime - * (NumTraits::MulCost + ActualLhs::CoeffReadCost + ActualRhs::CoeffReadCost) + * (NumTraits::MulCost + Lhs::CoeffReadCost + Rhs::CoeffReadCost) + (Lhs::ColsAtCompileTime - 1) * NumTraits::AddCost }; }; @@ -115,7 +117,7 @@ struct ei_traits > template struct ei_product_eval_mode { enum{ value = Lhs::MaxRowsAtCompileTime == Dynamic || Rhs::MaxColsAtCompileTime == Dynamic - ? CacheOptimal : UnrolledDotProduct }; + ? CacheOptimal : UnrolledDotProduct }; }; template class Product : ei_no_assignment_operator, @@ -124,11 +126,12 @@ template class Product : ei_no_assignm public: EIGEN_GENERIC_PUBLIC_INTERFACE(Product) +#if 0 typedef typename ei_traits::ActualLhs ActualLhs; typedef typename ei_traits::ActualRhs ActualRhs; typedef typename ei_traits::ActualLhsXprCopy ActualLhsXprCopy; typedef typename ei_traits::ActualRhsXprCopy ActualRhsXprCopy; - +#endif Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { @@ -153,7 +156,7 @@ template class Product : ei_no_assignm ei_product_unroller + Lhs, Rhs> ::run(row, col, m_lhs, m_rhs, res); else { @@ -165,8 +168,8 @@ template class Product : ei_no_assignm } protected: - const ActualLhsXprCopy m_lhs; - const ActualRhsXprCopy m_rhs; + const typename Lhs::XprCopy m_lhs; + const typename Rhs::XprCopy m_rhs; }; /** \returns the matrix product of \c *this and \a other. @@ -181,7 +184,10 @@ template const typename ei_eval_unless_lazy >::type MatrixBase::operator*(const MatrixBase &other) const { - return Product(derived(), other.derived()).eval(); + typedef ei_eval_if_expensive Lhs; + typedef ei_eval_if_expensive Rhs; + return Product + (typename Lhs::reftype(derived()), typename Rhs::reftype(other.derived())).eval(); } /** replaces \c *this by \c *this * \a other.