finally here is a simple solution making (a*b).diagonal() even faster than a.lazyProduct(b).diagonal() !!

2025-07-31 01:03:38 +08:00 · 2010-02-10 14:08:47 +01:00 · 2010-02-10 14:08:47 +01:00 · 0ca67afe6a
commit 0ca67afe6a
parent 71b64d3498
4 changed files with 40 additions and 12 deletions
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -447,17 +447,12 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
  *
-  * The coefficients of the product will be computed as requested that is particularly useful when you
+  * The returned product will behave like any other expressions: the coefficients of the product will be
-  * only want to compute a small fraction of the result's coefficients.
+  * computed once at a time as requested. This might be useful in some extremely rare cases when only
-  * Here is an example:
+  * a small and no coherent fraction of the result's coefficients have to be computed.
  * \code
  * MatrixXf a(10,10), b(10,10);
  * (a*b).diagonal().sum();             // here a*b is entirely computed into a 10x10 temporary matrix
  * a.lazyProduct(b).diagonal().sum();  // here a*b is evaluated in a lazy manner,
  *                                     // so only the diagonal coefficients will be computed
  * \endcode
  *
-  * \warning This version of the matrix product can be much much slower if all coefficients have to be computed anyways.
+  * \warning This version of the matrix product can be much much slower. So use it only if you know
  * what you are doing and that you measured a true speed improvement.
  *
  * \sa operator*(const MatrixBase&)
  */
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@ -83,6 +83,9 @@ class ProductBase : public MatrixBase<Derived>
    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
    typedef typename ei_cleantype<ActualRhsType>::type _ActualRhsType;
    // Diagonal of a product: no need to evaluate the arguments because they are going to be evaluated only once
    typedef CoeffBasedProduct<LhsNested, RhsNested, 0> FullyLazyCoeffBaseProductType;
  public:
    typedef typename Base::PlainMatrixType PlainMatrixType;
@ -121,6 +124,16 @@ class ProductBase : public MatrixBase<Derived>
      return m_result;
    }
    const Diagonal<FullyLazyCoeffBaseProductType,0> diagonal() const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
    template<int Index>
    const Diagonal<FullyLazyCoeffBaseProductType,Index> diagonal() const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs); }
    const Diagonal<FullyLazyCoeffBaseProductType,Dynamic> diagonal(int index) const
    { return FullyLazyCoeffBaseProductType(m_lhs, m_rhs).diagonal(index); }
  protected:
    const LhsNested m_lhs;
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@ -127,8 +127,14 @@ class CoeffBasedProduct
                                  Unroll ? InnerSize-1 : Dynamic,
                                  _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
    typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
  public:
    inline CoeffBasedProduct(const CoeffBasedProduct& other)
      : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
    {}
    template<typename Lhs, typename Rhs>
    inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
      : m_lhs(lhs), m_rhs(rhs)
@ -185,6 +191,16 @@ class CoeffBasedProduct
    const _LhsNested& lhs() const { return m_lhs; }
    const _RhsNested& rhs() const { return m_rhs; }
    const Diagonal<LazyCoeffBasedProductType,0> diagonal() const
    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
    template<int Index>
    const Diagonal<LazyCoeffBasedProductType,Index> diagonal() const
    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
    const Diagonal<LazyCoeffBasedProductType,Dynamic> diagonal(int index) const
    { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
  protected:
    const LhsNested m_lhs;
    const RhsNested m_rhs;
--- a/doc/A05_PortingFrom2To3.dox
+++ b/doc/A05_PortingFrom2To3.dox
@ -81,9 +81,9 @@ With Eigen2 you would have written:
 c = (a.cwise().abs().cwise().pow(3)).cwise() * (b.cwise().abs().cwise().sin());
 \endcode
-\section LazyVsNoalias Lazy evaluation versus noalias
+\section LazyVsNoalias Lazy evaluation and noalias
-In Eigen all operations are performed in a lazy fashion except the matrix products which are always evaluated to a temporary by default.
+In Eigen all operations are performed in a lazy fashion except the matrix products which are always evaluated into a temporary by default.
 In Eigen2, lazy evaluation could be enforced by tagging a product using the .lazy() function. However, in complex expressions it was not
 easy to determine where to put the lazy() function. In Eigen3, the lazy() feature has been superseded by the MatrixBase::noalias() function
 which can be used on the left hand side of an assignment when no aliasing can occur. Here is an example:
@ -92,6 +92,10 @@ MatrixXf a, b, c;
 ...
 c.noalias() += 2 * a.transpose() * b;
 \endcode
 However, the noalias mechanism does not cover all the features of the old .lazy(). Indeed, in some extremely rare cases,
 it might be useful to explicit request for a lay product, i.e., for a product which will be evaluated one coefficient at once, on request,
 just like any other expressions. To this end you can use the MatrixBase::lazyProduct() function, however we strongly discourage you to
 use it unless you are sure of what you are doing, i.e., you have rigourosly measured a speed improvement.
 */