implement __gnuc_forget_about_setZero_its_over_now

2025-09-30 02:03:14 +08:00 · 2009-09-18 15:36:05 +02:00 · 2009-09-18 15:36:05 +02:00 · 0b60027f3c
commit 0b60027f3c
parent 6b5f96cb03
2 changed files with 37 additions and 49 deletions
--- a/Eigen/src/Sparse/SparseProduct.h
+++ b/Eigen/src/Sparse/SparseProduct.h
@ -305,23 +305,9 @@ inline Derived& SparseMatrixBase<Derived>::operator=(const SparseProduct<Lhs,Rhs
 }
 // dense = sparse * dense
-// template<typename Derived>
+// Note that here we force no inlining and separate the setZero() because GCC messes up otherwise
-// template<typename Lhs, typename Rhs>
+template<typename Lhs, typename Rhs, typename Dest>
-// Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product)
+EIGEN_DONT_INLINE void ei_sparse_time_dense_product(const Lhs& lhs, const Rhs& rhs, Dest& dst)
 // {
 //   typedef typename ei_cleantype<Lhs>::type _Lhs;
 //   typedef typename _Lhs::InnerIterator LhsInnerIterator;
 //   enum { LhsIsRowMajor = (_Lhs::Flags&RowMajorBit)==RowMajorBit };
 //   derived().setZero();
 //   for (int j=0; j<product.lhs().outerSize(); ++j)
 //     for (LhsInnerIterator i(product.lhs(),j); i; ++i)
 //       derived().row(LhsIsRowMajor ? j : i.index()) += i.value() * product.rhs().row(LhsIsRowMajor ? i.index() : j);
 //   return derived();
 // }
 template<typename Derived>
 template<typename Lhs, typename Rhs>
 Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product)
 {
  typedef typename ei_cleantype<Lhs>::type _Lhs;
  typedef typename ei_cleantype<Rhs>::type _Rhs;
@ -335,34 +321,44 @@ Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeD
          || ( (_Lhs::Flags&LowerTriangularBit) && LhsIsRowMajor) ),
    ProcessSecondHalf = LhsIsSelfAdjoint && (!ProcessFirstHalf)
  };
-  derived().setZero();
+  for (int j=0; j<lhs.outerSize(); ++j)
  for (int j=0; j<product.lhs().outerSize(); ++j)
  {
-    LhsInnerIterator i(product.lhs(),j);
+    LhsInnerIterator i(lhs,j);
    if (ProcessSecondHalf && i && (i.index()==j))
    {
-      derived().row(j) += i.value() * product.rhs().row(j);
+      dst.row(j) += i.value() * rhs.row(j);
      ++i;
    }
-    Block<Derived,1,Derived::ColsAtCompileTime> res(derived().row(LhsIsRowMajor ? j : 0));
+    typename Rhs::Scalar rhs_j = rhs.coeff(j,0);
    Block<Dest,1,Dest::ColsAtCompileTime> dst_j(dst.row(LhsIsRowMajor ? j : 0));
    for(; (ProcessFirstHalf ? i && i.index() < j : i) ; ++i)
    {
      if(LhsIsSelfAdjoint)
      {
        int a = LhsIsRowMajor ? j : i.index();
        int b = LhsIsRowMajor ? i.index() : j;
-        Scalar v = i.value();
+        typename Lhs::Scalar v = i.value();
-        derived().row(a) += (v) * product.rhs().row(b);
+        dst.row(a) += (v) * rhs.row(b);
-        derived().row(b) += ei_conj(v) * product.rhs().row(a);
+        dst.row(b) += ei_conj(v) * rhs.row(a);
      }
      else if(LhsIsRowMajor)
-        res += i.value() * product.rhs().row(i.index());
+        dst_j += i.value() * rhs.row(i.index());
      else if(Rhs::ColsAtCompileTime==1)
        dst.coeffRef(i.index()) += i.value() * rhs_j;
      else
-        derived().row(i.index()) += i.value() * product.rhs().row(j);
+        dst.row(i.index()) += i.value() * rhs.row(j);
    }
    if (ProcessFirstHalf && i && (i.index()==j))
-      derived().row(j) += i.value() * product.rhs().row(j);
+      dst.row(j) += i.value() * rhs.row(j);
  }
 }
 template<typename Derived>
 template<typename Lhs, typename Rhs>
 Derived& MatrixBase<Derived>::lazyAssign(const SparseProduct<Lhs,Rhs,SparseTimeDenseProduct>& product)
 {
  derived().setZero();
  ei_sparse_time_dense_product(product.lhs(), product.rhs(), derived());
  return derived();
 }
--- a/bench/sparse_dense_product.cpp
+++ b/bench/sparse_dense_product.cpp
@ -91,11 +91,11 @@ int main(int argc, char *argv[])
    {
      std::cout << "Eigen sparse\t" << sm1.nonZeros()/float(sm1.rows()*sm1.cols())*100 << "%\n";
-      BENCH(for (int k=0; k<REPEAT; ++k) v2 = sm1 * v1;)
+      BENCH(asm("#myc"); v2 = sm1 * v1; asm("#myd");)
      std::cout << "   a * v:\t" << timer.value() << endl;
-      BENCH(for (int k=0; k<REPEAT; ++k) { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); })
+      BENCH( { asm("#mya"); v2 = sm1.transpose() * v1; asm("#myb"); })
      std::cout << "   a' * v:\t" << timer.value() << endl;
    }
@ -123,18 +123,10 @@ int main(int argc, char *argv[])
      Map<Matrix<Scalar,Dynamic,1> >(&gmmV1[0], cols) = v1;
      Map<Matrix<Scalar,Dynamic,1> >(&gmmV2[0], cols) = v2;
-      timer.reset();
+      BENCH( asm("#myx"); gmm::mult(m1, gmmV1, gmmV2); asm("#myy"); )
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        gmm::mult(m1, gmmV1, gmmV2);
      timer.stop();
      std::cout << "   a * v:\t" << timer.value() << endl;
-      timer.reset();
+      BENCH( gmm::mult(gmm::transposed(m1), gmmV1, gmmV2); )
      timer.start();
      for (int k=0; k<REPEAT; ++k)
        gmm::mult(gmm::transposed(m1), gmmV1, gmmV2);
      timer.stop();
      std::cout << "   a' * v:\t" << timer.value() << endl;
    }
    #endif