In sparse matrix product, enable sorted insertion when doing two transposition is defenitely not optimal.

2025-08-12 11:49:02 +08:00 · 2014-08-29 11:55:03 +02:00 · 2014-08-29 11:55:03 +02:00 · 1ed9e2d004
commit 1ed9e2d004
parent c3e4080474
1 changed files with 45 additions and 37 deletions
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@ -15,7 +15,7 @@ namespace Eigen {
 namespace internal {
 template<typename Lhs, typename Rhs, typename ResultType>
-static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res)
+static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& rhs, ResultType& res, bool sortedInsertion = false)
 {
  typedef typename remove_all<Lhs>::type::Scalar Scalar;
  typedef typename remove_all<Lhs>::type::Index Index;
@ -64,7 +64,8 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
          values[i] += x * y;
      }
    }
-
+    if(!sortedInsertion)
    {
      // unordered insertion
      for(Index k=0; k<nnz; ++k)
      {
@ -72,22 +73,20 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
        res.insertBackByOuterInnerUnordered(j,i) = values[i];
        mask[i] = false;
      }
-
+    }
-#if 0
+    else
    {
      // alternative ordered insertion code:
-
+      const Index t200 = rows/(log2(200)*1.39);
-    Index t200 = rows/(log2(200)*1.39);
+      const Index t = (rows*100)/139;
    Index t = (rows*100)/139;
      // FIXME reserve nnz non zeros
-    // FIXME implement fast sort algorithms for very small nnz
+      // FIXME implement faster sorting algorithms for very small nnz
      // if the result is sparse enough => use a quick sort
      // otherwise => loop through the entire vector
      // In order to avoid to perform an expensive log2 when the
      // result is clearly very sparse we use a linear bound up to 200.
-    //if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
+      if((nnz<200 && nnz<t200) || nnz * log2(nnz) < t)
    //res.startVec(j);
    if(true)
      {
        if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
        for(Index k=0; k<nnz; ++k)
@ -109,8 +108,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
          }
        }
      }
-#endif
+    }
  }
  res.finalize();
 }
@ -137,11 +135,21 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C
    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix resCol(lhs.rows(),rhs.cols());
-    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
+    // FIXME, the following heuristic is probably not very good.
-    // sort the non zeros:
+    if(lhs.rows()>=rhs.cols())
    {
      // perform sorted insertion
      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, true);
      res = resCol;
    }
    else
    {
      // ressort to transpose to sort the entries
      internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol, false);
      RowMajorMatrix resRow(resCol);
      res = resRow;
    }
  }
 };
 template<typename Lhs, typename Rhs, typename ResultType>