From aec3d90ca65528fdface6013ccbcc33b04ada867 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 29 Aug 2014 14:19:03 +0200 Subject: [PATCH] Optimization in sparse-sparse matrix products for small ones --- .../ConservativeSparseSparseProduct.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h index 608044a95..8067565f9 100644 --- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h @@ -24,10 +24,10 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r Index rows = lhs.innerSize(); Index cols = rhs.outerSize(); eigen_assert(lhs.outerSize() == rhs.innerSize()); - - std::vector mask(rows,false); - Matrix values(rows); - Matrix indices(rows); + + ei_declare_aligned_stack_constructed_variable(bool, mask, rows, 0); + ei_declare_aligned_stack_constructed_variable(Scalar, values, rows, 0); + ei_declare_aligned_stack_constructed_variable(Index, indices, rows, 0); // estimate the number of non zero entries // given a rhs column containing Y non zeros, we assume that the respective Y columns @@ -77,7 +77,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r else { // alternative ordered insertion code: - const Index t200 = rows/(log2(200)*1.39); + const Index t200 = rows/11; // 11 == (log2(200)*1.39) const Index t = (rows*100)/139; // FIXME reserve nnz non zeros @@ -88,7 +88,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r // result is clearly very sparse we use a linear bound up to 200. if((nnz<200 && nnz1) std::sort(indices.data(),indices.data()+nnz); + if(nnz>1) std::sort(indices,indices+nnz); for(Index k=0; k RowMajorMatrix; - typedef SparseMatrix ColMajorMatrix; + typedef SparseMatrix ColMajorMatrixAux; + typedef typename sparse_eval::type ColMajorMatrix; + ColMajorMatrix resCol(lhs.rows(),rhs.cols()); // FIXME, the following heuristic is probably not very good. if(lhs.rows()>=rhs.cols()) { // perform sorted insertion internal::conservative_sparse_sparse_product_impl(lhs, rhs, resCol, true); - res = resCol; + res.swap(resCol); } else {