From 53b930887d118af5204840231f08b3307addce4e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 26 Jun 2015 10:32:34 +0200 Subject: [PATCH] Enable OpenMP parallelization of row-major-sparse * dense products. I observed significant speed-up of the CG solver. --- Eigen/src/SparseCore/SparseDenseProduct.h | 39 +++++++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/Eigen/src/SparseCore/SparseDenseProduct.h b/Eigen/src/SparseCore/SparseDenseProduct.h index 731d40f29..b8d71d3f8 100644 --- a/Eigen/src/SparseCore/SparseDenseProduct.h +++ b/Eigen/src/SparseCore/SparseDenseProduct.h @@ -30,23 +30,48 @@ struct sparse_time_dense_product_impl::type Rhs; typedef typename internal::remove_all::type Res; typedef typename evaluator::InnerIterator LhsInnerIterator; + typedef typename evaluator::type LhsEval; static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha) { - typename evaluator::type lhsEval(lhs); + LhsEval lhsEval(lhs); + + Index n = lhs.outerSize(); +#ifdef EIGEN_HAS_OPENMP + Eigen::initParallel(); + Index threads = Eigen::nbThreads(); +#endif + for(Index c=0; c1 && lhs.nonZeros() > 20000) { - typename Res::Scalar tmp(0); - for(LhsInnerIterator it(lhsEval,j); it ;++it) - tmp += it.value() * rhs.coeff(it.index(),c); - res.coeffRef(j,c) += alpha * tmp; + #pragma omp parallel for schedule(static) num_threads(threads) + for(Index i=0; i struct scalar_product_traits > {