mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 12:19:12 +08:00
Add multi-threading for sparse-row-major * dense-row-major
This commit is contained in:
parent
2f3287da7d
commit
8810baaed4
@ -88,10 +88,11 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
|
|||||||
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
|
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
|
||||||
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
||||||
typedef typename internal::remove_all<DenseResType>::type Res;
|
typedef typename internal::remove_all<DenseResType>::type Res;
|
||||||
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
typedef evaluator<Lhs> LhsEval;
|
||||||
|
typedef typename LhsEval::InnerIterator LhsInnerIterator;
|
||||||
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha)
|
||||||
{
|
{
|
||||||
evaluator<Lhs> lhsEval(lhs);
|
LhsEval lhsEval(lhs);
|
||||||
for(Index c=0; c<rhs.cols(); ++c)
|
for(Index c=0; c<rhs.cols(); ++c)
|
||||||
{
|
{
|
||||||
for(Index j=0; j<lhs.outerSize(); ++j)
|
for(Index j=0; j<lhs.outerSize(); ++j)
|
||||||
@ -111,16 +112,37 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
|
|||||||
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
|
typedef typename internal::remove_all<SparseLhsType>::type Lhs;
|
||||||
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
typedef typename internal::remove_all<DenseRhsType>::type Rhs;
|
||||||
typedef typename internal::remove_all<DenseResType>::type Res;
|
typedef typename internal::remove_all<DenseResType>::type Res;
|
||||||
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
typedef evaluator<Lhs> LhsEval;
|
||||||
|
typedef typename LhsEval::InnerIterator LhsInnerIterator;
|
||||||
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha)
|
||||||
{
|
{
|
||||||
evaluator<Lhs> lhsEval(lhs);
|
Index n = lhs.rows();
|
||||||
for(Index j=0; j<lhs.outerSize(); ++j)
|
LhsEval lhsEval(lhs);
|
||||||
|
|
||||||
|
#ifdef EIGEN_HAS_OPENMP
|
||||||
|
Eigen::initParallel();
|
||||||
|
Index threads = Eigen::nbThreads();
|
||||||
|
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
||||||
|
// It basically represents the minimal amount of work to be done to be worth it.
|
||||||
|
if(threads>1 && lhsEval.nonZerosEstimate()*rhs.cols() > 20000)
|
||||||
{
|
{
|
||||||
typename Res::RowXpr res_j(res.row(j));
|
#pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
|
||||||
for(LhsInnerIterator it(lhsEval,j); it ;++it)
|
for(Index i=0; i<n; ++i)
|
||||||
res_j += (alpha*it.value()) * rhs.row(it.index());
|
processRow(lhsEval,rhs,res,alpha,i);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
for(Index i=0; i<n; ++i)
|
||||||
|
processRow(lhsEval, rhs, res, alpha, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha, Index i)
|
||||||
|
{
|
||||||
|
typename Res::RowXpr res_i(res.row(i));
|
||||||
|
for(LhsInnerIterator it(lhsEval,i); it ;++it)
|
||||||
|
res_i += (alpha*it.value()) * rhs.row(it.index());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user