Small speed-up in row-major sparse dense product

This commit is contained in:
Erik Schultheis 2021-12-15 18:46:25 +00:00 committed by Rasmus Munk Larsen
parent 2d39da8af5
commit e939c06b0e

View File

@ -65,10 +65,18 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res, const typename Res::Scalar& alpha, Index i, Index col)
{
typename Res::Scalar tmp(0);
for(LhsInnerIterator it(lhsEval,i); it ;++it)
tmp += it.value() * rhs.coeff(it.index(),col);
res.coeffRef(i,col) += alpha * tmp;
// Two accumulators, which breaks the dependency chain on the accumulator
// and allows more instruction-level parallelism in the following loop
typename Res::Scalar tmp_a(0);
typename Res::Scalar tmp_b(0);
for(LhsInnerIterator it(lhsEval,i); it ;++it) {
tmp_a += it.value() * rhs.coeff(it.index(), col);
++it;
if(it) {
tmp_b += it.value() * rhs.coeff(it.index(), col);
}
}
res.coeffRef(i, col) += alpha * (tmp_a + tmp_b);
}
};