mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
315 lines
13 KiB
C++
315 lines
13 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#ifndef EIGEN_SPARSEDENSEPRODUCT_H
|
|
#define EIGEN_SPARSEDENSEPRODUCT_H
|
|
|
|
// IWYU pragma: private
|
|
#include "./InternalHeaderCheck.h"
|
|
|
|
namespace Eigen {
|
|
|
|
namespace internal {
|
|
|
|
template <>
|
|
struct product_promote_storage_type<Sparse, Dense, OuterProduct> {
|
|
typedef Sparse ret;
|
|
};
|
|
template <>
|
|
struct product_promote_storage_type<Dense, Sparse, OuterProduct> {
|
|
typedef Sparse ret;
|
|
};
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType,
|
|
int LhsStorageOrder = ((SparseLhsType::Flags & RowMajorBit) == RowMajorBit) ? RowMajor : ColMajor,
|
|
bool ColPerCol = ((DenseRhsType::Flags & RowMajorBit) == 0) || DenseRhsType::ColsAtCompileTime == 1>
|
|
struct sparse_time_dense_product_impl;
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
|
|
struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
|
|
RowMajor, true> {
|
|
typedef internal::remove_all_t<SparseLhsType> Lhs;
|
|
typedef internal::remove_all_t<DenseRhsType> Rhs;
|
|
typedef internal::remove_all_t<DenseResType> Res;
|
|
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
|
typedef evaluator<Lhs> LhsEval;
|
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
|
|
const typename Res::Scalar& alpha) {
|
|
LhsEval lhsEval(lhs);
|
|
|
|
Index n = lhs.outerSize();
|
|
#ifdef EIGEN_HAS_OPENMP
|
|
Index threads = Eigen::nbThreads();
|
|
#endif
|
|
|
|
for (Index c = 0; c < rhs.cols(); ++c) {
|
|
#ifdef EIGEN_HAS_OPENMP
|
|
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
|
// It basically represents the minimal amount of work to be done to be worth it.
|
|
if (threads > 1 && lhsEval.nonZerosEstimate() > 20000) {
|
|
#pragma omp parallel for schedule(dynamic, (n + threads * 4 - 1) / (threads * 4)) num_threads(threads)
|
|
for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i, c);
|
|
} else
|
|
#endif
|
|
{
|
|
for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i, c);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, DenseResType& res,
|
|
const typename Res::Scalar& alpha, Index i, Index col) {
|
|
// Two accumulators, which breaks the dependency chain on the accumulator
|
|
// and allows more instruction-level parallelism in the following loop
|
|
typename Res::Scalar tmp_a(0);
|
|
typename Res::Scalar tmp_b(0);
|
|
for (LhsInnerIterator it(lhsEval, i); it; ++it) {
|
|
tmp_a += it.value() * rhs.coeff(it.index(), col);
|
|
++it;
|
|
if (it) {
|
|
tmp_b += it.value() * rhs.coeff(it.index(), col);
|
|
}
|
|
}
|
|
res.coeffRef(i, col) += alpha * (tmp_a + tmp_b);
|
|
}
|
|
};
|
|
|
|
// FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
|
|
// -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
|
|
// template<typename T1, typename T2/*, int Options_, typename StrideType_*/>
|
|
// struct ScalarBinaryOpTraits<T1, Ref<T2/*, Options_, StrideType_*/> >
|
|
// {
|
|
// enum {
|
|
// Defined = 1
|
|
// };
|
|
// typedef typename CwiseUnaryOp<scalar_multiple2_op<T1, typename T2::Scalar>, T2>::PlainObject ReturnType;
|
|
// };
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
|
|
struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, AlphaType, ColMajor, true> {
|
|
typedef internal::remove_all_t<SparseLhsType> Lhs;
|
|
typedef internal::remove_all_t<DenseRhsType> Rhs;
|
|
typedef internal::remove_all_t<DenseResType> Res;
|
|
typedef evaluator<Lhs> LhsEval;
|
|
typedef typename LhsEval::InnerIterator LhsInnerIterator;
|
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res, const AlphaType& alpha) {
|
|
LhsEval lhsEval(lhs);
|
|
for (Index c = 0; c < rhs.cols(); ++c) {
|
|
for (Index j = 0; j < lhs.outerSize(); ++j) {
|
|
// typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
|
|
typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j, c));
|
|
for (LhsInnerIterator it(lhsEval, j); it; ++it) res.coeffRef(it.index(), c) += it.value() * rhs_j;
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
|
|
struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
|
|
RowMajor, false> {
|
|
typedef internal::remove_all_t<SparseLhsType> Lhs;
|
|
typedef internal::remove_all_t<DenseRhsType> Rhs;
|
|
typedef internal::remove_all_t<DenseResType> Res;
|
|
typedef evaluator<Lhs> LhsEval;
|
|
typedef typename LhsEval::InnerIterator LhsInnerIterator;
|
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
|
|
const typename Res::Scalar& alpha) {
|
|
Index n = lhs.rows();
|
|
LhsEval lhsEval(lhs);
|
|
|
|
#ifdef EIGEN_HAS_OPENMP
|
|
Index threads = Eigen::nbThreads();
|
|
// This 20000 threshold has been found experimentally on 2D and 3D Poisson problems.
|
|
// It basically represents the minimal amount of work to be done to be worth it.
|
|
if (threads > 1 && lhsEval.nonZerosEstimate() * rhs.cols() > 20000) {
|
|
#pragma omp parallel for schedule(dynamic, (n + threads * 4 - 1) / (threads * 4)) num_threads(threads)
|
|
for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i);
|
|
} else
|
|
#endif
|
|
{
|
|
for (Index i = 0; i < n; ++i) processRow(lhsEval, rhs, res, alpha, i);
|
|
}
|
|
}
|
|
|
|
static void processRow(const LhsEval& lhsEval, const DenseRhsType& rhs, Res& res, const typename Res::Scalar& alpha,
|
|
Index i) {
|
|
typename Res::RowXpr res_i(res.row(i));
|
|
for (LhsInnerIterator it(lhsEval, i); it; ++it) res_i += (alpha * it.value()) * rhs.row(it.index());
|
|
}
|
|
};
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType>
|
|
struct sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, typename DenseResType::Scalar,
|
|
ColMajor, false> {
|
|
typedef internal::remove_all_t<SparseLhsType> Lhs;
|
|
typedef internal::remove_all_t<DenseRhsType> Rhs;
|
|
typedef internal::remove_all_t<DenseResType> Res;
|
|
typedef typename evaluator<Lhs>::InnerIterator LhsInnerIterator;
|
|
static void run(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
|
|
const typename Res::Scalar& alpha) {
|
|
evaluator<Lhs> lhsEval(lhs);
|
|
for (Index j = 0; j < lhs.outerSize(); ++j) {
|
|
typename Rhs::ConstRowXpr rhs_j(rhs.row(j));
|
|
for (LhsInnerIterator it(lhsEval, j); it; ++it) res.row(it.index()) += (alpha * it.value()) * rhs_j;
|
|
}
|
|
}
|
|
};
|
|
|
|
template <typename SparseLhsType, typename DenseRhsType, typename DenseResType, typename AlphaType>
|
|
inline void sparse_time_dense_product(const SparseLhsType& lhs, const DenseRhsType& rhs, DenseResType& res,
|
|
const AlphaType& alpha) {
|
|
sparse_time_dense_product_impl<SparseLhsType, DenseRhsType, DenseResType, AlphaType>::run(lhs, rhs, res, alpha);
|
|
}
|
|
|
|
} // end namespace internal
|
|
|
|
namespace internal {
|
|
|
|
template <typename Lhs, typename Rhs, int ProductType>
|
|
struct generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType>
|
|
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> > {
|
|
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
|
|
template <typename Dest>
|
|
static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
typedef typename nested_eval<Lhs, ((Rhs::Flags & RowMajorBit) == 0) ? 1 : Rhs::ColsAtCompileTime>::type LhsNested;
|
|
typedef typename nested_eval<Rhs, ((Lhs::Flags & RowMajorBit) == 0) ? 1 : Dynamic>::type RhsNested;
|
|
LhsNested lhsNested(lhs);
|
|
RhsNested rhsNested(rhs);
|
|
internal::sparse_time_dense_product(lhsNested, rhsNested, dst, alpha);
|
|
}
|
|
};
|
|
|
|
template <typename Lhs, typename Rhs, int ProductType>
|
|
struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, DenseShape, ProductType>
|
|
: generic_product_impl<Lhs, Rhs, SparseShape, DenseShape, ProductType> {};
|
|
|
|
template <typename Lhs, typename Rhs, int ProductType>
|
|
struct generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType>
|
|
: generic_product_impl_base<Lhs, Rhs, generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> > {
|
|
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
|
|
|
template <typename Dst>
|
|
static void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) {
|
|
typedef typename nested_eval<Lhs, ((Rhs::Flags & RowMajorBit) == 0) ? Dynamic : 1>::type LhsNested;
|
|
typedef typename nested_eval<Rhs, ((Lhs::Flags & RowMajorBit) == RowMajorBit) ? 1 : Lhs::RowsAtCompileTime>::type
|
|
RhsNested;
|
|
LhsNested lhsNested(lhs);
|
|
RhsNested rhsNested(rhs);
|
|
|
|
// transpose everything
|
|
Transpose<Dst> dstT(dst);
|
|
internal::sparse_time_dense_product(rhsNested.transpose(), lhsNested.transpose(), dstT, alpha);
|
|
}
|
|
};
|
|
|
|
template <typename Lhs, typename Rhs, int ProductType>
|
|
struct generic_product_impl<Lhs, Rhs, DenseShape, SparseTriangularShape, ProductType>
|
|
: generic_product_impl<Lhs, Rhs, DenseShape, SparseShape, ProductType> {};
|
|
|
|
template <typename LhsT, typename RhsT, bool NeedToTranspose>
|
|
struct sparse_dense_outer_product_evaluator {
|
|
protected:
|
|
typedef std::conditional_t<NeedToTranspose, RhsT, LhsT> Lhs1;
|
|
typedef std::conditional_t<NeedToTranspose, LhsT, RhsT> ActualRhs;
|
|
typedef Product<LhsT, RhsT, DefaultProduct> ProdXprType;
|
|
|
|
// if the actual left-hand side is a dense vector,
|
|
// then build a sparse-view so that we can seamlessly iterate over it.
|
|
typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind, Sparse>::value, Lhs1,
|
|
SparseView<Lhs1> >
|
|
ActualLhs;
|
|
typedef std::conditional_t<is_same<typename internal::traits<Lhs1>::StorageKind, Sparse>::value, Lhs1 const&,
|
|
SparseView<Lhs1> >
|
|
LhsArg;
|
|
|
|
typedef evaluator<ActualLhs> LhsEval;
|
|
typedef evaluator<ActualRhs> RhsEval;
|
|
typedef typename evaluator<ActualLhs>::InnerIterator LhsIterator;
|
|
typedef typename ProdXprType::Scalar Scalar;
|
|
|
|
public:
|
|
enum { Flags = NeedToTranspose ? RowMajorBit : 0, CoeffReadCost = HugeCost };
|
|
|
|
class InnerIterator : public LhsIterator {
|
|
public:
|
|
InnerIterator(const sparse_dense_outer_product_evaluator& xprEval, Index outer)
|
|
: LhsIterator(xprEval.m_lhsXprImpl, 0),
|
|
m_outer(outer),
|
|
m_empty(false),
|
|
m_factor(get(xprEval.m_rhsXprImpl, outer, typename internal::traits<ActualRhs>::StorageKind())) {}
|
|
|
|
EIGEN_STRONG_INLINE Index outer() const { return m_outer; }
|
|
EIGEN_STRONG_INLINE Index row() const { return NeedToTranspose ? m_outer : LhsIterator::index(); }
|
|
EIGEN_STRONG_INLINE Index col() const { return NeedToTranspose ? LhsIterator::index() : m_outer; }
|
|
|
|
EIGEN_STRONG_INLINE Scalar value() const { return LhsIterator::value() * m_factor; }
|
|
EIGEN_STRONG_INLINE operator bool() const { return LhsIterator::operator bool() && (!m_empty); }
|
|
|
|
protected:
|
|
Scalar get(const RhsEval& rhs, Index outer, Dense = Dense()) const { return rhs.coeff(outer); }
|
|
|
|
Scalar get(const RhsEval& rhs, Index outer, Sparse = Sparse()) {
|
|
typename RhsEval::InnerIterator it(rhs, outer);
|
|
if (it && it.index() == 0 && it.value() != Scalar(0)) return it.value();
|
|
m_empty = true;
|
|
return Scalar(0);
|
|
}
|
|
|
|
Index m_outer;
|
|
bool m_empty;
|
|
Scalar m_factor;
|
|
};
|
|
|
|
sparse_dense_outer_product_evaluator(const Lhs1& lhs, const ActualRhs& rhs)
|
|
: m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) {
|
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
}
|
|
|
|
// transpose case
|
|
sparse_dense_outer_product_evaluator(const ActualRhs& rhs, const Lhs1& lhs)
|
|
: m_lhs(lhs), m_lhsXprImpl(m_lhs), m_rhsXprImpl(rhs) {
|
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
|
}
|
|
|
|
protected:
|
|
const LhsArg m_lhs;
|
|
evaluator<ActualLhs> m_lhsXprImpl;
|
|
evaluator<ActualRhs> m_rhsXprImpl;
|
|
};
|
|
|
|
// sparse * dense outer product
|
|
template <typename Lhs, typename Rhs>
|
|
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, SparseShape, DenseShape>
|
|
: sparse_dense_outer_product_evaluator<Lhs, Rhs, Lhs::IsRowMajor> {
|
|
typedef sparse_dense_outer_product_evaluator<Lhs, Rhs, Lhs::IsRowMajor> Base;
|
|
|
|
typedef Product<Lhs, Rhs> XprType;
|
|
typedef typename XprType::PlainObject PlainObject;
|
|
|
|
explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {}
|
|
};
|
|
|
|
template <typename Lhs, typename Rhs>
|
|
struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, OuterProduct, DenseShape, SparseShape>
|
|
: sparse_dense_outer_product_evaluator<Lhs, Rhs, Rhs::IsRowMajor> {
|
|
typedef sparse_dense_outer_product_evaluator<Lhs, Rhs, Rhs::IsRowMajor> Base;
|
|
|
|
typedef Product<Lhs, Rhs> XprType;
|
|
typedef typename XprType::PlainObject PlainObject;
|
|
|
|
explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs()) {}
|
|
};
|
|
|
|
} // end namespace internal
|
|
|
|
} // end namespace Eigen
|
|
|
|
#endif // EIGEN_SPARSEDENSEPRODUCT_H
|