* added innerSize / outerSize functions to MatrixBase

* added complete implementation of sparse matrix product
  (with a little glue in Eigen/Core)
* added an exhaustive bench of sparse products including GMM++ and MTL4
  => Eigen outperforms in all transposed/density configurations !
This commit is contained in:
Gael Guennebaud 2008-06-28 23:07:14 +00:00
parent 6917be9113
commit 027818d739
16 changed files with 622 additions and 380 deletions

View File

@ -11,13 +11,13 @@ namespace Eigen {
#include "src/Sparse/SparseUtil.h"
#include "src/Sparse/SparseMatrixBase.h"
#include "src/Sparse/SparseProduct.h"
#include "src/Sparse/SparseArray.h"
#include "src/Sparse/SparseMatrix.h"
#include "src/Sparse/HashMatrix.h"
#include "src/Sparse/LinkedVectorMatrix.h"
#include "src/Sparse/CoreIterators.h"
#include "src/Sparse/SparseSetter.h"
#include "src/Sparse/SparseProduct.h"
} // namespace Eigen

View File

@ -204,8 +204,8 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, NoUnrolling>
static void run(Derived1 &dst, const Derived2 &src)
{
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
const int innerSize = rowMajor ? dst.cols() : dst.rows();
const int outerSize = rowMajor ? dst.rows() : dst.cols();
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
for(int j = 0; j < outerSize; j++)
for(int i = 0; i < innerSize; i++)
{
@ -233,7 +233,7 @@ struct ei_assign_impl<Derived1, Derived2, NoVectorization, InnerUnrolling>
{
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = rowMajor ? dst.rows() : dst.cols();
const int outerSize = dst.outerSize();
for(int j = 0; j < outerSize; j++)
ei_assign_novec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
::run(dst, src, j);
@ -250,8 +250,8 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, NoUnrolling>
static void run(Derived1 &dst, const Derived2 &src)
{
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
const int innerSize = rowMajor ? dst.cols() : dst.rows();
const int outerSize = rowMajor ? dst.rows() : dst.cols();
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
for(int j = 0; j < outerSize; j++)
{
@ -282,7 +282,7 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorization, InnerUnrolling>
{
const bool rowMajor = int(Derived1::Flags)&RowMajorBit;
const int innerSize = rowMajor ? Derived1::ColsAtCompileTime : Derived1::RowsAtCompileTime;
const int outerSize = rowMajor ? dst.rows() : dst.cols();
const int outerSize = dst.outerSize();
for(int j = 0; j < outerSize; j++)
ei_assign_innervec_InnerUnrolling<Derived1, Derived2, 0, innerSize>
::run(dst, src, j);
@ -337,8 +337,8 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorization, NoUnrolling>
{
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
const bool rowMajor = Derived1::Flags&RowMajorBit;
const int innerSize = rowMajor ? dst.cols() : dst.rows();
const int outerSize = rowMajor ? dst.rows() : dst.cols();
const int innerSize = dst.innerSize();
const int outerSize = dst.outerSize();
const int alignedInnerSize = (innerSize/packetSize)*packetSize;
for(int i = 0; i < outerSize; i++)

View File

@ -154,11 +154,20 @@ template<typename Derived> class MatrixBase
/** \returns the number of coefficients, which is \a rows()*cols().
* \sa rows(), cols(), SizeAtCompileTime. */
inline int size() const { return rows() * cols(); }
/** \returns the number of nonzero coefficients which is in practice the number
* of stored coefficients. */
inline int nonZeros() const { return derived.nonZeros(); }
/** \returns true if either the number of rows or the number of columns is equal to 1.
* In other words, this function returns
* \code rows()==1 || cols()==1 \endcode
* \sa rows(), cols(), IsVectorAtCompileTime. */
inline bool isVector() const { return rows()==1 || cols()==1; }
/** \returns the size of the storage major dimension,
* i.e., the number of columns for a columns major matrix, and the number of rows otherwise */
int outerSize() const { return (int(Flags)&RowMajorBit) ? this->rows() : this->cols(); }
/** \returns the size of the inner dimension according to the storage order,
* i.e., the number of rows for a columns major matrix, and the number of cols otherwise */
int innerSize() const { return (int(Flags)&RowMajorBit) ? this->cols() : this->rows(); }
/** Represents a constant matrix */
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
@ -206,6 +215,10 @@ template<typename Derived> class MatrixBase
template<typename Derived1, typename Derived2>
Derived& lazyAssign(const Product<Derived1,Derived2,CacheFriendlyProduct>& product);
/** Overloaded for sparse product evaluation */
template<typename Derived1, typename Derived2>
Derived& lazyAssign(const Product<Derived1,Derived2,SparseProduct>& product);
CommaInitializer operator<< (const Scalar& s);
template<typename OtherDerived>

View File

@ -92,6 +92,8 @@ template<typename Lhs, typename Rhs> struct ei_product_mode
{
enum{ value = ((Rhs::Flags&Diagonal)==Diagonal) || ((Lhs::Flags&Diagonal)==Diagonal)
? DiagonalProduct
: (Rhs::Flags & Lhs::Flags & SparseBit)
? SparseProduct
: Lhs::MaxRowsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Rhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
&& Lhs::MaxColsAtCompileTime >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
@ -147,8 +149,7 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit)
&& (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0),
EvalToRowMajor = (RhsFlags & RowMajorBit)
&& (ProductMode==(int)CacheFriendlyProduct ? (int)LhsFlags & RowMajorBit : (!CanVectorizeLhs)),
EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),
RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),

View File

@ -71,7 +71,7 @@ template<typename MatrixType> class Transpose
inline int rows() const { return m_matrix.cols(); }
inline int cols() const { return m_matrix.rows(); }
inline int nonZeros() const { return m_matrix.nonZeros(); }
inline int stride(void) const { return m_matrix.stride(); }
inline Scalar& coeffRef(int row, int col)

View File

@ -211,6 +211,11 @@ template<typename T> struct ei_unref<T&> { typedef T type; };
template<typename T> struct ei_unconst { typedef T type; };
template<typename T> struct ei_unconst<const T> { typedef T type; };
template<typename T> struct ei_cleantype { typedef T type; };
template<typename T> struct ei_cleantype<const T> { typedef T type; };
template<typename T> struct ei_cleantype<const T&> { typedef T type; };
template<typename T> struct ei_cleantype<T&> { typedef T type; };
template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
template<typename T> struct ei_must_nest_by_value<NestByValue<T> > { enum { ret = true }; };

View File

@ -34,7 +34,7 @@ struct ei_traits<HashMatrix<_Scalar, _Flags> >
ColsAtCompileTime = Dynamic,
MaxRowsAtCompileTime = Dynamic,
MaxColsAtCompileTime = Dynamic,
Flags = _Flags,
Flags = SparseBit | _Flags,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
SupportedAccessPatterns = RandomAccessPattern
};

View File

@ -34,7 +34,7 @@ struct ei_traits<LinkedVectorMatrix<_Scalar,_Flags> >
ColsAtCompileTime = Dynamic,
MaxRowsAtCompileTime = Dynamic,
MaxColsAtCompileTime = Dynamic,
Flags = _Flags,
Flags = SparseBit | _Flags,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
SupportedAccessPatterns = InnerCoherentAccessPattern
};

View File

@ -43,7 +43,7 @@ struct ei_traits<SparseMatrix<_Scalar, _Flags> >
ColsAtCompileTime = Dynamic,
MaxRowsAtCompileTime = Dynamic,
MaxColsAtCompileTime = Dynamic,
Flags = _Flags,
Flags = SparseBit | _Flags,
CoeffReadCost = NumTraits<Scalar>::ReadCost,
SupportedAccessPatterns = FullyCoherentAccessPattern
};
@ -68,8 +68,9 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
inline int rows() const { return m_rows; }
inline int cols() const { return m_cols; }
inline int innerNonZeros(int j) const { return m_colPtrs[j+1]-m_colPtrs[j]; }
inline const Scalar& coeff(int row, int col) const
inline Scalar coeff(int row, int col) const
{
int id = m_colPtrs[col];
int end = m_colPtrs[col+1];
@ -161,6 +162,13 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
resize(rows, cols);
}
template<typename OtherDerived>
inline SparseMatrix(const MatrixBase<OtherDerived>& other)
: m_rows(0), m_cols(0), m_colPtrs(0)
{
*this = other.derived();
}
inline void shallowCopy(const SparseMatrix& other)
{
EIGEN_DBG_SPARSE(std::cout << "SparseMatrix:: shallowCopy\n");
@ -192,15 +200,7 @@ class SparseMatrix : public SparseMatrixBase<SparseMatrix<_Scalar, _Flags> >
template<typename OtherDerived>
inline SparseMatrix& operator=(const MatrixBase<OtherDerived>& other)
{
return SparseMatrixBase<SparseMatrix>::operator=(other);
}
template<typename OtherDerived>
SparseMatrix<Scalar> operator*(const MatrixBase<OtherDerived>& other)
{
SparseMatrix<Scalar> res(rows(), other.cols());
ei_sparse_product<SparseMatrix,OtherDerived>(*this,other.derived(),res);
return res;
return SparseMatrixBase<SparseMatrix>::operator=(other.derived());
}
friend std::ostream & operator << (std::ostream & s, const SparseMatrix& m)

View File

@ -49,9 +49,6 @@ class SparseMatrixBase : public MatrixBase<Derived>
bool isRValue() const { return m_isRValue; }
Derived& temporary() { m_isRValue = true; return derived(); }
int outerSize() const { return RowMajor ? this->rows() : this->cols(); }
int innerSize() const { return RowMajor ? this->cols() : this->rows(); }
inline Derived& operator=(const Derived& other)
{
if (other.isRValue())
@ -64,31 +61,27 @@ class SparseMatrixBase : public MatrixBase<Derived>
return derived();
}
template<typename OtherDerived>
inline Derived& operator=(const MatrixBase<OtherDerived>& other)
{
const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
const int outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols();
// eval to a temporary and then do a shallow copy
typename ei_meta_if<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::ret temp(other.rows(), other.cols());
const int outerSize = other.outerSize();
typedef typename ei_meta_if<transpose, LinkedVectorMatrix<Scalar,Flags&RowMajorBit>, Derived>::ret TempType;
TempType temp(other.rows(), other.cols());
temp.startFill(std::max(this->rows(),this->cols())*2);
// std::cout << other.rows() << " xm " << other.cols() << "\n";
for (int j=0; j<outerSize; ++j)
{
for (typename OtherDerived::InnerIterator it(other.derived(), j); it; ++it)
{
// std::cout << other.rows() << " x " << other.cols() << "\n";
// std::cout << it.m_matrix.rows() << "\n";
Scalar v = it.value();
if (v!=Scalar(0))
if (RowMajor) temp.fill(j,it.index()) = v;
if (OtherDerived::Flags & RowMajorBit) temp.fill(j,it.index()) = v;
else temp.fill(it.index(),j) = v;
}
}
temp.endFill();
derived() = temp.temporary();
return derived();
}
@ -97,6 +90,7 @@ class SparseMatrixBase : public MatrixBase<Derived>
inline Derived& operator=(const SparseMatrixBase<OtherDerived>& other)
{
const bool transpose = (Flags & RowMajorBit) != (OtherDerived::Flags & RowMajorBit);
// std::cout << "eval transpose = " << transpose << "\n";
const int outerSize = (int(OtherDerived::Flags) & RowMajorBit) ? other.rows() : other.cols();
if ((!transpose) && other.isRValue())
{
@ -120,12 +114,15 @@ class SparseMatrixBase : public MatrixBase<Derived>
return derived();
}
template<typename Lhs, typename Rhs>
inline Derived& operator=(const Product<Lhs,Rhs,SparseProduct>& product);
friend std::ostream & operator << (std::ostream & s, const SparseMatrixBase& m)
{
if (Flags&RowMajorBit)
{
for (int row=0; row<m.rows(); ++row)
for (int row=0; row<m.outerSize(); ++row)
{
int col = 0;
for (typename Derived::InnerIterator it(m.derived(), row); it; ++it)
@ -158,6 +155,4 @@ class SparseMatrixBase : public MatrixBase<Derived>
mutable bool m_hasBeenCopied;
};
#endif // EIGEN_SPARSEMATRIXBASE_H

View File

@ -25,145 +25,308 @@
#ifndef EIGEN_SPARSEPRODUCT_H
#define EIGEN_SPARSEPRODUCT_H
#define DENSE_TMP 1
#define MAP_TMP 2
#define LIST_TMP 3
#define TMP_TMP 3
template<typename Scalar>
struct ListEl
// sparse product return type specialization
template<typename Lhs, typename Rhs>
struct ProductReturnType<Lhs,Rhs,SparseProduct>
{
int next;
int index;
Scalar value;
typedef typename ei_traits<Lhs>::Scalar Scalar;
enum {
LhsRowMajor = ei_traits<Lhs>::Flags & RowMajorBit,
RhsRowMajor = ei_traits<Rhs>::Flags & RowMajorBit,
TransposeRhs = (!LhsRowMajor) && RhsRowMajor,
TransposeLhs = LhsRowMajor && (!RhsRowMajor)
};
// FIXME if we transpose let's evaluate to a LinkedVectorMatrix since it is the
// type of the temporary to perform the transpose op
typedef typename ei_meta_if<TransposeLhs,
SparseMatrix<Scalar,0>,
typename ei_nested<Lhs,Rhs::RowsAtCompileTime>::type>::ret LhsNested;
typedef typename ei_meta_if<TransposeRhs,
SparseMatrix<Scalar,0>,
typename ei_nested<Rhs,Lhs::RowsAtCompileTime>::type>::ret RhsNested;
typedef Product<typename ei_unconst<LhsNested>::type,
typename ei_unconst<RhsNested>::type, SparseProduct> Type;
};
template<typename Lhs, typename Rhs>
static void ei_sparse_product(const Lhs& lhs, const Rhs& rhs, SparseMatrix<typename ei_traits<Lhs>::Scalar>& res)
template<typename LhsNested, typename RhsNested>
struct ei_traits<Product<LhsNested, RhsNested, SparseProduct> >
{
int rows = lhs.rows();
int cols = rhs.rows();
int size = lhs.cols();
// clean the nested types:
typedef typename ei_unconst<typename ei_unref<LhsNested>::type>::type _LhsNested;
typedef typename ei_unconst<typename ei_unref<RhsNested>::type>::type _RhsNested;
typedef typename _LhsNested::Scalar Scalar;
float ratio = std::max(float(lhs.nonZeros())/float(lhs.rows()*lhs.cols()), float(rhs.nonZeros())/float(rhs.rows()*rhs.cols()));
std::cout << ratio << "\n";
enum {
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
ei_assert(size == rhs.rows());
typedef typename ei_traits<Lhs>::Scalar Scalar;
#if (TMP_TMP == MAP_TMP)
std::map<int,Scalar> tmp;
#elif (TMP_TMP == LIST_TMP)
std::vector<ListEl<Scalar> > tmp(2*rows);
#else
std::vector<Scalar> tmp(rows);
#endif
res.resize(rows, cols);
res.startFill(2*std::max(rows, cols));
for (int j=0; j<cols; ++j)
{
#if (TMP_TMP == MAP_TMP)
tmp.clear();
#elif (TMP_TMP == LIST_TMP)
int tmp_size = 0;
int tmp_start = -1;
#else
for (int k=0; k<rows; ++k)
tmp[k] = 0;
#endif
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
InnerSize = EIGEN_ENUM_MIN(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
EvalToRowMajor = (RhsFlags & LhsFlags & RowMajorBit),
RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
| ((RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) ? 0 : LargeBit)),
Flags = (int(LhsFlags | RhsFlags) & HereditaryBits & RemovedBits)
| EvalBeforeAssigningBit
| EvalBeforeNestingBit,
CoeffReadCost = Dynamic
};
};
template<typename LhsNested, typename RhsNested> class Product<LhsNested,RhsNested,SparseProduct> : ei_no_assignment_operator,
public MatrixBase<Product<LhsNested, RhsNested, SparseProduct> >
{
public:
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
private:
typedef typename ei_traits<Product>::_LhsNested _LhsNested;
typedef typename ei_traits<Product>::_RhsNested _RhsNested;
public:
template<typename Lhs, typename Rhs>
inline Product(const Lhs& lhs, const Rhs& rhs)
: m_lhs(lhs), m_rhs(rhs)
{
#if (TMP_TMP == MAP_TMP)
typename std::map<int,Scalar>::iterator hint = tmp.begin();
typename std::map<int,Scalar>::iterator r;
#elif (TMP_TMP == LIST_TMP)
int tmp_el = tmp_start;
#endif
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
{
#if (TMP_TMP == MAP_TMP)
r = hint;
Scalar v = lhsIt.value() * rhsIt.value();
int id = lhsIt.index();
while (r!=tmp.end() && r->first < id)
++r;
if (r!=tmp.end() && r->first==id)
{
r->second += v;
hint = r;
}
else
hint = tmp.insert(r, std::pair<int,Scalar>(id, v));
++hint;
#elif (TMP_TMP == LIST_TMP)
Scalar v = lhsIt.value() * rhsIt.value();
int id = lhsIt.index();
if (tmp_size==0)
{
tmp_start = 0;
tmp_el = 0;
tmp_size++;
tmp[0].value = v;
tmp[0].index = id;
tmp[0].next = -1;
}
else if (id<tmp[tmp_start].index)
{
tmp[tmp_size].value = v;
tmp[tmp_size].index = id;
tmp[tmp_size].next = tmp_start;
tmp_start = tmp_size;
tmp_size++;
}
else
{
int nextel = tmp[tmp_el].next;
while (nextel >= 0 && tmp[nextel].index<=id)
{
tmp_el = nextel;
nextel = tmp[nextel].next;
}
ei_assert(lhs.cols() == rhs.rows());
}
if (tmp[tmp_el].index==id)
Scalar coeff(int, int) const { ei_assert(false && "eigen internal error"); }
Scalar& coeffRef(int, int) { ei_assert(false && "eigen internal error"); }
inline int rows() const { return m_lhs.rows(); }
inline int cols() const { return m_rhs.cols(); }
const _LhsNested& lhs() const { return m_lhs; }
const _LhsNested& rhs() const { return m_rhs; }
protected:
const LhsNested m_lhs;
const RhsNested m_rhs;
};
const int RowMajor = RowMajorBit;
const int ColMajor = 0;
template<typename Lhs, typename Rhs, typename ResultType,
int LhsStorageOrder = ei_traits<Lhs>::Flags&RowMajorBit,
int RhsStorageOrder = ei_traits<Rhs>::Flags&RowMajorBit,
int ResStorageOrder = ei_traits<ResultType>::Flags&RowMajorBit>
struct ei_sparse_product_selector;
template<typename Lhs, typename Rhs, typename ResultType>
struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,ColMajor>
{
typedef typename ei_traits<typename ei_cleantype<Lhs>::type>::Scalar Scalar;
struct ListEl
{
int next;
int index;
Scalar value;
};
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
// make sure to call innerSize/outerSize since we fake the storage order.
int rows = lhs.innerSize();
int cols = rhs.outerSize();
int size = lhs.outerSize();
ei_assert(size == rhs.rows());
// allocate a temporary buffer
Scalar* buffer = new Scalar[rows];
// estimate the number of non zero entries
float ratioLhs = float(lhs.nonZeros())/float(lhs.rows()*lhs.cols());
float avgNnzPerRhsColumn = float(rhs.nonZeros())/float(cols);
float ratioRes = std::min(ratioLhs * avgNnzPerRhsColumn, 1.f);
res.resize(rows, cols);
res.startFill(ratioRes*rows*cols);
for (int j=0; j<cols; ++j)
{
// let's do a more accurate determination of the nnz ratio for the current column j of res
//float ratioColRes = std::min(ratioLhs * rhs.innerNonZeros(j), 1.f);
// FIXME find a nice way to get the number of nonzeros of a sub matrix (here an inner vector)
float ratioColRes = ratioRes;
if (ratioColRes>0.1)
{
// dense path, the scalar * columns products are accumulated into a dense column
Scalar* __restrict__ tmp = buffer;
// set to zero
for (int k=0; k<rows; ++k)
tmp[k] = 0;
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
{
// FIXME should be written like this: tmp += rhsIt.value() * lhs.col(rhsIt.index())
Scalar x = rhsIt.value();
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
{
tmp[tmp_el].value += v;
}
else
{
tmp[tmp_size].value = v;
tmp[tmp_size].index = id;
tmp[tmp_size].next = tmp[tmp_el].next;
tmp[tmp_el].next = tmp_size;
tmp_size++;
tmp[lhsIt.index()] += lhsIt.value() * x;
}
}
#else
tmp[lhsIt.index()] += lhsIt.value() * rhsIt.value();
#endif
//res.coeffRef(lhsIt.index(), j) += lhsIt.value() * rhsIt.value();
// copy the temporary to the respective res.col()
for (int k=0; k<rows; ++k)
if (tmp[k]!=0)
res.fill(k, j) = tmp[k];
}
else
{
ListEl* __restrict__ tmp = reinterpret_cast<ListEl*>(buffer);
// sparse path, the scalar * columns products are accumulated into a linked list
int tmp_size = 0;
int tmp_start = -1;
for (typename Rhs::InnerIterator rhsIt(rhs, j); rhsIt; ++rhsIt)
{
int tmp_el = tmp_start;
for (typename Lhs::InnerIterator lhsIt(lhs, rhsIt.index()); lhsIt; ++lhsIt)
{
Scalar v = lhsIt.value() * rhsIt.value();
int id = lhsIt.index();
if (tmp_size==0)
{
tmp_start = 0;
tmp_el = 0;
tmp_size++;
tmp[0].value = v;
tmp[0].index = id;
tmp[0].next = -1;
}
else if (id<tmp[tmp_start].index)
{
tmp[tmp_size].value = v;
tmp[tmp_size].index = id;
tmp[tmp_size].next = tmp_start;
tmp_start = tmp_size;
tmp_size++;
}
else
{
int nextel = tmp[tmp_el].next;
while (nextel >= 0 && tmp[nextel].index<=id)
{
tmp_el = nextel;
nextel = tmp[nextel].next;
}
if (tmp[tmp_el].index==id)
{
tmp[tmp_el].value += v;
}
else
{
tmp[tmp_size].value = v;
tmp[tmp_size].index = id;
tmp[tmp_size].next = tmp[tmp_el].next;
tmp[tmp_el].next = tmp_size;
tmp_size++;
}
}
}
}
int k = tmp_start;
while (k>=0)
{
if (tmp[k].value!=0)
res.fill(tmp[k].index, j) = tmp[k].value;
k = tmp[k].next;
}
}
}
#if (TMP_TMP == MAP_TMP)
for (typename std::map<int,Scalar>::const_iterator k=tmp.begin(); k!=tmp.end(); ++k)
if (k->second!=0)
res.fill(k->first, j) = k->second;
#elif (TMP_TMP == LIST_TMP)
int k = tmp_start;
while (k>=0)
{
if (tmp[k].value!=0)
res.fill(tmp[k].index, j) = tmp[k].value;
k = tmp[k].next;
}
#else
for (int k=0; k<rows; ++k)
if (tmp[k]!=0)
res.fill(k, j) = tmp[k];
#endif
res.endFill();
}
res.endFill();
};
std::cout << " => " << float(res.nonZeros())/float(res.rows()*res.cols()) << "\n";
template<typename Lhs, typename Rhs, typename ResultType>
struct ei_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,ColMajor,RowMajor>
{
typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
SparseTemporaryType _res(res.rows(), res.cols());
ei_sparse_product_selector<Lhs,Rhs,SparseTemporaryType,ColMajor,ColMajor,ColMajor>::run(lhs, rhs, _res);
res = _res;
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,RowMajor>
{
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
// let's transpose the product and fake the matrices are column major
ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,ColMajor>::run(rhs, lhs, res);
}
};
template<typename Lhs, typename Rhs, typename ResultType>
struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,RowMajor,ColMajor>
{
typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
{
// let's transpose the product and fake the matrices are column major
ei_sparse_product_selector<Rhs,Lhs,ResultType,ColMajor,ColMajor,RowMajor>::run(rhs, lhs, res);
}
};
// NOTE eventually let's transpose one argument even in this case since it might be expensive if
// the result is not dense.
// template<typename Lhs, typename Rhs, typename ResultType, int ResStorageOrder>
// struct ei_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,ColMajor,ResStorageOrder>
// {
// static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
// {
// // trivial product as lhs.row/rhs.col dot products
// // loop over the prefered order of the result
// }
// };
// NOTE the 2 others cases (col row *) must never occurs since they are catched
// by ProductReturnType which transform it to (col col *) by evaluating rhs.
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,SparseProduct>& product)
{
// std::cout << "sparse product to dense\n";
ei_sparse_product_selector<
typename ei_cleantype<Lhs>::type,
typename ei_cleantype<Rhs>::type,
typename ei_cleantype<Derived>::type>::run(product.lhs(),product.rhs(),derived());
return derived();
}
template<typename Derived>
template<typename Lhs, typename Rhs>
inline Derived& SparseMatrixBase<Derived>::operator=(const Product<Lhs,Rhs,SparseProduct>& product)
{
// std::cout << "sparse product to sparse\n";
ei_sparse_product_selector<
typename ei_cleantype<Lhs>::type,
typename ei_cleantype<Rhs>::type,
Derived>::run(product.lhs(),product.rhs(),derived());
return derived();
}
#endif // EIGEN_SPARSEPRODUCT_H

View File

@ -31,6 +31,7 @@
#define EIGEN_DBG_SPARSE(X) X
#endif
template<typename Derived> class SparseMatrixBase;
template<typename _Scalar, int _Flags = 0> class SparseMatrix;
template<typename _Scalar, int _Flags = 0> class HashMatrix;
template<typename _Scalar, int _Flags = 0> class LinkedVectorMatrix;

75
bench/BenchSparseUtil.h Normal file
View File

@ -0,0 +1,75 @@
#include <Eigen/Array>
#include <Eigen/Sparse>
#include <bench/BenchTimer.h>
using namespace std;
using namespace Eigen;
USING_PART_OF_NAMESPACE_EIGEN
#ifndef SIZE
#define SIZE 1024
#endif
#ifndef DENSITY
#define DENSITY 0.01
#endif
#ifndef SCALAR
#define SCALAR float
#endif
typedef SCALAR Scalar;
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef SparseMatrix<Scalar> EigenSparseMatrix;
void fillMatrix(float density, int rows, int cols, EigenSparseMatrix& dst)
{
dst.startFill(rows*cols*density);
for(int j = 0; j < cols; j++)
{
for(int i = 0; i < rows; i++)
{
Scalar v = (ei_random<float>(0,1) < density) ? ei_random<Scalar>() : 0;
if (v!=0)
dst.fill(i,j) = v;
}
}
dst.endFill();
}
void eiToDense(const EigenSparseMatrix& src, DenseMatrix& dst)
{
dst.setZero();
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
dst(it.index(),j) = it.value();
}
#ifndef NOGMM
#include "gmm/gmm.h"
typedef gmm::csc_matrix<Scalar> GmmSparse;
typedef gmm::col_matrix< gmm::wsvector<Scalar> > GmmDynSparse;
void eiToGmm(const EigenSparseMatrix& src, GmmSparse& dst)
{
GmmDynSparse tmp(src.rows(), src.cols());
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
tmp(it.index(),j) = it.value();
gmm::copy(tmp, dst);
}
#endif
#ifndef NOMTL
#include <boost/numeric/mtl/mtl.hpp>
typedef mtl::compressed2D<Scalar> MtlSparse;
void eiToMtl(const EigenSparseMatrix& src, MtlSparse& dst)
{
mtl::matrix::inserter<MtlSparse> ins(dst);
for (int j=0; j<src.cols(); ++j)
for (EigenSparseMatrix::InnerIterator it(src.derived(), j); it; ++it)
ins[it.index()][j] = it.value();
}
#endif

View File

@ -1,211 +0,0 @@
// g++ -O3 -DNDEBUG sparse_01.cpp -I .. -o sparse_01 && ./sparse_01
#include <Eigen/Array>
#include <Eigen/Sparse>
#include <bench/BenchTimer.h>
#include "gmm/gmm.h"
using namespace std;
using namespace Eigen;
USING_PART_OF_NAMESPACE_EIGEN
#ifndef REPEAT
#define REPEAT 10
#endif
#define REPEATPRODUCT 1
#define SIZE 10
#define DENSITY 0.2
// #define NODENSEMATRIX
typedef MatrixXf DenseMatrix;
// typedef Matrix<float,SIZE,SIZE> DenseMatrix;
typedef SparseMatrix<float> EigenSparseMatrix;
typedef gmm::csc_matrix<float> GmmSparse;
typedef gmm::col_matrix< gmm::wsvector<float> > GmmDynSparse;
void fillMatrix(float density, int rows, int cols, DenseMatrix* pDenseMatrix, EigenSparseMatrix* pSparseMatrix, GmmSparse* pGmmMatrix=0)
{
GmmDynSparse gmmT(rows, cols);
if (pSparseMatrix)
pSparseMatrix->startFill(rows*cols*density);
for(int j = 0; j < cols; j++)
{
for(int i = 0; i < rows; i++)
{
float v = (ei_random<float>(0,1) < density) ? ei_random<float>() : 0;
if (pDenseMatrix)
(*pDenseMatrix)(i,j) = v;
if (v!=0)
{
if (pSparseMatrix)
pSparseMatrix->fill(i,j) = v;
if (pGmmMatrix)
gmmT(i,j) = v;
}
}
}
if (pSparseMatrix)
pSparseMatrix->endFill();
if (pGmmMatrix)
gmm::copy(gmmT, *pGmmMatrix);
}
int main(int argc, char *argv[])
{
int rows = SIZE;
int cols = SIZE;
float density = DENSITY;
// dense matrices
#ifndef NODENSEMATRIX
DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols), m4(rows,cols);
#endif
// sparse matrices
EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols);
HashMatrix<float> hm4(rows,cols);
// GMM++ matrices
GmmDynSparse gmmT4(rows,cols);
GmmSparse gmmM1(rows,cols), gmmM2(rows,cols), gmmM3(rows,cols), gmmM4(rows,cols);
#ifndef NODENSEMATRIX
fillMatrix(density, rows, cols, &m1, &sm1, &gmmM1);
fillMatrix(density, rows, cols, &m2, &sm2, &gmmM2);
fillMatrix(density, rows, cols, &m3, &sm3, &gmmM3);
#else
fillMatrix(density, rows, cols, 0, &sm1, &gmmM1);
fillMatrix(density, rows, cols, 0, &sm2, &gmmM2);
fillMatrix(density, rows, cols, 0, &sm3, &gmmM3);
#endif
BenchTimer timer;
//--------------------------------------------------------------------------------
// COEFF WISE OPERATORS
//--------------------------------------------------------------------------------
#if 1
std::cout << "\n\n\"m4 = m1 + m2 + 2 * m3\":\n\n";
timer.reset();
timer.start();
asm("#begin");
for (int k=0; k<REPEAT; ++k)
m4 = m1 + m2 + 2 * m3;
asm("#end");
timer.stop();
std::cout << "Eigen dense = " << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
sm4 = sm1 + sm2 + 2 * sm3;
timer.stop();
std::cout << "Eigen sparse = " << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
hm4 = sm1 + sm2 + 2 * sm3;
timer.stop();
std::cout << "Eigen hash = " << timer.value() << endl;
LinkedVectorMatrix<float> lm4(rows, cols);
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
lm4 = sm1 + sm2 + 2 * sm3;
timer.stop();
std::cout << "Eigen linked vector = " << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
{
gmm::add(gmmM1, gmmM2, gmmT4);
gmm::add(gmm::scaled(gmmM3,2), gmmT4);
}
timer.stop();
std::cout << "GMM++ sparse = " << timer.value() << endl;
#endif
//--------------------------------------------------------------------------------
// PRODUCT
//--------------------------------------------------------------------------------
#if 0
std::cout << "\n\nProduct:\n\n";
#ifndef NODENSEMATRIX
timer.reset();
timer.start();
asm("#begin");
for (int k=0; k<REPEATPRODUCT; ++k)
m1 = m1 * m2;
asm("#end");
timer.stop();
std::cout << "Eigen dense = " << timer.value() << endl;
#endif
timer.reset();
timer.start();
for (int k=0; k<REPEATPRODUCT; ++k)
sm4 = sm1 * sm2;
timer.stop();
std::cout << "Eigen sparse = " << timer.value() << endl;
// timer.reset();
// timer.start();
// for (int k=0; k<REPEATPRODUCT; ++k)
// hm4 = sm1 * sm2;
// timer.stop();
// std::cout << "Eigen hash = " << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEATPRODUCT; ++k)
{
gmm::csr_matrix<float> R(rows,cols);
gmm::copy(gmmM1, R);
//gmm::mult(gmmM1, gmmM2, gmmT4);
}
timer.stop();
std::cout << "GMM++ sparse = " << timer.value() << endl;
#endif
//--------------------------------------------------------------------------------
// VARIOUS
//--------------------------------------------------------------------------------
#if 1
// sm3 = sm1 + m2;
cout << m4.transpose() << "\n\n";
// sm4 = sm1+sm2;
cout << sm4 << "\n\n";
cout << lm4 << endl;
LinkedVectorMatrix<float,RowMajorBit> lm5(rows, cols);
lm5 = lm4;
lm5 = sm4;
cout << endl << lm5 << endl;
sm3 = sm4.transpose();
cout << endl << lm5 << endl;
cout << endl << "SM1 before random editing: " << endl << sm1 << endl;
{
SparseSetter<EigenSparseMatrix,RandomAccessPattern> w1(sm1);
w1->coeffRef(4,2) = ei_random<float>();
w1->coeffRef(2,6) = ei_random<float>();
w1->coeffRef(0,4) = ei_random<float>();
w1->coeffRef(9,3) = ei_random<float>();
}
cout << endl << "SM1 after random editing: " << endl << sm1 << endl;
#endif
return 0;
}

199
bench/sparse_product.cpp Normal file
View File

@ -0,0 +1,199 @@
//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.005 -DSIZE=10000 && ./a.out
//g++ -O3 -g0 -DNDEBUG sparse_product.cpp -I.. -I/home/gael/Coding/LinearAlgebra/mtl4/ -DDENSITY=0.05 -DSIZE=2000 && ./a.out
// -DNOGMM -DNOMTL
#ifndef SIZE
#define SIZE 10000
#endif
#ifndef DENSITY
#define DENSITY 0.01
#endif
#ifndef REPEAT
#define REPEAT 1
#endif
#include "BenchSparseUtil.h"
#ifndef MINDENSITY
#define MINDENSITY 0.0004
#endif
int main(int argc, char *argv[])
{
int rows = SIZE;
int cols = SIZE;
float density = DENSITY;
EigenSparseMatrix sm1(rows,cols), sm2(rows,cols), sm3(rows,cols), sm4(rows,cols);
BenchTimer timer;
for (float density = DENSITY; density>=MINDENSITY; density*=0.5)
{
fillMatrix(density, rows, cols, sm1);
fillMatrix(density, rows, cols, sm2);
// dense matrices
#ifdef DENSEMATRIX
{
std::cout << "Eigen Dense\t" << density*100 << "%\n";
DenseMatrix m1(rows,cols), m2(rows,cols), m3(rows,cols);
eiToDense(sm1, m1);
eiToDense(sm2, m2);
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1 * m2;
timer.stop();
std::cout << " a * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1.transpose() * m2;
timer.stop();
std::cout << " a' * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1.transpose() * m2.transpose();
timer.stop();
std::cout << " a' * b':\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1 * m2.transpose();
timer.stop();
std::cout << " a * b':\t" << timer.value() << endl;
}
#endif
// eigen sparse matrices
{
std::cout << "Eigen sparse\t" << density*100 << "%\n";
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
sm3 = sm1 * sm2;
timer.stop();
std::cout << " a * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
sm3 = sm1.transpose() * sm2;
timer.stop();
std::cout << " a' * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
sm3 = sm1.transpose() * sm2.transpose();
timer.stop();
std::cout << " a' * b':\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
sm3 = sm1 * sm2.transpose();
timer.stop();
std::cout << " a * b' :\t" << timer.value() << endl;
}
// GMM++
#ifndef NOGMM
{
std::cout << "GMM++ sparse\t" << density*100 << "%\n";
GmmDynSparse gmmT3(rows,cols);
GmmSparse m1(rows,cols), m2(rows,cols), m3(rows,cols);
eiToGmm(sm1, m1);
eiToGmm(sm2, m2);
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
gmm::mult(m1, m2, gmmT3);
timer.stop();
std::cout << " a * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
gmm::mult(gmm::transposed(m1), m2, gmmT3);
timer.stop();
std::cout << " a' * b:\t" << timer.value() << endl;
if (rows<500)
{
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
gmm::mult(gmm::transposed(m1), gmm::transposed(m2), gmmT3);
timer.stop();
std::cout << " a' * b':\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
gmm::mult(m1, gmm::transposed(m2), gmmT3);
timer.stop();
std::cout << " a * b':\t" << timer.value() << endl;
}
else
{
std::cout << " a' * b':\t" << "forever" << endl;
std::cout << " a * b':\t" << "forever" << endl;
}
}
#endif
// MTL4
#ifndef NOMTL
{
std::cout << "MTL4\t" << density*100 << "%\n";
MtlSparse m1(rows,cols), m2(rows,cols), m3(rows,cols);
eiToMtl(sm1, m1);
eiToMtl(sm2, m2);
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1 * m2;
timer.stop();
std::cout << " a * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = trans(m1) * m2;
timer.stop();
std::cout << " a' * b:\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = trans(m1) * trans(m2);
timer.stop();
std::cout << " a' * b':\t" << timer.value() << endl;
timer.reset();
timer.start();
for (int k=0; k<REPEAT; ++k)
m3 = m1 * trans(m2);
timer.stop();
std::cout << " a * b' :\t" << timer.value() << endl;
}
#endif
std::cout << "\n\n";
}
return 0;
}

View File

@ -24,6 +24,7 @@
#include "main.h"
#include <Eigen/Cholesky>
#include <Eigen/LU>
template<typename MatrixType> void cholesky(const MatrixType& m)
{
@ -34,12 +35,12 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
int cols = m.cols();
typedef typename MatrixType::Scalar Scalar;
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> SquareMatrixType;
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> SquareMatrixType;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
MatrixType a = MatrixType::random(rows,cols).transpose();
VectorType b = VectorType::random(cols);
SquareMatrixType covMat = a.adjoint() * a;
MatrixType a = MatrixType::random(rows,cols);
VectorType b = VectorType::random(rows);
SquareMatrixType covMat = a * a.adjoint();
CholeskyWithoutSquareRoot<SquareMatrixType> cholnosqrt(covMat);
VERIFY_IS_APPROX(covMat, cholnosqrt.matrixL() * cholnosqrt.vectorD().asDiagonal() * cholnosqrt.matrixL().adjoint());