Rebase to latest.

This commit is contained in:
Ville Kallioniemi 2016-02-01 19:32:31 -07:00
commit f0fdefa96f
113 changed files with 2894 additions and 1270 deletions

View File

@ -19,7 +19,7 @@ extern "C" {
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup CholmodSupport_Module CholmodSupport module * \defgroup CholmodSupport_Module CholmodSupport module
* *
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* It provides the two following main factorization classes: * It provides the two following main factorization classes:
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial). * - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).

View File

@ -17,7 +17,7 @@
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup SPQRSupport_Module SuiteSparseQR module * \defgroup SPQRSupport_Module SuiteSparseQR module
* *
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* *
* \code * \code
* #include <Eigen/SPQRSupport> * #include <Eigen/SPQRSupport>

View File

@ -19,7 +19,7 @@ extern "C" {
/** \ingroup Support_modules /** \ingroup Support_modules
* \defgroup UmfPackSupport_Module UmfPackSupport module * \defgroup UmfPackSupport_Module UmfPackSupport module
* *
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package. * This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
* It provides the following factorization class: * It provides the following factorization class:
* - class UmfPackLU: a multifrontal sequential LU factorization. * - class UmfPackLU: a multifrontal sequential LU factorization.
* *

View File

@ -273,9 +273,10 @@ class CholmodBase : public SparseSolverBase<Derived>
const Index size = m_cholmodFactor->n; const Index size = m_cholmodFactor->n;
EIGEN_UNUSED_VARIABLE(size); EIGEN_UNUSED_VARIABLE(size);
eigen_assert(size==b.rows()); eigen_assert(size==b.rows());
// Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
// note: cd stands for Cholmod Dense
Rhs& b_ref(b.const_cast_derived());
cholmod_dense b_cd = viewAsCholmod(b_ref); cholmod_dense b_cd = viewAsCholmod(b_ref);
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod); cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
if(!x_cd) if(!x_cd)

View File

@ -103,7 +103,7 @@ template<typename Derived> class ArrayBase
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const ArrayBase& other) Derived& operator=(const ArrayBase& other)
{ {
internal::call_assignment(derived(), other.derived()); internal::call_assignment(derived(), other.derived());
@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase
/** Set all the entries to \a value. /** Set all the entries to \a value.
* \sa DenseBase::setConstant(), DenseBase::fill() */ * \sa DenseBase::setConstant(), DenseBase::fill() */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const Scalar &value) Derived& operator=(const Scalar &value)
{ Base::setConstant(value); return derived(); } { Base::setConstant(value); return derived(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const Scalar& scalar); Derived& operator+=(const Scalar& scalar);
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const Scalar& scalar); Derived& operator-=(const Scalar& scalar);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const ArrayBase<OtherDerived>& other); Derived& operator+=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const ArrayBase<OtherDerived>& other); Derived& operator-=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator*=(const ArrayBase<OtherDerived>& other); Derived& operator*=(const ArrayBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator/=(const ArrayBase<OtherDerived>& other); Derived& operator/=(const ArrayBase<OtherDerived>& other);
public: public:

View File

@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@ -67,7 +67,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
@ -80,13 +80,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -98,13 +98,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
template<int LoadMode> template<int LoadMode>
@ -116,7 +116,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); m_expression.template writePacket<LoadMode>(rowId, colId, val);
} }
template<int LoadMode> template<int LoadMode>
@ -128,7 +128,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); m_expression.template writePacket<LoadMode>(index, val);
} }
template<typename Dest> template<typename Dest>
@ -145,11 +145,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;
@ -195,7 +195,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
const Scalar const Scalar
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType; typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
@ -210,7 +210,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
inline Index innerStride() const { return m_expression.innerStride(); } inline Index innerStride() const { return m_expression.innerStride(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); } inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return m_expression.data(); } inline const Scalar* data() const { return m_expression.data(); }
@ -223,7 +223,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
return m_expression.const_cast_derived().coeffRef(rowId, colId); return m_expression.coeffRef(rowId, colId);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -241,13 +241,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_expression.const_cast_derived().coeffRef(index); return m_expression.coeffRef(index);
} }
template<int LoadMode> template<int LoadMode>
@ -259,7 +259,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val); m_expression.template writePacket<LoadMode>(rowId, colId, val);
} }
template<int LoadMode> template<int LoadMode>
@ -271,7 +271,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val); m_expression.template writePacket<LoadMode>(index, val);
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -284,11 +284,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index) */ * \sa DenseBase::resize(Index) */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); } void resize(Index newSize) { m_expression.resize(newSize); }
/** Forwards the resizing request to the nested expression /** Forwards the resizing request to the nested expression
* \sa DenseBase::resize(Index,Index)*/ * \sa DenseBase::resize(Index,Index)*/
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); } void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
protected: protected:
NestedExpressionType m_expression; NestedExpressionType m_expression;

View File

@ -637,7 +637,7 @@ protected:
***************************************************************************/ ***************************************************************************/
template<typename DstXprType, typename SrcXprType, typename Functor> template<typename DstXprType, typename SrcXprType, typename Functor>
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
@ -654,7 +654,7 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
} }
template<typename DstXprType, typename SrcXprType> template<typename DstXprType, typename SrcXprType>
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
{ {
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>()); call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
} }
@ -688,26 +688,30 @@ struct Assignment;
// does not has to bother about these annoying details. // does not has to bother about these annoying details.
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(const Dst& dst, const Src& src)
{ {
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
// Deal with "assume-aliasing" // Deal with "assume-aliasing"
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{ {
typename plain_matrix_type<Src>::type tmp(src); typename plain_matrix_type<Src>::type tmp(src);
call_assignment_no_alias(dst, tmp, func); call_assignment_no_alias(dst, tmp, func);
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
{ {
call_assignment_no_alias(dst, src, func); call_assignment_no_alias(dst, src, func);
} }
@ -715,14 +719,16 @@ EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& fun
// by-pass "assume-aliasing" // by-pass "assume-aliasing"
// When there is no aliasing, we require that 'dst' has been properly resized // When there is no aliasing, we require that 'dst' has been properly resized
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func> template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
{ {
call_assignment_no_alias(dst.expression(), src, func); call_assignment_no_alias(dst.expression(), src, func);
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
{ {
enum { enum {
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
@ -747,13 +753,15 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func); Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias(Dst& dst, const Src& src)
{ {
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
template<typename Dst, typename Src, typename Func> template<typename Dst, typename Src, typename Func>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
{ {
Index dstRows = src.rows(); Index dstRows = src.rows();
Index dstCols = src.cols(); Index dstCols = src.cols();
@ -767,7 +775,8 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src
Assignment<Dst,Src,Func>::run(dst, src, func); Assignment<Dst,Src,Func>::run(dst, src, func);
} }
template<typename Dst, typename Src> template<typename Dst, typename Src>
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
{ {
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>()); call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
} }
@ -779,7 +788,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar> template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar> struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
{ {
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
{ {
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());

View File

@ -129,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
: Impl(xpr, startRow, startCol) : Impl(xpr, startRow, startCol)
{ {
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows() eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols()); && startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
} }
/** Dynamic-size constructor /** Dynamic-size constructor
@ -221,15 +221,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index rowId, Index colId) inline Scalar& coeffRef(Index rowId, Index colId)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index rowId, Index colId) const inline const Scalar& coeffRef(Index rowId, Index colId) const
{ {
return m_xpr.derived() return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -242,39 +240,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
EIGEN_STATIC_ASSERT_LVALUE(XprType) EIGEN_STATIC_ASSERT_LVALUE(XprType)
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index index) const inline const Scalar& coeffRef(Index index) const
{ {
return m_xpr.const_cast_derived() return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const CoeffReturnType coeff(Index index) const inline const CoeffReturnType coeff(Index index) const
{ {
return m_xpr return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
template<int LoadMode> template<int LoadMode>
inline PacketScalar packet(Index rowId, Index colId) const inline PacketScalar packet(Index rowId, Index colId) const
{ {
return m_xpr.template packet<Unaligned> return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
(rowId + m_startRow.value(), colId + m_startCol.value());
} }
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index rowId, Index colId, const PacketScalar& val) inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
(rowId + m_startRow.value(), colId + m_startCol.value(), val);
} }
template<int LoadMode> template<int LoadMode>
@ -288,7 +281,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
template<int LoadMode> template<int LoadMode>
inline void writePacket(Index index, const PacketScalar& val) inline void writePacket(Index index, const PacketScalar& val)
{ {
m_xpr.const_cast_derived().template writePacket<Unaligned> m_xpr.template writePacket<Unaligned>
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), (m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
} }
@ -320,7 +313,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
protected: protected:
const typename XprType::Nested m_xpr; typename XprType::Nested m_xpr;
const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;

View File

@ -148,7 +148,8 @@ struct evaluator<PlainObjectBase<Derived> >
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
if (IsRowMajor) if (IsRowMajor)
return m_data[row * m_outerStride.value() + col]; return m_data[row * m_outerStride.value() + col];
@ -156,12 +157,14 @@ struct evaluator<PlainObjectBase<Derived> >
return m_data[row + col * m_outerStride.value()]; return m_data[row + col * m_outerStride.value()];
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_data[index]; return m_data[index];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
if (IsRowMajor) if (IsRowMajor)
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col]; return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
@ -169,12 +172,14 @@ struct evaluator<PlainObjectBase<Derived> >
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()]; return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return const_cast<Scalar*>(m_data)[index]; return const_cast<Scalar*>(m_data)[index];
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
if (IsRowMajor) if (IsRowMajor)
@ -184,12 +189,14 @@ struct evaluator<PlainObjectBase<Derived> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return ploadt<PacketType, LoadMode>(m_data + index); return ploadt<PacketType, LoadMode>(m_data + index);
} }
template<int StoreMode,typename PacketType> template<int StoreMode,typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
if (IsRowMajor) if (IsRowMajor)
@ -201,6 +208,7 @@ struct evaluator<PlainObjectBase<Derived> >
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x); return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
@ -260,45 +268,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(col, row); return m_argImpl.coeff(col, row);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index); return m_argImpl.coeff(index);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(col, row); return m_argImpl.coeffRef(col, row);
} }
EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename XprType::Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index); return m_argImpl.coeffRef(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(col, row); return m_argImpl.template packet<LoadMode,PacketType>(col, row);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(index); return m_argImpl.template packet<LoadMode,PacketType>(index);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x); m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode,PacketType>(index, x); m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
@ -338,23 +354,27 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(row, col); return m_functor(row, col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(index); return m_functor(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.template packetOp<Index,PacketType>(row, col); return m_functor.template packetOp<Index,PacketType>(row, col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.template packetOp<Index,PacketType>(index); return m_functor.template packetOp<Index,PacketType>(index);
@ -380,7 +400,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
Alignment = evaluator<ArgType>::Alignment Alignment = evaluator<ArgType>::Alignment
}; };
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit unary_evaluator(const XprType& op)
: m_functor(op.functor()), : m_functor(op.functor()),
m_argImpl(op.nestedExpression()) m_argImpl(op.nestedExpression())
{ {
@ -390,23 +411,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(m_argImpl.coeff(row, col)); return m_functor(m_argImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(m_argImpl.coeff(index)); return m_functor(m_argImpl.coeff(index));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col)); return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index)); return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
@ -466,17 +491,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col)); return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index)); return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col), return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
@ -484,6 +512,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index), return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
@ -523,22 +552,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
typedef typename XprType::Scalar Scalar; typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_unaryOp(m_argImpl.coeff(row, col)); return m_unaryOp(m_argImpl.coeff(row, col));
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_unaryOp(m_argImpl.coeff(index)); return m_unaryOp(m_argImpl.coeff(index));
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_unaryOp(m_argImpl.coeffRef(row, col)); return m_unaryOp(m_argImpl.coeffRef(row, col));
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_unaryOp(m_argImpl.coeffRef(index)); return m_unaryOp(m_argImpl.coeffRef(index));
} }
@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base<Derived>
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_xpr.innerStride()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_data[index * m_xpr.innerStride()]; return m_data[index * m_xpr.innerStride()];
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketType, LoadMode>(ptr); return internal::ploadt<PacketType, LoadMode>(ptr);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride()); return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x); return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x); internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
@ -767,46 +808,54 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
RowsAtCompileTime = XprType::RowsAtCompileTime RowsAtCompileTime = XprType::RowsAtCompileTime
}; };
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col); return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0); RowsAtCompileTime == 1 ? index : 0);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x); return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index, return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
@ -859,7 +908,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment) Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
}; };
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select) EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
: m_conditionImpl(select.conditionMatrix()), : m_conditionImpl(select.conditionMatrix()),
m_thenImpl(select.thenMatrix()), m_thenImpl(select.thenMatrix()),
m_elseImpl(select.elseMatrix()) m_elseImpl(select.elseMatrix())
@ -869,7 +918,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
if (m_conditionImpl.coeff(row, col)) if (m_conditionImpl.coeff(row, col))
return m_thenImpl.coeff(row, col); return m_thenImpl.coeff(row, col);
@ -877,7 +927,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
return m_elseImpl.coeff(row, col); return m_elseImpl.coeff(row, col);
} }
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
if (m_conditionImpl.coeff(index)) if (m_conditionImpl.coeff(index))
return m_thenImpl.coeff(index); return m_thenImpl.coeff(index);
@ -921,7 +972,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
m_cols(replicate.nestedExpression().cols()) m_cols(replicate.nestedExpression().cols())
{} {}
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
// try to avoid using modulo; this is a pure optimization strategy // try to avoid using modulo; this is a pure optimization strategy
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@ -934,7 +986,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
return m_argImpl.coeff(actual_row, actual_col); return m_argImpl.coeff(actual_row, actual_col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
// try to avoid using modulo; this is a pure optimization strategy // try to avoid using modulo; this is a pure optimization strategy
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@ -945,6 +998,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0 const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
@ -958,6 +1012,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1 const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
@ -994,7 +1049,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
CoeffReadCost = TraversalSize==Dynamic ? HugeCost CoeffReadCost = TraversalSize==Dynamic ? HugeCost
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))), Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
}; };
@ -1008,7 +1063,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar coeff(Index i, Index j) const
{ {
if (Direction==Vertical) if (Direction==Vertical)
return m_functor(m_arg.col(j)); return m_functor(m_arg.col(j));
@ -1016,7 +1072,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
return m_functor(m_arg.row(i)); return m_functor(m_arg.row(i));
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar coeff(Index index) const
{ {
if (Direction==Vertical) if (Direction==Vertical)
return m_functor(m_arg.col(index)); return m_functor(m_arg.col(index));
@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base
typedef typename ArgType::Scalar Scalar; typedef typename ArgType::Scalar Scalar;
typedef typename ArgType::CoeffReturnType CoeffReturnType; typedef typename ArgType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(row, col); return m_argImpl.coeff(row, col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index); return m_argImpl.coeff(index);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(row, col); return m_argImpl.coeffRef(row, col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index); return m_argImpl.coeffRef(index);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(row, col); return m_argImpl.template packet<LoadMode,PacketType>(row, col);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
return m_argImpl.template packet<LoadMode,PacketType>(index); return m_argImpl.template packet<LoadMode,PacketType>(index);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode>(row, col, x); m_argImpl.template writePacket<StoreMode>(row, col, x);
} }
template<int StoreMode, typename PacketType> template<int StoreMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
m_argImpl.template writePacket<StoreMode>(index, x); m_argImpl.template writePacket<StoreMode>(index, x);
@ -1164,29 +1229,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
{ } { }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index col) const
{ {
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col); ReverseCol ? m_cols.value() - col - 1 : col);
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index col)
{ {
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col); ReverseCol ? m_cols.value() - col - 1 : col);
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index row, Index col) const PacketType packet(Index row, Index col) const
{ {
enum { enum {
@ -1201,6 +1271,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
PacketType packet(Index index) const PacketType packet(Index index) const
{ {
enum { PacketSize = unpacket_traits<PacketType>::size }; enum { PacketSize = unpacket_traits<PacketType>::size };
@ -1208,6 +1279,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index row, Index col, const PacketType& x) void writePacket(Index row, Index col, const PacketType& x)
{ {
// FIXME we could factorize some code with packet(i,j) // FIXME we could factorize some code with packet(i,j)
@ -1224,6 +1296,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
} }
template<int LoadMode, typename PacketType> template<int LoadMode, typename PacketType>
EIGEN_STRONG_INLINE
void writePacket(Index index, const PacketType& x) void writePacket(Index index, const PacketType& x)
{ {
enum { PacketSize = unpacket_traits<PacketType>::size }; enum { PacketSize = unpacket_traits<PacketType>::size };
@ -1267,22 +1340,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value, typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType; typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index) const
{ {
return m_argImpl.coeff(row + rowOffset(), row + colOffset()); return m_argImpl.coeff(row + rowOffset(), row + colOffset());
} }
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index index) const
{ {
return m_argImpl.coeff(index + rowOffset(), index + colOffset()); return m_argImpl.coeff(index + rowOffset(), index + colOffset());
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index row, Index)
{ {
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
} }
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index) EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Scalar& coeffRef(Index index)
{ {
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
} }

View File

@ -32,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
// we still want to handle the case when the result type is different. // we still want to handle the case when the result type is different.
typedef typename result_of< typedef typename result_of<
BinaryOp( BinaryOp(
typename Lhs::Scalar, const typename Lhs::Scalar&,
typename Rhs::Scalar const typename Rhs::Scalar&
) )
>::type Scalar; >::type Scalar;
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,

View File

@ -19,7 +19,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
: traits<XprType> : traits<XprType>
{ {
typedef typename result_of< typedef typename result_of<
UnaryOp(typename XprType::Scalar) UnaryOp(const typename XprType::Scalar&)
>::type Scalar; >::type Scalar;
typedef typename XprType::Nested XprTypeNested; typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested; typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
@ -58,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base; typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
typedef typename internal::remove_all<XprType>::type NestedExpression; typedef typename internal::remove_all<XprType>::type NestedExpression;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
: m_xpr(xpr), m_functor(func) {} : m_xpr(xpr), m_functor(func) {}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); } Index rows() const { return m_xpr.rows(); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); } Index cols() const { return m_xpr.cols(); }
/** \returns the functor representing the unary operation */ /** \returns the functor representing the unary operation */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const UnaryOp& functor() const { return m_functor; } const UnaryOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const typename internal::remove_all<typename XprType::Nested>::type& const typename internal::remove_all<XprTypeNested>::type&
nestedExpression() const { return m_xpr; } nestedExpression() const { return m_xpr; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
typename internal::remove_all<typename XprType::Nested>::type& typename internal::remove_all<XprTypeNested>::type&
nestedExpression() { return m_xpr.const_cast_derived(); } nestedExpression() { return m_xpr; }
protected: protected:
typename XprType::Nested m_xpr; XprTypeNested m_xpr;
const UnaryOp m_functor; const UnaryOp m_functor;
}; };

View File

@ -18,7 +18,7 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
: traits<MatrixType> : traits<MatrixType>
{ {
typedef typename result_of< typedef typename result_of<
ViewOp(typename traits<MatrixType>::Scalar) ViewOp(const typename traits<MatrixType>::Scalar&)
>::type Scalar; >::type Scalar;
typedef typename MatrixType::Nested MatrixTypeNested; typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested; typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
@ -61,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixType>::type NestedExpression; typedef typename internal::remove_all<MatrixType>::type NestedExpression;
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
@ -75,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
const ViewOp& functor() const { return m_functor; } const ViewOp& functor() const { return m_functor; }
/** \returns the nested expression */ /** \returns the nested expression */
const typename internal::remove_all<typename MatrixType::Nested>::type& const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
typename internal::remove_all<typename MatrixType::Nested>::type& typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix.const_cast_derived(); }
protected: protected:
typename internal::ref_selector<MatrixType>::type m_matrix; MatrixTypeNested m_matrix;
ViewOp m_functor; ViewOp m_functor;
}; };

View File

@ -275,13 +275,13 @@ template<typename Derived> class DenseBase
/** Copies \a other into *this. \returns a reference to *this. */ /** Copies \a other into *this. \returns a reference to *this. */
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other); Derived& operator=(const DenseBase<OtherDerived>& other);
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase& other); Derived& operator=(const DenseBase& other);
template<typename OtherDerived> template<typename OtherDerived>
@ -388,10 +388,10 @@ template<typename Derived> class DenseBase
inline bool hasNaN() const; inline bool hasNaN() const;
inline bool allFinite() const; inline bool allFinite() const;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
inline Derived& operator*=(const Scalar& other); Derived& operator*=(const Scalar& other);
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
inline Derived& operator/=(const Scalar& other); Derived& operator/=(const Scalar& other);
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType; typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
/** \returns the matrix or vector obtained by evaluating this expression. /** \returns the matrix or vector obtained by evaluating this expression.

View File

@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
>::type ScalarWithConstIfNotLvalue; >::type ScalarWithConstIfNotLvalue;
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); } inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline Scalar& coeffRef(Index row, Index) inline Scalar& coeffRef(Index row, Index)
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index row, Index) const inline const Scalar& coeffRef(Index row, Index) const
{ {
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset()); return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
inline Scalar& coeffRef(Index idx) inline Scalar& coeffRef(Index idx)
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
inline const Scalar& coeffRef(Index idx) const inline const Scalar& coeffRef(Index idx) const
{ {
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset()); return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
} }
protected: protected:
typename MatrixType::Nested m_matrix; typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index; const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
private: private:

View File

@ -142,6 +142,52 @@ inline void MatrixBase<Derived>::normalize()
derived() /= numext::sqrt(z); derived() /= numext::sqrt(z);
} }
/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
*
* \only_for_vectors
*
* This method is analogue to the normalized() method, but it reduces the risk of
* underflow and overflow when computing the norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0),
* then this function returns a copy of the input.
*
* \sa stableNorm(), stableNormalize(), normalized()
*/
template<typename Derived>
inline const typename MatrixBase<Derived>::PlainObject
MatrixBase<Derived>::stableNormalized() const
{
typedef typename internal::nested_eval<Derived,3>::type _Nested;
_Nested n(derived());
RealScalar w = n.cwiseAbs().maxCoeff();
RealScalar z = (n/w).squaredNorm();
if(z>RealScalar(0))
return n / (numext::sqrt(z)*w);
else
return n;
}
/** Normalizes the vector while avoid underflow and overflow
*
* \only_for_vectors
*
* This method is analogue to the normalize() method, but it reduces the risk of
* underflow and overflow when computing the norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
*
* \sa stableNorm(), stableNormalized(), normalize()
*/
template<typename Derived>
inline void MatrixBase<Derived>::stableNormalize()
{
RealScalar w = cwiseAbs().maxCoeff();
RealScalar z = (derived()/w).squaredNorm();
if(z>RealScalar(0))
derived() /= numext::sqrt(z)*w;
}
//---------- implementation of other norms ---------- //---------- implementation of other norms ----------
namespace internal { namespace internal {

View File

@ -75,6 +75,7 @@ struct default_packet_traits
HasCosh = 0, HasCosh = 0,
HasTanh = 0, HasTanh = 0,
HasLGamma = 0, HasLGamma = 0,
HasDiGamma = 0,
HasErf = 0, HasErf = 0,
HasErfc = 0, HasErfc = 0,
@ -284,7 +285,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
{ pstore(to, from); } { pstore(to, from); }
/** \internal tries to do cache prefetching of \a addr */ /** \internal tries to do cache prefetching of \a addr */
template<typename Scalar> inline void prefetch(const Scalar* addr) template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
{ {
#ifdef __CUDA_ARCH__ #ifdef __CUDA_ARCH__
#if defined(__LP64__) #if defined(__LP64__)
@ -439,6 +440,10 @@ Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); } Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
/** \internal \returns the erf(\a a) (coeff-wise) */ /** \internal \returns the erf(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perf(const Packet& a) { using numext::erf; return erf(a); } Packet perf(const Packet& a) { using numext::erf; return erf(a); }

View File

@ -50,6 +50,7 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op) EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)

View File

@ -748,9 +748,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
} }
//MSVC defines a _isnan builtin function, but for double only //MSVC defines a _isnan builtin function, but for double only
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); } EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); } EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
@ -1080,21 +1080,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision); return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
} }
template<typename Scalar> EIGEN_DEVICE_FUNC template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApprox(const Scalar& x, const Scalar& y, inline bool isApprox(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
} }
template<typename Scalar> EIGEN_DEVICE_FUNC template<typename Scalar> EIGEN_DEVICE_FUNC
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
{ {
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision); return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
} }

View File

@ -135,14 +135,14 @@ template<typename Derived> class MatrixBase
/** Special case of the template operator=, in order to prevent the compiler /** Special case of the template operator=, in order to prevent the compiler
* from generating a default operator= (issue hit with g++ 4.1) * from generating a default operator= (issue hit with g++ 4.1)
*/ */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const MatrixBase& other); Derived& operator=(const MatrixBase& other);
// We cannot inherit here via Base::operator= since it is causing // We cannot inherit here via Base::operator= since it is causing
// trouble with MSVC. // trouble with MSVC.
template <typename OtherDerived> template <typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator=(const DenseBase<OtherDerived>& other); Derived& operator=(const DenseBase<OtherDerived>& other);
template <typename OtherDerived> template <typename OtherDerived>
@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase
Derived& operator=(const ReturnByValue<OtherDerived>& other); Derived& operator=(const ReturnByValue<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator+=(const MatrixBase<OtherDerived>& other); Derived& operator+=(const MatrixBase<OtherDerived>& other);
template<typename OtherDerived> template<typename OtherDerived>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Derived& operator-=(const MatrixBase<OtherDerived>& other); Derived& operator-=(const MatrixBase<OtherDerived>& other);
#ifdef __CUDACC__ #ifdef __CUDACC__
@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase
RealScalar blueNorm() const; RealScalar blueNorm() const;
RealScalar hypotNorm() const; RealScalar hypotNorm() const;
EIGEN_DEVICE_FUNC const PlainObject normalized() const; EIGEN_DEVICE_FUNC const PlainObject normalized() const;
EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
EIGEN_DEVICE_FUNC void normalize(); EIGEN_DEVICE_FUNC void normalize();
EIGEN_DEVICE_FUNC void stableNormalize();
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
EIGEN_DEVICE_FUNC void adjointInPlace(); EIGEN_DEVICE_FUNC void adjointInPlace();

View File

@ -32,7 +32,7 @@ namespace internal {
template<typename MatrixType, unsigned int UpLo> template<typename MatrixType, unsigned int UpLo>
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType> struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
{ {
typedef typename ref_selector<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef MatrixType ExpressionType; typedef MatrixType ExpressionType;
typedef typename MatrixType::PlainObject FullMatrixType; typedef typename MatrixType::PlainObject FullMatrixType;
@ -97,7 +97,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
{ {
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView); EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return m_matrix.const_cast_derived().coeffRef(row, col); return m_matrix.coeffRef(row, col);
} }
/** \internal */ /** \internal */
@ -107,7 +107,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); } MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
/** Efficient triangular matrix times vector/matrix product */ /** Efficient triangular matrix times vector/matrix product */
template<typename OtherDerived> template<typename OtherDerived>

View File

@ -13,79 +13,349 @@
namespace Eigen { namespace Eigen {
namespace internal { namespace internal {
// Parts of this code are based on the Cephes Math Library.
//
// Cephes Math Library Release 2.8: June, 2000
// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier
//
// Permission has been kindly provided by the original author
// to incorporate the Cephes software into the Eigen codebase:
//
// From: Stephen Moshier
// To: Eugene Brevdo
// Subject: Re: Permission to wrap several cephes functions in Eigen
//
// Hello Eugene,
//
// Thank you for writing.
//
// If your licensing is similar to BSD, the formal way that has been
// handled is simply to add a statement to the effect that you are incorporating
// the Cephes software by permission of the author.
//
// Good luck with your project,
// Steve
namespace cephes {
/* polevl (modified for Eigen)
*
* Evaluate polynomial
*
*
*
* SYNOPSIS:
*
* int N;
* Scalar x, y, coef[N+1];
*
* y = polevl<decltype(x), N>( x, coef);
*
*
*
* DESCRIPTION:
*
* Evaluates polynomial of degree N:
*
* 2 N
* y = C + C x + C x +...+ C x
* 0 1 2 N
*
* Coefficients are stored in reverse order:
*
* coef[0] = C , ..., coef[N] = C .
* N 0
*
* The function p1evl() assumes that coef[N] = 1.0 and is
* omitted from the array. Its calling arguments are
* otherwise the same as polevl().
*
*
* The Eigen implementation is templatized. For best speed, store
* coef as a const array (constexpr), e.g.
*
* const double coef[] = {1.0, 2.0, 3.0, ...};
*
*/
template <typename Scalar, int N>
struct polevl {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static Scalar run(const Scalar x, const Scalar coef[]) {
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
}
};
template <typename Scalar>
struct polevl<Scalar, 0> {
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
static Scalar run(const Scalar, const Scalar coef[]) {
return coef[0];
}
};
} // end namespace cephes
/**************************************************************************** /****************************************************************************
* Implementation of lgamma * * Implementation of lgamma *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template <typename Scalar>
struct lgamma_impl struct lgamma_impl {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar&) static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED); THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0); return Scalar(0);
} }
}; };
template<typename Scalar> template <typename Scalar>
struct lgamma_retval struct lgamma_retval {
{
typedef Scalar type; typedef Scalar type;
}; };
#ifdef EIGEN_HAS_C99_MATH #ifdef EIGEN_HAS_C99_MATH
template<> template <>
struct lgamma_impl<float> struct lgamma_impl<float> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); } static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
}; };
template<> template <>
struct lgamma_impl<double> struct lgamma_impl<double> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); } static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
}; };
#endif #endif
/****************************************************************************
* Implementation of digamma (psi) *
****************************************************************************/
#ifdef EIGEN_HAS_C99_MATH
/*
*
* Polynomial evaluation helper for the Psi (digamma) function.
*
* digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for
* input Scalar s, assuming s is above 10.0.
*
* If s is above a certain threshold for the given Scalar type, zero
* is returned. Otherwise the polynomial is evaluated with enough
* coefficients for results matching Scalar machine precision.
*
*
*/
template <typename Scalar>
struct digamma_impl_maybe_poly {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
}
};
template <>
struct digamma_impl_maybe_poly<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(const float s) {
const float A[] = {
-4.16666666666666666667E-3,
3.96825396825396825397E-3,
-8.33333333333333333333E-3,
8.33333333333333333333E-2
};
float z;
if (s < 1.0e8f) {
z = 1.0f / (s * s);
return z * cephes::polevl<float, 3>::run(z, A);
} else return 0.0f;
}
};
template <>
struct digamma_impl_maybe_poly<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const double s) {
const double A[] = {
8.33333333333333333333E-2,
-2.10927960927960927961E-2,
7.57575757575757575758E-3,
-4.16666666666666666667E-3,
3.96825396825396825397E-3,
-8.33333333333333333333E-3,
8.33333333333333333333E-2
};
double z;
if (s < 1.0e17) {
z = 1.0 / (s * s);
return z * cephes::polevl<double, 6>::run(z, A);
}
else return 0.0;
}
};
#endif // EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_retval {
typedef Scalar type;
};
#ifdef EIGEN_HAS_C99_MATH
template <typename Scalar>
struct digamma_impl {
EIGEN_DEVICE_FUNC
static Scalar run(Scalar x) {
/*
*
* Psi (digamma) function (modified for Eigen)
*
*
* SYNOPSIS:
*
* double x, y, psi();
*
* y = psi( x );
*
*
* DESCRIPTION:
*
* d -
* psi(x) = -- ln | (x)
* dx
*
* is the logarithmic derivative of the gamma function.
* For integer x,
* n-1
* -
* psi(n) = -EUL + > 1/k.
* -
* k=1
*
* If x is negative, it is transformed to a positive argument by the
* reflection formula psi(1-x) = psi(x) + pi cot(pi x).
* For general positive x, the argument is made greater than 10
* using the recurrence psi(x+1) = psi(x) + 1/x.
* Then the following asymptotic expansion is applied:
*
* inf. B
* - 2k
* psi(x) = log(x) - 1/2x - > -------
* - 2k
* k=1 2k x
*
* where the B2k are Bernoulli numbers.
*
* ACCURACY (float):
* Relative error (except absolute when |psi| < 1):
* arithmetic domain # trials peak rms
* IEEE 0,30 30000 1.3e-15 1.4e-16
* IEEE -30,0 40000 1.5e-15 2.2e-16
*
* ACCURACY (double):
* Absolute error, relative when |psi| > 1 :
* arithmetic domain # trials peak rms
* IEEE -33,0 30000 8.2e-7 1.2e-7
* IEEE 0,33 100000 7.3e-7 7.7e-8
*
* ERROR MESSAGES:
* message condition value returned
* psi singularity x integer <=0 INFINITY
*/
Scalar p, q, nz, s, w, y;
bool negative;
const Scalar maxnum = std::numeric_limits<Scalar>::infinity();
const Scalar m_pi = 3.14159265358979323846;
negative = 0;
nz = 0.0;
const Scalar zero = 0.0;
const Scalar one = 1.0;
const Scalar half = 0.5;
if (x <= zero) {
negative = one;
q = x;
p = ::floor(q);
if (p == q) {
return maxnum;
}
/* Remove the zeros of tan(m_pi x)
* by subtracting the nearest integer from x
*/
nz = q - p;
if (nz != half) {
if (nz > half) {
p += one;
nz = q - p;
}
nz = m_pi / ::tan(m_pi * nz);
}
else {
nz = zero;
}
x = one - x;
}
/* use the recurrence psi(x+1) = psi(x) + 1/x. */
s = x;
w = zero;
while (s < Scalar(10)) {
w += one / s;
s += one;
}
y = digamma_impl_maybe_poly<Scalar>::run(s);
y = ::log(s) - (half / s) - y - w;
return (negative) ? y - nz : y;
}
};
#endif // EIGEN_HAS_C99_MATH
/**************************************************************************** /****************************************************************************
* Implementation of erf * * Implementation of erf *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template <typename Scalar>
struct erf_impl struct erf_impl {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar&) static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED); THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0); return Scalar(0);
} }
}; };
template<typename Scalar> template <typename Scalar>
struct erf_retval struct erf_retval {
{
typedef Scalar type; typedef Scalar type;
}; };
#ifdef EIGEN_HAS_C99_MATH #ifdef EIGEN_HAS_C99_MATH
template<> template <>
struct erf_impl<float> struct erf_impl<float> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); } static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
}; };
template<> template <>
struct erf_impl<double> struct erf_impl<double> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); } static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
}; };
#endif // EIGEN_HAS_C99_MATH #endif // EIGEN_HAS_C99_MATH
@ -93,35 +363,30 @@ struct erf_impl<double>
* Implementation of erfc * * Implementation of erfc *
****************************************************************************/ ****************************************************************************/
template<typename Scalar> template <typename Scalar>
struct erfc_impl struct erfc_impl {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar&) static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
{
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false), EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED); THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0); return Scalar(0);
} }
}; };
template<typename Scalar> template <typename Scalar>
struct erfc_retval struct erfc_retval {
{
typedef Scalar type; typedef Scalar type;
}; };
#ifdef EIGEN_HAS_C99_MATH #ifdef EIGEN_HAS_C99_MATH
template<> template <>
struct erfc_impl<float> struct erfc_impl<float> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
}; };
template<> template <>
struct erfc_impl<double> struct erfc_impl<double> {
{
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
}; };
@ -129,27 +394,29 @@ struct erfc_impl<double>
} // end namespace internal } // end namespace internal
namespace numext { namespace numext {
template<typename Scalar> template <typename Scalar>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar)
inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x) lgamma(const Scalar& x) {
{
return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
} }
template<typename Scalar> template <typename Scalar>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar)
inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x) digamma(const Scalar& x) {
{ return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x);
}
template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
erf(const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
} }
template<typename Scalar> template <typename Scalar>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x) erfc(const Scalar& x) {
{
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x); return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
} }

View File

@ -54,6 +54,8 @@ template<typename MatrixType> class Transpose
{ {
public: public:
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base; typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
typedef typename internal::remove_all<MatrixType>::type NestedExpression; typedef typename internal::remove_all<MatrixType>::type NestedExpression;
@ -68,16 +70,16 @@ template<typename MatrixType> class Transpose
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
const typename internal::remove_all<typename MatrixType::Nested>::type& const typename internal::remove_all<MatrixTypeNested>::type&
nestedExpression() const { return m_matrix; } nestedExpression() const { return m_matrix; }
/** \returns the nested expression */ /** \returns the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
typename internal::remove_all<typename MatrixType::Nested>::type& typename internal::remove_reference<MatrixTypeNested>::type&
nestedExpression() { return m_matrix.const_cast_derived(); } nestedExpression() { return m_matrix; }
protected: protected:
typename MatrixType::Nested m_matrix; typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
}; };
namespace internal { namespace internal {

View File

@ -325,7 +325,7 @@ class TranspositionsWrapper
protected: protected:
const typename IndicesType::Nested m_indices; typename IndicesType::Nested m_indices;
}; };

View File

@ -168,7 +168,7 @@ namespace internal {
template<typename MatrixType, unsigned int _Mode> template<typename MatrixType, unsigned int _Mode>
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType> struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
{ {
typedef typename ref_selector<MatrixType>::type MatrixTypeNested; typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef; typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned; typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
typedef typename MatrixType::PlainObject FullMatrixType; typedef typename MatrixType::PlainObject FullMatrixType;
@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
IsVectorAtCompileTime = false IsVectorAtCompileTime = false
}; };
// FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
{} {}
@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
/** \returns a reference to the nested expression */ /** \returns a reference to the nested expression */
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); } NestedExpression& nestedExpression() { return m_matrix; }
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType; typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
/** \sa MatrixBase::conjugate() const */ /** \sa MatrixBase::conjugate() const */
@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
inline TransposeReturnType transpose() inline TransposeReturnType transpose()
{ {
EIGEN_STATIC_ASSERT_LVALUE(MatrixType) EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived()); typename MatrixType::TransposeReturnType tmp(m_matrix);
return TransposeReturnType(tmp); return TransposeReturnType(tmp);
} }
@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
{ {
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType); EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
Base::check_coordinates_internal(row, col); Base::check_coordinates_internal(row, col);
return derived().nestedExpression().const_cast_derived().coeffRef(row, col); return derived().nestedExpression().coeffRef(row, col);
} }
/** Assigns a triangular matrix to a triangular part of a dense matrix */ /** Assigns a triangular matrix to a triangular part of a dense matrix */

View File

@ -124,7 +124,7 @@ struct member_lpnorm {
template <typename BinaryOp, typename Scalar> template <typename BinaryOp, typename Scalar>
struct member_redux { struct member_redux {
typedef typename result_of< typedef typename result_of<
BinaryOp(Scalar,Scalar) BinaryOp(const Scalar&,const Scalar&)
>::type result_type; >::type result_type;
template<typename _Scalar, int Size> struct Cost template<typename _Scalar, int Size> struct Cost
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; }; { enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };

View File

@ -197,7 +197,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location. /** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
@ -215,7 +215,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
/** \returns the minimum of all coefficients of *this and puts in *index its location. /** \returns the minimum of all coefficients of *this and puts in *index its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>
@ -233,7 +233,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location. /** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
* \warning the result is undefined if \c *this contains NaN. * \warning the result is undefined if \c *this contains NaN.
* *
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff() * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
*/ */
template<typename Derived> template<typename Derived>
template<typename IndexType> template<typename IndexType>

View File

@ -78,6 +78,20 @@ double2 plgamma<double2>(const double2& a)
return make_double2(lgamma(a.x), lgamma(a.y)); return make_double2(lgamma(a.x), lgamma(a.y));
} }
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pdigamma<float4>(const float4& a)
{
using numext::digamma;
return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pdigamma<double2>(const double2& a)
{
using numext::digamma;
return make_double2(digamma(a.x), digamma(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 perf<float4>(const float4& a) float4 perf<float4>(const float4& a)
{ {

View File

@ -40,6 +40,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasLGamma = 1, HasLGamma = 1,
HasDiGamma = 1,
HasErf = 1, HasErf = 1,
HasErfc = 1, HasErfc = 1,
@ -63,6 +64,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasSqrt = 1, HasSqrt = 1,
HasRsqrt = 1, HasRsqrt = 1,
HasLGamma = 1, HasLGamma = 1,
HasDiGamma = 1,
HasErf = 1, HasErf = 1,
HasErfc = 1, HasErfc = 1,

View File

@ -37,7 +37,7 @@ template<typename Scalar>
struct functor_traits<scalar_identity_op<Scalar> > struct functor_traits<scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; }; { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl; template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
// linear access for packet ops: // linear access for packet ops:
// 1) initialization // 1) initialization
@ -48,12 +48,12 @@ template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp) // TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
// in order to avoid the padd() in operator() ? // in order to avoid the padd() in operator() ?
template <typename Scalar, typename Packet> template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,false> struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& step) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(step), m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)), m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {} m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
@ -75,11 +75,11 @@ struct linspaced_op_impl<Scalar,Packet,false>
// 1) each step // 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) // [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar, typename Packet> template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,true> struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
{ {
linspaced_op_impl(const Scalar& low, const Scalar& step) : linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_step(step), m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {} m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
@ -95,6 +95,31 @@ struct linspaced_op_impl<Scalar,Packet,true>
const Packet m_interPacket; const Packet m_interPacket;
}; };
template <typename Scalar, typename Packet>
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
{
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0))
{}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Scalar operator() (Index i) const {
return m_low + (m_length*Scalar(i))/m_divisor;
}
template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Packet packetOp(Index i) const {
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
pset1<Packet>(m_divisor))); }
const Scalar m_low;
const Scalar m_length;
const Index m_divisor;
const Packet m_interPacket;
};
// ----- Linspace functor ---------------------------------------------------------------- // ----- Linspace functor ----------------------------------------------------------------
// Forward declaration (we default to random access which does not really give // Forward declaration (we default to random access which does not really give
@ -102,10 +127,20 @@ struct linspaced_op_impl<Scalar,Packet,true>
// nested expressions). // nested expressions).
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op; template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> > template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; }; {
enum
{
Cost = 1,
PacketAccess = packet_traits<Scalar>::HasSetLinear
&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
IsRepeatable = true
};
};
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
{ {
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
: impl((num_steps==1 ? high : low),high,num_steps)
{}
template<typename Index> template<typename Index>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@ -134,7 +169,9 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
// This proxy object handles the actual required temporaries, the different // This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the // implementations (random vs. sequential access) as well as the
// correct piping to size 2/4 packet operations. // correct piping to size 2/4 packet operations.
const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl; // As long as we don't have a Bresenham-like implementation for linear-access and integer types,
// we have to by-pass RandomAccess for integer types. See bug 698.
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
}; };
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta

View File

@ -427,6 +427,28 @@ struct functor_traits<scalar_lgamma_op<Scalar> >
}; };
}; };
/** \internal
* \brief Template functor to compute psi, the derivative of lgamma of a scalar.
* \sa class CwiseUnaryOp, Cwise::digamma()
*/
template<typename Scalar> struct scalar_digamma_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
using numext::digamma; return digamma(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
};
template<typename Scalar>
struct functor_traits<scalar_digamma_op<Scalar> >
{
enum {
// Guesstimate
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasDiGamma
};
};
/** \internal /** \internal
* \brief Template functor to compute the Gauss error function of a * \brief Template functor to compute the Gauss error function of a
* scalar * scalar
@ -644,7 +666,7 @@ struct functor_traits<scalar_floor_op<Scalar> >
template<typename Scalar> struct scalar_ceil_op { template<typename Scalar> struct scalar_ceil_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op) EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
typedef typename packet_traits<Scalar>::type Packet; template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); } EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
}; };
template<typename Scalar> template<typename Scalar>

View File

@ -252,7 +252,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
// we have both L2 and L3, and problem is small enough to be kept in L2 // we have both L2 and L3, and problem is small enough to be kept in L2
// Let's choose m such that lhs's block fit in 1/3 of L2 // Let's choose m such that lhs's block fit in 1/3 of L2
actual_lm = l2; actual_lm = l2;
max_mc = 576; max_mc = (std::min<Index>)(576,max_mc);
} }
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc); Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
if (mc > Traits::mr) mc -= mc % Traits::mr; if (mc > Traits::mr) mc -= mc % Traits::mr;

View File

@ -352,9 +352,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
} }
else // no l3 blocking else // no l3 blocking
{ {
Index m = this->m_mc;
Index n = this->m_nc; Index n = this->m_nc;
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads); computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
} }
m_sizeA = this->m_mc * this->m_kc; m_sizeA = this->m_mc * this->m_kc;

View File

@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha) const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{ {
general_matrix_matrix_triangular_product<Index, general_matrix_matrix_triangular_product<Index,
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs, RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs, LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
ColMajor, UpLo==Lower?Upper:Lower> ColMajor, UpLo==Lower?Upper:Lower>
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha); ::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
} }
}; };
@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
{ {
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar; typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride, static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha) const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
{ {
typedef gebp_traits<LhsScalar,RhsScalar> Traits; typedef gebp_traits<LhsScalar,RhsScalar> Traits;
@ -69,16 +71,18 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
RhsMapper rhs(_rhs,rhsStride); RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride); ResMapper res(_res, resStride);
Index kc = depth; // cache block size along the K direction Index kc = blocking.kc();
Index mc = size; // cache block size along the M direction Index mc = (std::min)(size,blocking.mc());
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1);
// !!! mc must be a multiple of nr: // !!! mc must be a multiple of nr:
if(mc > Traits::nr) if(mc > Traits::nr)
mc = (mc/Traits::nr)*Traits::nr; mc = (mc/Traits::nr)*Traits::nr;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0); std::size_t sizeA = kc*mc;
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0); std::size_t sizeB = kc*size;
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs; gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
@ -136,7 +140,7 @@ struct tribb_kernel
typedef typename Traits::ResScalar ResScalar; typedef typename Traits::ResScalar ResScalar;
enum { enum {
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr) BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
}; };
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha) void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
{ {
@ -256,13 +260,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived()); typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
};
Index size = mat.cols();
Index depth = actualLhs.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
BlockingType blocking(size, size, depth, 1, false);
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate, typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate, typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualLhs.cols(), ::run(size, depth,
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(), &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
mat.data(), mat.outerStride(), actualAlpha); mat.data(), mat.outerStride(), actualAlpha, blocking);
} }
}; };

View File

@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride, const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride, const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
product_selfadjoint_matrix<Scalar, Index, product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor> ColMajor>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
} }
}; };
@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = rows; Index size = rows;
@ -340,17 +340,14 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
RhsMapper rhs(_rhs,rhsStride); RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride); ResMapper res(_res, resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction // kc must be smaller than mc
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
// kc must smaller than mc
kc = (std::min)(kc,mc); kc = (std::min)(kc,mc);
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Scalar* blockB = allocatedBlockB;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -410,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -421,7 +418,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = cols; Index size = cols;
@ -432,14 +429,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
LhsMapper lhs(_lhs,lhsStride); LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride); ResMapper res(_res,resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction std::size_t sizeA = kc*mc;
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Scalar* blockB = allocatedBlockB;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -498,6 +493,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs); * RhsBlasTraits::extractScalarFactor(a_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
internal::product_selfadjoint_matrix<Scalar, Index, internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
@ -509,7 +509,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha actualAlpha, blocking // alpha
); );
} }
}; };

View File

@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived()); Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 }; enum {
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
};
Index size = mat.cols();
Index depth = actualOther.cols();
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
BlockingType blocking(size, size, depth, 1, false);
internal::general_matrix_matrix_triangular_product<Index, internal::general_matrix_matrix_triangular_product<Index,
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex, Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex, Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo> IsRowMajor ? RowMajor : ColMajor, UpLo>
::run(mat.cols(), actualOther.cols(), ::run(size, depth,
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
mat.data(), mat.outerStride(), actualAlpha); mat.data(), mat.outerStride(), actualAlpha, blocking);
} }
}; };

View File

@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
Index kc = blocking.kc(); // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
// The small panel size must not be larger than blocking size.
// Usually this should never be the case because SmallPanelWidth^2 is very small
// compared to L2 cache size, but let's be safe:
Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
std::size_t sizeA = kc*mc; std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
if(IsLower || actual_k2<rows) if(IsLower || actual_k2<rows)
{ {
// for each small vertical panels of lhs // for each small vertical panels of lhs
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth) for (Index k1=0; k1<actual_kc; k1+=panelWidth)
{ {
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth); Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1; Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
Index startBlock = actual_k2+k1; Index startBlock = actual_k2+k1;
Index blockBOffset = k1; Index blockBOffset = k1;

View File

@ -15,10 +15,11 @@
// 4522 - 'class' : multiple assignment operators specified // 4522 - 'class' : multiple assignment operators specified
// 4700 - uninitialized local variable 'xyz' used // 4700 - uninitialized local variable 'xyz' used
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow // 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS #ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
#pragma warning( push ) #pragma warning( push )
#endif #endif
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 ) #pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
#elif defined __INTEL_COMPILER #elif defined __INTEL_COMPILER
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed) // 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body // ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body

View File

@ -336,7 +336,6 @@
// Do we support r-value references? // Do we support r-value references?
#if (__has_feature(cxx_rvalue_references) || \ #if (__has_feature(cxx_rvalue_references) || \
(defined(__cplusplus) && __cplusplus >= 201103L) || \ (defined(__cplusplus) && __cplusplus >= 201103L) || \
defined(__GXX_EXPERIMENTAL_CXX0X__) || \
(EIGEN_COMP_MSVC >= 1600)) (EIGEN_COMP_MSVC >= 1600))
#define EIGEN_HAVE_RVALUE_REFERENCES #define EIGEN_HAVE_RVALUE_REFERENCES
#endif #endif

View File

@ -526,9 +526,9 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
template<int Alignment, typename Scalar, typename Index> template<int Alignment, typename Scalar, typename Index>
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size) EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
{ {
static const Index ScalarSize = sizeof(Scalar); const Index ScalarSize = sizeof(Scalar);
static const Index AlignmentSize = Alignment / ScalarSize; const Index AlignmentSize = Alignment / ScalarSize;
static const Index AlignmentMask = AlignmentSize-1; const Index AlignmentMask = AlignmentSize-1;
if(AlignmentSize<=1) if(AlignmentSize<=1)
{ {

View File

@ -257,7 +257,7 @@ struct has_std_result_type {int a[2];};
struct has_tr1_result {int a[3];}; struct has_tr1_result {int a[3];};
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)> template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
struct unary_result_of_select {typedef ArgType type;}; struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
template<typename Func, typename ArgType> template<typename Func, typename ArgType>
struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;}; struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
@ -279,7 +279,7 @@ struct result_of<Func(ArgType)> {
}; };
template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)> template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
struct binary_result_of_select {typedef ArgType0 type;}; struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
template<typename Func, typename ArgType0, typename ArgType1> template<typename Func, typename ArgType0, typename ArgType1>
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)> struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
@ -326,6 +326,22 @@ class meta_sqrt
template<int Y, int InfX, int SupX> template<int Y, int InfX, int SupX>
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; }; class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
/** \internal Computes the least common multiple of two positive integer A and B
* at compile-time. It implements a naive algorithm testing all multiples of A.
* It thus works better if A>=B.
*/
template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
struct meta_least_common_multiple
{
enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
};
template<int A, int B, int K>
struct meta_least_common_multiple<A,B,K,true>
{
enum { ret = A*K };
};
/** \internal determines whether the product of two numeric types is allowed and what the return type is */ /** \internal determines whether the product of two numeric types is allowed and what the return type is */
template<typename T, typename U> struct scalar_product_traits template<typename T, typename U> struct scalar_product_traits
{ {

View File

@ -26,7 +26,7 @@
#ifndef EIGEN_NO_STATIC_ASSERT #ifndef EIGEN_NO_STATIC_ASSERT
#if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600) #if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)
// if native static_assert is enabled, let's use it // if native static_assert is enabled, let's use it
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG); #define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);

View File

@ -466,17 +466,17 @@ struct special_scalar_op_base : public BaseType
template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType> template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType
{ {
const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
operator*(const OtherScalar& scalar) const operator*(const OtherScalar& scalar) const
{ {
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
#endif #endif
return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
(*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar)); (*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
} }
inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived> inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
operator*(const OtherScalar& scalar, const Derived& matrix) operator*(const OtherScalar& scalar, const Derived& matrix)
{ {
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
@ -485,13 +485,13 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : publi
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar); return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
} }
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived> const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
operator/(const OtherScalar& scalar) const operator/(const OtherScalar& scalar) const
{ {
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN #ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
#endif #endif
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived> return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar)); (*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
} }
}; };
@ -526,22 +526,21 @@ template <typename A> struct promote_storage_type<const A, A>
* the functor. * the functor.
* The default rules are as follows: * The default rules are as follows:
* \code * \code
* A op A -> A * A op A -> A
* A op dense -> dense * A op dense -> dense
* dense op B -> dense * dense op B -> dense
* A * dense -> A * sparse op dense -> sparse
* dense * B -> B * dense op sparse -> sparse
* \endcode * \endcode
*/ */
template <typename A, typename B, typename Functor> struct cwise_promote_storage_type; template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; }; template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; }; template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; }; template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; }; template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; }; template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; }; template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; };
/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B. /** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
* The template parameter ProductTag permits to specialize the resulting storage kind wrt to * The template parameter ProductTag permits to specialize the resulting storage kind wrt to

View File

@ -129,7 +129,7 @@ public:
* determined by \a prec. * determined by \a prec.
* *
* \sa MatrixBase::isApprox() */ * \sa MatrixBase::isApprox() */
bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); } { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
protected: protected:

View File

@ -162,7 +162,7 @@ public:
* determined by \a prec. * determined by \a prec.
* *
* \sa MatrixBase::isApprox() */ * \sa MatrixBase::isApprox() */
bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
{ return m_coeffs.isApprox(other.m_coeffs, prec); } { return m_coeffs.isApprox(other.m_coeffs, prec); }
}; };

View File

@ -37,6 +37,8 @@ namespace Eigen {
* and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed * and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed
* on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where * on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where
* \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$. * \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$.
* If the factorization fails, then the shift in doubled until it succeed or a maximum of ten attempts. If it still fails, as returned by
* the info() method, then you can either increase the initial shift, or better use another preconditioning technique.
* *
*/ */
template <typename Scalar, int _UpLo = Lower, typename _OrderingType = template <typename Scalar, int _UpLo = Lower, typename _OrderingType =
@ -185,6 +187,10 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol); inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol);
}; };
// Based on the following paper:
// C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
// Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
// http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf
template<typename Scalar, int _UpLo, typename OrderingType> template<typename Scalar, int _UpLo, typename OrderingType>
template<typename _MatrixType> template<typename _MatrixType>
void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat) void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
@ -240,7 +246,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
else else
m_scale(j) = 1; m_scale(j) = 1;
// FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster) // TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
// Scale and compute the shift for the matrix // Scale and compute the shift for the matrix
RealScalar mindiag = NumTraits<RealScalar>::highest(); RealScalar mindiag = NumTraits<RealScalar>::highest();
@ -251,96 +257,122 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored"); eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored");
mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag); mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);
} }
FactorType L_save = m_L;
RealScalar shift = 0; RealScalar shift = 0;
if(mindiag <= RealScalar(0.)) if(mindiag <= RealScalar(0.))
shift = m_initialShift - mindiag; shift = m_initialShift - mindiag;
// Apply the shift to the diagonal elements of the matrix m_info = NumericalIssue;
for (Index j = 0; j < n; j++)
vals[colPtr[j]] += shift; // Try to perform the incomplete factorization using the current shift
int iter = 0;
// jki version of the Cholesky factorization do
for (Index j=0; j < n; ++j) {
{ // Apply the shift to the diagonal elements of the matrix
// Left-looking factorization of the j-th column for (Index j = 0; j < n; j++)
// First, load the j-th column into col_vals vals[colPtr[j]] += shift;
Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
col_nnz = 0; // jki version of the Cholesky factorization
for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++) Index j=0;
for (; j < n; ++j)
{ {
StorageIndex l = rowIdx[i]; // Left-looking factorization of the j-th column
col_vals(col_nnz) = vals[i]; // First, load the j-th column into col_vals
col_irow(col_nnz) = l; Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
col_pattern(l) = col_nnz; col_nnz = 0;
col_nnz++; for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)
}
{
typename std::list<StorageIndex>::iterator k;
// Browse all previous columns that will update column j
for(k = listCol[j].begin(); k != listCol[j].end(); k++)
{ {
Index jk = firstElt(*k); // First element to use in the column StorageIndex l = rowIdx[i];
eigen_internal_assert(rowIdx[jk]==j); col_vals(col_nnz) = vals[i];
Scalar v_j_jk = numext::conj(vals[jk]); col_irow(col_nnz) = l;
col_pattern(l) = col_nnz;
jk += 1; col_nnz++;
for (Index i = jk; i < colPtr[*k+1]; i++)
{
StorageIndex l = rowIdx[i];
if(col_pattern[l]<0)
{
col_vals(col_nnz) = vals[i] * v_j_jk;
col_irow[col_nnz] = l;
col_pattern(l) = col_nnz;
col_nnz++;
}
else
col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
}
updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
} }
{
typename std::list<StorageIndex>::iterator k;
// Browse all previous columns that will update column j
for(k = listCol[j].begin(); k != listCol[j].end(); k++)
{
Index jk = firstElt(*k); // First element to use in the column
eigen_internal_assert(rowIdx[jk]==j);
Scalar v_j_jk = numext::conj(vals[jk]);
jk += 1;
for (Index i = jk; i < colPtr[*k+1]; i++)
{
StorageIndex l = rowIdx[i];
if(col_pattern[l]<0)
{
col_vals(col_nnz) = vals[i] * v_j_jk;
col_irow[col_nnz] = l;
col_pattern(l) = col_nnz;
col_nnz++;
}
else
col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
}
updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
}
}
// Scale the current column
if(numext::real(diag) <= 0)
{
if(++iter>=10)
return;
// increase shift
shift = numext::maxi(m_initialShift,RealScalar(2)*shift);
// restore m_L, col_pattern, and listCol
vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);
colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n+1);
col_pattern.fill(-1);
for(Index i=0; i<n; ++i)
listCol[i].clear();
break;
}
RealScalar rdiag = sqrt(numext::real(diag));
vals[colPtr[j]] = rdiag;
for (Index k = 0; k<col_nnz; ++k)
{
Index i = col_irow[k];
//Scale
col_vals(k) /= rdiag;
//Update the remaining diagonals with col_vals
vals[colPtr[i]] -= numext::abs2(col_vals(k));
}
// Select the largest p elements
// p is the original number of elements in the column (without the diagonal)
Index p = colPtr[j+1] - colPtr[j] - 1 ;
Ref<VectorSx> cvals = col_vals.head(col_nnz);
Ref<VectorIx> cirow = col_irow.head(col_nnz);
internal::QuickSplit(cvals,cirow, p);
// Insert the largest p elements in the matrix
Index cpt = 0;
for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
{
vals[i] = col_vals(cpt);
rowIdx[i] = col_irow(cpt);
// restore col_pattern:
col_pattern(col_irow(cpt)) = -1;
cpt++;
}
// Get the first smallest row index and put it after the diagonal element
Index jk = colPtr(j)+1;
updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
} }
// Scale the current column if(j==n)
if(numext::real(diag) <= 0)
{ {
m_info = NumericalIssue; m_factorizationIsOk = true;
return; m_info = Success;
} }
} while(m_info!=Success);
RealScalar rdiag = sqrt(numext::real(diag));
vals[colPtr[j]] = rdiag;
for (Index k = 0; k<col_nnz; ++k)
{
Index i = col_irow[k];
//Scale
col_vals(k) /= rdiag;
//Update the remaining diagonals with col_vals
vals[colPtr[i]] -= numext::abs2(col_vals(k));
}
// Select the largest p elements
// p is the original number of elements in the column (without the diagonal)
Index p = colPtr[j+1] - colPtr[j] - 1 ;
Ref<VectorSx> cvals = col_vals.head(col_nnz);
Ref<VectorIx> cirow = col_irow.head(col_nnz);
internal::QuickSplit(cvals,cirow, p);
// Insert the largest p elements in the matrix
Index cpt = 0;
for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
{
vals[i] = col_vals(cpt);
rowIdx[i] = col_irow(cpt);
// restore col_pattern:
col_pattern(col_irow(cpt)) = -1;
cpt++;
}
// Get the first smallest row index and put it after the diagonal element
Index jk = colPtr(j)+1;
updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
}
m_factorizationIsOk = true;
m_info = Success;
} }
template<typename Scalar, int _UpLo, typename OrderingType> template<typename Scalar, int _UpLo, typename OrderingType>

View File

@ -8,7 +8,7 @@
NOTE: this routine has been adapted from the CSparse library: NOTE: this routine has been adapted from the CSparse library:
Copyright (c) 2006, Timothy A. Davis. Copyright (c) 2006, Timothy A. Davis.
http://www.cise.ufl.edu/research/sparse/CSparse http://www.suitesparse.com
CSparse is free software; you can redistribute it and/or CSparse is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public modify it under the terms of the GNU Lesser General Public

View File

@ -41,12 +41,8 @@
// //
// The colamd/symamd library is available at // The colamd/symamd library is available at
// //
// http://www.cise.ufl.edu/research/sparse/colamd/ // http://www.suitesparse.com
// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
// files, and by any C code that calls the routines whose prototypes are
// listed below, or that uses the colamd/symamd definitions listed below.
#ifndef EIGEN_COLAMD_H #ifndef EIGEN_COLAMD_H
#define EIGEN_COLAMD_H #define EIGEN_COLAMD_H
@ -102,9 +98,6 @@ namespace internal {
/* === Definitions ========================================================== */ /* === Definitions ========================================================== */
/* ========================================================================== */ /* ========================================================================== */
#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b))
#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b))
#define ONES_COMPLEMENT(r) (-(r)-1) #define ONES_COMPLEMENT(r) (-(r)-1)
/* -------------------------------------------------------------------------- */ /* -------------------------------------------------------------------------- */
@ -739,8 +732,8 @@ static void init_scoring
/* === Extract knobs ==================================================== */ /* === Extract knobs ==================================================== */
dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; dense_row_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ;
dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; dense_col_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ;
COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
max_deg = 0 ; max_deg = 0 ;
n_col2 = n_col ; n_col2 = n_col ;
@ -804,7 +797,7 @@ static void init_scoring
else else
{ {
/* keep track of max degree of remaining rows */ /* keep track of max degree of remaining rows */
max_deg = COLAMD_MAX (max_deg, deg) ; max_deg = numext::maxi(max_deg, deg) ;
} }
} }
COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
@ -842,7 +835,7 @@ static void init_scoring
/* add row's external degree */ /* add row's external degree */
score += Row [row].shared1.degree - 1 ; score += Row [row].shared1.degree - 1 ;
/* guard against integer overflow */ /* guard against integer overflow */
score = COLAMD_MIN (score, n_col) ; score = numext::mini(score, n_col) ;
} }
/* determine pruned column length */ /* determine pruned column length */
col_length = (IndexType) (new_cp - &A [Col [c].start]) ; col_length = (IndexType) (new_cp - &A [Col [c].start]) ;
@ -914,7 +907,7 @@ static void init_scoring
head [score] = c ; head [score] = c ;
/* see if this score is less than current min */ /* see if this score is less than current min */
min_score = COLAMD_MIN (min_score, score) ; min_score = numext::mini(min_score, score) ;
} }
@ -1040,7 +1033,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* === Garbage_collection, if necessary ============================= */ /* === Garbage_collection, if necessary ============================= */
needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; needed_memory = numext::mini(pivot_col_score, n_col - k) ;
if (pfree + needed_memory >= Alen) if (pfree + needed_memory >= Alen)
{ {
pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
@ -1099,7 +1092,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* clear tag on pivot column */ /* clear tag on pivot column */
Col [pivot_col].shared1.thickness = pivot_col_thickness ; Col [pivot_col].shared1.thickness = pivot_col_thickness ;
max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; max_deg = numext::maxi(max_deg, pivot_row_degree) ;
/* === Kill all rows used to construct pivot row ==================== */ /* === Kill all rows used to construct pivot row ==================== */
@ -1273,7 +1266,7 @@ static IndexType find_ordering /* return the number of garbage collections */
/* add set difference */ /* add set difference */
cur_score += row_mark - tag_mark ; cur_score += row_mark - tag_mark ;
/* integer overflow... */ /* integer overflow... */
cur_score = COLAMD_MIN (cur_score, n_col) ; cur_score = numext::mini(cur_score, n_col) ;
} }
/* recompute the column's length */ /* recompute the column's length */
@ -1386,7 +1379,7 @@ static IndexType find_ordering /* return the number of garbage collections */
cur_score -= Col [col].shared1.thickness ; cur_score -= Col [col].shared1.thickness ;
/* make sure score is less or equal than the max score */ /* make sure score is less or equal than the max score */
cur_score = COLAMD_MIN (cur_score, max_score) ; cur_score = numext::mini(cur_score, max_score) ;
COLAMD_ASSERT (cur_score >= 0) ; COLAMD_ASSERT (cur_score >= 0) ;
/* store updated score */ /* store updated score */
@ -1409,7 +1402,7 @@ static IndexType find_ordering /* return the number of garbage collections */
head [cur_score] = col ; head [cur_score] = col ;
/* see if this score is less than current min */ /* see if this score is less than current min */
min_score = COLAMD_MIN (min_score, cur_score) ; min_score = numext::mini(min_score, cur_score) ;
} }

View File

@ -100,11 +100,11 @@ protected:
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols }; enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
public: public:
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i) inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize) : m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
{} {}
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols)) : m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
{} {}
@ -112,7 +112,7 @@ public:
inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other) inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
{ {
typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType; typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
_NestedMatrixType& matrix = const_cast<_NestedMatrixType&>(m_matrix);; _NestedMatrixType& matrix = m_matrix;
// This assignment is slow if this vector set is not empty // This assignment is slow if this vector set is not empty
// and/or it is not at the end of the nonzeros of the underlying matrix. // and/or it is not at the end of the nonzeros of the underlying matrix.
@ -209,28 +209,28 @@ public:
inline const Scalar* valuePtr() const inline const Scalar* valuePtr() const
{ return m_matrix.valuePtr(); } { return m_matrix.valuePtr(); }
inline Scalar* valuePtr() inline Scalar* valuePtr()
{ return m_matrix.const_cast_derived().valuePtr(); } { return m_matrix.valuePtr(); }
inline const StorageIndex* innerIndexPtr() const inline const StorageIndex* innerIndexPtr() const
{ return m_matrix.innerIndexPtr(); } { return m_matrix.innerIndexPtr(); }
inline StorageIndex* innerIndexPtr() inline StorageIndex* innerIndexPtr()
{ return m_matrix.const_cast_derived().innerIndexPtr(); } { return m_matrix.innerIndexPtr(); }
inline const StorageIndex* outerIndexPtr() const inline const StorageIndex* outerIndexPtr() const
{ return m_matrix.outerIndexPtr() + m_outerStart; } { return m_matrix.outerIndexPtr() + m_outerStart; }
inline StorageIndex* outerIndexPtr() inline StorageIndex* outerIndexPtr()
{ return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; } { return m_matrix.outerIndexPtr() + m_outerStart; }
inline const StorageIndex* innerNonZeroPtr() const inline const StorageIndex* innerNonZeroPtr() const
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); } { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
inline StorageIndex* innerNonZeroPtr() inline StorageIndex* innerNonZeroPtr()
{ return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); } { return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; } bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart)); return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
} }
inline const Scalar coeff(Index row, Index col) const inline const Scalar coeff(Index row, Index col) const
@ -264,7 +264,7 @@ public:
protected: protected:
typename SparseMatrixType::Nested m_matrix; typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;
Index m_outerStart; Index m_outerStart;
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize; const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
@ -373,7 +373,7 @@ public:
/** Column or Row constructor /** Column or Row constructor
*/ */
inline BlockImpl(const XprType& xpr, Index i) inline BlockImpl(XprType& xpr, Index i)
: m_matrix(xpr), : m_matrix(xpr),
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0), m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0), m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
@ -383,7 +383,7 @@ public:
/** Dynamic-size constructor /** Dynamic-size constructor
*/ */
inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols) inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
: m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols)) : m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
{} {}
@ -392,8 +392,7 @@ public:
inline Scalar& coeffRef(Index row, Index col) inline Scalar& coeffRef(Index row, Index col)
{ {
return m_matrix.const_cast_derived() return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
} }
inline const Scalar coeff(Index row, Index col) const inline const Scalar coeff(Index row, Index col) const
@ -403,16 +402,14 @@ public:
inline Scalar& coeffRef(Index index) inline Scalar& coeffRef(Index index)
{ {
return m_matrix.const_cast_derived() return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
inline const Scalar coeff(Index index) const inline const Scalar coeff(Index index) const
{ {
return m_matrix return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
} }
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; } inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
@ -430,7 +427,7 @@ public:
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
typename XprType::Nested m_matrix; typename internal::ref_selector<XprType>::non_const_type m_matrix;
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow; const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol; const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows; const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;

View File

@ -117,6 +117,24 @@ template<typename Derived>
class SparseCompressedBase<Derived>::InnerIterator class SparseCompressedBase<Derived>::InnerIterator
{ {
public: public:
InnerIterator()
: m_values(0), m_indices(0), m_outer(0), m_id(0), m_end(0)
{}
InnerIterator(const InnerIterator& other)
: m_values(other.m_values), m_indices(other.m_indices), m_outer(other.m_outer), m_id(other.m_id), m_end(other.m_end)
{}
InnerIterator& operator=(const InnerIterator& other)
{
m_values = other.m_values;
m_indices = other.m_indices;
const_cast<OuterType&>(m_outer).setValue(other.m_outer.value());
m_id = other.m_id;
m_end = other.m_end;
return *this;
}
InnerIterator(const SparseCompressedBase& mat, Index outer) InnerIterator(const SparseCompressedBase& mat, Index outer)
: m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer) : m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)
{ {
@ -162,7 +180,8 @@ class SparseCompressedBase<Derived>::InnerIterator
protected: protected:
const Scalar* m_values; const Scalar* m_values;
const StorageIndex* m_indices; const StorageIndex* m_indices;
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer; typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;
const OuterType m_outer;
Index m_id; Index m_id;
Index m_end; Index m_end;
private: private:

View File

@ -49,17 +49,10 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
namespace internal { namespace internal {
template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived,
typename _LhsStorageMode = typename traits<Lhs>::StorageKind,
typename _RhsStorageMode = typename traits<Rhs>::StorageKind>
class sparse_cwise_binary_op_inner_iterator_selector;
} // end namespace internal
namespace internal {
// Generic "sparse OP sparse" // Generic "sparse OP sparse"
template<typename XprType> struct binary_sparse_evaluator;
template<typename BinaryOp, typename Lhs, typename Rhs> template<typename BinaryOp, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased> struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> > : evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
@ -153,6 +146,182 @@ protected:
evaluator<Rhs> m_rhsImpl; evaluator<Rhs> m_rhsImpl;
}; };
// dense op sparse
template<typename BinaryOp, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IteratorBased>
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
protected:
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
typedef typename traits<XprType>::Scalar Scalar;
typedef typename XprType::StorageIndex StorageIndex;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
public:
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
: m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize())
{
this->operator++();
}
EIGEN_STRONG_INLINE InnerIterator& operator++()
{
++m_id;
if(m_id<m_innerSize)
{
Scalar lhsVal = m_lhsEval.coeff(IsRowMajor?m_rhsIter.outer():m_id,
IsRowMajor?m_id:m_rhsIter.outer());
if(m_rhsIter && m_rhsIter.index()==m_id)
{
m_value = m_functor(lhsVal, m_rhsIter.value());
++m_rhsIter;
}
else
m_value = m_functor(lhsVal, Scalar(0));
}
return *this;
}
EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_rhsIter.outer() : m_id; }
EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_rhsIter.outer(); }
EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
protected:
const evaluator<Lhs> &m_lhsEval;
RhsIterator m_rhsIter;
const BinaryOp& m_functor;
Scalar m_value;
StorageIndex m_id;
StorageIndex m_innerSize;
};
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs()),
m_expr(xpr)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
inline Index nonZerosEstimate() const {
return m_expr.size();
}
protected:
const BinaryOp m_functor;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
const XprType &m_expr;
};
// sparse op dense
template<typename BinaryOp, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IndexBased>
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
{
protected:
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
typedef typename traits<XprType>::Scalar Scalar;
typedef typename XprType::StorageIndex StorageIndex;
public:
class ReverseInnerIterator;
class InnerIterator
{
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
public:
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.lhs().innerSize())
{
this->operator++();
}
EIGEN_STRONG_INLINE InnerIterator& operator++()
{
++m_id;
if(m_id<m_innerSize)
{
Scalar rhsVal = m_rhsEval.coeff(IsRowMajor?m_lhsIter.outer():m_id,
IsRowMajor?m_id:m_lhsIter.outer());
if(m_lhsIter && m_lhsIter.index()==m_id)
{
m_value = m_functor(m_lhsIter.value(), rhsVal);
++m_lhsIter;
}
else
m_value = m_functor(Scalar(0),rhsVal);
}
return *this;
}
EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_lhsIter.outer() : m_id; }
EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_lhsIter.outer(); }
EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
protected:
LhsIterator m_lhsIter;
const evaluator<Rhs> &m_rhsEval;
const BinaryOp& m_functor;
Scalar m_value;
StorageIndex m_id;
StorageIndex m_innerSize;
};
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
// Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
};
explicit binary_evaluator(const XprType& xpr)
: m_functor(xpr.functor()),
m_lhsImpl(xpr.lhs()),
m_rhsImpl(xpr.rhs()),
m_expr(xpr)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
inline Index nonZerosEstimate() const {
return m_expr.size();
}
protected:
const BinaryOp m_functor;
evaluator<Lhs> m_lhsImpl;
evaluator<Rhs> m_rhsImpl;
const XprType &m_expr;
};
// "sparse .* sparse" // "sparse .* sparse"
template<typename T, typename Lhs, typename Rhs> template<typename T, typename Lhs, typename Rhs>
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased> struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
@ -287,7 +456,8 @@ public:
enum { enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
Flags = XprType::Flags // Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
}; };
explicit binary_evaluator(const XprType& xpr) explicit binary_evaluator(const XprType& xpr)
@ -360,7 +530,8 @@ public:
enum { enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost, CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
Flags = XprType::Flags // Expose storage order of the sparse expression
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
}; };
explicit binary_evaluator(const XprType& xpr) explicit binary_evaluator(const XprType& xpr)
@ -428,6 +599,34 @@ SparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) c
return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived()); return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived());
} }
template<typename DenseDerived, typename SparseDerived>
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
operator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
{
return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
}
template<typename SparseDerived, typename DenseDerived>
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
operator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
{
return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
}
template<typename DenseDerived, typename SparseDerived>
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
operator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
{
return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
}
template<typename SparseDerived, typename DenseDerived>
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
operator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
{
return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
}
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H #endif // EIGEN_SPARSE_CWISE_BINARY_OP_H

View File

@ -48,7 +48,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
// It basically represents the minimal amount of work to be done to be worth it. // It basically represents the minimal amount of work to be done to be worth it.
if(threads>1 && lhsEval.nonZerosEstimate() > 20000) if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
{ {
#pragma omp parallel for schedule(static) num_threads(threads) #pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
for(Index i=0; i<n; ++i) for(Index i=0; i<n; ++i)
processRow(lhsEval,rhs,res,alpha,i,c); processRow(lhsEval,rhs,res,alpha,i,c);
} }

View File

@ -538,7 +538,12 @@ class SparseMatrix
} }
/** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched. /** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched.
* \sa reserve(), setZero() *
* If the sizes of the matrix are decreased, then the matrix is turned to \b uncompressed-mode
* and the storage of the out of bounds coefficients is kept and reserved.
* Call makeCompressed() to pack the entries and squeeze extra memory.
*
* \sa reserve(), setZero(), makeCompressed()
*/ */
void conservativeResize(Index rows, Index cols) void conservativeResize(Index rows, Index cols)
{ {

View File

@ -55,10 +55,10 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
typedef typename MatrixType::Scalar Scalar; typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::StorageIndex StorageIndex; typedef typename MatrixType::StorageIndex StorageIndex;
typedef Matrix<StorageIndex,Dynamic,1> VectorI; typedef Matrix<StorageIndex,Dynamic,1> VectorI;
typedef typename MatrixType::Nested MatrixTypeNested; typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested; typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix) explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
{ {
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices"); eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
} }
@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
/** \internal \returns a reference to the nested matrix */ /** \internal \returns a reference to the nested matrix */
const _MatrixTypeNested& matrix() const { return m_matrix; } const _MatrixTypeNested& matrix() const { return m_matrix; }
_MatrixTypeNested& matrix() { return m_matrix.const_cast_derived(); } typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; }
/** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs. /** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs.
* *
@ -158,7 +158,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
protected: protected:
typename MatrixType::Nested m_matrix; MatrixTypeNested m_matrix;
//mutable VectorI m_countPerRow; //mutable VectorI m_countPerRow;
//mutable VectorI m_countPerCol; //mutable VectorI m_countPerCol;
private: private:
@ -194,9 +194,9 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
{ {
SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint(); SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
if(alpha==Scalar(0)) if(alpha==Scalar(0))
m_matrix.const_cast_derived() = tmp.template triangularView<Mode>(); m_matrix = tmp.template triangularView<Mode>();
else else
m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>(); m_matrix += alpha * tmp.template triangularView<Mode>();
return *this; return *this;
} }

View File

@ -205,23 +205,54 @@ class SparseVector
inline void finalize() {} inline void finalize() {}
/** \copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */
void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision()) void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
{ {
m_data.prune(reference,epsilon); m_data.prune(reference,epsilon);
} }
/** Resizes the sparse vector to \a rows x \a cols
*
* This method is provided for compatibility with matrices.
* For a column vector, \a cols must be equal to 1.
* For a row vector, \a rows must be equal to 1.
*
* \sa resize(Index)
*/
void resize(Index rows, Index cols) void resize(Index rows, Index cols)
{ {
eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1"); eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1");
resize(IsColVector ? rows : cols); resize(IsColVector ? rows : cols);
} }
/** Resizes the sparse vector to \a newSize
* This method deletes all entries, thus leaving an empty sparse vector
*
* \sa conservativeResize(), setZero() */
void resize(Index newSize) void resize(Index newSize)
{ {
m_size = newSize; m_size = newSize;
m_data.clear(); m_data.clear();
} }
/** Resizes the sparse vector to \a newSize, while leaving old values untouched.
*
* If the size of the vector is decreased, then the storage of the out-of bounds coefficients is kept and reserved.
* Call .data().squeeze() to free extra memory.
*
* \sa reserve(), setZero()
*/
void conservativeResize(Index newSize)
{
if (newSize < m_size)
{
Index i = 0;
while (i<m_data.size() && m_data.index(i)<newSize) ++i;
m_data.resize(i);
}
m_size = newSize;
}
void resizeNonZeros(Index size) { m_data.resize(size); } void resizeNonZeros(Index size) { m_data.resize(size); }
inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); } inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }

View File

@ -38,7 +38,7 @@ public:
typedef typename internal::remove_all<MatrixType>::type NestedExpression; typedef typename internal::remove_all<MatrixType>::type NestedExpression;
explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0), explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),
RealScalar epsilon = NumTraits<Scalar>::dummy_precision()) const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision())
: m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {} : m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {}
inline Index rows() const { return m_matrix.rows(); } inline Index rows() const { return m_matrix.rows(); }

View File

@ -128,6 +128,17 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
inline Index cols() const { return m_pmat.cols();} inline Index cols() const { return m_pmat.cols();}
/** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization. /** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
* \warning The entries of the returned matrix are not sorted. This means that using it in algorithms
* expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()),
* and coefficient-wise operations. Matrix products and triangular solves are fine though.
*
* To sort the entries, you can assign it to a row-major matrix, and if a column-major matrix
* is required, you can copy it again:
* \code
* SparseMatrix<double> R = qr.matrixR(); // column-major, not sorted!
* SparseMatrix<double,RowMajor> Rr = qr.matrixR(); // row-major, sorted
* SparseMatrix<double> Rc = Rr; // column-major, sorted
* \endcode
*/ */
const QRMatrixType& matrixR() const { return m_R; } const QRMatrixType& matrixR() const { return m_R; }

View File

@ -22,6 +22,7 @@ typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturn
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType; typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType; typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType; typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType; typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType; typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType; typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
@ -318,6 +319,16 @@ lgamma() const
return LgammaReturnType(derived()); return LgammaReturnType(derived());
} }
/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma).
*
* \sa cos(), sin(), tan()
*/
inline const DigammaReturnType
digamma() const
{
return DigammaReturnType(derived());
}
/** \returns an expression of the coefficient-wise Gauss error /** \returns an expression of the coefficient-wise Gauss error
* function of *this. * function of *this.
* *

8
bench/tensors/README Normal file
View File

@ -0,0 +1,8 @@
Each benchmark comes in 2 flavors: one that runs on CPU, and one that runs on GPU.
To compile the CPU benchmarks, simply call:
g++ tensor_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
To compile the GPU benchmarks, simply call:
nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -arch compute_35 -o benchmarks_gpu

49
bench/tensors/benchmark.h Normal file
View File

@ -0,0 +1,49 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stddef.h>
#include <stdint.h>
#include <vector>
namespace testing {
class Benchmark {
public:
Benchmark(const char* name, void (*fn)(int)) {
Register(name, fn, NULL);
}
Benchmark(const char* name, void (*fn_range)(int, int)) {
Register(name, NULL, fn_range);
}
Benchmark* Arg(int x);
Benchmark* Range(int lo, int hi);
const char* Name();
bool ShouldRun(int argc, char* argv[]);
void Run();
private:
const char* name_;
void (*fn_)(int);
void (*fn_range_)(int, int);
std::vector<int> args_;
void Register(const char* name, void (*fn)(int), void (*fn_range)(int, int));
void RunRepeatedlyWithArg(int iterations, int arg);
void RunWithArg(int arg);
};
} // namespace testing
void SetBenchmarkFlopsProcessed(int64_t);
void StopBenchmarkTiming();
void StartBenchmarkTiming();
#define BENCHMARK(f) \
static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
(new ::testing::Benchmark(#f, f))

View File

@ -0,0 +1,237 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "benchmark.h"
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <inttypes.h>
#include <time.h>
#include <map>
static int64_t g_flops_processed;
static int64_t g_benchmark_total_time_ns;
static int64_t g_benchmark_start_time_ns;
typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
typedef BenchmarkMap::iterator BenchmarkMapIt;
BenchmarkMap& gBenchmarks() {
static BenchmarkMap g_benchmarks;
return g_benchmarks;
}
static int g_name_column_width = 20;
static int Round(int n) {
int base = 1;
while (base*10 < n) {
base *= 10;
}
if (n < 2*base) {
return 2*base;
}
if (n < 5*base) {
return 5*base;
}
return 10*base;
}
#ifdef __APPLE__
#include <mach/mach_time.h>
static mach_timebase_info_data_t g_time_info;
static void __attribute__((constructor)) init_info() {
mach_timebase_info(&g_time_info);
}
#endif
static int64_t NanoTime() {
#if defined(__APPLE__)
uint64_t t = mach_absolute_time();
return t * g_time_info.numer / g_time_info.denom;
#else
struct timespec t;
t.tv_sec = t.tv_nsec = 0;
clock_gettime(CLOCK_MONOTONIC, &t);
return static_cast<int64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
#endif
}
namespace testing {
Benchmark* Benchmark::Arg(int arg) {
args_.push_back(arg);
return this;
}
Benchmark* Benchmark::Range(int lo, int hi) {
const int kRangeMultiplier = 8;
if (hi < lo) {
int temp = hi;
hi = lo;
lo = temp;
}
while (lo < hi) {
args_.push_back(lo);
lo *= kRangeMultiplier;
}
// We always run the hi number.
args_.push_back(hi);
return this;
}
const char* Benchmark::Name() {
return name_;
}
bool Benchmark::ShouldRun(int argc, char* argv[]) {
if (argc == 1) {
return true; // With no arguments, we run all benchmarks.
}
// Otherwise, we interpret each argument as a regular expression and
// see if any of our benchmarks match.
for (int i = 1; i < argc; i++) {
regex_t re;
if (regcomp(&re, argv[i], 0) != 0) {
fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]);
exit(EXIT_FAILURE);
}
int match = regexec(&re, name_, 0, NULL, 0);
regfree(&re);
if (match != REG_NOMATCH) {
return true;
}
}
return false;
}
void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) {
name_ = name;
fn_ = fn;
fn_range_ = fn_range;
if (fn_ == NULL && fn_range_ == NULL) {
fprintf(stderr, "%s: missing function\n", name_);
exit(EXIT_FAILURE);
}
gBenchmarks().insert(std::make_pair(name, this));
}
void Benchmark::Run() {
if (fn_ != NULL) {
RunWithArg(0);
} else {
if (args_.empty()) {
fprintf(stderr, "%s: no args!\n", name_);
exit(EXIT_FAILURE);
}
for (size_t i = 0; i < args_.size(); ++i) {
RunWithArg(args_[i]);
}
}
}
void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) {
g_flops_processed = 0;
g_benchmark_total_time_ns = 0;
g_benchmark_start_time_ns = NanoTime();
if (fn_ != NULL) {
fn_(iterations);
} else {
fn_range_(iterations, arg);
}
if (g_benchmark_start_time_ns != 0) {
g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
}
}
void Benchmark::RunWithArg(int arg) {
// run once in case it's expensive
int iterations = 1;
RunRepeatedlyWithArg(iterations, arg);
while (g_benchmark_total_time_ns < 1e9 && iterations < 1e9) {
int last = iterations;
if (g_benchmark_total_time_ns/iterations == 0) {
iterations = 1e9;
} else {
iterations = 1e9 / (g_benchmark_total_time_ns/iterations);
}
iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last));
iterations = Round(iterations);
RunRepeatedlyWithArg(iterations, arg);
}
char throughput[100];
throughput[0] = '\0';
if (g_benchmark_total_time_ns > 0 && g_flops_processed > 0) {
double mflops_processed = static_cast<double>(g_flops_processed)/1e6;
double seconds = static_cast<double>(g_benchmark_total_time_ns)/1e9;
snprintf(throughput, sizeof(throughput), " %8.2f MFlops/s", mflops_processed/seconds);
}
char full_name[100];
if (fn_range_ != NULL) {
if (arg >= (1<<20)) {
snprintf(full_name, sizeof(full_name), "%s/%dM", name_, arg/(1<<20));
} else if (arg >= (1<<10)) {
snprintf(full_name, sizeof(full_name), "%s/%dK", name_, arg/(1<<10));
} else {
snprintf(full_name, sizeof(full_name), "%s/%d", name_, arg);
}
} else {
snprintf(full_name, sizeof(full_name), "%s", name_);
}
printf("%-*s %10d %10" PRId64 "%s\n", g_name_column_width, full_name,
iterations, g_benchmark_total_time_ns/iterations, throughput);
fflush(stdout);
}
} // namespace testing
void SetBenchmarkFlopsProcessed(int64_t x) {
g_flops_processed = x;
}
void StopBenchmarkTiming() {
if (g_benchmark_start_time_ns != 0) {
g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
}
g_benchmark_start_time_ns = 0;
}
void StartBenchmarkTiming() {
if (g_benchmark_start_time_ns == 0) {
g_benchmark_start_time_ns = NanoTime();
}
}
int main(int argc, char* argv[]) {
if (gBenchmarks().empty()) {
fprintf(stderr, "No benchmarks registered!\n");
exit(EXIT_FAILURE);
}
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
int name_width = static_cast<int>(strlen(it->second->Name()));
g_name_column_width = std::max(g_name_column_width, name_width);
}
bool need_header = true;
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
::testing::Benchmark* b = it->second;
if (b->ShouldRun(argc, argv)) {
if (need_header) {
printf("%-*s %10s %10s\n", g_name_column_width, "", "iterations", "ns/op");
fflush(stdout);
need_header = false;
}
b->Run();
}
}
if (need_header) {
fprintf(stderr, "No matching benchmarks!\n");
fprintf(stderr, "Available benchmarks:\n");
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
fprintf(stderr, " %s\n", it->second->Name());
}
exit(EXIT_FAILURE);
}
return 0;
}

View File

@ -4,13 +4,15 @@
typedef int TensorIndex; typedef int TensorIndex;
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "unsupported/Eigen/CXX11/Tensor"
#include "testing/base/public/benchmark.h" #include "benchmark.h"
#define BENCHMARK_RANGE(bench, lo, hi) \
BENCHMARK(bench)->Range(lo, hi)
using Eigen::Tensor; using Eigen::Tensor;
using Eigen::TensorMap; using Eigen::TensorMap;
// TODO(bsteiner): also templatize on the input type since we have users // TODO(bsteiner): also templatize on the input type since we have users
// for int8 as well as floats. // for int8 as well as floats.
template <typename Device> class BenchmarkSuite { template <typename Device> class BenchmarkSuite {
@ -38,12 +40,26 @@ template <typename Device> class BenchmarkSuite {
device_.memcpy(c_, a_, m_ * m_ * sizeof(float)); device_.memcpy(c_, a_, m_ * m_ * sizeof(float));
} }
// Record the number of values copied per second // Record the number of values copied per second
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
}
void typeCasting(int num_iters) {
eigen_assert(m_ == n_);
const Eigen::array<TensorIndex, 2> sizes = {{m_, k_}};
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> A(a_, sizes);
TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> B((int*)b_, sizes);
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
B.device(device_) = A.cast<int>();
}
// Record the number of values copied per second
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
} }
void random(int num_iters) { void random(int num_iters) {
eigen_assert(m_ == k_ && k_ == n_); eigen_assert(m_ == k_ && k_ == n_);
const Eigen::array<TensorIndex, 2> sizes(m_, m_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
StartBenchmarkTiming(); StartBenchmarkTiming();
@ -51,21 +67,21 @@ template <typename Device> class BenchmarkSuite {
C.device(device_) = C.random(); C.device(device_) = C.random();
} }
// Record the number of random numbers generated per second // Record the number of random numbers generated per second
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
} }
void slicing(int num_iters) { void slicing(int num_iters) {
eigen_assert(m_ == k_ && k_ == n_); eigen_assert(m_ == k_ && k_ == n_);
const Eigen::array<TensorIndex, 2> sizes(m_, m_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
const Eigen::DSizes<TensorIndex, 2> quarter_sizes(Eigen::array<TensorIndex, 2>(m_/2, m_/2)); const Eigen::DSizes<TensorIndex, 2> quarter_sizes(m_/2, m_/2);
const Eigen::DSizes<TensorIndex, 2> first_quadrant(Eigen::array<TensorIndex, 2>(0, 0)); const Eigen::DSizes<TensorIndex, 2> first_quadrant(0, 0);
const Eigen::DSizes<TensorIndex, 2> second_quadrant(Eigen::array<TensorIndex, 2>(0, m_/2)); const Eigen::DSizes<TensorIndex, 2> second_quadrant(0, m_/2);
const Eigen::DSizes<TensorIndex, 2> third_quadrant(Eigen::array<TensorIndex, 2>(m_/2, 0)); const Eigen::DSizes<TensorIndex, 2> third_quadrant(m_/2, 0);
const Eigen::DSizes<TensorIndex, 2> fourth_quadrant(Eigen::array<TensorIndex, 2>(m_/2, m_/2)); const Eigen::DSizes<TensorIndex, 2> fourth_quadrant(m_/2, m_/2);
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
@ -80,31 +96,59 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of values copied from the rhs slice to the lhs slice // Record the number of values copied from the rhs slice to the lhs slice
// each second // each second
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
}
void rowChip(int num_iters) {
const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = B.chip(iter % k_, 0);
}
// Record the number of values copied from the rhs chip to the lhs.
finalizeBenchmark(static_cast<int64_t>(n_) * num_iters);
}
void colChip(int num_iters) {
const Eigen::array<TensorIndex, 2> input_size= {{k_, n_}};
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = B.chip(iter % n_, 1);
}
// Record the number of values copied from the rhs chip to the lhs.
finalizeBenchmark(static_cast<int64_t>(n_) * num_iters);
} }
void shuffling(int num_iters) { void shuffling(int num_iters) {
eigen_assert(m_ == n_); eigen_assert(m_ == n_);
const Eigen::array<TensorIndex, 2> size_a(m_, k_); const Eigen::array<TensorIndex, 2> size_a = {{m_, k_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
const Eigen::array<TensorIndex, 2> size_b(k_, m_); const Eigen::array<TensorIndex, 2> size_b = {{k_, m_}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b); TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
const Eigen::array<int, 2> shuffle(1, 0); const Eigen::array<int, 2> shuffle = {{1, 0}};
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
B.device(device_) = A.shuffle(shuffle); B.device(device_) = A.shuffle(shuffle);
} }
// Record the number of values shuffled from A and copied to B each second // Record the number of values shuffled from A and copied to B each second
finalizeBenchmark(m_ * k_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
} }
void padding(int num_iters) { void padding(int num_iters) {
eigen_assert(m_ == k_); eigen_assert(m_ == k_);
const Eigen::array<TensorIndex, 2> size_a(m_, k_-3); const Eigen::array<TensorIndex, 2> size_a = {{m_, k_-3}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
const Eigen::array<TensorIndex, 2> size_b(k_, m_); const Eigen::array<TensorIndex, 2> size_b = {{k_, m_}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b); TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings; Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
@ -116,35 +160,34 @@ template <typename Device> class BenchmarkSuite {
B.device(device_) = A.pad(paddings); B.device(device_) = A.pad(paddings);
} }
// Record the number of values copied from the padded tensor A each second // Record the number of values copied from the padded tensor A each second
finalizeBenchmark(m_ * k_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
} }
void striding(int num_iters) { void striding(int num_iters) {
eigen_assert(m_ == k_); eigen_assert(m_ == k_);
const Eigen::array<TensorIndex, 2> size_a(m_, k_); const Eigen::array<TensorIndex, 2> size_a = {{m_, k_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
const Eigen::array<TensorIndex, 2> size_b(m_, k_ / 2); const Eigen::array<TensorIndex, 2> size_b = {{m_, k_ / 2}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b); TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
const Eigen::array<TensorIndex, 2> strides(1, 2); const Eigen::array<TensorIndex, 2> strides = {{1, 2}};
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
B.device(device_) = A.stride(strides); B.device(device_) = A.stride(strides);
} }
// Record the number of values copied from the padded tensor A each second // Record the number of values copied from the padded tensor A each second
finalizeBenchmark(m_ * k_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
} }
void broadcasting(int num_iters) { void broadcasting(int num_iters) {
const Eigen::array<TensorIndex, 2> size_a(m_, 1); const Eigen::array<TensorIndex, 2> size_a = {{m_, 1}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
const Eigen::array<TensorIndex, 2> size_c(m_, n_); const Eigen::array<TensorIndex, 2> size_c = {{m_, n_}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, size_c); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, size_c);
#if defined(__CUDACC__) #ifndef EIGEN_HAS_INDEX_LIST
// nvcc doesn't support cxx11 const Eigen::array<int, 2> broadcast = {{1, n_}};
const Eigen::array<int, 2> broadcast(1, n_);
#else #else
// Take advantage of cxx11 to give the compiler information it can use to // Take advantage of cxx11 to give the compiler information it can use to
// optimize the code. // optimize the code.
@ -157,12 +200,12 @@ template <typename Device> class BenchmarkSuite {
C.device(device_) = A.broadcast(broadcast); C.device(device_) = A.broadcast(broadcast);
} }
// Record the number of values broadcasted from A and copied to C each second // Record the number of values broadcasted from A and copied to C each second
finalizeBenchmark(m_ * n_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * n_ * num_iters);
} }
void coeffWiseOp(int num_iters) { void coeffWiseOp(int num_iters) {
eigen_assert(m_ == k_ && k_ == n_); eigen_assert(m_ == k_ && k_ == n_);
const Eigen::array<TensorIndex, 2> sizes(m_, m_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
@ -173,12 +216,12 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (2 multiplications and // Record the number of FLOP executed per second (2 multiplications and
// 1 addition per value) // 1 addition per value)
finalizeBenchmark(3 * m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(3) * m_ * m_ * num_iters);
} }
void algebraicFunc(int num_iters) { void algebraicFunc(int num_iters) {
eigen_assert(m_ == k_ && k_ == n_); eigen_assert(m_ == k_ && k_ == n_);
const Eigen::array<TensorIndex, 2> sizes(m_, m_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
@ -189,12 +232,12 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (assuming one operation // Record the number of FLOP executed per second (assuming one operation
// per value) // per value)
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
} }
void transcendentalFunc(int num_iters) { void transcendentalFunc(int num_iters) {
eigen_assert(m_ == k_ && k_ == n_); eigen_assert(m_ == k_ && k_ == n_);
const Eigen::array<TensorIndex, 2> sizes(m_, m_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes); const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
@ -205,17 +248,23 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (assuming one operation // Record the number of FLOP executed per second (assuming one operation
// per value) // per value)
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
} }
// Simple reduction // Row reduction
void reduction(int num_iters) { void rowReduction(int num_iters) {
const Eigen::array<TensorIndex, 2> input_size(k_, n_); const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, input_size); const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
const Eigen::array<TensorIndex, 1> output_size(n_); const Eigen::array<TensorIndex, 1> output_size = {{n_}};
TensorMap<Tensor<float, 1>, Eigen::Aligned> C(c_, output_size); TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
const Eigen::array<TensorIndex, 1> sum_along_dim(0); #ifndef EIGEN_HAS_INDEX_LIST
const Eigen::array<TensorIndex, 1> sum_along_dim = {{0}};
#else
// Take advantage of cxx11 to give the compiler information it can use to
// optimize the code.
Eigen::IndexList<Eigen::type2index<0>> sum_along_dim;
#endif
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
@ -223,21 +272,47 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (assuming one operation // Record the number of FLOP executed per second (assuming one operation
// per value) // per value)
finalizeBenchmark(m_ * m_ * num_iters); finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
}
// Column reduction
void colReduction(int num_iters) {
const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size);
const Eigen::array<TensorIndex, 1> output_size = {{k_}};
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size);
#ifndef EIGEN_HAS_INDEX_LIST
const Eigen::array<TensorIndex, 1> sum_along_dim = {{1}};
#else
// Take advantage of cxx11 to give the compiler information it can use to
// optimize the code.
Eigen::IndexList<Eigen::type2index<1>> sum_along_dim;
#endif
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
C.device(device_) = B.sum(sum_along_dim);
}
// Record the number of FLOP executed per second (assuming one operation
// per value)
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
} }
// do a contraction which is equivalent to a matrix multiplication // do a contraction which is equivalent to a matrix multiplication
void contraction(int num_iters) { void contraction(int num_iters) {
const Eigen::array<TensorIndex, 2> sizeA(m_, k_); const Eigen::array<TensorIndex, 2> sizeA = {{m_, k_}};
const Eigen::array<TensorIndex, 2> sizeB(k_, n_); const Eigen::array<TensorIndex, 2> sizeB = {{k_, n_}};
const Eigen::array<TensorIndex, 2> sizeC(m_, n_); const Eigen::array<TensorIndex, 2> sizeC = {{m_, n_}};
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizeA); const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizeA);
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizeB); const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizeB);
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizeC); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizeC);
typedef typename Tensor<float, 2>::DimensionPair DimPair; typedef typename Tensor<float, 2>::DimensionPair DimPair;
const Eigen::array<DimPair, 1> dims(DimPair(1, 0)); const Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
@ -245,18 +320,18 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (size_ multiplications and // Record the number of FLOP executed per second (size_ multiplications and
// additions for each value in the resulting tensor) // additions for each value in the resulting tensor)
finalizeBenchmark(static_cast<int64>(2) * m_ * n_ * k_ * num_iters); finalizeBenchmark(static_cast<int64_t>(2) * m_ * n_ * k_ * num_iters);
} }
void convolution(int num_iters, int kernel_x, int kernel_y) { void convolution(int num_iters, int kernel_x, int kernel_y) {
const Eigen::array<TensorIndex, 2> input_sizes(m_, n_); const Eigen::array<TensorIndex, 2> input_sizes = {{m_, n_}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, input_sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, input_sizes);
const Eigen::array<TensorIndex, 2> kernel_sizes(kernel_x, kernel_y); const Eigen::array<TensorIndex, 2> kernel_sizes = {{kernel_x, kernel_y}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, kernel_sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, kernel_sizes);
const Eigen::array<TensorIndex, 2> result_sizes( const Eigen::array<TensorIndex, 2> result_sizes =
m_ - kernel_x + 1, n_ - kernel_y + 1); {{m_ - kernel_x + 1, n_ - kernel_y + 1}};
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, result_sizes); TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, result_sizes);
Eigen::array<Tensor<float, 2>::Index, 2> dims(0, 1); Eigen::array<Tensor<float, 2>::Index, 2> dims = {{0, 1}};
StartBenchmarkTiming(); StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) { for (int iter = 0; iter < num_iters; ++iter) {
@ -264,8 +339,8 @@ template <typename Device> class BenchmarkSuite {
} }
// Record the number of FLOP executed per second (kernel_size // Record the number of FLOP executed per second (kernel_size
// multiplications and additions for each value in the resulting tensor) // multiplications and additions for each value in the resulting tensor)
finalizeBenchmark( finalizeBenchmark(static_cast<int64_t>(2) *
(m_ - kernel_x + 1) * (n_ - kernel_y + 1) * kernel_x * kernel_y * 2 * num_iters); (m_ - kernel_x + 1) * (n_ - kernel_y + 1) * kernel_x * kernel_y * num_iters);
} }
private: private:
@ -280,23 +355,23 @@ template <typename Device> class BenchmarkSuite {
device_.memset(b_, 23, k_ * n_ * sizeof(float)); device_.memset(b_, 23, k_ * n_ * sizeof(float));
device_.memset(c_, 31, m_ * n_ * sizeof(float)); device_.memset(c_, 31, m_ * n_ * sizeof(float));
BenchmarkUseRealTime(); //BenchmarkUseRealTime();
} }
inline void finalizeBenchmark(int64 num_items) { inline void finalizeBenchmark(int64_t num_items) {
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) #if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) { if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
device_.synchronize(); device_.synchronize();
} }
#endif #endif
StopBenchmarkTiming(); StopBenchmarkTiming();
SetBenchmarkItemsProcessed(num_items); SetBenchmarkFlopsProcessed(num_items);
} }
size_t m_; TensorIndex m_;
size_t k_; TensorIndex k_;
size_t n_; TensorIndex n_;
float* a_; float* a_;
float* b_; float* b_;
float* c_; float* c_;

View File

@ -1,19 +1,12 @@
#define EIGEN_USE_THREADS #define EIGEN_USE_THREADS
#include "base/sysinfo.h" #include <string>
#include "strings/strcat.h"
#include "third_party/eigen3/tensor_benchmarks.h" #include "tensor_benchmarks.h"
#include "thread/threadpool.h"
#ifdef __ANDROID__
#define CREATE_THREAD_POOL(threads) \ #define CREATE_THREAD_POOL(threads) \
Eigen::ThreadPoolDevice device(threads); Eigen::ThreadPool pool(threads); \
#else Eigen::ThreadPoolDevice device(&pool, threads);
#define CREATE_THREAD_POOL(threads) \
ThreadPool tp(threads); \
tp.StartWorkers(); \
Eigen::ThreadPoolDevice device(&tp, threads);
#endif
// Simple functions // Simple functions
#define BM_FuncCPU(FUNC, THREADS) \ #define BM_FuncCPU(FUNC, THREADS) \
@ -22,7 +15,6 @@ Eigen::ThreadPoolDevice device(&tp, threads);
CREATE_THREAD_POOL(THREADS); \ CREATE_THREAD_POOL(THREADS); \
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \ BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
suite.FUNC(iters); \ suite.FUNC(iters); \
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000); BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
@ -30,6 +22,10 @@ BM_FuncCPU(memcpy, 4);
BM_FuncCPU(memcpy, 8); BM_FuncCPU(memcpy, 8);
BM_FuncCPU(memcpy, 12); BM_FuncCPU(memcpy, 12);
BM_FuncCPU(typeCasting, 4);
BM_FuncCPU(typeCasting, 8);
BM_FuncCPU(typeCasting, 12);
BM_FuncCPU(random, 4); BM_FuncCPU(random, 4);
BM_FuncCPU(random, 8); BM_FuncCPU(random, 8);
BM_FuncCPU(random, 12); BM_FuncCPU(random, 12);
@ -38,6 +34,14 @@ BM_FuncCPU(slicing, 4);
BM_FuncCPU(slicing, 8); BM_FuncCPU(slicing, 8);
BM_FuncCPU(slicing, 12); BM_FuncCPU(slicing, 12);
BM_FuncCPU(rowChip, 4);
BM_FuncCPU(rowChip, 8);
BM_FuncCPU(rowChip, 12);
BM_FuncCPU(colChip, 4);
BM_FuncCPU(colChip, 8);
BM_FuncCPU(colChip, 12);
BM_FuncCPU(shuffling, 4); BM_FuncCPU(shuffling, 4);
BM_FuncCPU(shuffling, 8); BM_FuncCPU(shuffling, 8);
BM_FuncCPU(shuffling, 12); BM_FuncCPU(shuffling, 12);
@ -66,9 +70,13 @@ BM_FuncCPU(transcendentalFunc, 4);
BM_FuncCPU(transcendentalFunc, 8); BM_FuncCPU(transcendentalFunc, 8);
BM_FuncCPU(transcendentalFunc, 12); BM_FuncCPU(transcendentalFunc, 12);
BM_FuncCPU(reduction, 4); BM_FuncCPU(rowReduction, 4);
BM_FuncCPU(reduction, 8); BM_FuncCPU(rowReduction, 8);
BM_FuncCPU(reduction, 12); BM_FuncCPU(rowReduction, 12);
BM_FuncCPU(colReduction, 4);
BM_FuncCPU(colReduction, 8);
BM_FuncCPU(colReduction, 12);
// Contractions // Contractions
@ -84,7 +92,6 @@ BM_FuncCPU(reduction, 12);
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \ BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \
suite.FUNC(iters); \ suite.FUNC(iters); \
} \ } \
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000); BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
@ -107,6 +114,12 @@ BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8);
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12); BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12);
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16); BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16);
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1);
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4);
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8);
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12);
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16);
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1); BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1);
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4); BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4);
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8); BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8);
@ -127,7 +140,6 @@ BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16);
CREATE_THREAD_POOL(THREADS); \ CREATE_THREAD_POOL(THREADS); \
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \ BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
suite.FUNC(iters, DIM1, DIM2); \ suite.FUNC(iters, DIM1, DIM2); \
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000); BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);

View File

@ -3,47 +3,47 @@
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <iostream> #include <iostream>
#include "strings/strcat.h"
#include "third_party/eigen3/tensor_benchmarks.h"
#include "tensor_benchmarks.h"
// Simple functions // Simple functions
#define BM_FuncGPU(FUNC) \ #define BM_FuncGPU(FUNC) \
static void BM_##FUNC(int iters, int N) { \ static void BM_##FUNC(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters); \ suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC, 10, 5000); BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
BM_FuncGPU(memcpy); BM_FuncGPU(memcpy);
BM_FuncGPU(typeCasting);
BM_FuncGPU(random); BM_FuncGPU(random);
BM_FuncGPU(slicing); BM_FuncGPU(slicing);
BM_FuncGPU(rowChip);
BM_FuncGPU(colChip);
BM_FuncGPU(shuffling); BM_FuncGPU(shuffling);
BM_FuncGPU(padding); BM_FuncGPU(padding);
BM_FuncGPU(striding); BM_FuncGPU(striding);
BM_FuncGPU(broadcasting); BM_FuncGPU(broadcasting);
BM_FuncGPU(coeffWiseOp); BM_FuncGPU(coeffWiseOp);
BM_FuncGPU(reduction); BM_FuncGPU(algebraicFunc);
BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction);
// Contractions // Contractions
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \ #define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \ static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters); \ suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000); BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
@ -51,19 +51,18 @@ BM_FuncGPU(reduction);
BM_FuncWithInputDimsGPU(contraction, N, N, N); BM_FuncWithInputDimsGPU(contraction, N, N, N);
BM_FuncWithInputDimsGPU(contraction, 64, N, N); BM_FuncWithInputDimsGPU(contraction, 64, N, N);
BM_FuncWithInputDimsGPU(contraction, N, 64, N); BM_FuncWithInputDimsGPU(contraction, N, 64, N);
BM_FuncWithInputDimsGPU(contraction, N, N, 64);
// Convolutions // Convolutions
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \ #define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \ static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters, DIM1, DIM2); \ suite.FUNC(iters, DIM1, DIM2); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000); BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);

View File

@ -19,19 +19,12 @@
int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
{ {
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run),
if(!init) // array index: LO
{ (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run);
func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run);
init = true;
}
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
@ -111,19 +104,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa
int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap) int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap)
{ {
typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar); typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
if(!init) // array index: LO
{ (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* ap = reinterpret_cast<Scalar*>(pap);
@ -162,19 +148,12 @@ int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
{ {
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::packed_rank2_update_selector<Scalar,int,Upper>::run),
if(!init) // array index: LO
{ (internal::packed_rank2_update_selector<Scalar,int,Lower>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* y = reinterpret_cast<Scalar*>(py); Scalar* y = reinterpret_cast<Scalar*>(py);
@ -217,19 +196,12 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda) int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
{ {
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
if(!init) // array index: LO
{ (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
@ -271,19 +243,12 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda) int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
{ {
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::rank2_update_selector<Scalar,int,Upper>::run),
if(!init) // array index: LO
{ (internal::rank2_update_selector<Scalar,int,Lower>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* y = reinterpret_cast<Scalar*>(py); Scalar* y = reinterpret_cast<Scalar*>(py);

View File

@ -26,20 +26,15 @@ struct general_matrix_vector_product_wrapper
int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc) int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc)
{ {
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar); typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar);
static functype func[4]; static const functype func[4] = {
// array index: NOTR
static bool init = false; (general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run),
if(!init) // array index: TR
{ (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run),
for(int k=0; k<4; ++k) // array index: ADJ
func[k] = 0; (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run),
0
func[NOTR] = (general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run); };
func[TR ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run);
func[ADJ ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run);
init = true;
}
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -90,32 +85,36 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
{ {
typedef void (*functype)(int, const Scalar *, int, Scalar *); typedef void (*functype)(int, const Scalar *, int, Scalar *);
static functype func[16]; static const functype func[16] = {
// array index: NOTR | (UP << 2) | (NUNIT << 3)
static bool init = false; (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run),
if(!init) // array index: TR | (UP << 2) | (NUNIT << 3)
{ (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run),
for(int k=0; k<16; ++k) // array index: ADJ | (UP << 2) | (NUNIT << 3)
func[k] = 0; (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run),
0,
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run); // array index: NOTR | (LO << 2) | (NUNIT << 3)
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run); (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run),
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run); // array index: TR | (LO << 2) | (NUNIT << 3)
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run),
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run); // array index: ADJ | (LO << 2) | (NUNIT << 3)
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run); (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run),
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run); 0,
// array index: NOTR | (UP << 2) | (UNIT << 3)
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run); (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run),
func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run); // array index: TR | (UP << 2) | (UNIT << 3)
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run); (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run),
// array index: ADJ | (UP << 2) | (UNIT << 3)
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run); (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run),
func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run); 0,
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run); // array index: NOTR | (LO << 2) | (UNIT << 3)
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run),
init = true; // array index: TR | (LO << 2) | (UNIT << 3)
} (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run),
// array index: ADJ | (LO << 2) | (UNIT << 3)
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run),
0
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -145,32 +144,36 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar
int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb) int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
{ {
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&); typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&);
static functype func[16]; static const functype func[16] = {
// array index: NOTR | (UP << 2) | (NUNIT << 3)
static bool init = false; (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run),
if(!init) // array index: TR | (UP << 2) | (NUNIT << 3)
{ (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run),
for(int k=0; k<16; ++k) // array index: ADJ | (UP << 2) | (NUNIT << 3)
func[k] = 0; (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run),
0,
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run); // array index: NOTR | (LO << 2) | (NUNIT << 3)
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run); (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run),
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run); // array index: TR | (LO << 2) | (NUNIT << 3)
(internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run),
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run); // array index: ADJ | (LO << 2) | (NUNIT << 3)
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run); (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run),
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run); 0,
// array index: NOTR | (UP << 2) | (UNIT << 3)
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); // array index: TR | (UP << 2) | (UNIT << 3)
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
// array index: ADJ | (UP << 2) | (UNIT << 3)
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); 0,
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); // array index: NOTR | (LO << 2) | (UNIT << 3)
(internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
init = true; // array index: TR | (LO << 2) | (UNIT << 3)
} (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
// array index: ADJ | (LO << 2) | (UNIT << 3)
(internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
0
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -346,32 +349,36 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea
int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx) int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
{ {
typedef void (*functype)(int, int, const Scalar *, int, Scalar *); typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
static functype func[16]; static const functype func[16] = {
// array index: NOTR | (UP << 2) | (NUNIT << 3)
static bool init = false; (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run),
if(!init) // array index: TR | (UP << 2) | (NUNIT << 3)
{ (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run),
for(int i=0; i<16; ++i) // array index: ADJ | (UP << 2) | (NUNIT << 3)
func[i] = 0; (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run),
0,
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run); // array index: NOTR | (LO << 2) | (NUNIT << 3)
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run); (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run),
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run); // array index: TR | (LO << 2) | (NUNIT << 3)
(internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run),
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run); // array index: ADJ | (LO << 2) | (NUNIT << 3)
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run); (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run),
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run); 0,
// array index: NOTR | (UP << 2) | (UNIT << 3)
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run); (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run),
func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run); // array index: TR | (UP << 2) | (UNIT << 3)
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run),
// array index: ADJ | (UP << 2) | (UNIT << 3)
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run); (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run),
func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run); 0,
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run); // array index: NOTR | (LO << 2) | (UNIT << 3)
(internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run),
init = true; // array index: TR | (LO << 2) | (UNIT << 3)
} (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run),
// array index: ADJ | (LO << 2) | (UNIT << 3)
(internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run),
0,
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
@ -416,32 +423,36 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real
int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
{ {
typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar); typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar);
static functype func[16]; static const functype func[16] = {
// array index: NOTR | (UP << 2) | (NUNIT << 3)
static bool init = false; (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run),
if(!init) // array index: TR | (UP << 2) | (NUNIT << 3)
{ (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run),
for(int k=0; k<16; ++k) // array index: ADJ | (UP << 2) | (NUNIT << 3)
func[k] = 0; (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run),
0,
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run); // array index: NOTR | (LO << 2) | (NUNIT << 3)
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run); (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run),
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run); // array index: TR | (LO << 2) | (NUNIT << 3)
(internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run),
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run); // array index: ADJ | (LO << 2) | (NUNIT << 3)
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run); (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run),
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run); 0,
// array index: NOTR | (UP << 2) | (UNIT << 3)
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); // array index: TR | (UP << 2) | (UNIT << 3)
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
// array index: ADJ | (UP << 2) | (UNIT << 3)
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run); (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run); 0,
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run); // array index: NOTR | (LO << 2) | (UNIT << 3)
(internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
init = true; // array index: TR | (LO << 2) | (UNIT << 3)
} (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
// array index: ADJ | (LO << 2) | (UNIT << 3)
(internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
0
};
Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* ap = reinterpret_cast<Scalar*>(pap);
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
@ -487,32 +498,36 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar
int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx) int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
{ {
typedef void (*functype)(int, const Scalar*, Scalar*); typedef void (*functype)(int, const Scalar*, Scalar*);
static functype func[16]; static const functype func[16] = {
// array index: NOTR | (UP << 2) | (NUNIT << 3)
static bool init = false; (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run),
if(!init) // array index: TR | (UP << 2) | (NUNIT << 3)
{ (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run),
for(int k=0; k<16; ++k) // array index: ADJ | (UP << 2) | (NUNIT << 3)
func[k] = 0; (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run),
0,
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run); // array index: NOTR | (LO << 2) | (NUNIT << 3)
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run); (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run),
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run); // array index: TR | (LO << 2) | (NUNIT << 3)
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run),
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run); // array index: ADJ | (LO << 2) | (NUNIT << 3)
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run); (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run),
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run); 0,
// array index: NOTR | (UP << 2) | (UNIT << 3)
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run); (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run),
func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run); // array index: TR | (UP << 2) | (UNIT << 3)
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run); (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run),
// array index: ADJ | (UP << 2) | (UNIT << 3)
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run); (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run),
func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run); 0,
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run); // array index: NOTR | (LO << 2) | (UNIT << 3)
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run),
init = true; // array index: TR | (LO << 2) | (UNIT << 3)
} (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run),
// array index: ADJ | (LO << 2) | (UNIT << 3)
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run),
0
};
Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* ap = reinterpret_cast<Scalar*>(pap);
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);

View File

@ -13,19 +13,12 @@
int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy) int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
{ {
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar); typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run),
if(!init) // array index: LO
{ (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run);
func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run);
init = true;
}
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
@ -71,34 +64,13 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p
int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc) int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc)
{ {
// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
// static functype func[2];
// static bool init = false;
// if(!init)
// {
// for(int k=0; k<2; ++k)
// func[k] = 0;
//
// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
// init = true;
// }
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&); typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
if(!init) // array index: LO
{ (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* c = reinterpret_cast<Scalar*>(pc); Scalar* c = reinterpret_cast<Scalar*>(pc);
@ -131,34 +103,13 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
// C := alpha*x*y' + alpha*y*x' + C // C := alpha*x*y' + alpha*y*x' + C
int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc) int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc)
{ {
// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
// static functype func[2];
//
// static bool init = false;
// if(!init)
// {
// for(int k=0; k<2; ++k)
// func[k] = 0;
//
// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
//
// init = true;
// }
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar); typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::rank2_update_selector<Scalar,int,Upper>::run),
if(!init) // array index: LO
{ (internal::rank2_update_selector<Scalar,int,Lower>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* y = reinterpret_cast<Scalar*>(py); Scalar* y = reinterpret_cast<Scalar*>(py);
@ -234,19 +185,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap) int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap)
{ {
typedef void (*functype)(int, Scalar*, const Scalar*, Scalar); typedef void (*functype)(int, Scalar*, const Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run),
if(!init) // array index: LO
{ (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run);
func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* ap = reinterpret_cast<Scalar*>(pap); Scalar* ap = reinterpret_cast<Scalar*>(pap);
@ -285,19 +229,12 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in
int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap) int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
{ {
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar); typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
static functype func[2]; static const functype func[2] = {
// array index: UP
static bool init = false; (internal::packed_rank2_update_selector<Scalar,int,Upper>::run),
if(!init) // array index: LO
{ (internal::packed_rank2_update_selector<Scalar,int,Lower>::run),
for(int k=0; k<2; ++k) };
func[k] = 0;
func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
init = true;
}
Scalar* x = reinterpret_cast<Scalar*>(px); Scalar* x = reinterpret_cast<Scalar*>(px);
Scalar* y = reinterpret_cast<Scalar*>(py); Scalar* y = reinterpret_cast<Scalar*>(py);

View File

@ -13,24 +13,29 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
{ {
// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n"; // std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*); typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
static functype func[12]; static const functype func[12] = {
// array index: NOTR | (NOTR << 2)
static bool init = false; (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run),
if(!init) // array index: TR | (NOTR << 2)
{ (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run),
for(int i=0; i<12; ++i) // array index: ADJ | (NOTR << 2)
func[i] = 0; (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run),
func[NOTR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run); 0,
func[TR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run); // array index: NOTR | (TR << 2)
func[ADJ | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run); (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run),
func[NOTR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run); // array index: TR | (TR << 2)
func[TR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run); (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run),
func[ADJ | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run); // array index: ADJ | (TR << 2)
func[NOTR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run); (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run),
func[TR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run); 0,
func[ADJ | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run); // array index: NOTR | (ADJ << 2)
init = true; (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
} // array index: TR | (ADJ << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
// array index: ADJ | (ADJ << 2)
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run),
0
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -73,49 +78,64 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
{ {
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n"; // std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&); typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
static functype func[32]; static const functype func[32] = {
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
static bool init = false; (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run),
if(!init) // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
{ (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run),
for(int i=0; i<32; ++i) // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
func[i] = 0; (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run),\
0,
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run); // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run),
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run); // array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run),
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run); // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run),
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run); 0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run),
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run); // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run),
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run); 0,
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run); // array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run),
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run); // array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run),
0,
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run); // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run),
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run); // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run),
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run); // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run); 0,
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run),
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run); // array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run); (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run),
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
init = true; (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
} 0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
0
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -162,47 +182,64 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
{ {
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n"; // std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&); typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static functype func[32]; static const functype func[32] = {
static bool init = false; // array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
if(!init) (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
{ // array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
for(int k=0; k<32; ++k) (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
func[k] = 0; // array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run); 0,
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run); // array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run); // array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
0,
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run); // array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run); // array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run); // array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run); 0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run); // array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run); 0,
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run); // array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run); // array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run); // array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run); 0,
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run); // array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run); (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
init = true; (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
} // array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
0,
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
0,
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
0
};
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* b = reinterpret_cast<Scalar*>(pb); Scalar* b = reinterpret_cast<Scalar*>(pb);
@ -275,9 +312,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
return 1; return 1;
} }
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
#if ISCOMPLEX #if ISCOMPLEX
// FIXME add support for symmetric complex matrix // FIXME add support for symmetric complex matrix
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size); Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
if(UPLO(*uplo)==UP) if(UPLO(*uplo)==UP)
{ {
@ -294,13 +331,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA; matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
#else #else
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
if(SIDE(*side)==LEFT) if(SIDE(*side)==LEFT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else return 0; else return 0;
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else return 0; else return 0;
else else
return 0; return 0;
@ -315,25 +354,23 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
{ {
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; // std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
#if !ISCOMPLEX #if !ISCOMPLEX
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static functype func[8]; static const functype func[8] = {
// array index: NOTR | (UP << 2)
static bool init = false; (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run),
if(!init) // array index: TR | (UP << 2)
{ (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run),
for(int i=0; i<8; ++i) // array index: ADJ | (UP << 2)
func[i] = 0; (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run),
0,
func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run); // array index: NOTR | (LO << 2)
func[TR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run); (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run),
func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run); // array index: TR | (LO << 2)
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run),
func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run); // array index: ADJ | (LO << 2)
func[TR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run); (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run),
func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run); 0
};
init = true;
}
#endif #endif
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
@ -381,8 +418,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda); matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
} }
#else #else
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
int code = OP(*op) | (UPLO(*uplo) << 2); int code = OP(*op) | (UPLO(*uplo) << 2);
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
#endif #endif
return 0; return 0;
@ -486,20 +525,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
return 1; return 1;
} }
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
if(SIDE(*side)==LEFT) if(SIDE(*side)==LEFT)
{ {
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor> if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor> else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else return 0; else return 0;
} }
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
{ {
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor> if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/ ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor> else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else return 0; else return 0;
} }
else else
@ -516,23 +558,21 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
{ {
// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n"; // std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&); typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
static functype func[8]; static const functype func[8] = {
// array index: NOTR | (UP << 2)
static bool init = false; (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run),
if(!init) 0,
{ // array index: ADJ | (UP << 2)
for(int i=0; i<8; ++i) (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run),
func[i] = 0; 0,
// array index: NOTR | (LO << 2)
func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run); (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run),
func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run); 0,
// array index: ADJ | (LO << 2)
func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run); (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run),
func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run); 0
};
init = true;
}
Scalar* a = reinterpret_cast<Scalar*>(pa); Scalar* a = reinterpret_cast<Scalar*>(pa);
Scalar* c = reinterpret_cast<Scalar*>(pc); Scalar* c = reinterpret_cast<Scalar*>(pc);
@ -571,7 +611,8 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
if(*k>0 && alpha!=RealScalar(0)) if(*k>0 && alpha!=RealScalar(0))
{ {
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha); internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
matrix(c, *n, *n, *ldc).diagonal().imag().setZero(); matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
} }
return 0; return 0;

View File

@ -44,7 +44,7 @@ C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)' VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)' v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi
low,low+step*(size-1)) low,low+step*(size-1)) //
// Matrix slicing and blocks. All expressions listed here are read/write. // Matrix slicing and blocks. All expressions listed here are read/write.
@ -94,6 +94,8 @@ R.transpose() // R.' or conj(R') // Read-write
R.diagonal() // diag(R) // Read-write R.diagonal() // diag(R) // Read-write
x.asDiagonal() // diag(x) x.asDiagonal() // diag(x)
R.transpose().colwise().reverse() // rot90(R) // Read-write R.transpose().colwise().reverse() // rot90(R) // Read-write
R.rowwise().reverse() // fliplr(R)
R.colwise().reverse() // flipud(R)
R.replicate(i,j) // repmat(P,i,j) R.replicate(i,j) // repmat(P,i,j)
@ -139,6 +141,7 @@ R.cwiseAbs2() // abs(P.^2)
R.array().abs2() // abs(P.^2) R.array().abs2() // abs(P.^2)
(R.array() < s).select(P,Q ); // (R < s ? P : Q) (R.array() < s).select(P,Q ); // (R < s ? P : Q)
R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0) R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0)
R = P.unaryExpr(ptr_fun(func)) // R = arrayfun(func, P) // with: scalar func(const scalar &x);
// Reductions. // Reductions.

View File

@ -65,17 +65,17 @@ They are summarized in the following tables:
<td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td> <td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td>
<td>optimized for tough problems and symmetric patterns</td></tr> <td>optimized for tough problems and symmetric patterns</td></tr>
<tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td> <tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td>
<td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td> <td>Requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td>
<td></td></tr> <td></td></tr>
<tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td> <tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
<td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td> <td>Requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td>
<td></td></tr> <td></td></tr>
<tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td> <tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
<td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td> <td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td>
<td></td></tr> <td></td></tr>
<tr><td>SPQR</td><td>\link SPQRSupport_Module SPQRSupport \endlink </td> <td> QR factorization </td> <tr><td>SPQR</td><td>\link SPQRSupport_Module SPQRSupport \endlink </td> <td> QR factorization </td>
<td> Any, rectangular</td><td>fill-in reducing, multithreaded, fast dense algebra</td> <td> Any, rectangular</td><td>fill-in reducing, multithreaded, fast dense algebra</td>
<td> requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td><td>recommended for linear least-squares problems, has a rank-revealing feature</tr> <td> requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td><td>recommended for linear least-squares problems, has a rank-revealing feature</tr>
</table> </table>
Here \c SPD means symmetric positive definite. Here \c SPD means symmetric positive definite.

View File

@ -153,10 +153,11 @@ not necessary to evaluate the right-hand side explicitly.
\section TopicAliasingMatrixMult Aliasing and matrix multiplication \section TopicAliasingMatrixMult Aliasing and matrix multiplication
Matrix multiplication is the only operation in %Eigen that assumes aliasing by default. Thus, if \c matA is a Matrix multiplication is the only operation in %Eigen that assumes aliasing by default, <strong>under the
matrix, then the statement <tt>matA = matA * matA;</tt> is safe. All other operations in %Eigen assume that condition that the destination matrix is not resized</strong>.
there are no aliasing problems, either because the result is assigned to a different matrix or because it is a Thus, if \c matA is a \b squared matrix, then the statement <tt>matA = matA * matA;</tt> is safe.
component-wise operation. All other operations in %Eigen assume that there are no aliasing problems,
either because the result is assigned to a different matrix or because it is a component-wise operation.
<table class="example"> <table class="example">
<tr><th>Example</th><th>Output</th></tr> <tr><th>Example</th><th>Output</th></tr>
@ -198,6 +199,27 @@ may get wrong results:
\verbinclude TopicAliasing_mult3.out \verbinclude TopicAliasing_mult3.out
</td></tr></table> </td></tr></table>
Moreover, starting in Eigen 3.3, aliasing is \b not assumed if the destination matrix is resized and the product is not directly assigned to the destination.
Therefore, the following example is also wrong:
<table class="example">
<tr><th>Example</th><th>Output</th></tr>
<tr><td>
\include TopicAliasing_mult4.cpp
</td>
<td>
\verbinclude TopicAliasing_mult4.out
</td></tr></table>
As for any aliasing issue, you can resolve it by explicitly evaluating the expression prior to assignment:
<table class="example">
<tr><th>Example</th><th>Output</th></tr>
<tr><td>
\include TopicAliasing_mult5.cpp
</td>
<td>
\verbinclude TopicAliasing_mult5.out
</td></tr></table>
\section TopicAliasingSummary Summary \section TopicAliasingSummary Summary

View File

@ -101,17 +101,16 @@ row and column position are to be stored. These variables should be of type
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out \verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out
</td></tr></table> </td></tr></table>
Note that both functions also return the value of the minimum or maximum coefficient if needed, Both functions also return the value of the minimum or maximum coefficient.
as if it was a typical reduction operation.
\section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions \section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions
Partial reductions are reductions that can operate column- or row-wise on a Matrix or Partial reductions are reductions that can operate column- or row-wise on a Matrix or
Array, applying the reduction operation on each column or row and Array, applying the reduction operation on each column or row and
returning a column or row-vector with the corresponding values. Partial reductions are applied returning a column or row vector with the corresponding values. Partial reductions are applied
with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink. with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink.
A simple example is obtaining the maximum of the elements A simple example is obtaining the maximum of the elements
in each column in a given matrix, storing the result in a row-vector: in each column in a given matrix, storing the result in a row vector:
<table class="example"> <table class="example">
<tr><th>Example:</th><th>Output:</th></tr> <tr><th>Example:</th><th>Output:</th></tr>
@ -133,8 +132,7 @@ The same operation can be performed row-wise:
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out \verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out
</td></tr></table> </td></tr></table>
<b>Note that column-wise operations return a 'row-vector' while row-wise operations <b>Note that column-wise operations return a row vector, while row-wise operations return a column vector.</b>
return a 'column-vector'</b>
\subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations \subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations
It is also possible to use the result of a partial reduction to do further processing. It is also possible to use the result of a partial reduction to do further processing.
@ -176,7 +174,7 @@ The concept behind broadcasting is similar to partial reductions, with the diffe
constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in
one direction. one direction.
A simple example is to add a certain column-vector to each column in a matrix. A simple example is to add a certain column vector to each column in a matrix.
This can be accomplished with: This can be accomplished with:
<table class="example"> <table class="example">
@ -253,7 +251,7 @@ is a new matrix whose size is the same as matrix <tt>m</tt>: \f[
\f] \f]
- <tt>(m.colwise() - v).colwise().squaredNorm()</tt> is a partial reduction, computing the squared norm column-wise. The result of - <tt>(m.colwise() - v).colwise().squaredNorm()</tt> is a partial reduction, computing the squared norm column-wise. The result of
this operation is a row-vector where each coefficient is the squared Euclidean distance between each column in <tt>m</tt> and <tt>v</tt>: \f[ this operation is a row vector where each coefficient is the squared Euclidean distance between each column in <tt>m</tt> and <tt>v</tt>: \f[
\mbox{(m.colwise() - v).colwise().squaredNorm()} = \mbox{(m.colwise() - v).colwise().squaredNorm()} =
\begin{bmatrix} \begin{bmatrix}
1 & 505 & 32 & 50 1 & 505 & 32 & 50

View File

@ -257,7 +257,14 @@ Binary coefficient wise operators can also mix sparse and dense expressions:
\code \code
sm2 = sm1.cwiseProduct(dm1); sm2 = sm1.cwiseProduct(dm1);
dm2 = sm1 + dm1; dm2 = sm1 + dm1;
dm2 = dm1 - sm1;
\endcode \endcode
Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing <tt>dm2 = sm1 + dm1</tt>, better write:
\code
dm2 = dm1;
dm2 += sm1;
\endcode
This version has the advantage to fully exploit the higher performance of dense storage (no indirection, SIMD, etc.), and to pay the cost of slow sparse evaluation on the few non-zeros of the sparse matrix only.
%Sparse expressions also support transposition: %Sparse expressions also support transposition:

View File

@ -52,7 +52,7 @@ When doing so, a number of Eigen's algorithms are silently substituted with call
These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex<float>, and \c complex<double>. These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex<float>, and \c complex<double>.
Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms. Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms.
In addition you can coarsely select choose which parts will be substituted by defining one or multiple of the following macros: In addition you can choose which parts will be substituted by defining one or multiple of the following macros:
<table class="manual"> <table class="manual">
<tr><td>\c EIGEN_USE_BLAS </td><td>Enables the use of external BLAS level 2 and 3 routines (currently works with Intel MKL only)</td></tr> <tr><td>\c EIGEN_USE_BLAS </td><td>Enables the use of external BLAS level 2 and 3 routines (currently works with Intel MKL only)</td></tr>

View File

@ -0,0 +1,5 @@
MatrixXf A(2,2), B(3,2);
B << 2, 0, 0, 3, 1, 1;
A << 2, 0, 0, -2;
A = (B * A).cwiseAbs();
cout << A;

View File

@ -0,0 +1,5 @@
MatrixXf A(2,2), B(3,2);
B << 2, 0, 0, 3, 1, 1;
A << 2, 0, 0, -2;
A = (B * A).eval().cwiseAbs();
cout << A;

View File

@ -45,12 +45,14 @@ template<> struct adjoint_specific<false> {
// check null inputs // check null inputs
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0)); VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE)
RealScalar very_small = (std::numeric_limits<RealScalar>::min)(); RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
VERIFY( (v1*very_small).norm() == 0 ); VERIFY( (v1*very_small).norm() == 0 );
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small)); VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
v3 = v1*very_small; v3 = v1*very_small;
v3.normalize(); v3.normalize();
VERIFY_IS_APPROX(v3, (v1*very_small)); VERIFY_IS_APPROX(v3, (v1*very_small));
#endif
// check compatibility of dot and adjoint // check compatibility of dot and adjoint
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm())); ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));

View File

@ -219,6 +219,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(m1.tanh(), tanh(m1)); VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
#ifdef EIGEN_HAS_C99_MATH #ifdef EIGEN_HAS_C99_MATH
VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1)); VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
VERIFY_IS_APPROX(m1.digamma(), digamma(m1));
VERIFY_IS_APPROX(m1.erf(), erf(m1)); VERIFY_IS_APPROX(m1.erf(), erf(m1));
VERIFY_IS_APPROX(m1.erfc(), erfc(m1)); VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
#endif // EIGEN_HAS_C99_MATH #endif // EIGEN_HAS_C99_MATH
@ -309,7 +310,22 @@ template<typename ArrayType> void array_real(const ArrayType& m)
s1 += Scalar(tiny); s1 += Scalar(tiny);
m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse()); VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse());
// check special functions (comparing against numpy implementation)
#ifdef EIGEN_HAS_C99_MATH
if (!NumTraits<Scalar>::IsComplex) {
VERIFY_IS_APPROX(numext::digamma(Scalar(1)), RealScalar(-0.5772156649015329));
VERIFY_IS_APPROX(numext::digamma(Scalar(1.5)), RealScalar(0.03648997397857645));
VERIFY_IS_APPROX(numext::digamma(Scalar(4)), RealScalar(1.2561176684318));
VERIFY_IS_APPROX(numext::digamma(Scalar(-10.5)), RealScalar(2.398239129535781));
VERIFY_IS_APPROX(numext::digamma(Scalar(10000.5)), RealScalar(9.210340372392849));
VERIFY_IS_EQUAL(numext::digamma(Scalar(0)),
std::numeric_limits<RealScalar>::infinity());
VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)),
std::numeric_limits<RealScalar>::infinity());
}
#endif // EIGEN_HAS_C99_MATH
// check inplace transpose // check inplace transpose
m3 = m1; m3 = m1;
m3.transposeInPlace(); m3.transposeInPlace();
@ -336,8 +352,6 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
Array<RealScalar, -1, -1> m3(rows, cols); Array<RealScalar, -1, -1> m3(rows, cols);
Scalar s1 = internal::random<Scalar>();
for (Index i = 0; i < m.rows(); ++i) for (Index i = 0; i < m.rows(); ++i)
for (Index j = 0; j < m.cols(); ++j) for (Index j = 0; j < m.cols(); ++j)
m2(i,j) = sqrt(m1(i,j)); m2(i,j) = sqrt(m1(i,j));
@ -410,6 +424,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1); VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1);
// scalar by array division // scalar by array division
Scalar s1 = internal::random<Scalar>();
const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon()); const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon());
s1 += Scalar(tiny); s1 += Scalar(tiny);
m1 += ArrayType::Constant(rows,cols,Scalar(tiny)); m1 += ArrayType::Constant(rows,cols,Scalar(tiny));

View File

@ -68,6 +68,16 @@ template<typename MatrixType> void array_for_matrix(const MatrixType& m)
const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0); const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0);
VERIFY(&ref_a1 == &ref_m1); VERIFY(&ref_a1 == &ref_m1);
VERIFY(&ref_a2 == &ref_m2); VERIFY(&ref_a2 == &ref_m2);
// Check write accessors:
m1.array().coeffRef(0,0) = 1;
VERIFY_IS_APPROX(m1(0,0),Scalar(1));
m1.array()(0,0) = 2;
VERIFY_IS_APPROX(m1(0,0),Scalar(2));
m1.array().matrix().coeffRef(0,0) = 3;
VERIFY_IS_APPROX(m1(0,0),Scalar(3));
m1.array().matrix()(0,0) = 4;
VERIFY_IS_APPROX(m1(0,0),Scalar(4));
} }
template<typename MatrixType> void comparisons(const MatrixType& m) template<typename MatrixType> void comparisons(const MatrixType& m)

View File

@ -20,6 +20,8 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
MatrixType m1 = MatrixType::Random(rows, cols), MatrixType m1 = MatrixType::Random(rows, cols),
m2 = MatrixType::Random(rows, cols); m2 = MatrixType::Random(rows, cols);
Scalar s1 = internal::random<Scalar>();
//check diagonal() //check diagonal()
VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal()); VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal());
m2.diagonal() = 2 * m1.diagonal(); m2.diagonal() = 2 * m1.diagonal();
@ -58,6 +60,11 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
VERIFY_IS_APPROX(m2.template diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2)); VERIFY_IS_APPROX(m2.template diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2));
m2.diagonal(N2)[0] *= 3; m2.diagonal(N2)[0] *= 3;
VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]); VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
m2.diagonal(N2).x() = s1;
VERIFY_IS_APPROX(m2.diagonal(N2).x(), s1);
m2.diagonal(N2).coeffRef(0) = Scalar(2)*s1;
VERIFY_IS_APPROX(m2.diagonal(N2).coeff(0), Scalar(2)*s1);
} }
} }

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2015-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -34,4 +34,32 @@ void test_incomplete_cholesky()
CALL_SUBTEST_1(( test_incomplete_cholesky_T<double,int>() )); CALL_SUBTEST_1(( test_incomplete_cholesky_T<double,int>() ));
CALL_SUBTEST_2(( test_incomplete_cholesky_T<std::complex<double>, int>() )); CALL_SUBTEST_2(( test_incomplete_cholesky_T<std::complex<double>, int>() ));
CALL_SUBTEST_3(( test_incomplete_cholesky_T<double,long int>() )); CALL_SUBTEST_3(( test_incomplete_cholesky_T<double,long int>() ));
#ifdef EIGEN_TEST_PART_1
// regression for bug 1150
for(int N = 1; N<20; ++N)
{
Eigen::MatrixXd b( N, N );
b.setOnes();
Eigen::SparseMatrix<double> m( N, N );
m.reserve(Eigen::VectorXi::Constant(N,4));
for( int i = 0; i < N; ++i )
{
m.insert( i, i ) = 1;
m.coeffRef( i, i / 2 ) = 2;
m.coeffRef( i, i / 3 ) = 2;
m.coeffRef( i, i / 4 ) = 2;
}
Eigen::SparseMatrix<double> A;
A = m * m.transpose();
Eigen::ConjugateGradient<Eigen::SparseMatrix<double>,
Eigen::Lower | Eigen::Upper,
Eigen::IncompleteCholesky<double> > solver( A );
VERIFY(solver.preconditioner().info() == Eigen::Success);
VERIFY(solver.info() == Eigen::Success);
}
#endif
} }

View File

@ -44,6 +44,7 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
Mat_d md = mf.template cast<double>(); Mat_d md = mf.template cast<double>();
Mat_cf mcf = Mat_cf::Random(size,size); Mat_cf mcf = Mat_cf::Random(size,size);
Mat_cd mcd = mcf.template cast<complex<double> >(); Mat_cd mcd = mcf.template cast<complex<double> >();
Mat_cd rcd = mcd;
Vec_f vf = Vec_f::Random(size,1); Vec_f vf = Vec_f::Random(size,1);
Vec_d vd = vf.template cast<double>(); Vec_d vd = vf.template cast<double>();
Vec_cf vcf = Vec_cf::Random(size,1); Vec_cf vcf = Vec_cf::Random(size,1);
@ -103,24 +104,23 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast<std::complex<double> >()); VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast<std::complex<double> >());
// check matrix-matrix products // check matrix-matrix products
VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast<CD>().eval()*mcd); VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast<CD>().eval()*mcd);
VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast<CD>()); VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast<CD>());
VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast<CD>().eval()*mcd); VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast<CD>().eval()*mcd);
VERIFY_IS_APPROX(scd*mcd*md, scd*mcd*md.template cast<CD>()); VERIFY_IS_APPROX(scd*mcd*md, scd*mcd*md.template cast<CD>());
VERIFY_IS_APPROX(sf*mf*mcf, sf*mf.template cast<CF>()*mcf); VERIFY_IS_APPROX(sf*mf*mcf, sf*mf.template cast<CF>()*mcf);
VERIFY_IS_APPROX(sf*mcf*mf, sf*mcf*mf.template cast<CF>()); VERIFY_IS_APPROX(sf*mcf*mf, sf*mcf*mf.template cast<CF>());
VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast<CF>()*mcf); VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast<CF>()*mcf);
VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast<CF>()); VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast<CF>());
VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast<CD>().eval().adjoint()*mcd); VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast<CD>().eval().adjoint()*mcd);
VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast<CD>()); VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast<CD>());
VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast<CD>().eval().adjoint()*mcd.adjoint()); VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast<CD>().eval().adjoint()*mcd.adjoint());
VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast<CD>().adjoint()); VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast<CD>().adjoint());
VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast<CD>().eval()*mcd.adjoint()); VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast<CD>().eval()*mcd.adjoint());
VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast<CD>().adjoint()); VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast<CD>().adjoint());
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast<CF>().eval().adjoint()*mcf); VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast<CF>().eval().adjoint()*mcf);
VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast<CF>()); VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast<CF>());
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast<CF>().eval().adjoint()*mcf.adjoint()); VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast<CF>().eval().adjoint()*mcf.adjoint());
@ -147,6 +147,39 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast<CD>().eval()); VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast<CD>().eval());
VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast<CD>().eval()*mcd); VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast<CD>().eval()*mcd);
VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast<CD>().eval()*mcd); VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast<CD>().eval()*mcd);
VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template triangularView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template triangularView<Upper>());
VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template triangularView<Lower>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template triangularView<Lower>());
VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template triangularView<Lower>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template triangularView<Lower>());
VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template triangularView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template triangularView<Upper>());
// Not supported yet: trmm
// VERIFY_IS_APPROX(sd*mcd*md.template triangularView<Lower>(), sd*mcd*md.template cast<CD>().eval().template triangularView<Lower>());
// VERIFY_IS_APPROX(scd*mcd*md.template triangularView<Upper>(), scd*mcd*md.template cast<CD>().eval().template triangularView<Upper>());
// VERIFY_IS_APPROX(sd*md*mcd.template triangularView<Lower>(), sd*md.template cast<CD>().eval()*mcd.template triangularView<Lower>());
// VERIFY_IS_APPROX(scd*md*mcd.template triangularView<Upper>(), scd*md.template cast<CD>().eval()*mcd.template triangularView<Upper>());
// Not supported yet: symv
// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView<Lower>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Lower>());
// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView<Lower>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Lower>());
// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
// Not supported yet: symm
// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView<Upper>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView<Upper>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
rcd.setZero();
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = sd * mcd * md),
Mat_cd((sd * mcd * md.template cast<CD>().eval()).template triangularView<Upper>()));
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = sd * md * mcd),
Mat_cd((sd * md.template cast<CD>().eval() * mcd).template triangularView<Upper>()));
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = scd * mcd * md),
Mat_cd((scd * mcd * md.template cast<CD>().eval()).template triangularView<Upper>()));
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = scd * md * mcd),
Mat_cd((scd * md.template cast<CD>().eval() * mcd).template triangularView<Upper>()));
} }
void test_mixingtypes() void test_mixingtypes()

View File

@ -78,14 +78,15 @@ template<typename MatrixType> void nomalloc(const MatrixType& m)
VERIFY_IS_APPROX(m2,m2); VERIFY_IS_APPROX(m2,m2);
m2.template selfadjointView<Lower>().rankUpdate(m1.col(0),-1); m2.template selfadjointView<Lower>().rankUpdate(m1.col(0),-1);
m2.template selfadjointView<Lower>().rankUpdate(m1.row(0),-1); m2.template selfadjointView<Upper>().rankUpdate(m1.row(0),-1);
m2.template selfadjointView<Lower>().rankUpdate(m1.col(0), m1.col(0)); // rank-2
// The following fancy matrix-matrix products are not safe yet regarding static allocation // The following fancy matrix-matrix products are not safe yet regarding static allocation
// m1 += m1.template triangularView<Upper>() * m2.col(; m2.template selfadjointView<Lower>().rankUpdate(m1);
// m1.template selfadjointView<Lower>().rankUpdate(m2); m2 += m2.template triangularView<Upper>() * m1;
// m1 += m1.template triangularView<Upper>() * m2; m2.template triangularView<Upper>() = m2 * m2;
// m1 += m1.template selfadjointView<Lower>() * m2; m1 += m1.template selfadjointView<Lower>() * m2;
// VERIFY_IS_APPROX(m1,m1); VERIFY_IS_APPROX(m2,m2);
} }
template<typename Scalar> template<typename Scalar>

View File

@ -48,30 +48,32 @@ void testVectorType(const VectorType& base)
VectorType m(base); VectorType m(base);
m.setLinSpaced(size,low,high); m.setLinSpaced(size,low,high);
if(!NumTraits<Scalar>::IsInteger)
{
VectorType n(size);
for (int i=0; i<size; ++i)
n(i) = low+i*step;
VERIFY_IS_APPROX(m,n);
}
VectorType n(size); VectorType n(size);
for (int i=0; i<size; ++i) for (int i=0; i<size; ++i)
n(i) = low+i*step; n(i) = size==1 ? low : (low + ((high-low)*Scalar(i))/(size-1));
VERIFY_IS_APPROX(m,n); VERIFY_IS_APPROX(m,n);
// random access version // random access version
m = VectorType::LinSpaced(size,low,high); m = VectorType::LinSpaced(size,low,high);
VERIFY_IS_APPROX(m,n); VERIFY_IS_APPROX(m,n);
// Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79). VERIFY( internal::isApprox(m(m.size()-1),high) );
VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits<Scalar>::epsilon() ); VERIFY( size==1 || internal::isApprox(m(0),low) );
// These guys sometimes fail! This is not good. Any ideas how to fix them!?
//VERIFY( m(m.size()-1) == high );
//VERIFY( m(0) == low );
// sequential access version // sequential access version
m = VectorType::LinSpaced(Sequential,size,low,high); m = VectorType::LinSpaced(Sequential,size,low,high);
VERIFY_IS_APPROX(m,n); VERIFY_IS_APPROX(m,n);
// These guys sometimes fail! This is not good. Any ideas how to fix them!? VERIFY( internal::isApprox(m(m.size()-1),high) );
//VERIFY( m(m.size()-1) == high ); VERIFY( size==1 || internal::isApprox(m(0),low) );
//VERIFY( m(0) == low );
// check whether everything works with row and col major vectors // check whether everything works with row and col major vectors
Matrix<Scalar,Dynamic,1> row_vector(size); Matrix<Scalar,Dynamic,1> row_vector(size);
@ -126,5 +128,13 @@ void test_nullary()
CALL_SUBTEST_8( testVectorType(Vector4f()) ); CALL_SUBTEST_8( testVectorType(Vector4f()) );
CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) ); CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) );
CALL_SUBTEST_8( testVectorType(Matrix<float,1,1>()) ); CALL_SUBTEST_8( testVectorType(Matrix<float,1,1>()) );
CALL_SUBTEST_9( testVectorType(VectorXi(internal::random<int>(1,300))) );
CALL_SUBTEST_9( testVectorType(Matrix<int,1,1>()) );
} }
#ifdef EIGEN_TEST_PART_6
// Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79).
VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits<double>::epsilon() );
#endif
} }

View File

@ -192,6 +192,11 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3)); VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3));
// VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4); // VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4);
VERIFY_IS_APPROX(refM4 + m3, refM4 + refM3);
VERIFY_IS_APPROX(m3 + refM4, refM3 + refM4);
VERIFY_IS_APPROX(refM4 - m3, refM4 - refM3);
VERIFY_IS_APPROX(m3 - refM4, refM3 - refM4);
// test aliasing // test aliasing
VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1)); VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1));
VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval())); VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval()));
@ -455,6 +460,33 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
refMat1.setIdentity(); refMat1.setIdentity();
VERIFY_IS_APPROX(m1, refMat1); VERIFY_IS_APPROX(m1, refMat1);
} }
// test array/vector of InnerIterator
{
typedef typename SparseMatrixType::InnerIterator IteratorType;
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
IteratorType static_array[2];
static_array[0] = IteratorType(m2,0);
static_array[1] = IteratorType(m2,m2.outerSize()-1);
VERIFY( static_array[0] || m2.innerVector(static_array[0].outer()).nonZeros() == 0 );
VERIFY( static_array[1] || m2.innerVector(static_array[1].outer()).nonZeros() == 0 );
if(static_array[0] && static_array[1])
{
++(static_array[1]);
static_array[1] = IteratorType(m2,0);
VERIFY( static_array[1] );
VERIFY( static_array[1].index() == static_array[0].index() );
VERIFY( static_array[1].outer() == static_array[0].outer() );
VERIFY( static_array[1].value() == static_array[0].value() );
}
std::vector<IteratorType> iters(2);
iters[0] = IteratorType(m2,0);
iters[1] = IteratorType(m2,m2.outerSize()-1);
}
} }

View File

@ -9,14 +9,14 @@
#include "sparse.h" #include "sparse.h"
template<typename Scalar,typename Index> void sparse_vector(int rows, int cols) template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int cols)
{ {
double densityMat = (std::max)(8./(rows*cols), 0.01); double densityMat = (std::max)(8./(rows*cols), 0.01);
double densityVec = (std::max)(8./float(rows), 0.1); double densityVec = (std::max)(8./float(rows), 0.1);
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix; typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
typedef Matrix<Scalar,Dynamic,1> DenseVector; typedef Matrix<Scalar,Dynamic,1> DenseVector;
typedef SparseVector<Scalar,0,Index> SparseVectorType; typedef SparseVector<Scalar,0,StorageIndex> SparseVectorType;
typedef SparseMatrix<Scalar,0,Index> SparseMatrixType; typedef SparseMatrix<Scalar,0,StorageIndex> SparseMatrixType;
Scalar eps = 1e-6; Scalar eps = 1e-6;
SparseMatrixType m1(rows,rows); SparseMatrixType m1(rows,rows);
@ -87,8 +87,10 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
VERIFY_IS_APPROX(m1*v2, refM1*refV2); VERIFY_IS_APPROX(m1*v2, refM1*refV2);
VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2)); VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2));
int i = internal::random<int>(0,rows-1); {
VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i))); int i = internal::random<int>(0,rows-1);
VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i)));
}
VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm()); VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm());
@ -111,15 +113,51 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense()); VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense());
VERIFY_IS_APPROX(DenseVector(v1),v1.toDense()); VERIFY_IS_APPROX(DenseVector(v1),v1.toDense());
// test conservative resize
{
std::vector<StorageIndex> inc;
if(rows > 3)
inc.push_back(-3);
inc.push_back(0);
inc.push_back(3);
inc.push_back(1);
inc.push_back(10);
for(std::size_t i = 0; i< inc.size(); i++) {
StorageIndex incRows = inc[i];
SparseVectorType vec1(rows);
DenseVector refVec1 = DenseVector::Zero(rows);
initSparse<Scalar>(densityVec, refVec1, vec1);
vec1.conservativeResize(rows+incRows);
refVec1.conservativeResize(rows+incRows);
if (incRows > 0) refVec1.tail(incRows).setZero();
VERIFY_IS_APPROX(vec1, refVec1);
// Insert new values
if (incRows > 0)
vec1.insert(vec1.rows()-1) = refVec1(refVec1.rows()-1) = 1;
VERIFY_IS_APPROX(vec1, refVec1);
}
}
} }
void test_sparse_vector() void test_sparse_vector()
{ {
for(int i = 0; i < g_repeat; i++) { for(int i = 0; i < g_repeat; i++) {
int r = Eigen::internal::random<int>(1,500), c = Eigen::internal::random<int>(1,500);
if(Eigen::internal::random<int>(0,4) == 0) {
r = c; // check square matrices in 25% of tries
}
EIGEN_UNUSED_VARIABLE(r+c);
CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) )); CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) ));
CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(16, 16) )); CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(r, c) ));
CALL_SUBTEST_1(( sparse_vector<double,long int>(299, 535) )); CALL_SUBTEST_1(( sparse_vector<double,long int>(r, c) ));
CALL_SUBTEST_1(( sparse_vector<double,short>(299, 535) )); CALL_SUBTEST_1(( sparse_vector<double,short>(r, c) ));
} }
} }

View File

@ -163,6 +163,21 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm())); VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm())); VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm()));
} }
// stableNormalize[d]
{
VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized());
MatrixType vcopy(vrand);
vcopy.stableNormalize();
VERIFY_IS_APPROX(vcopy, vrand.normalized());
VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1));
VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1));
VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1));
VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1));
RealScalar big_scaling = ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling);
VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized());
}
} }
void test_stable_norm() void test_stable_norm()

View File

@ -210,6 +210,9 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm<Infinity>()); VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm<Infinity>());
VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm<Infinity>()); VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm<Infinity>());
// regression for bug 1158
VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum().x(), m1.col(0).cwiseAbs().sum());
// test normalized // test normalized
m2 = m1.colwise().normalized(); m2 = m1.colwise().normalized();
VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized()); VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());

View File

@ -25,6 +25,7 @@ template<typename MatrixType> void zeroReduction(const MatrixType& m) {
template<typename MatrixType> void zeroSizedMatrix() template<typename MatrixType> void zeroSizedMatrix()
{ {
MatrixType t1; MatrixType t1;
typedef typename MatrixType::Scalar Scalar;
if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0) if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0)
{ {
@ -45,6 +46,23 @@ template<typename MatrixType> void zeroSizedMatrix()
VERIFY(t1==t2); VERIFY(t1==t2);
} }
} }
if(MatrixType::MaxColsAtCompileTime!=0 && MatrixType::MaxRowsAtCompileTime!=0)
{
Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random<Index>(1,10) : MatrixType::RowsAtCompileTime;
Index cols = MatrixType::ColsAtCompileTime==Dynamic ? internal::random<Index>(1,10) : MatrixType::ColsAtCompileTime;
MatrixType m(rows,cols);
zeroReduction(m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols));
zeroReduction(m.template block<MatrixType::RowsAtCompileTime,0>(0,0,rows,0));
zeroReduction(m.template block<0,1>(0,0));
zeroReduction(m.template block<1,0>(0,0));
Matrix<Scalar,Dynamic,Dynamic> prod = m.template block<MatrixType::RowsAtCompileTime,0>(0,0,rows,0) * m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols);
VERIFY(prod.rows()==rows && prod.cols()==cols);
VERIFY(prod.isZero());
prod = m.template block<1,0>(0,0) * m.template block<0,1>(0,0);
VERIFY(prod.size()==1);
VERIFY(prod.isZero());
}
} }
template<typename VectorType> void zeroSizedVector() template<typename VectorType> void zeroSizedVector()

View File

@ -188,7 +188,7 @@ template<typename _Scalar> class AlignedVector3
} }
template<typename Derived> template<typename Derived>
inline bool isApprox(const MatrixBase<Derived>& other, RealScalar eps=NumTraits<Scalar>::dummy_precision()) const inline bool isApprox(const MatrixBase<Derived>& other, const RealScalar& eps=NumTraits<Scalar>::dummy_precision()) const
{ {
return m_coeffs.template head<3>().isApprox(other,eps); return m_coeffs.template head<3>().isApprox(other,eps);
} }

View File

@ -25,6 +25,16 @@ template <typename T, size_t n> class array {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; } EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& front() { return values[0]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& front() const { return values[0]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& back() { return values[n-1]; }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
static std::size_t size() { return n; } static std::size_t size() { return n; }
@ -123,13 +133,33 @@ template <typename T> class array<T, 0> {
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& operator[] (size_t) { EIGEN_STRONG_INLINE T& operator[] (size_t) {
eigen_assert(false && "Can't index a zero size array"); eigen_assert(false && "Can't index a zero size array");
return *static_cast<T*>(NULL); return dummy;
} }
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& operator[] (size_t) const { EIGEN_STRONG_INLINE const T& operator[] (size_t) const {
eigen_assert(false && "Can't index a zero size array"); eigen_assert(false && "Can't index a zero size array");
return *static_cast<const T*>(NULL); return dummy;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& front() {
eigen_assert(false && "Can't index a zero size array");
return dummy;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& front() const {
eigen_assert(false && "Can't index a zero size array");
return dummy;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE T& back() {
eigen_assert(false && "Can't index a zero size array");
return dummy;
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const T& back() const {
eigen_assert(false && "Can't index a zero size array");
return dummy;
} }
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; } static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
@ -142,6 +172,9 @@ template <typename T> class array<T, 0> {
eigen_assert(l.size() == 0); eigen_assert(l.size() == 0);
} }
#endif #endif
private:
T dummy;
}; };
namespace internal { namespace internal {

View File

@ -128,6 +128,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_lgamma_op<Scalar>()); return unaryExpr(internal::scalar_lgamma_op<Scalar>());
} }
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived>
digamma() const {
return unaryExpr(internal::scalar_digamma_op<Scalar>());
}
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
erf() const { erf() const {

View File

@ -378,7 +378,7 @@ struct TensorContractionEvaluatorBase
} }
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
void evalGemv(Scalar* buffer) const { EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const {
const Index rows = m_i_size; const Index rows = m_i_size;
const Index cols = m_k_size; const Index cols = m_k_size;
@ -516,7 +516,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
Base(op, device) { } Base(op, device) { }
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment> template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
void evalProduct(Scalar* buffer) const { EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const {
if (this->m_j_size == 1) { if (this->m_j_size == 1) {
this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer); this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
return; return;
@ -582,10 +582,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
OutputMapper output(buffer, m); OutputMapper output(buffer, m);
typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType;
// Sizes of the blocks to load in cache. See the Goto paper for details. // Sizes of the blocks to load in cache. See the Goto paper for details.
BlockingType blocking(m, n, k, 1, true); internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1);
const Index kc = blocking.kc(); const Index kc = blocking.kc();
const Index mc = numext::mini(m, blocking.mc()); const Index mc = numext::mini(m, blocking.mc());
const Index nc = numext::mini(n, blocking.nc()); const Index nc = numext::mini(n, blocking.nc());

View File

@ -28,7 +28,7 @@ class TensorContractionBlocking {
typedef typename LhsMapper::Scalar LhsScalar; typedef typename LhsMapper::Scalar LhsScalar;
typedef typename RhsMapper::Scalar RhsScalar; typedef typename RhsMapper::Scalar RhsScalar;
TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) : EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
kc_(k), mc_(m), nc_(n) kc_(k), mc_(m), nc_(n)
{ {
if (ShardingType == ShardByCol) { if (ShardingType == ShardByCol) {
@ -41,9 +41,9 @@ class TensorContractionBlocking {
} }
} }
EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
private: private:
Index kc_; Index kc_;

View File

@ -426,15 +426,16 @@ class TensorContractionSubMapper {
}; };
template<typename Scalar, typename Index, int side, template<typename Scalar_, typename Index, int side,
typename Tensor, typename Tensor,
typename nocontract_t, typename contract_t, typename nocontract_t, typename contract_t,
int packet_size, int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper class TensorContractionInputMapper
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> { : public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
public: public:
typedef Scalar_ Scalar;
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base; typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
typedef SubMapper VectorMapper; typedef SubMapper VectorMapper;

View File

@ -176,10 +176,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// compute block sizes (which depend on number of threads) // compute block sizes (which depend on number of threads)
const Index num_threads = this->m_device.numThreads(); const Index num_threads = this->m_device.numThreads();
Index mc = m; internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, num_threads);
Index nc = n; Index mc = blocking.mc();
Index kc = k; Index nc = blocking.nc();
internal::computeProductBlockingSizes<LhsScalar,RhsScalar,1>(kc, mc, nc, num_threads); Index kc = blocking.kc();
eigen_assert(mc <= m); eigen_assert(mc <= m);
eigen_assert(nc <= n); eigen_assert(nc <= n);
eigen_assert(kc <= k); eigen_assert(kc <= k);

View File

@ -21,7 +21,7 @@ namespace Eigen {
*/ */
namespace internal { namespace internal {
template <typename Index, typename InputDims, size_t NumKernelDims, int Layout> template <typename Index, typename InputDims, int NumKernelDims, int Layout>
class IndexMapper { class IndexMapper {
public: public:
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims, IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
@ -123,7 +123,7 @@ class IndexMapper {
} }
inputIndex += p * m_inputStrides[NumKernelDims]; inputIndex += p * m_inputStrides[NumKernelDims];
} else { } else {
int limit = 0; std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) { if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1; limit = NumDims - NumKernelDims - 1;
} }
@ -147,7 +147,7 @@ class IndexMapper {
} }
outputIndex += p * m_outputStrides[NumKernelDims]; outputIndex += p * m_outputStrides[NumKernelDims];
} else { } else {
int limit = 0; std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) { if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1; limit = NumDims - NumKernelDims - 1;
} }
@ -206,7 +206,7 @@ class IndexMapper {
} }
private: private:
static const size_t NumDims = internal::array_size<InputDims>::value; static const int NumDims = internal::array_size<InputDims>::value;
array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_inputStrides;
array<Index, NumDims> m_outputStrides; array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_cudaInputStrides; array<Index, NumDims> m_cudaInputStrides;

View File

@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
struct GpuDevice { struct GpuDevice {
// The StreamInterface is not owned: the caller is // The StreamInterface is not owned: the caller is
// responsible for its initialization and eventual destruction. // responsible for its initialization and eventual destruction.
explicit GpuDevice(const StreamInterface* stream) : stream_(stream) { explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
eigen_assert(stream);
}
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream); eigen_assert(stream);
} }
// TODO(bsteiner): This is an internal API, we should not expose it. // TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream(); return stream_->stream();
@ -246,6 +248,10 @@ struct GpuDevice {
#endif #endif
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
return max_blocks_;
}
// This function checks if the CUDA runtime recorded an error for the // This function checks if the CUDA runtime recorded an error for the
// underlying stream device. // underlying stream device.
inline bool ok() const { inline bool ok() const {
@ -259,7 +265,7 @@ struct GpuDevice {
private: private:
const StreamInterface* stream_; const StreamInterface* stream_;
int max_blocks_;
}; };
#ifndef __CUDA_ARCH__ #ifndef __CUDA_ARCH__

View File

@ -136,7 +136,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
} }
template<int LoadMode> template<int LoadMode>
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{ {
return internal::ploadt<Packet, LoadMode>(m_buffer + index); return internal::ploadt<Packet, LoadMode>(m_buffer + index);
} }

View File

@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
if (needs_assign) if (needs_assign)
{ {
const int block_size = device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions()); const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0. // Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
if (needs_assign) if (needs_assign)
{ {
const int block_size = device.maxCudaThreadsPerBlock(); const int block_size = device.maxCudaThreadsPerBlock();
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size; const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions()); const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0. // Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1); const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);

View File

@ -106,7 +106,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
m_impl.evalSubExprsIfNeeded(NULL);
const Index numValues = m_impl.dimensions().TotalSize(); const Index numValues = m_impl.dimensions().TotalSize();
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
// Should initialize the memory in case we're dealing with non POD types. // Should initialize the memory in case we're dealing with non POD types.
@ -119,7 +118,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
EvalTo evalToTmp(m_buffer, m_op); EvalTo evalToTmp(m_buffer, m_op);
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
m_impl.cleanup();
return true; return true;
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {

Some files were not shown because too many files have changed in this diff Show More