mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 12:46:00 +08:00
Rebase to latest.
This commit is contained in:
commit
f0fdefa96f
@ -19,7 +19,7 @@ extern "C" {
|
|||||||
/** \ingroup Support_modules
|
/** \ingroup Support_modules
|
||||||
* \defgroup CholmodSupport_Module CholmodSupport module
|
* \defgroup CholmodSupport_Module CholmodSupport module
|
||||||
*
|
*
|
||||||
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
* This module provides an interface to the Cholmod library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||||
* It provides the two following main factorization classes:
|
* It provides the two following main factorization classes:
|
||||||
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
|
* - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization.
|
||||||
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
|
* - class CholmodDecomposiiton: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of the underlying factorization method (supernodal or simplicial).
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
/** \ingroup Support_modules
|
/** \ingroup Support_modules
|
||||||
* \defgroup SPQRSupport_Module SuiteSparseQR module
|
* \defgroup SPQRSupport_Module SuiteSparseQR module
|
||||||
*
|
*
|
||||||
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
* This module provides an interface to the SPQR library, which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||||
*
|
*
|
||||||
* \code
|
* \code
|
||||||
* #include <Eigen/SPQRSupport>
|
* #include <Eigen/SPQRSupport>
|
||||||
|
@ -19,7 +19,7 @@ extern "C" {
|
|||||||
/** \ingroup Support_modules
|
/** \ingroup Support_modules
|
||||||
* \defgroup UmfPackSupport_Module UmfPackSupport module
|
* \defgroup UmfPackSupport_Module UmfPackSupport module
|
||||||
*
|
*
|
||||||
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">suitesparse</a> package.
|
* This module provides an interface to the UmfPack library which is part of the <a href="http://www.suitesparse.com">suitesparse</a> package.
|
||||||
* It provides the following factorization class:
|
* It provides the following factorization class:
|
||||||
* - class UmfPackLU: a multifrontal sequential LU factorization.
|
* - class UmfPackLU: a multifrontal sequential LU factorization.
|
||||||
*
|
*
|
||||||
|
@ -273,9 +273,10 @@ class CholmodBase : public SparseSolverBase<Derived>
|
|||||||
const Index size = m_cholmodFactor->n;
|
const Index size = m_cholmodFactor->n;
|
||||||
EIGEN_UNUSED_VARIABLE(size);
|
EIGEN_UNUSED_VARIABLE(size);
|
||||||
eigen_assert(size==b.rows());
|
eigen_assert(size==b.rows());
|
||||||
|
|
||||||
|
// Cholmod needs column-major stoarge without inner-stride, which corresponds to the default behavior of Ref.
|
||||||
|
Ref<const Matrix<typename Rhs::Scalar,Dynamic,Dynamic,ColMajor> > b_ref(b.derived());
|
||||||
|
|
||||||
// note: cd stands for Cholmod Dense
|
|
||||||
Rhs& b_ref(b.const_cast_derived());
|
|
||||||
cholmod_dense b_cd = viewAsCholmod(b_ref);
|
cholmod_dense b_cd = viewAsCholmod(b_ref);
|
||||||
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
|
cholmod_dense* x_cd = cholmod_solve(CHOLMOD_A, m_cholmodFactor, &b_cd, &m_cholmod);
|
||||||
if(!x_cd)
|
if(!x_cd)
|
||||||
|
@ -103,7 +103,7 @@ template<typename Derived> class ArrayBase
|
|||||||
/** Special case of the template operator=, in order to prevent the compiler
|
/** Special case of the template operator=, in order to prevent the compiler
|
||||||
* from generating a default operator= (issue hit with g++ 4.1)
|
* from generating a default operator= (issue hit with g++ 4.1)
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const ArrayBase& other)
|
Derived& operator=(const ArrayBase& other)
|
||||||
{
|
{
|
||||||
internal::call_assignment(derived(), other.derived());
|
internal::call_assignment(derived(), other.derived());
|
||||||
@ -112,28 +112,28 @@ template<typename Derived> class ArrayBase
|
|||||||
|
|
||||||
/** Set all the entries to \a value.
|
/** Set all the entries to \a value.
|
||||||
* \sa DenseBase::setConstant(), DenseBase::fill() */
|
* \sa DenseBase::setConstant(), DenseBase::fill() */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const Scalar &value)
|
Derived& operator=(const Scalar &value)
|
||||||
{ Base::setConstant(value); return derived(); }
|
{ Base::setConstant(value); return derived(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator+=(const Scalar& scalar);
|
Derived& operator+=(const Scalar& scalar);
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator-=(const Scalar& scalar);
|
Derived& operator-=(const Scalar& scalar);
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
Derived& operator+=(const ArrayBase<OtherDerived>& other);
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
Derived& operator-=(const ArrayBase<OtherDerived>& other);
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
Derived& operator*=(const ArrayBase<OtherDerived>& other);
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
Derived& operator/=(const ArrayBase<OtherDerived>& other);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -52,7 +52,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
const Scalar
|
const Scalar
|
||||||
>::type ScalarWithConstIfNotLvalue;
|
>::type ScalarWithConstIfNotLvalue;
|
||||||
|
|
||||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
@ -67,7 +67,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar* data() const { return m_expression.data(); }
|
inline const Scalar* data() const { return m_expression.data(); }
|
||||||
|
|
||||||
@ -80,13 +80,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
return m_expression.coeffRef(rowId, colId);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
return m_expression.coeffRef(rowId, colId);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -98,13 +98,13 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Scalar& coeffRef(Index index)
|
inline Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(index);
|
return m_expression.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index index) const
|
inline const Scalar& coeffRef(Index index) const
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(index);
|
return m_expression.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
@ -116,7 +116,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
|
m_expression.template writePacket<LoadMode>(rowId, colId, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
@ -128,7 +128,7 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index index, const PacketScalar& val)
|
inline void writePacket(Index index, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
m_expression.template writePacket<LoadMode>(index, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dest>
|
template<typename Dest>
|
||||||
@ -145,11 +145,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
|
|||||||
/** Forwards the resizing request to the nested expression
|
/** Forwards the resizing request to the nested expression
|
||||||
* \sa DenseBase::resize(Index) */
|
* \sa DenseBase::resize(Index) */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||||
/** Forwards the resizing request to the nested expression
|
/** Forwards the resizing request to the nested expression
|
||||||
* \sa DenseBase::resize(Index,Index)*/
|
* \sa DenseBase::resize(Index,Index)*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NestedExpressionType m_expression;
|
NestedExpressionType m_expression;
|
||||||
@ -195,7 +195,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
const Scalar
|
const Scalar
|
||||||
>::type ScalarWithConstIfNotLvalue;
|
>::type ScalarWithConstIfNotLvalue;
|
||||||
|
|
||||||
typedef typename internal::ref_selector<ExpressionType>::type NestedExpressionType;
|
typedef typename internal::ref_selector<ExpressionType>::non_const_type NestedExpressionType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {}
|
||||||
@ -210,7 +210,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
inline Index innerStride() const { return m_expression.innerStride(); }
|
inline Index innerStride() const { return m_expression.innerStride(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
|
inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar* data() const { return m_expression.data(); }
|
inline const Scalar* data() const { return m_expression.data(); }
|
||||||
|
|
||||||
@ -223,7 +223,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(rowId, colId);
|
return m_expression.coeffRef(rowId, colId);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -241,13 +241,13 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Scalar& coeffRef(Index index)
|
inline Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(index);
|
return m_expression.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index index) const
|
inline const Scalar& coeffRef(Index index) const
|
||||||
{
|
{
|
||||||
return m_expression.const_cast_derived().coeffRef(index);
|
return m_expression.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
@ -259,7 +259,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_expression.const_cast_derived().template writePacket<LoadMode>(rowId, colId, val);
|
m_expression.template writePacket<LoadMode>(rowId, colId, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
@ -271,7 +271,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index index, const PacketScalar& val)
|
inline void writePacket(Index index, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
|
m_expression.template writePacket<LoadMode>(index, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -284,11 +284,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
|
|||||||
/** Forwards the resizing request to the nested expression
|
/** Forwards the resizing request to the nested expression
|
||||||
* \sa DenseBase::resize(Index) */
|
* \sa DenseBase::resize(Index) */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
|
void resize(Index newSize) { m_expression.resize(newSize); }
|
||||||
/** Forwards the resizing request to the nested expression
|
/** Forwards the resizing request to the nested expression
|
||||||
* \sa DenseBase::resize(Index,Index)*/
|
* \sa DenseBase::resize(Index,Index)*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
void resize(Index rows, Index cols) { m_expression.const_cast_derived().resize(rows,cols); }
|
void resize(Index rows, Index cols) { m_expression.resize(rows,cols); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
NestedExpressionType m_expression;
|
NestedExpressionType m_expression;
|
||||||
|
@ -637,7 +637,7 @@ protected:
|
|||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
template<typename DstXprType, typename SrcXprType, typename Functor>
|
template<typename DstXprType, typename SrcXprType, typename Functor>
|
||||||
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func)
|
||||||
{
|
{
|
||||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
|
|
||||||
@ -654,7 +654,7 @@ EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const S
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename DstXprType, typename SrcXprType>
|
template<typename DstXprType, typename SrcXprType>
|
||||||
EIGEN_DEVICE_FUNC void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
|
||||||
{
|
{
|
||||||
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
|
||||||
}
|
}
|
||||||
@ -688,26 +688,30 @@ struct Assignment;
|
|||||||
// does not has to bother about these annoying details.
|
// does not has to bother about these annoying details.
|
||||||
|
|
||||||
template<typename Dst, typename Src>
|
template<typename Dst, typename Src>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment(Dst& dst, const Src& src)
|
||||||
{
|
{
|
||||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
}
|
}
|
||||||
template<typename Dst, typename Src>
|
template<typename Dst, typename Src>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment(const Dst& dst, const Src& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment(const Dst& dst, const Src& src)
|
||||||
{
|
{
|
||||||
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Deal with "assume-aliasing"
|
// Deal with "assume-aliasing"
|
||||||
template<typename Dst, typename Src, typename Func>
|
template<typename Dst, typename Src, typename Func>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if< evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||||
{
|
{
|
||||||
typename plain_matrix_type<Src>::type tmp(src);
|
typename plain_matrix_type<Src>::type tmp(src);
|
||||||
call_assignment_no_alias(dst, tmp, func);
|
call_assignment_no_alias(dst, tmp, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst, typename Src, typename Func>
|
template<typename Dst, typename Src, typename Func>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment(Dst& dst, const Src& src, const Func& func, typename enable_if<!evaluator_assume_aliasing<Src>::value, void*>::type = 0)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias(dst, src, func);
|
call_assignment_no_alias(dst, src, func);
|
||||||
}
|
}
|
||||||
@ -715,14 +719,16 @@ EIGEN_DEVICE_FUNC void call_assignment(Dst& dst, const Src& src, const Func& fun
|
|||||||
// by-pass "assume-aliasing"
|
// by-pass "assume-aliasing"
|
||||||
// When there is no aliasing, we require that 'dst' has been properly resized
|
// When there is no aliasing, we require that 'dst' has been properly resized
|
||||||
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
template<typename Dst, template <typename> class StorageBase, typename Src, typename Func>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment(NoAlias<Dst,StorageBase>& dst, const Src& src, const Func& func)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias(dst.expression(), src, func);
|
call_assignment_no_alias(dst.expression(), src, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename Dst, typename Src, typename Func>
|
template<typename Dst, typename Src, typename Func>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment_no_alias(Dst& dst, const Src& src, const Func& func)
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
|
NeedToTranspose = ( (int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1)
|
||||||
@ -747,13 +753,15 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src, const
|
|||||||
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
Assignment<ActualDstTypeCleaned,Src,Func>::run(actualDst, src, func);
|
||||||
}
|
}
|
||||||
template<typename Dst, typename Src>
|
template<typename Dst, typename Src>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias(Dst& dst, const Src& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment_no_alias(Dst& dst, const Src& src)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Dst, typename Src, typename Func>
|
template<typename Dst, typename Src, typename Func>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, const Func& func)
|
||||||
{
|
{
|
||||||
Index dstRows = src.rows();
|
Index dstRows = src.rows();
|
||||||
Index dstCols = src.cols();
|
Index dstCols = src.cols();
|
||||||
@ -767,7 +775,8 @@ EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src
|
|||||||
Assignment<Dst,Src,Func>::run(dst, src, func);
|
Assignment<Dst,Src,Func>::run(dst, src, func);
|
||||||
}
|
}
|
||||||
template<typename Dst, typename Src>
|
template<typename Dst, typename Src>
|
||||||
EIGEN_DEVICE_FUNC void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
|
||||||
{
|
{
|
||||||
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
|
||||||
}
|
}
|
||||||
@ -779,7 +788,8 @@ template<typename Dst, typename Src> void check_for_aliasing(const Dst &dst, con
|
|||||||
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
|
||||||
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
|
struct Assignment<DstXprType, SrcXprType, Functor, Dense2Dense, Scalar>
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
static void run(DstXprType &dst, const SrcXprType &src, const Functor &func)
|
||||||
{
|
{
|
||||||
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
|
||||||
|
|
||||||
|
@ -129,8 +129,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
|
|||||||
: Impl(xpr, startRow, startCol)
|
: Impl(xpr, startRow, startCol)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
EIGEN_STATIC_ASSERT(RowsAtCompileTime!=Dynamic && ColsAtCompileTime!=Dynamic,THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE)
|
||||||
eigen_assert(startRow >= 0 && BlockRows >= 1 && startRow + BlockRows <= xpr.rows()
|
eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows()
|
||||||
&& startCol >= 0 && BlockCols >= 1 && startCol + BlockCols <= xpr.cols());
|
&& startCol >= 0 && BlockCols >= 0 && startCol + BlockCols <= xpr.cols());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Dynamic-size constructor
|
/** Dynamic-size constructor
|
||||||
@ -221,15 +221,13 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
inline Scalar& coeffRef(Index rowId, Index colId)
|
inline Scalar& coeffRef(Index rowId, Index colId)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||||
return m_xpr.const_cast_derived()
|
return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
inline const Scalar& coeffRef(Index rowId, Index colId) const
|
||||||
{
|
{
|
||||||
return m_xpr.derived()
|
return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||||
.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -242,39 +240,34 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
inline Scalar& coeffRef(Index index)
|
inline Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
EIGEN_STATIC_ASSERT_LVALUE(XprType)
|
||||||
return m_xpr.const_cast_derived()
|
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index index) const
|
inline const Scalar& coeffRef(Index index) const
|
||||||
{
|
{
|
||||||
return m_xpr.const_cast_derived()
|
return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const CoeffReturnType coeff(Index index) const
|
inline const CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_xpr
|
return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline PacketScalar packet(Index rowId, Index colId) const
|
inline PacketScalar packet(Index rowId, Index colId) const
|
||||||
{
|
{
|
||||||
return m_xpr.template packet<Unaligned>
|
return m_xpr.template packet<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value());
|
||||||
(rowId + m_startRow.value(), colId + m_startCol.value());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
inline void writePacket(Index rowId, Index colId, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_xpr.const_cast_derived().template writePacket<Unaligned>
|
m_xpr.template writePacket<Unaligned>(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
||||||
(rowId + m_startRow.value(), colId + m_startCol.value(), val);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
@ -288,7 +281,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
inline void writePacket(Index index, const PacketScalar& val)
|
inline void writePacket(Index index, const PacketScalar& val)
|
||||||
{
|
{
|
||||||
m_xpr.const_cast_derived().template writePacket<Unaligned>
|
m_xpr.template writePacket<Unaligned>
|
||||||
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val);
|
||||||
}
|
}
|
||||||
@ -320,7 +313,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
const typename XprType::Nested m_xpr;
|
typename XprType::Nested m_xpr;
|
||||||
const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
const internal::variable_if_dynamic<StorageIndex, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||||
const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
const internal::variable_if_dynamic<StorageIndex, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
||||||
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
|
const internal::variable_if_dynamic<StorageIndex, RowsAtCompileTime> m_blockRows;
|
||||||
|
@ -148,7 +148,8 @@ struct evaluator<PlainObjectBase<Derived> >
|
|||||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
if (IsRowMajor)
|
if (IsRowMajor)
|
||||||
return m_data[row * m_outerStride.value() + col];
|
return m_data[row * m_outerStride.value() + col];
|
||||||
@ -156,12 +157,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
|||||||
return m_data[row + col * m_outerStride.value()];
|
return m_data[row + col * m_outerStride.value()];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_data[index];
|
return m_data[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
if (IsRowMajor)
|
if (IsRowMajor)
|
||||||
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
|
return const_cast<Scalar*>(m_data)[row * m_outerStride.value() + col];
|
||||||
@ -169,12 +172,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
|||||||
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
|
return const_cast<Scalar*>(m_data)[row + col * m_outerStride.value()];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return const_cast<Scalar*>(m_data)[index];
|
return const_cast<Scalar*>(m_data)[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
if (IsRowMajor)
|
if (IsRowMajor)
|
||||||
@ -184,12 +189,14 @@ struct evaluator<PlainObjectBase<Derived> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return ploadt<PacketType, LoadMode>(m_data + index);
|
return ploadt<PacketType, LoadMode>(m_data + index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode,typename PacketType>
|
template<int StoreMode,typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
if (IsRowMajor)
|
if (IsRowMajor)
|
||||||
@ -201,6 +208,7 @@ struct evaluator<PlainObjectBase<Derived> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_data) + index, x);
|
||||||
@ -260,45 +268,53 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased>
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(col, row);
|
return m_argImpl.coeff(col, row);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(index);
|
return m_argImpl.coeff(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(col, row);
|
return m_argImpl.coeffRef(col, row);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC typename XprType::Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
typename XprType::Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(index);
|
return m_argImpl.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
|
return m_argImpl.template packet<LoadMode,PacketType>(col, row);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
|
m_argImpl.template writePacket<StoreMode,PacketType>(col, row, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
|
m_argImpl.template writePacket<StoreMode,PacketType>(index, x);
|
||||||
@ -338,23 +354,27 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
|
|||||||
|
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor(row, col);
|
return m_functor(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor(index);
|
return m_functor(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor.template packetOp<Index,PacketType>(row, col);
|
return m_functor.template packetOp<Index,PacketType>(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor.template packetOp<Index,PacketType>(index);
|
return m_functor.template packetOp<Index,PacketType>(index);
|
||||||
@ -380,7 +400,8 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
|||||||
Alignment = evaluator<ArgType>::Alignment
|
Alignment = evaluator<ArgType>::Alignment
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
explicit unary_evaluator(const XprType& op)
|
||||||
: m_functor(op.functor()),
|
: m_functor(op.functor()),
|
||||||
m_argImpl(op.nestedExpression())
|
m_argImpl(op.nestedExpression())
|
||||||
{
|
{
|
||||||
@ -390,23 +411,27 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased >
|
|||||||
|
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor(m_argImpl.coeff(row, col));
|
return m_functor(m_argImpl.coeff(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor(m_argImpl.coeff(index));
|
return m_functor(m_argImpl.coeff(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
|
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
|
return m_functor.packetOp(m_argImpl.template packet<LoadMode, PacketType>(index));
|
||||||
@ -466,17 +491,20 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
|||||||
|
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
|
return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
|
return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
|
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(row, col),
|
||||||
@ -484,6 +512,7 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
|
return m_functor.packetOp(m_lhsImpl.template packet<LoadMode,PacketType>(index),
|
||||||
@ -523,22 +552,26 @@ struct unary_evaluator<CwiseUnaryView<UnaryOp, ArgType>, IndexBased>
|
|||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_unaryOp(m_argImpl.coeff(row, col));
|
return m_unaryOp(m_argImpl.coeff(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_unaryOp(m_argImpl.coeff(index));
|
return m_unaryOp(m_argImpl.coeff(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_unaryOp(m_argImpl.coeffRef(row, col));
|
return m_unaryOp(m_argImpl.coeffRef(row, col));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_unaryOp(m_argImpl.coeffRef(index));
|
return m_unaryOp(m_argImpl.coeffRef(index));
|
||||||
}
|
}
|
||||||
@ -578,47 +611,55 @@ struct mapbase_evaluator : evaluator_base<Derived>
|
|||||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_data[index * m_xpr.innerStride()];
|
return m_data[index * m_xpr.innerStride()];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_data[index * m_xpr.innerStride()];
|
return m_data[index * m_xpr.innerStride()];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
return internal::ploadt<PacketType, LoadMode>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_xpr.innerStride());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
|
||||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_xpr.innerStride(), x);
|
||||||
@ -767,46 +808,54 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||||||
RowsAtCompileTime = XprType::RowsAtCompileTime
|
RowsAtCompileTime = XprType::RowsAtCompileTime
|
||||||
};
|
};
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
|
return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
|
return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
return m_argImpl.template packet<LoadMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
return packet<LoadMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||||
RowsAtCompileTime == 1 ? index : 0);
|
RowsAtCompileTime == 1 ? index : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
return m_argImpl.template writePacket<StoreMode,PacketType>(m_startRow.value() + row, m_startCol.value() + col, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
return writePacket<StoreMode,PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||||
@ -859,7 +908,7 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
|||||||
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ThenMatrixType>::Alignment, evaluator<ElseMatrixType>::Alignment)
|
||||||
};
|
};
|
||||||
|
|
||||||
inline EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
|
EIGEN_DEVICE_FUNC explicit evaluator(const XprType& select)
|
||||||
: m_conditionImpl(select.conditionMatrix()),
|
: m_conditionImpl(select.conditionMatrix()),
|
||||||
m_thenImpl(select.thenMatrix()),
|
m_thenImpl(select.thenMatrix()),
|
||||||
m_elseImpl(select.elseMatrix())
|
m_elseImpl(select.elseMatrix())
|
||||||
@ -869,7 +918,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
|||||||
|
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
if (m_conditionImpl.coeff(row, col))
|
if (m_conditionImpl.coeff(row, col))
|
||||||
return m_thenImpl.coeff(row, col);
|
return m_thenImpl.coeff(row, col);
|
||||||
@ -877,7 +927,8 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
|
|||||||
return m_elseImpl.coeff(row, col);
|
return m_elseImpl.coeff(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
if (m_conditionImpl.coeff(index))
|
if (m_conditionImpl.coeff(index))
|
||||||
return m_thenImpl.coeff(index);
|
return m_thenImpl.coeff(index);
|
||||||
@ -921,7 +972,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
|||||||
m_cols(replicate.nestedExpression().cols())
|
m_cols(replicate.nestedExpression().cols())
|
||||||
{}
|
{}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
// try to avoid using modulo; this is a pure optimization strategy
|
// try to avoid using modulo; this is a pure optimization strategy
|
||||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||||
@ -934,7 +986,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
|||||||
return m_argImpl.coeff(actual_row, actual_col);
|
return m_argImpl.coeff(actual_row, actual_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
// try to avoid using modulo; this is a pure optimization strategy
|
// try to avoid using modulo; this is a pure optimization strategy
|
||||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||||
@ -945,6 +998,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
|
||||||
@ -958,6 +1012,7 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime==1
|
||||||
@ -994,7 +1049,7 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
|||||||
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
|
CoeffReadCost = TraversalSize==Dynamic ? HugeCost
|
||||||
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
|
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
|
||||||
|
|
||||||
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))),
|
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | LinearAccessBit,
|
||||||
|
|
||||||
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
|
Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized
|
||||||
};
|
};
|
||||||
@ -1008,7 +1063,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
|||||||
|
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const Scalar coeff(Index i, Index j) const
|
||||||
{
|
{
|
||||||
if (Direction==Vertical)
|
if (Direction==Vertical)
|
||||||
return m_functor(m_arg.col(j));
|
return m_functor(m_arg.col(j));
|
||||||
@ -1016,7 +1072,8 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
|
|||||||
return m_functor(m_arg.row(i));
|
return m_functor(m_arg.row(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const Scalar coeff(Index index) const
|
||||||
{
|
{
|
||||||
if (Direction==Vertical)
|
if (Direction==Vertical)
|
||||||
return m_functor(m_arg.col(index));
|
return m_functor(m_arg.col(index));
|
||||||
@ -1051,45 +1108,53 @@ struct evaluator_wrapper_base
|
|||||||
typedef typename ArgType::Scalar Scalar;
|
typedef typename ArgType::Scalar Scalar;
|
||||||
typedef typename ArgType::CoeffReturnType CoeffReturnType;
|
typedef typename ArgType::CoeffReturnType CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(row, col);
|
return m_argImpl.coeff(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(index);
|
return m_argImpl.coeff(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(row, col);
|
return m_argImpl.coeffRef(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(index);
|
return m_argImpl.coeffRef(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
|
return m_argImpl.template packet<LoadMode,PacketType>(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
return m_argImpl.template packet<LoadMode,PacketType>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
m_argImpl.template writePacket<StoreMode>(row, col, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int StoreMode, typename PacketType>
|
template<int StoreMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||||
@ -1164,29 +1229,34 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
|||||||
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
|
m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index col) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index col) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
|
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
|
||||||
ReverseCol ? m_cols.value() - col - 1 : col);
|
ReverseCol ? m_cols.value() - col - 1 : col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
|
return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index col)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
|
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
|
||||||
ReverseCol ? m_cols.value() - col - 1 : col);
|
ReverseCol ? m_cols.value() - col - 1 : col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
|
return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index row, Index col) const
|
PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
@ -1201,6 +1271,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
PacketType packet(Index index) const
|
PacketType packet(Index index) const
|
||||||
{
|
{
|
||||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||||
@ -1208,6 +1279,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index row, Index col, const PacketType& x)
|
void writePacket(Index row, Index col, const PacketType& x)
|
||||||
{
|
{
|
||||||
// FIXME we could factorize some code with packet(i,j)
|
// FIXME we could factorize some code with packet(i,j)
|
||||||
@ -1224,6 +1296,7 @@ struct unary_evaluator<Reverse<ArgType, Direction> >
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode, typename PacketType>
|
template<int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE
|
||||||
void writePacket(Index index, const PacketType& x)
|
void writePacket(Index index, const PacketType& x)
|
||||||
{
|
{
|
||||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
enum { PacketSize = unpacket_traits<PacketType>::size };
|
||||||
@ -1267,22 +1340,26 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
|
|||||||
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
|
typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
|
||||||
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
|
typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index row, Index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index row, Index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(row + rowOffset(), row + colOffset());
|
return m_argImpl.coeff(row + rowOffset(), row + colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
CoeffReturnType coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_argImpl.coeff(index + rowOffset(), index + colOffset());
|
return m_argImpl.coeff(index + rowOffset(), index + colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index row, Index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index row, Index)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
|
return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC Scalar& coeffRef(Index index)
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
|
return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
|||||||
// we still want to handle the case when the result type is different.
|
// we still want to handle the case when the result type is different.
|
||||||
typedef typename result_of<
|
typedef typename result_of<
|
||||||
BinaryOp(
|
BinaryOp(
|
||||||
typename Lhs::Scalar,
|
const typename Lhs::Scalar&,
|
||||||
typename Rhs::Scalar
|
const typename Rhs::Scalar&
|
||||||
)
|
)
|
||||||
>::type Scalar;
|
>::type Scalar;
|
||||||
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
|
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind,
|
||||||
|
@ -19,7 +19,7 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
|
|||||||
: traits<XprType>
|
: traits<XprType>
|
||||||
{
|
{
|
||||||
typedef typename result_of<
|
typedef typename result_of<
|
||||||
UnaryOp(typename XprType::Scalar)
|
UnaryOp(const typename XprType::Scalar&)
|
||||||
>::type Scalar;
|
>::type Scalar;
|
||||||
typedef typename XprType::Nested XprTypeNested;
|
typedef typename XprType::Nested XprTypeNested;
|
||||||
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
|
||||||
@ -58,33 +58,34 @@ class CwiseUnaryOp : public CwiseUnaryOpImpl<UnaryOp, XprType, typename internal
|
|||||||
|
|
||||||
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
|
||||||
|
typedef typename internal::ref_selector<XprType>::type XprTypeNested;
|
||||||
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
typedef typename internal::remove_all<XprType>::type NestedExpression;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
explicit inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
|
||||||
: m_xpr(xpr), m_functor(func) {}
|
: m_xpr(xpr), m_functor(func) {}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
|
Index rows() const { return m_xpr.rows(); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
|
Index cols() const { return m_xpr.cols(); }
|
||||||
|
|
||||||
/** \returns the functor representing the unary operation */
|
/** \returns the functor representing the unary operation */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const UnaryOp& functor() const { return m_functor; }
|
const UnaryOp& functor() const { return m_functor; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
const typename internal::remove_all<typename XprType::Nested>::type&
|
const typename internal::remove_all<XprTypeNested>::type&
|
||||||
nestedExpression() const { return m_xpr; }
|
nestedExpression() const { return m_xpr; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
typename internal::remove_all<typename XprType::Nested>::type&
|
typename internal::remove_all<XprTypeNested>::type&
|
||||||
nestedExpression() { return m_xpr.const_cast_derived(); }
|
nestedExpression() { return m_xpr; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typename XprType::Nested m_xpr;
|
XprTypeNested m_xpr;
|
||||||
const UnaryOp m_functor;
|
const UnaryOp m_functor;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
|
|||||||
: traits<MatrixType>
|
: traits<MatrixType>
|
||||||
{
|
{
|
||||||
typedef typename result_of<
|
typedef typename result_of<
|
||||||
ViewOp(typename traits<MatrixType>::Scalar)
|
ViewOp(const typename traits<MatrixType>::Scalar&)
|
||||||
>::type Scalar;
|
>::type Scalar;
|
||||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
typedef typename MatrixType::Nested MatrixTypeNested;
|
||||||
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||||
@ -61,6 +61,7 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
|||||||
|
|
||||||
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
typedef typename CwiseUnaryViewImpl<ViewOp, MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView)
|
||||||
|
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||||
|
|
||||||
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
explicit inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp())
|
||||||
@ -75,15 +76,15 @@ class CwiseUnaryView : public CwiseUnaryViewImpl<ViewOp, MatrixType, typename in
|
|||||||
const ViewOp& functor() const { return m_functor; }
|
const ViewOp& functor() const { return m_functor; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||||
nestedExpression() const { return m_matrix; }
|
nestedExpression() const { return m_matrix; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
nestedExpression() { return m_matrix.const_cast_derived(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typename internal::ref_selector<MatrixType>::type m_matrix;
|
MatrixTypeNested m_matrix;
|
||||||
ViewOp m_functor;
|
ViewOp m_functor;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -275,13 +275,13 @@ template<typename Derived> class DenseBase
|
|||||||
|
|
||||||
/** Copies \a other into *this. \returns a reference to *this. */
|
/** Copies \a other into *this. \returns a reference to *this. */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||||
|
|
||||||
/** Special case of the template operator=, in order to prevent the compiler
|
/** Special case of the template operator=, in order to prevent the compiler
|
||||||
* from generating a default operator= (issue hit with g++ 4.1)
|
* from generating a default operator= (issue hit with g++ 4.1)
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const DenseBase& other);
|
Derived& operator=(const DenseBase& other);
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
@ -388,10 +388,10 @@ template<typename Derived> class DenseBase
|
|||||||
inline bool hasNaN() const;
|
inline bool hasNaN() const;
|
||||||
inline bool allFinite() const;
|
inline bool allFinite() const;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
inline Derived& operator*=(const Scalar& other);
|
Derived& operator*=(const Scalar& other);
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
inline Derived& operator/=(const Scalar& other);
|
Derived& operator/=(const Scalar& other);
|
||||||
|
|
||||||
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
|
||||||
/** \returns the matrix or vector obtained by evaluating this expression.
|
/** \returns the matrix or vector obtained by evaluating this expression.
|
||||||
|
@ -103,21 +103,21 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||||||
>::type ScalarWithConstIfNotLvalue;
|
>::type ScalarWithConstIfNotLvalue;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
|
inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline Scalar& coeffRef(Index row, Index)
|
inline Scalar& coeffRef(Index row, Index)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index row, Index) const
|
inline const Scalar& coeffRef(Index row, Index) const
|
||||||
{
|
{
|
||||||
return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
|
return m_matrix.coeffRef(row+rowOffset(), row+colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -130,13 +130,13 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||||||
inline Scalar& coeffRef(Index idx)
|
inline Scalar& coeffRef(Index idx)
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
inline const Scalar& coeffRef(Index idx) const
|
inline const Scalar& coeffRef(Index idx) const
|
||||||
{
|
{
|
||||||
return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
|
return m_matrix.coeffRef(idx+rowOffset(), idx+colOffset());
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
@ -159,7 +159,7 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typename MatrixType::Nested m_matrix;
|
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||||
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
|
const internal::variable_if_dynamicindex<Index, DiagIndex> m_index;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -142,6 +142,52 @@ inline void MatrixBase<Derived>::normalize()
|
|||||||
derived() /= numext::sqrt(z);
|
derived() /= numext::sqrt(z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow.
|
||||||
|
*
|
||||||
|
* \only_for_vectors
|
||||||
|
*
|
||||||
|
* This method is analogue to the normalized() method, but it reduces the risk of
|
||||||
|
* underflow and overflow when computing the norm.
|
||||||
|
*
|
||||||
|
* \warning If the input vector is too small (i.e., this->norm()==0),
|
||||||
|
* then this function returns a copy of the input.
|
||||||
|
*
|
||||||
|
* \sa stableNorm(), stableNormalize(), normalized()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
inline const typename MatrixBase<Derived>::PlainObject
|
||||||
|
MatrixBase<Derived>::stableNormalized() const
|
||||||
|
{
|
||||||
|
typedef typename internal::nested_eval<Derived,3>::type _Nested;
|
||||||
|
_Nested n(derived());
|
||||||
|
RealScalar w = n.cwiseAbs().maxCoeff();
|
||||||
|
RealScalar z = (n/w).squaredNorm();
|
||||||
|
if(z>RealScalar(0))
|
||||||
|
return n / (numext::sqrt(z)*w);
|
||||||
|
else
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Normalizes the vector while avoid underflow and overflow
|
||||||
|
*
|
||||||
|
* \only_for_vectors
|
||||||
|
*
|
||||||
|
* This method is analogue to the normalize() method, but it reduces the risk of
|
||||||
|
* underflow and overflow when computing the norm.
|
||||||
|
*
|
||||||
|
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
|
||||||
|
*
|
||||||
|
* \sa stableNorm(), stableNormalized(), normalize()
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
inline void MatrixBase<Derived>::stableNormalize()
|
||||||
|
{
|
||||||
|
RealScalar w = cwiseAbs().maxCoeff();
|
||||||
|
RealScalar z = (derived()/w).squaredNorm();
|
||||||
|
if(z>RealScalar(0))
|
||||||
|
derived() /= numext::sqrt(z)*w;
|
||||||
|
}
|
||||||
|
|
||||||
//---------- implementation of other norms ----------
|
//---------- implementation of other norms ----------
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
@ -75,6 +75,7 @@ struct default_packet_traits
|
|||||||
HasCosh = 0,
|
HasCosh = 0,
|
||||||
HasTanh = 0,
|
HasTanh = 0,
|
||||||
HasLGamma = 0,
|
HasLGamma = 0,
|
||||||
|
HasDiGamma = 0,
|
||||||
HasErf = 0,
|
HasErf = 0,
|
||||||
HasErfc = 0,
|
HasErfc = 0,
|
||||||
|
|
||||||
@ -284,7 +285,7 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
|||||||
{ pstore(to, from); }
|
{ pstore(to, from); }
|
||||||
|
|
||||||
/** \internal tries to do cache prefetching of \a addr */
|
/** \internal tries to do cache prefetching of \a addr */
|
||||||
template<typename Scalar> inline void prefetch(const Scalar* addr)
|
template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
|
||||||
{
|
{
|
||||||
#ifdef __CUDA_ARCH__
|
#ifdef __CUDA_ARCH__
|
||||||
#if defined(__LP64__)
|
#if defined(__LP64__)
|
||||||
@ -439,6 +440,10 @@ Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
|
|||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
|
Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
|
||||||
|
|
||||||
|
/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
|
||||||
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
|
||||||
|
|
||||||
/** \internal \returns the erf(\a a) (coeff-wise) */
|
/** \internal \returns the erf(\a a) (coeff-wise) */
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
||||||
|
@ -50,6 +50,7 @@ namespace Eigen
|
|||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh,scalar_cosh_op)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh,scalar_tanh_op)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma,scalar_lgamma_op)
|
||||||
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op)
|
||||||
|
@ -748,9 +748,9 @@ template<typename T> EIGEN_DEVICE_FUNC bool isinf_msvc_helper(T x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
//MSVC defines a _isnan builtin function, but for double only
|
//MSVC defines a _isnan builtin function, but for double only
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x); }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const long double& x) { return _isnan(x)!=0; }
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x); }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const double& x) { return _isnan(x)!=0; }
|
||||||
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x); }
|
EIGEN_DEVICE_FUNC inline bool isnan_impl(const float& x) { return _isnan(x)!=0; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
|
EIGEN_DEVICE_FUNC inline bool isinf_impl(const long double& x) { return isinf_msvc_helper(x); }
|
||||||
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
|
EIGEN_DEVICE_FUNC inline bool isinf_impl(const double& x) { return isinf_msvc_helper(x); }
|
||||||
@ -1080,21 +1080,21 @@ struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::
|
|||||||
|
|
||||||
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
|
template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
|
||||||
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
|
inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
|
||||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||||
{
|
{
|
||||||
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
|
return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||||
inline bool isApprox(const Scalar& x, const Scalar& y,
|
inline bool isApprox(const Scalar& x, const Scalar& y,
|
||||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||||
{
|
{
|
||||||
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
|
return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar> EIGEN_DEVICE_FUNC
|
template<typename Scalar> EIGEN_DEVICE_FUNC
|
||||||
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
|
inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
|
||||||
typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
|
const typename NumTraits<Scalar>::Real &precision = NumTraits<Scalar>::dummy_precision())
|
||||||
{
|
{
|
||||||
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
|
return scalar_fuzzy_impl<Scalar>::isApproxOrLessThan(x, y, precision);
|
||||||
}
|
}
|
||||||
|
@ -135,14 +135,14 @@ template<typename Derived> class MatrixBase
|
|||||||
/** Special case of the template operator=, in order to prevent the compiler
|
/** Special case of the template operator=, in order to prevent the compiler
|
||||||
* from generating a default operator= (issue hit with g++ 4.1)
|
* from generating a default operator= (issue hit with g++ 4.1)
|
||||||
*/
|
*/
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const MatrixBase& other);
|
Derived& operator=(const MatrixBase& other);
|
||||||
|
|
||||||
// We cannot inherit here via Base::operator= since it is causing
|
// We cannot inherit here via Base::operator= since it is causing
|
||||||
// trouble with MSVC.
|
// trouble with MSVC.
|
||||||
|
|
||||||
template <typename OtherDerived>
|
template <typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator=(const DenseBase<OtherDerived>& other);
|
Derived& operator=(const DenseBase<OtherDerived>& other);
|
||||||
|
|
||||||
template <typename OtherDerived>
|
template <typename OtherDerived>
|
||||||
@ -154,10 +154,10 @@ template<typename Derived> class MatrixBase
|
|||||||
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
Derived& operator=(const ReturnByValue<OtherDerived>& other);
|
||||||
|
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
Derived& operator+=(const MatrixBase<OtherDerived>& other);
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
Derived& operator-=(const MatrixBase<OtherDerived>& other);
|
||||||
|
|
||||||
#ifdef __CUDACC__
|
#ifdef __CUDACC__
|
||||||
@ -204,7 +204,9 @@ template<typename Derived> class MatrixBase
|
|||||||
RealScalar blueNorm() const;
|
RealScalar blueNorm() const;
|
||||||
RealScalar hypotNorm() const;
|
RealScalar hypotNorm() const;
|
||||||
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
EIGEN_DEVICE_FUNC const PlainObject normalized() const;
|
||||||
|
EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const;
|
||||||
EIGEN_DEVICE_FUNC void normalize();
|
EIGEN_DEVICE_FUNC void normalize();
|
||||||
|
EIGEN_DEVICE_FUNC void stableNormalize();
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
|
||||||
EIGEN_DEVICE_FUNC void adjointInPlace();
|
EIGEN_DEVICE_FUNC void adjointInPlace();
|
||||||
|
@ -32,7 +32,7 @@ namespace internal {
|
|||||||
template<typename MatrixType, unsigned int UpLo>
|
template<typename MatrixType, unsigned int UpLo>
|
||||||
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
struct traits<SelfAdjointView<MatrixType, UpLo> > : traits<MatrixType>
|
||||||
{
|
{
|
||||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||||
typedef MatrixType ExpressionType;
|
typedef MatrixType ExpressionType;
|
||||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||||
@ -97,7 +97,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
|
EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView);
|
||||||
Base::check_coordinates_internal(row, col);
|
Base::check_coordinates_internal(row, col);
|
||||||
return m_matrix.const_cast_derived().coeffRef(row, col);
|
return m_matrix.coeffRef(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal */
|
/** \internal */
|
||||||
@ -107,7 +107,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
|
MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; }
|
||||||
|
|
||||||
/** Efficient triangular matrix times vector/matrix product */
|
/** Efficient triangular matrix times vector/matrix product */
|
||||||
template<typename OtherDerived>
|
template<typename OtherDerived>
|
||||||
|
@ -13,79 +13,349 @@
|
|||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
// Parts of this code are based on the Cephes Math Library.
|
||||||
|
//
|
||||||
|
// Cephes Math Library Release 2.8: June, 2000
|
||||||
|
// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier
|
||||||
|
//
|
||||||
|
// Permission has been kindly provided by the original author
|
||||||
|
// to incorporate the Cephes software into the Eigen codebase:
|
||||||
|
//
|
||||||
|
// From: Stephen Moshier
|
||||||
|
// To: Eugene Brevdo
|
||||||
|
// Subject: Re: Permission to wrap several cephes functions in Eigen
|
||||||
|
//
|
||||||
|
// Hello Eugene,
|
||||||
|
//
|
||||||
|
// Thank you for writing.
|
||||||
|
//
|
||||||
|
// If your licensing is similar to BSD, the formal way that has been
|
||||||
|
// handled is simply to add a statement to the effect that you are incorporating
|
||||||
|
// the Cephes software by permission of the author.
|
||||||
|
//
|
||||||
|
// Good luck with your project,
|
||||||
|
// Steve
|
||||||
|
|
||||||
|
namespace cephes {
|
||||||
|
|
||||||
|
/* polevl (modified for Eigen)
|
||||||
|
*
|
||||||
|
* Evaluate polynomial
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* SYNOPSIS:
|
||||||
|
*
|
||||||
|
* int N;
|
||||||
|
* Scalar x, y, coef[N+1];
|
||||||
|
*
|
||||||
|
* y = polevl<decltype(x), N>( x, coef);
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DESCRIPTION:
|
||||||
|
*
|
||||||
|
* Evaluates polynomial of degree N:
|
||||||
|
*
|
||||||
|
* 2 N
|
||||||
|
* y = C + C x + C x +...+ C x
|
||||||
|
* 0 1 2 N
|
||||||
|
*
|
||||||
|
* Coefficients are stored in reverse order:
|
||||||
|
*
|
||||||
|
* coef[0] = C , ..., coef[N] = C .
|
||||||
|
* N 0
|
||||||
|
*
|
||||||
|
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||||
|
* omitted from the array. Its calling arguments are
|
||||||
|
* otherwise the same as polevl().
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* The Eigen implementation is templatized. For best speed, store
|
||||||
|
* coef as a const array (constexpr), e.g.
|
||||||
|
*
|
||||||
|
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Scalar, int N>
|
||||||
|
struct polevl {
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
static Scalar run(const Scalar x, const Scalar coef[]) {
|
||||||
|
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
|
||||||
|
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct polevl<Scalar, 0> {
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
static Scalar run(const Scalar, const Scalar coef[]) {
|
||||||
|
return coef[0];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace cephes
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Implementation of lgamma *
|
* Implementation of lgamma *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct lgamma_impl
|
struct lgamma_impl {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct lgamma_retval
|
struct lgamma_retval {
|
||||||
{
|
|
||||||
typedef Scalar type;
|
typedef Scalar type;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_C99_MATH
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
template<>
|
template <>
|
||||||
struct lgamma_impl<float>
|
struct lgamma_impl<float> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE double run(const float& x) { return ::lgammaf(x); }
|
static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template <>
|
||||||
struct lgamma_impl<double>
|
struct lgamma_impl<double> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::lgamma(x); }
|
static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* Implementation of digamma (psi) *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* Polynomial evaluation helper for the Psi (digamma) function.
|
||||||
|
*
|
||||||
|
* digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for
|
||||||
|
* input Scalar s, assuming s is above 10.0.
|
||||||
|
*
|
||||||
|
* If s is above a certain threshold for the given Scalar type, zero
|
||||||
|
* is returned. Otherwise the polynomial is evaluated with enough
|
||||||
|
* coefficients for results matching Scalar machine precision.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Scalar>
|
||||||
|
struct digamma_impl_maybe_poly {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||||
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
return Scalar(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct digamma_impl_maybe_poly<float> {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_STRONG_INLINE float run(const float s) {
|
||||||
|
const float A[] = {
|
||||||
|
-4.16666666666666666667E-3,
|
||||||
|
3.96825396825396825397E-3,
|
||||||
|
-8.33333333333333333333E-3,
|
||||||
|
8.33333333333333333333E-2
|
||||||
|
};
|
||||||
|
|
||||||
|
float z;
|
||||||
|
if (s < 1.0e8f) {
|
||||||
|
z = 1.0f / (s * s);
|
||||||
|
return z * cephes::polevl<float, 3>::run(z, A);
|
||||||
|
} else return 0.0f;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct digamma_impl_maybe_poly<double> {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_STRONG_INLINE double run(const double s) {
|
||||||
|
const double A[] = {
|
||||||
|
8.33333333333333333333E-2,
|
||||||
|
-2.10927960927960927961E-2,
|
||||||
|
7.57575757575757575758E-3,
|
||||||
|
-4.16666666666666666667E-3,
|
||||||
|
3.96825396825396825397E-3,
|
||||||
|
-8.33333333333333333333E-3,
|
||||||
|
8.33333333333333333333E-2
|
||||||
|
};
|
||||||
|
|
||||||
|
double z;
|
||||||
|
if (s < 1.0e17) {
|
||||||
|
z = 1.0 / (s * s);
|
||||||
|
return z * cephes::polevl<double, 6>::run(z, A);
|
||||||
|
}
|
||||||
|
else return 0.0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct digamma_retval {
|
||||||
|
typedef Scalar type;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
|
template <typename Scalar>
|
||||||
|
struct digamma_impl {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static Scalar run(Scalar x) {
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* Psi (digamma) function (modified for Eigen)
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* SYNOPSIS:
|
||||||
|
*
|
||||||
|
* double x, y, psi();
|
||||||
|
*
|
||||||
|
* y = psi( x );
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DESCRIPTION:
|
||||||
|
*
|
||||||
|
* d -
|
||||||
|
* psi(x) = -- ln | (x)
|
||||||
|
* dx
|
||||||
|
*
|
||||||
|
* is the logarithmic derivative of the gamma function.
|
||||||
|
* For integer x,
|
||||||
|
* n-1
|
||||||
|
* -
|
||||||
|
* psi(n) = -EUL + > 1/k.
|
||||||
|
* -
|
||||||
|
* k=1
|
||||||
|
*
|
||||||
|
* If x is negative, it is transformed to a positive argument by the
|
||||||
|
* reflection formula psi(1-x) = psi(x) + pi cot(pi x).
|
||||||
|
* For general positive x, the argument is made greater than 10
|
||||||
|
* using the recurrence psi(x+1) = psi(x) + 1/x.
|
||||||
|
* Then the following asymptotic expansion is applied:
|
||||||
|
*
|
||||||
|
* inf. B
|
||||||
|
* - 2k
|
||||||
|
* psi(x) = log(x) - 1/2x - > -------
|
||||||
|
* - 2k
|
||||||
|
* k=1 2k x
|
||||||
|
*
|
||||||
|
* where the B2k are Bernoulli numbers.
|
||||||
|
*
|
||||||
|
* ACCURACY (float):
|
||||||
|
* Relative error (except absolute when |psi| < 1):
|
||||||
|
* arithmetic domain # trials peak rms
|
||||||
|
* IEEE 0,30 30000 1.3e-15 1.4e-16
|
||||||
|
* IEEE -30,0 40000 1.5e-15 2.2e-16
|
||||||
|
*
|
||||||
|
* ACCURACY (double):
|
||||||
|
* Absolute error, relative when |psi| > 1 :
|
||||||
|
* arithmetic domain # trials peak rms
|
||||||
|
* IEEE -33,0 30000 8.2e-7 1.2e-7
|
||||||
|
* IEEE 0,33 100000 7.3e-7 7.7e-8
|
||||||
|
*
|
||||||
|
* ERROR MESSAGES:
|
||||||
|
* message condition value returned
|
||||||
|
* psi singularity x integer <=0 INFINITY
|
||||||
|
*/
|
||||||
|
|
||||||
|
Scalar p, q, nz, s, w, y;
|
||||||
|
bool negative;
|
||||||
|
|
||||||
|
const Scalar maxnum = std::numeric_limits<Scalar>::infinity();
|
||||||
|
const Scalar m_pi = 3.14159265358979323846;
|
||||||
|
|
||||||
|
negative = 0;
|
||||||
|
nz = 0.0;
|
||||||
|
|
||||||
|
const Scalar zero = 0.0;
|
||||||
|
const Scalar one = 1.0;
|
||||||
|
const Scalar half = 0.5;
|
||||||
|
|
||||||
|
if (x <= zero) {
|
||||||
|
negative = one;
|
||||||
|
q = x;
|
||||||
|
p = ::floor(q);
|
||||||
|
if (p == q) {
|
||||||
|
return maxnum;
|
||||||
|
}
|
||||||
|
/* Remove the zeros of tan(m_pi x)
|
||||||
|
* by subtracting the nearest integer from x
|
||||||
|
*/
|
||||||
|
nz = q - p;
|
||||||
|
if (nz != half) {
|
||||||
|
if (nz > half) {
|
||||||
|
p += one;
|
||||||
|
nz = q - p;
|
||||||
|
}
|
||||||
|
nz = m_pi / ::tan(m_pi * nz);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
nz = zero;
|
||||||
|
}
|
||||||
|
x = one - x;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* use the recurrence psi(x+1) = psi(x) + 1/x. */
|
||||||
|
s = x;
|
||||||
|
w = zero;
|
||||||
|
while (s < Scalar(10)) {
|
||||||
|
w += one / s;
|
||||||
|
s += one;
|
||||||
|
}
|
||||||
|
|
||||||
|
y = digamma_impl_maybe_poly<Scalar>::run(s);
|
||||||
|
|
||||||
|
y = ::log(s) - (half / s) - y - w;
|
||||||
|
|
||||||
|
return (negative) ? y - nz : y;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Implementation of erf *
|
* Implementation of erf *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct erf_impl
|
struct erf_impl {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct erf_retval
|
struct erf_retval {
|
||||||
{
|
|
||||||
typedef Scalar type;
|
typedef Scalar type;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_C99_MATH
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
template<>
|
template <>
|
||||||
struct erf_impl<float>
|
struct erf_impl<float> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE float run(const float& x) { return ::erff(x); }
|
static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template <>
|
||||||
struct erf_impl<double>
|
struct erf_impl<double> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE double run(const double& x) { return ::erf(x); }
|
static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
|
||||||
};
|
};
|
||||||
#endif // EIGEN_HAS_C99_MATH
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
@ -93,35 +363,30 @@ struct erf_impl<double>
|
|||||||
* Implementation of erfc *
|
* Implementation of erfc *
|
||||||
****************************************************************************/
|
****************************************************************************/
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct erfc_impl
|
struct erfc_impl {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar&)
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||||
{
|
|
||||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
return Scalar(0);
|
return Scalar(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
struct erfc_retval
|
struct erfc_retval {
|
||||||
{
|
|
||||||
typedef Scalar type;
|
typedef Scalar type;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_C99_MATH
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
template<>
|
template <>
|
||||||
struct erfc_impl<float>
|
struct erfc_impl<float> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
|
static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<>
|
template <>
|
||||||
struct erfc_impl<double>
|
struct erfc_impl<double> {
|
||||||
{
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
|
static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
|
||||||
};
|
};
|
||||||
@ -129,27 +394,29 @@ struct erfc_impl<double>
|
|||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
|
|
||||||
namespace numext {
|
namespace numext {
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar)
|
||||||
inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar) lgamma(const Scalar& x)
|
lgamma(const Scalar& x) {
|
||||||
{
|
|
||||||
return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar)
|
||||||
inline EIGEN_MATHFUNC_RETVAL(erf, Scalar) erf(const Scalar& x)
|
digamma(const Scalar& x) {
|
||||||
{
|
return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
|
||||||
|
erf(const Scalar& x) {
|
||||||
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar>
|
template <typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
|
||||||
inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar) erfc(const Scalar& x)
|
erfc(const Scalar& x) {
|
||||||
{
|
|
||||||
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,6 +54,8 @@ template<typename MatrixType> class Transpose
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
|
|
||||||
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
|
||||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||||
@ -68,16 +70,16 @@ template<typename MatrixType> class Transpose
|
|||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
const typename internal::remove_all<typename MatrixType::Nested>::type&
|
const typename internal::remove_all<MatrixTypeNested>::type&
|
||||||
nestedExpression() const { return m_matrix; }
|
nestedExpression() const { return m_matrix; }
|
||||||
|
|
||||||
/** \returns the nested expression */
|
/** \returns the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
typename internal::remove_all<typename MatrixType::Nested>::type&
|
typename internal::remove_reference<MatrixTypeNested>::type&
|
||||||
nestedExpression() { return m_matrix.const_cast_derived(); }
|
nestedExpression() { return m_matrix; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
typename MatrixType::Nested m_matrix;
|
typename internal::ref_selector<MatrixType>::non_const_type m_matrix;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
@ -325,7 +325,7 @@ class TranspositionsWrapper
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
const typename IndicesType::Nested m_indices;
|
typename IndicesType::Nested m_indices;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ namespace internal {
|
|||||||
template<typename MatrixType, unsigned int _Mode>
|
template<typename MatrixType, unsigned int _Mode>
|
||||||
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
struct traits<TriangularView<MatrixType, _Mode> > : traits<MatrixType>
|
||||||
{
|
{
|
||||||
typedef typename ref_selector<MatrixType>::type MatrixTypeNested;
|
typedef typename ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
typedef typename remove_reference<MatrixTypeNested>::type MatrixTypeNestedNonRef;
|
||||||
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
|
||||||
typedef typename MatrixType::PlainObject FullMatrixType;
|
typedef typename MatrixType::PlainObject FullMatrixType;
|
||||||
@ -213,7 +213,6 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
|||||||
IsVectorAtCompileTime = false
|
IsVectorAtCompileTime = false
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME This, combined with const_cast_derived in transpose() leads to a const-correctness loophole
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix)
|
||||||
{}
|
{}
|
||||||
@ -235,7 +234,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
|||||||
|
|
||||||
/** \returns a reference to the nested expression */
|
/** \returns a reference to the nested expression */
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
NestedExpression& nestedExpression() { return *const_cast<NestedExpression*>(&m_matrix); }
|
NestedExpression& nestedExpression() { return m_matrix; }
|
||||||
|
|
||||||
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
typedef TriangularView<const MatrixConjugateReturnType,Mode> ConjugateReturnType;
|
||||||
/** \sa MatrixBase::conjugate() const */
|
/** \sa MatrixBase::conjugate() const */
|
||||||
@ -255,7 +254,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
|
|||||||
inline TransposeReturnType transpose()
|
inline TransposeReturnType transpose()
|
||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
|
||||||
typename MatrixType::TransposeReturnType tmp(m_matrix.const_cast_derived());
|
typename MatrixType::TransposeReturnType tmp(m_matrix);
|
||||||
return TransposeReturnType(tmp);
|
return TransposeReturnType(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,7 +417,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
|
|||||||
{
|
{
|
||||||
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
|
EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType);
|
||||||
Base::check_coordinates_internal(row, col);
|
Base::check_coordinates_internal(row, col);
|
||||||
return derived().nestedExpression().const_cast_derived().coeffRef(row, col);
|
return derived().nestedExpression().coeffRef(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
/** Assigns a triangular matrix to a triangular part of a dense matrix */
|
||||||
|
@ -124,7 +124,7 @@ struct member_lpnorm {
|
|||||||
template <typename BinaryOp, typename Scalar>
|
template <typename BinaryOp, typename Scalar>
|
||||||
struct member_redux {
|
struct member_redux {
|
||||||
typedef typename result_of<
|
typedef typename result_of<
|
||||||
BinaryOp(Scalar,Scalar)
|
BinaryOp(const Scalar&,const Scalar&)
|
||||||
>::type result_type;
|
>::type result_type;
|
||||||
template<typename _Scalar, int Size> struct Cost
|
template<typename _Scalar, int Size> struct Cost
|
||||||
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
|
{ enum { value = (Size-1) * functor_traits<BinaryOp>::Cost }; };
|
||||||
|
@ -197,7 +197,7 @@ struct functor_traits<max_coeff_visitor<Scalar> > {
|
|||||||
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
* \warning the result is undefined if \c *this contains NaN.
|
||||||
*
|
*
|
||||||
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
|
* \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<typename IndexType>
|
||||||
@ -215,7 +215,7 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
|
|||||||
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
/** \returns the minimum of all coefficients of *this and puts in *index its location.
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
* \warning the result is undefined if \c *this contains NaN.
|
||||||
*
|
*
|
||||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::minCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<typename IndexType>
|
||||||
@ -233,7 +233,7 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
|
|||||||
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
|
||||||
* \warning the result is undefined if \c *this contains NaN.
|
* \warning the result is undefined if \c *this contains NaN.
|
||||||
*
|
*
|
||||||
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
|
* \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff()
|
||||||
*/
|
*/
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
template<typename IndexType>
|
template<typename IndexType>
|
||||||
|
@ -78,6 +78,20 @@ double2 plgamma<double2>(const double2& a)
|
|||||||
return make_double2(lgamma(a.x), lgamma(a.y));
|
return make_double2(lgamma(a.x), lgamma(a.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
float4 pdigamma<float4>(const float4& a)
|
||||||
|
{
|
||||||
|
using numext::digamma;
|
||||||
|
return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
double2 pdigamma<double2>(const double2& a)
|
||||||
|
{
|
||||||
|
using numext::digamma;
|
||||||
|
return make_double2(digamma(a.x), digamma(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
float4 perf<float4>(const float4& a)
|
float4 perf<float4>(const float4& a)
|
||||||
{
|
{
|
||||||
|
@ -40,6 +40,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
HasLGamma = 1,
|
HasLGamma = 1,
|
||||||
|
HasDiGamma = 1,
|
||||||
HasErf = 1,
|
HasErf = 1,
|
||||||
HasErfc = 1,
|
HasErfc = 1,
|
||||||
|
|
||||||
@ -63,6 +64,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
HasLGamma = 1,
|
HasLGamma = 1,
|
||||||
|
HasDiGamma = 1,
|
||||||
HasErf = 1,
|
HasErf = 1,
|
||||||
HasErfc = 1,
|
HasErfc = 1,
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ template<typename Scalar>
|
|||||||
struct functor_traits<scalar_identity_op<Scalar> >
|
struct functor_traits<scalar_identity_op<Scalar> >
|
||||||
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
|
||||||
|
|
||||||
template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_op_impl;
|
template <typename Scalar, typename Packet, bool RandomAccess, bool IsInteger> struct linspaced_op_impl;
|
||||||
|
|
||||||
// linear access for packet ops:
|
// linear access for packet ops:
|
||||||
// 1) initialization
|
// 1) initialization
|
||||||
@ -48,12 +48,12 @@ template <typename Scalar, typename Packet, bool RandomAccess> struct linspaced_
|
|||||||
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
|
||||||
// in order to avoid the padd() in operator() ?
|
// in order to avoid the padd() in operator() ?
|
||||||
template <typename Scalar, typename Packet>
|
template <typename Scalar, typename Packet>
|
||||||
struct linspaced_op_impl<Scalar,Packet,false>
|
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
|
||||||
{
|
{
|
||||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||||
m_low(low), m_step(step),
|
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||||
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*step)),
|
m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
|
||||||
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
|
m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
|
||||||
|
|
||||||
template<typename Index>
|
template<typename Index>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const
|
||||||
@ -75,11 +75,11 @@ struct linspaced_op_impl<Scalar,Packet,false>
|
|||||||
// 1) each step
|
// 1) each step
|
||||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||||
template <typename Scalar, typename Packet>
|
template <typename Scalar, typename Packet>
|
||||||
struct linspaced_op_impl<Scalar,Packet,true>
|
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
|
||||||
{
|
{
|
||||||
linspaced_op_impl(const Scalar& low, const Scalar& step) :
|
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||||
m_low(low), m_step(step),
|
m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
|
||||||
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
|
m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
|
||||||
|
|
||||||
template<typename Index>
|
template<typename Index>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
|
||||||
@ -95,6 +95,31 @@ struct linspaced_op_impl<Scalar,Packet,true>
|
|||||||
const Packet m_interPacket;
|
const Packet m_interPacket;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, typename Packet>
|
||||||
|
struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
|
||||||
|
{
|
||||||
|
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
|
||||||
|
m_low(low), m_length(high-low), m_divisor(num_steps==1?1:num_steps-1), m_interPacket(plset<Packet>(0))
|
||||||
|
{}
|
||||||
|
|
||||||
|
template<typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const Scalar operator() (Index i) const {
|
||||||
|
return m_low + (m_length*Scalar(i))/m_divisor;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Index>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
const Packet packetOp(Index i) const {
|
||||||
|
return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
|
||||||
|
pset1<Packet>(m_divisor))); }
|
||||||
|
|
||||||
|
const Scalar m_low;
|
||||||
|
const Scalar m_length;
|
||||||
|
const Index m_divisor;
|
||||||
|
const Packet m_interPacket;
|
||||||
|
};
|
||||||
|
|
||||||
// ----- Linspace functor ----------------------------------------------------------------
|
// ----- Linspace functor ----------------------------------------------------------------
|
||||||
|
|
||||||
// Forward declaration (we default to random access which does not really give
|
// Forward declaration (we default to random access which does not really give
|
||||||
@ -102,10 +127,20 @@ struct linspaced_op_impl<Scalar,Packet,true>
|
|||||||
// nested expressions).
|
// nested expressions).
|
||||||
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
|
template <typename Scalar, typename PacketType, bool RandomAccess = true> struct linspaced_op;
|
||||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
|
template <typename Scalar, typename PacketType, bool RandomAccess> struct functor_traits< linspaced_op<Scalar,PacketType,RandomAccess> >
|
||||||
{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::HasSetLinear, IsRepeatable = true }; };
|
{
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
Cost = 1,
|
||||||
|
PacketAccess = packet_traits<Scalar>::HasSetLinear
|
||||||
|
&& ((!NumTraits<Scalar>::IsInteger) || packet_traits<Scalar>::HasDiv),
|
||||||
|
IsRepeatable = true
|
||||||
|
};
|
||||||
|
};
|
||||||
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
|
template <typename Scalar, typename PacketType, bool RandomAccess> struct linspaced_op
|
||||||
{
|
{
|
||||||
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {}
|
linspaced_op(const Scalar& low, const Scalar& high, Index num_steps)
|
||||||
|
: impl((num_steps==1 ? high : low),high,num_steps)
|
||||||
|
{}
|
||||||
|
|
||||||
template<typename Index>
|
template<typename Index>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
|
||||||
@ -134,7 +169,9 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
|
|||||||
// This proxy object handles the actual required temporaries, the different
|
// This proxy object handles the actual required temporaries, the different
|
||||||
// implementations (random vs. sequential access) as well as the
|
// implementations (random vs. sequential access) as well as the
|
||||||
// correct piping to size 2/4 packet operations.
|
// correct piping to size 2/4 packet operations.
|
||||||
const linspaced_op_impl<Scalar,PacketType,RandomAccess> impl;
|
// As long as we don't have a Bresenham-like implementation for linear-access and integer types,
|
||||||
|
// we have to by-pass RandomAccess for integer types. See bug 698.
|
||||||
|
const linspaced_op_impl<Scalar,PacketType,(NumTraits<Scalar>::IsInteger?true:RandomAccess),NumTraits<Scalar>::IsInteger> impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
// all functors allow linear access, except scalar_identity_op. So we fix here a quick meta
|
||||||
|
@ -427,6 +427,28 @@ struct functor_traits<scalar_lgamma_op<Scalar> >
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** \internal
|
||||||
|
* \brief Template functor to compute psi, the derivative of lgamma of a scalar.
|
||||||
|
* \sa class CwiseUnaryOp, Cwise::digamma()
|
||||||
|
*/
|
||||||
|
template<typename Scalar> struct scalar_digamma_op {
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
|
||||||
|
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
|
||||||
|
using numext::digamma; return digamma(a);
|
||||||
|
}
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
|
||||||
|
};
|
||||||
|
template<typename Scalar>
|
||||||
|
struct functor_traits<scalar_digamma_op<Scalar> >
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
// Guesstimate
|
||||||
|
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
|
||||||
|
PacketAccess = packet_traits<Scalar>::HasDiGamma
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the Gauss error function of a
|
* \brief Template functor to compute the Gauss error function of a
|
||||||
* scalar
|
* scalar
|
||||||
@ -644,7 +666,7 @@ struct functor_traits<scalar_floor_op<Scalar> >
|
|||||||
template<typename Scalar> struct scalar_ceil_op {
|
template<typename Scalar> struct scalar_ceil_op {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_ceil_op)
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return numext::ceil(a); }
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
template <typename Packet>
|
||||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
|
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pceil(a); }
|
||||||
};
|
};
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
|
@ -252,7 +252,7 @@ void evaluateProductBlockingSizesHeuristic(Index& k, Index& m, Index& n, Index n
|
|||||||
// we have both L2 and L3, and problem is small enough to be kept in L2
|
// we have both L2 and L3, and problem is small enough to be kept in L2
|
||||||
// Let's choose m such that lhs's block fit in 1/3 of L2
|
// Let's choose m such that lhs's block fit in 1/3 of L2
|
||||||
actual_lm = l2;
|
actual_lm = l2;
|
||||||
max_mc = 576;
|
max_mc = (std::min<Index>)(576,max_mc);
|
||||||
}
|
}
|
||||||
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
|
Index mc = (std::min<Index>)(actual_lm/(3*k*sizeof(LhsScalar)), max_mc);
|
||||||
if (mc > Traits::mr) mc -= mc % Traits::mr;
|
if (mc > Traits::mr) mc -= mc % Traits::mr;
|
||||||
|
@ -352,9 +352,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
}
|
}
|
||||||
else // no l3 blocking
|
else // no l3 blocking
|
||||||
{
|
{
|
||||||
Index m = this->m_mc;
|
|
||||||
Index n = this->m_nc;
|
Index n = this->m_nc;
|
||||||
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, n, num_threads);
|
computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, this->m_mc, n, num_threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
m_sizeA = this->m_mc * this->m_kc;
|
m_sizeA = this->m_mc * this->m_kc;
|
||||||
|
@ -42,13 +42,14 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|||||||
{
|
{
|
||||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
|
||||||
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
|
const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
|
||||||
|
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
|
||||||
{
|
{
|
||||||
general_matrix_matrix_triangular_product<Index,
|
general_matrix_matrix_triangular_product<Index,
|
||||||
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
|
||||||
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
LhsScalar, LhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateLhs,
|
||||||
ColMajor, UpLo==Lower?Upper:Lower>
|
ColMajor, UpLo==Lower?Upper:Lower>
|
||||||
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha);
|
::run(size,depth,rhs,rhsStride,lhs,lhsStride,res,resStride,alpha,blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -58,7 +59,8 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|||||||
{
|
{
|
||||||
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
|
||||||
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
|
||||||
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride, const ResScalar& alpha)
|
const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
|
||||||
|
const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
|
||||||
{
|
{
|
||||||
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
typedef gebp_traits<LhsScalar,RhsScalar> Traits;
|
||||||
|
|
||||||
@ -69,16 +71,18 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
|
|||||||
RhsMapper rhs(_rhs,rhsStride);
|
RhsMapper rhs(_rhs,rhsStride);
|
||||||
ResMapper res(_res, resStride);
|
ResMapper res(_res, resStride);
|
||||||
|
|
||||||
Index kc = depth; // cache block size along the K direction
|
Index kc = blocking.kc();
|
||||||
Index mc = size; // cache block size along the M direction
|
Index mc = (std::min)(size,blocking.mc());
|
||||||
Index nc = size; // cache block size along the N direction
|
|
||||||
computeProductBlockingSizes<LhsScalar,RhsScalar>(kc, mc, nc, 1);
|
|
||||||
// !!! mc must be a multiple of nr:
|
// !!! mc must be a multiple of nr:
|
||||||
if(mc > Traits::nr)
|
if(mc > Traits::nr)
|
||||||
mc = (mc/Traits::nr)*Traits::nr;
|
mc = (mc/Traits::nr)*Traits::nr;
|
||||||
|
|
||||||
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, kc*mc, 0);
|
std::size_t sizeA = kc*mc;
|
||||||
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, kc*size, 0);
|
std::size_t sizeB = kc*size;
|
||||||
|
|
||||||
|
ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
|
||||||
|
ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
|
||||||
|
|
||||||
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
gemm_pack_lhs<LhsScalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||||
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
gemm_pack_rhs<RhsScalar, Index, RhsMapper, Traits::nr, RhsStorageOrder> pack_rhs;
|
||||||
@ -136,7 +140,7 @@ struct tribb_kernel
|
|||||||
typedef typename Traits::ResScalar ResScalar;
|
typedef typename Traits::ResScalar ResScalar;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
BlockSize = EIGEN_PLAIN_ENUM_MAX(mr,nr)
|
BlockSize = meta_least_common_multiple<EIGEN_PLAIN_ENUM_MAX(mr,nr),EIGEN_PLAIN_ENUM_MIN(mr,nr)>::ret
|
||||||
};
|
};
|
||||||
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
void operator()(ResScalar* _res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha)
|
||||||
{
|
{
|
||||||
@ -256,13 +260,27 @@ struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
|
|||||||
|
|
||||||
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
|
||||||
|
|
||||||
|
enum {
|
||||||
|
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||||
|
LhsIsRowMajor = _ActualLhs::Flags&RowMajorBit ? 1 : 0,
|
||||||
|
RhsIsRowMajor = _ActualRhs::Flags&RowMajorBit ? 1 : 0
|
||||||
|
};
|
||||||
|
|
||||||
|
Index size = mat.cols();
|
||||||
|
Index depth = actualLhs.cols();
|
||||||
|
|
||||||
|
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,typename Lhs::Scalar,typename Rhs::Scalar,
|
||||||
|
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualRhs::MaxColsAtCompileTime> BlockingType;
|
||||||
|
|
||||||
|
BlockingType blocking(size, size, depth, 1, false);
|
||||||
|
|
||||||
internal::general_matrix_matrix_triangular_product<Index,
|
internal::general_matrix_matrix_triangular_product<Index,
|
||||||
typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
typename Lhs::Scalar, LhsIsRowMajor ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||||
typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
typename Rhs::Scalar, RhsIsRowMajor ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||||
::run(mat.cols(), actualLhs.cols(),
|
::run(size, depth,
|
||||||
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
|
&actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
|
||||||
mat.data(), mat.outerStride(), actualAlpha);
|
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
|
|||||||
const Scalar* lhs, Index lhsStride,
|
const Scalar* lhs, Index lhsStride,
|
||||||
const Scalar* rhs, Index rhsStride,
|
const Scalar* rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resStride,
|
||||||
const Scalar& alpha)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
product_selfadjoint_matrix<Scalar, Index,
|
product_selfadjoint_matrix<Scalar, Index,
|
||||||
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
||||||
@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
|
|||||||
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
||||||
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
||||||
ColMajor>
|
ColMajor>
|
||||||
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha);
|
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resStride,
|
||||||
const Scalar& alpha);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resStride,
|
||||||
const Scalar& alpha)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index size = rows;
|
Index size = rows;
|
||||||
|
|
||||||
@ -340,17 +340,14 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|||||||
RhsMapper rhs(_rhs,rhsStride);
|
RhsMapper rhs(_rhs,rhsStride);
|
||||||
ResMapper res(_res, resStride);
|
ResMapper res(_res, resStride);
|
||||||
|
|
||||||
Index kc = size; // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = rows; // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
Index nc = cols; // cache block size along the N direction
|
// kc must be smaller than mc
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
|
|
||||||
// kc must smaller than mc
|
|
||||||
kc = (std::min)(kc,mc);
|
kc = (std::min)(kc,mc);
|
||||||
|
std::size_t sizeA = kc*mc;
|
||||||
std::size_t sizeB = kc*cols;
|
std::size_t sizeB = kc*cols;
|
||||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||||
Scalar* blockB = allocatedBlockB;
|
|
||||||
|
|
||||||
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||||
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||||
@ -410,7 +407,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resStride,
|
||||||
const Scalar& alpha);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
@ -421,7 +418,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
|||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resStride,
|
||||||
const Scalar& alpha)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index size = cols;
|
Index size = cols;
|
||||||
|
|
||||||
@ -432,14 +429,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
|||||||
LhsMapper lhs(_lhs,lhsStride);
|
LhsMapper lhs(_lhs,lhsStride);
|
||||||
ResMapper res(_res,resStride);
|
ResMapper res(_res,resStride);
|
||||||
|
|
||||||
Index kc = size; // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = rows; // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
Index nc = cols; // cache block size along the N direction
|
std::size_t sizeA = kc*mc;
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
|
|
||||||
std::size_t sizeB = kc*cols;
|
std::size_t sizeB = kc*cols;
|
||||||
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
|
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
|
||||||
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0);
|
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
|
||||||
Scalar* blockB = allocatedBlockB;
|
|
||||||
|
|
||||||
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
|
||||||
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
|
||||||
@ -498,6 +493,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|||||||
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
|
||||||
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
* RhsBlasTraits::extractScalarFactor(a_rhs);
|
||||||
|
|
||||||
|
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
|
||||||
|
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
|
||||||
|
|
||||||
|
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
|
||||||
|
|
||||||
internal::product_selfadjoint_matrix<Scalar, Index,
|
internal::product_selfadjoint_matrix<Scalar, Index,
|
||||||
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
|
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
|
||||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
||||||
@ -509,7 +509,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|||||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
||||||
actualAlpha // alpha
|
actualAlpha, blocking // alpha
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -92,15 +92,27 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
|
|||||||
|
|
||||||
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
|
Scalar actualAlpha = alpha * OtherBlasTraits::extractScalarFactor(other.derived());
|
||||||
|
|
||||||
enum { IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0 };
|
enum {
|
||||||
|
IsRowMajor = (internal::traits<MatrixType>::Flags&RowMajorBit) ? 1 : 0,
|
||||||
|
OtherIsRowMajor = _ActualOtherType::Flags&RowMajorBit ? 1 : 0
|
||||||
|
};
|
||||||
|
|
||||||
|
Index size = mat.cols();
|
||||||
|
Index depth = actualOther.cols();
|
||||||
|
|
||||||
|
typedef internal::gemm_blocking_space<IsRowMajor ? RowMajor : ColMajor,Scalar,Scalar,
|
||||||
|
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime, _ActualOtherType::MaxColsAtCompileTime> BlockingType;
|
||||||
|
|
||||||
|
BlockingType blocking(size, size, depth, 1, false);
|
||||||
|
|
||||||
|
|
||||||
internal::general_matrix_matrix_triangular_product<Index,
|
internal::general_matrix_matrix_triangular_product<Index,
|
||||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
Scalar, OtherIsRowMajor ? RowMajor : ColMajor, OtherBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
|
||||||
Scalar, _ActualOtherType::Flags&RowMajorBit ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
Scalar, OtherIsRowMajor ? ColMajor : RowMajor, (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex,
|
||||||
MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
|
IsRowMajor ? RowMajor : ColMajor, UpLo>
|
||||||
::run(mat.cols(), actualOther.cols(),
|
::run(size, depth,
|
||||||
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
|
&actualOther.coeffRef(0,0), actualOther.outerStride(), &actualOther.coeffRef(0,0), actualOther.outerStride(),
|
||||||
mat.data(), mat.outerStride(), actualAlpha);
|
mat.data(), mat.outerStride(), actualAlpha, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -126,6 +126,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
|
|
||||||
Index kc = blocking.kc(); // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
|
// The small panel size must not be larger than blocking size.
|
||||||
|
// Usually this should never be the case because SmallPanelWidth^2 is very small
|
||||||
|
// compared to L2 cache size, but let's be safe:
|
||||||
|
Index panelWidth = (std::min)(Index(SmallPanelWidth),(std::min)(kc,mc));
|
||||||
|
|
||||||
std::size_t sizeA = kc*mc;
|
std::size_t sizeA = kc*mc;
|
||||||
std::size_t sizeB = kc*cols;
|
std::size_t sizeB = kc*cols;
|
||||||
@ -169,9 +173,9 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
if(IsLower || actual_k2<rows)
|
if(IsLower || actual_k2<rows)
|
||||||
{
|
{
|
||||||
// for each small vertical panels of lhs
|
// for each small vertical panels of lhs
|
||||||
for (Index k1=0; k1<actual_kc; k1+=SmallPanelWidth)
|
for (Index k1=0; k1<actual_kc; k1+=panelWidth)
|
||||||
{
|
{
|
||||||
Index actualPanelWidth = std::min<Index>(actual_kc-k1, SmallPanelWidth);
|
Index actualPanelWidth = std::min<Index>(actual_kc-k1, panelWidth);
|
||||||
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
|
Index lengthTarget = IsLower ? actual_kc-k1-actualPanelWidth : k1;
|
||||||
Index startBlock = actual_k2+k1;
|
Index startBlock = actual_k2+k1;
|
||||||
Index blockBOffset = k1;
|
Index blockBOffset = k1;
|
||||||
|
@ -15,10 +15,11 @@
|
|||||||
// 4522 - 'class' : multiple assignment operators specified
|
// 4522 - 'class' : multiple assignment operators specified
|
||||||
// 4700 - uninitialized local variable 'xyz' used
|
// 4700 - uninitialized local variable 'xyz' used
|
||||||
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
|
// 4717 - 'function' : recursive on all control paths, function will cause runtime stack overflow
|
||||||
|
// 4800 - 'type' : forcing value to bool 'true' or 'false' (performance warning)
|
||||||
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
#ifndef EIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS
|
||||||
#pragma warning( push )
|
#pragma warning( push )
|
||||||
#endif
|
#endif
|
||||||
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 )
|
#pragma warning( disable : 4100 4101 4127 4181 4211 4244 4273 4324 4503 4512 4522 4700 4717 4800)
|
||||||
#elif defined __INTEL_COMPILER
|
#elif defined __INTEL_COMPILER
|
||||||
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
|
// 2196 - routine is both "inline" and "noinline" ("noinline" assumed)
|
||||||
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
|
// ICC 12 generates this warning even without any inline keyword, when defining class methods 'inline' i.e. inside of class body
|
||||||
|
@ -336,7 +336,6 @@
|
|||||||
// Do we support r-value references?
|
// Do we support r-value references?
|
||||||
#if (__has_feature(cxx_rvalue_references) || \
|
#if (__has_feature(cxx_rvalue_references) || \
|
||||||
(defined(__cplusplus) && __cplusplus >= 201103L) || \
|
(defined(__cplusplus) && __cplusplus >= 201103L) || \
|
||||||
defined(__GXX_EXPERIMENTAL_CXX0X__) || \
|
|
||||||
(EIGEN_COMP_MSVC >= 1600))
|
(EIGEN_COMP_MSVC >= 1600))
|
||||||
#define EIGEN_HAVE_RVALUE_REFERENCES
|
#define EIGEN_HAVE_RVALUE_REFERENCES
|
||||||
#endif
|
#endif
|
||||||
|
@ -526,9 +526,9 @@ template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_align
|
|||||||
template<int Alignment, typename Scalar, typename Index>
|
template<int Alignment, typename Scalar, typename Index>
|
||||||
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
|
EIGEN_DEVICE_FUNC inline Index first_aligned(const Scalar* array, Index size)
|
||||||
{
|
{
|
||||||
static const Index ScalarSize = sizeof(Scalar);
|
const Index ScalarSize = sizeof(Scalar);
|
||||||
static const Index AlignmentSize = Alignment / ScalarSize;
|
const Index AlignmentSize = Alignment / ScalarSize;
|
||||||
static const Index AlignmentMask = AlignmentSize-1;
|
const Index AlignmentMask = AlignmentSize-1;
|
||||||
|
|
||||||
if(AlignmentSize<=1)
|
if(AlignmentSize<=1)
|
||||||
{
|
{
|
||||||
|
@ -257,7 +257,7 @@ struct has_std_result_type {int a[2];};
|
|||||||
struct has_tr1_result {int a[3];};
|
struct has_tr1_result {int a[3];};
|
||||||
|
|
||||||
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
|
template<typename Func, typename ArgType, int SizeOf=sizeof(has_none)>
|
||||||
struct unary_result_of_select {typedef ArgType type;};
|
struct unary_result_of_select {typedef typename internal::remove_all<ArgType>::type type;};
|
||||||
|
|
||||||
template<typename Func, typename ArgType>
|
template<typename Func, typename ArgType>
|
||||||
struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
|
struct unary_result_of_select<Func, ArgType, sizeof(has_std_result_type)> {typedef typename Func::result_type type;};
|
||||||
@ -279,7 +279,7 @@ struct result_of<Func(ArgType)> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
|
template<typename Func, typename ArgType0, typename ArgType1, int SizeOf=sizeof(has_none)>
|
||||||
struct binary_result_of_select {typedef ArgType0 type;};
|
struct binary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
|
||||||
|
|
||||||
template<typename Func, typename ArgType0, typename ArgType1>
|
template<typename Func, typename ArgType0, typename ArgType1>
|
||||||
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
|
struct binary_result_of_select<Func, ArgType0, ArgType1, sizeof(has_std_result_type)>
|
||||||
@ -326,6 +326,22 @@ class meta_sqrt
|
|||||||
template<int Y, int InfX, int SupX>
|
template<int Y, int InfX, int SupX>
|
||||||
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
|
class meta_sqrt<Y, InfX, SupX, true> { public: enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };
|
||||||
|
|
||||||
|
|
||||||
|
/** \internal Computes the least common multiple of two positive integer A and B
|
||||||
|
* at compile-time. It implements a naive algorithm testing all multiples of A.
|
||||||
|
* It thus works better if A>=B.
|
||||||
|
*/
|
||||||
|
template<int A, int B, int K=1, bool Done = ((A*K)%B)==0>
|
||||||
|
struct meta_least_common_multiple
|
||||||
|
{
|
||||||
|
enum { ret = meta_least_common_multiple<A,B,K+1>::ret };
|
||||||
|
};
|
||||||
|
template<int A, int B, int K>
|
||||||
|
struct meta_least_common_multiple<A,B,K,true>
|
||||||
|
{
|
||||||
|
enum { ret = A*K };
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
|
/** \internal determines whether the product of two numeric types is allowed and what the return type is */
|
||||||
template<typename T, typename U> struct scalar_product_traits
|
template<typename T, typename U> struct scalar_product_traits
|
||||||
{
|
{
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
#ifndef EIGEN_NO_STATIC_ASSERT
|
#ifndef EIGEN_NO_STATIC_ASSERT
|
||||||
|
|
||||||
#if defined(__GXX_EXPERIMENTAL_CXX0X__) || (EIGEN_COMP_MSVC >= 1600)
|
#if __has_feature(cxx_static_assert) || (defined(__cplusplus) && __cplusplus >= 201103L) || (EIGEN_COMP_MSVC >= 1600)
|
||||||
|
|
||||||
// if native static_assert is enabled, let's use it
|
// if native static_assert is enabled, let's use it
|
||||||
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
|
#define EIGEN_STATIC_ASSERT(X,MSG) static_assert(X,#MSG);
|
||||||
|
@ -466,17 +466,17 @@ struct special_scalar_op_base : public BaseType
|
|||||||
template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
|
template<typename Derived,typename Scalar,typename OtherScalar, typename BaseType>
|
||||||
struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType
|
struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : public BaseType
|
||||||
{
|
{
|
||||||
const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
|
const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
|
||||||
operator*(const OtherScalar& scalar) const
|
operator*(const OtherScalar& scalar) const
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
#endif
|
#endif
|
||||||
return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
|
return CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
|
||||||
(*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
|
(*static_cast<const Derived*>(this), scalar_multiple2_op<Scalar,OtherScalar>(scalar));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, Derived>
|
inline friend const CwiseUnaryOp<scalar_multiple2_op<Scalar,OtherScalar>, const Derived>
|
||||||
operator*(const OtherScalar& scalar, const Derived& matrix)
|
operator*(const OtherScalar& scalar, const Derived& matrix)
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
@ -485,13 +485,13 @@ struct special_scalar_op_base<Derived,Scalar,OtherScalar,BaseType,true> : publi
|
|||||||
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
return static_cast<const special_scalar_op_base&>(matrix).operator*(scalar);
|
||||||
}
|
}
|
||||||
|
|
||||||
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
const CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
|
||||||
operator/(const OtherScalar& scalar) const
|
operator/(const OtherScalar& scalar) const
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
|
||||||
#endif
|
#endif
|
||||||
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, Derived>
|
return CwiseUnaryOp<scalar_quotient2_op<Scalar,OtherScalar>, const Derived>
|
||||||
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
|
(*static_cast<const Derived*>(this), scalar_quotient2_op<Scalar,OtherScalar>(scalar));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -526,22 +526,21 @@ template <typename A> struct promote_storage_type<const A, A>
|
|||||||
* the functor.
|
* the functor.
|
||||||
* The default rules are as follows:
|
* The default rules are as follows:
|
||||||
* \code
|
* \code
|
||||||
* A op A -> A
|
* A op A -> A
|
||||||
* A op dense -> dense
|
* A op dense -> dense
|
||||||
* dense op B -> dense
|
* dense op B -> dense
|
||||||
* A * dense -> A
|
* sparse op dense -> sparse
|
||||||
* dense * B -> B
|
* dense op sparse -> sparse
|
||||||
* \endcode
|
* \endcode
|
||||||
*/
|
*/
|
||||||
template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
|
template <typename A, typename B, typename Functor> struct cwise_promote_storage_type;
|
||||||
|
|
||||||
template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
|
template <typename A, typename Functor> struct cwise_promote_storage_type<A,A,Functor> { typedef A ret; };
|
||||||
template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
|
template <typename Functor> struct cwise_promote_storage_type<Dense,Dense,Functor> { typedef Dense ret; };
|
||||||
template <typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef Dense ret; };
|
template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
|
||||||
template <typename A, typename Functor> struct cwise_promote_storage_type<A,Dense,Functor> { typedef Dense ret; };
|
template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
|
||||||
template <typename B, typename Functor> struct cwise_promote_storage_type<Dense,B,Functor> { typedef Dense ret; };
|
template <typename Functor> struct cwise_promote_storage_type<Sparse,Dense,Functor> { typedef Sparse ret; };
|
||||||
template <typename A, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<A,Dense,scalar_product_op<ScalarA,ScalarB> > { typedef A ret; };
|
template <typename Functor> struct cwise_promote_storage_type<Dense,Sparse,Functor> { typedef Sparse ret; };
|
||||||
template <typename B, typename ScalarA, typename ScalarB> struct cwise_promote_storage_type<Dense,B,scalar_product_op<ScalarA,ScalarB> > { typedef B ret; };
|
|
||||||
|
|
||||||
/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
|
/** \internal Specify the "storage kind" of multiplying an expression of kind A with kind B.
|
||||||
* The template parameter ProductTag permits to specialize the resulting storage kind wrt to
|
* The template parameter ProductTag permits to specialize the resulting storage kind wrt to
|
||||||
|
@ -129,7 +129,7 @@ public:
|
|||||||
* determined by \a prec.
|
* determined by \a prec.
|
||||||
*
|
*
|
||||||
* \sa MatrixBase::isApprox() */
|
* \sa MatrixBase::isApprox() */
|
||||||
bool isApprox(const ParametrizedLine& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
|
bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||||
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
|
{ return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -162,7 +162,7 @@ public:
|
|||||||
* determined by \a prec.
|
* determined by \a prec.
|
||||||
*
|
*
|
||||||
* \sa MatrixBase::isApprox() */
|
* \sa MatrixBase::isApprox() */
|
||||||
bool isApprox(const Translation& other, typename NumTraits<Scalar>::Real prec = NumTraits<Scalar>::dummy_precision()) const
|
bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
|
||||||
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
|
{ return m_coeffs.isApprox(other.m_coeffs, prec); }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -37,6 +37,8 @@ namespace Eigen {
|
|||||||
* and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed
|
* and \f$ \beta \f$ be the minimum value of the diagonal. If \f$ \beta > 0 \f$ then, the factorization is directly performed
|
||||||
* on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where
|
* on the matrix B. Otherwise, the factorization is performed on the shifted matrix \f$ B + (\sigma+|\beta| I \f$ where
|
||||||
* \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$.
|
* \f$ \sigma \f$ is the initial shift value as returned and set by setInitialShift() method. The default value is \f$ \sigma = 10^{-3} \f$.
|
||||||
|
* If the factorization fails, then the shift in doubled until it succeed or a maximum of ten attempts. If it still fails, as returned by
|
||||||
|
* the info() method, then you can either increase the initial shift, or better use another preconditioning technique.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
template <typename Scalar, int _UpLo = Lower, typename _OrderingType =
|
template <typename Scalar, int _UpLo = Lower, typename _OrderingType =
|
||||||
@ -185,6 +187,10 @@ class IncompleteCholesky : public SparseSolverBase<IncompleteCholesky<Scalar,_Up
|
|||||||
inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol);
|
inline void updateList(Ref<const VectorIx> colPtr, Ref<VectorIx> rowIdx, Ref<VectorSx> vals, const Index& col, const Index& jk, VectorIx& firstElt, VectorList& listCol);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Based on the following paper:
|
||||||
|
// C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with
|
||||||
|
// Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999
|
||||||
|
// http://ftp.mcs.anl.gov/pub/tech_reports/reports/P682.pdf
|
||||||
template<typename Scalar, int _UpLo, typename OrderingType>
|
template<typename Scalar, int _UpLo, typename OrderingType>
|
||||||
template<typename _MatrixType>
|
template<typename _MatrixType>
|
||||||
void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
|
void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType& mat)
|
||||||
@ -240,7 +246,7 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
|
|||||||
else
|
else
|
||||||
m_scale(j) = 1;
|
m_scale(j) = 1;
|
||||||
|
|
||||||
// FIXME disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
|
// TODO disable scaling if not needed, i.e., if it is roughly uniform? (this will make solve() faster)
|
||||||
|
|
||||||
// Scale and compute the shift for the matrix
|
// Scale and compute the shift for the matrix
|
||||||
RealScalar mindiag = NumTraits<RealScalar>::highest();
|
RealScalar mindiag = NumTraits<RealScalar>::highest();
|
||||||
@ -251,96 +257,122 @@ void IncompleteCholesky<Scalar,_UpLo, OrderingType>::factorize(const _MatrixType
|
|||||||
eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored");
|
eigen_internal_assert(rowIdx[colPtr[j]]==j && "IncompleteCholesky: only the lower triangular part must be stored");
|
||||||
mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);
|
mindiag = numext::mini(numext::real(vals[colPtr[j]]), mindiag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FactorType L_save = m_L;
|
||||||
|
|
||||||
RealScalar shift = 0;
|
RealScalar shift = 0;
|
||||||
if(mindiag <= RealScalar(0.))
|
if(mindiag <= RealScalar(0.))
|
||||||
shift = m_initialShift - mindiag;
|
shift = m_initialShift - mindiag;
|
||||||
|
|
||||||
// Apply the shift to the diagonal elements of the matrix
|
m_info = NumericalIssue;
|
||||||
for (Index j = 0; j < n; j++)
|
|
||||||
vals[colPtr[j]] += shift;
|
// Try to perform the incomplete factorization using the current shift
|
||||||
|
int iter = 0;
|
||||||
// jki version of the Cholesky factorization
|
do
|
||||||
for (Index j=0; j < n; ++j)
|
{
|
||||||
{
|
// Apply the shift to the diagonal elements of the matrix
|
||||||
// Left-looking factorization of the j-th column
|
for (Index j = 0; j < n; j++)
|
||||||
// First, load the j-th column into col_vals
|
vals[colPtr[j]] += shift;
|
||||||
Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
|
|
||||||
col_nnz = 0;
|
// jki version of the Cholesky factorization
|
||||||
for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)
|
Index j=0;
|
||||||
|
for (; j < n; ++j)
|
||||||
{
|
{
|
||||||
StorageIndex l = rowIdx[i];
|
// Left-looking factorization of the j-th column
|
||||||
col_vals(col_nnz) = vals[i];
|
// First, load the j-th column into col_vals
|
||||||
col_irow(col_nnz) = l;
|
Scalar diag = vals[colPtr[j]]; // It is assumed that only the lower part is stored
|
||||||
col_pattern(l) = col_nnz;
|
col_nnz = 0;
|
||||||
col_nnz++;
|
for (Index i = colPtr[j] + 1; i < colPtr[j+1]; i++)
|
||||||
}
|
|
||||||
{
|
|
||||||
typename std::list<StorageIndex>::iterator k;
|
|
||||||
// Browse all previous columns that will update column j
|
|
||||||
for(k = listCol[j].begin(); k != listCol[j].end(); k++)
|
|
||||||
{
|
{
|
||||||
Index jk = firstElt(*k); // First element to use in the column
|
StorageIndex l = rowIdx[i];
|
||||||
eigen_internal_assert(rowIdx[jk]==j);
|
col_vals(col_nnz) = vals[i];
|
||||||
Scalar v_j_jk = numext::conj(vals[jk]);
|
col_irow(col_nnz) = l;
|
||||||
|
col_pattern(l) = col_nnz;
|
||||||
jk += 1;
|
col_nnz++;
|
||||||
for (Index i = jk; i < colPtr[*k+1]; i++)
|
|
||||||
{
|
|
||||||
StorageIndex l = rowIdx[i];
|
|
||||||
if(col_pattern[l]<0)
|
|
||||||
{
|
|
||||||
col_vals(col_nnz) = vals[i] * v_j_jk;
|
|
||||||
col_irow[col_nnz] = l;
|
|
||||||
col_pattern(l) = col_nnz;
|
|
||||||
col_nnz++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
|
|
||||||
}
|
|
||||||
updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
typename std::list<StorageIndex>::iterator k;
|
||||||
|
// Browse all previous columns that will update column j
|
||||||
|
for(k = listCol[j].begin(); k != listCol[j].end(); k++)
|
||||||
|
{
|
||||||
|
Index jk = firstElt(*k); // First element to use in the column
|
||||||
|
eigen_internal_assert(rowIdx[jk]==j);
|
||||||
|
Scalar v_j_jk = numext::conj(vals[jk]);
|
||||||
|
|
||||||
|
jk += 1;
|
||||||
|
for (Index i = jk; i < colPtr[*k+1]; i++)
|
||||||
|
{
|
||||||
|
StorageIndex l = rowIdx[i];
|
||||||
|
if(col_pattern[l]<0)
|
||||||
|
{
|
||||||
|
col_vals(col_nnz) = vals[i] * v_j_jk;
|
||||||
|
col_irow[col_nnz] = l;
|
||||||
|
col_pattern(l) = col_nnz;
|
||||||
|
col_nnz++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
col_vals(col_pattern[l]) -= vals[i] * v_j_jk;
|
||||||
|
}
|
||||||
|
updateList(colPtr,rowIdx,vals, *k, jk, firstElt, listCol);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale the current column
|
||||||
|
if(numext::real(diag) <= 0)
|
||||||
|
{
|
||||||
|
if(++iter>=10)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// increase shift
|
||||||
|
shift = numext::maxi(m_initialShift,RealScalar(2)*shift);
|
||||||
|
// restore m_L, col_pattern, and listCol
|
||||||
|
vals = Map<const VectorSx>(L_save.valuePtr(), nnz);
|
||||||
|
rowIdx = Map<const VectorIx>(L_save.innerIndexPtr(), nnz);
|
||||||
|
colPtr = Map<const VectorIx>(L_save.outerIndexPtr(), n+1);
|
||||||
|
col_pattern.fill(-1);
|
||||||
|
for(Index i=0; i<n; ++i)
|
||||||
|
listCol[i].clear();
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
RealScalar rdiag = sqrt(numext::real(diag));
|
||||||
|
vals[colPtr[j]] = rdiag;
|
||||||
|
for (Index k = 0; k<col_nnz; ++k)
|
||||||
|
{
|
||||||
|
Index i = col_irow[k];
|
||||||
|
//Scale
|
||||||
|
col_vals(k) /= rdiag;
|
||||||
|
//Update the remaining diagonals with col_vals
|
||||||
|
vals[colPtr[i]] -= numext::abs2(col_vals(k));
|
||||||
|
}
|
||||||
|
// Select the largest p elements
|
||||||
|
// p is the original number of elements in the column (without the diagonal)
|
||||||
|
Index p = colPtr[j+1] - colPtr[j] - 1 ;
|
||||||
|
Ref<VectorSx> cvals = col_vals.head(col_nnz);
|
||||||
|
Ref<VectorIx> cirow = col_irow.head(col_nnz);
|
||||||
|
internal::QuickSplit(cvals,cirow, p);
|
||||||
|
// Insert the largest p elements in the matrix
|
||||||
|
Index cpt = 0;
|
||||||
|
for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
|
||||||
|
{
|
||||||
|
vals[i] = col_vals(cpt);
|
||||||
|
rowIdx[i] = col_irow(cpt);
|
||||||
|
// restore col_pattern:
|
||||||
|
col_pattern(col_irow(cpt)) = -1;
|
||||||
|
cpt++;
|
||||||
|
}
|
||||||
|
// Get the first smallest row index and put it after the diagonal element
|
||||||
|
Index jk = colPtr(j)+1;
|
||||||
|
updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale the current column
|
if(j==n)
|
||||||
if(numext::real(diag) <= 0)
|
|
||||||
{
|
{
|
||||||
m_info = NumericalIssue;
|
m_factorizationIsOk = true;
|
||||||
return;
|
m_info = Success;
|
||||||
}
|
}
|
||||||
|
} while(m_info!=Success);
|
||||||
RealScalar rdiag = sqrt(numext::real(diag));
|
|
||||||
vals[colPtr[j]] = rdiag;
|
|
||||||
for (Index k = 0; k<col_nnz; ++k)
|
|
||||||
{
|
|
||||||
Index i = col_irow[k];
|
|
||||||
//Scale
|
|
||||||
col_vals(k) /= rdiag;
|
|
||||||
//Update the remaining diagonals with col_vals
|
|
||||||
vals[colPtr[i]] -= numext::abs2(col_vals(k));
|
|
||||||
}
|
|
||||||
// Select the largest p elements
|
|
||||||
// p is the original number of elements in the column (without the diagonal)
|
|
||||||
Index p = colPtr[j+1] - colPtr[j] - 1 ;
|
|
||||||
Ref<VectorSx> cvals = col_vals.head(col_nnz);
|
|
||||||
Ref<VectorIx> cirow = col_irow.head(col_nnz);
|
|
||||||
internal::QuickSplit(cvals,cirow, p);
|
|
||||||
// Insert the largest p elements in the matrix
|
|
||||||
Index cpt = 0;
|
|
||||||
for (Index i = colPtr[j]+1; i < colPtr[j+1]; i++)
|
|
||||||
{
|
|
||||||
vals[i] = col_vals(cpt);
|
|
||||||
rowIdx[i] = col_irow(cpt);
|
|
||||||
// restore col_pattern:
|
|
||||||
col_pattern(col_irow(cpt)) = -1;
|
|
||||||
cpt++;
|
|
||||||
}
|
|
||||||
// Get the first smallest row index and put it after the diagonal element
|
|
||||||
Index jk = colPtr(j)+1;
|
|
||||||
updateList(colPtr,rowIdx,vals,j,jk,firstElt,listCol);
|
|
||||||
}
|
|
||||||
m_factorizationIsOk = true;
|
|
||||||
m_info = Success;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar, int _UpLo, typename OrderingType>
|
template<typename Scalar, int _UpLo, typename OrderingType>
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
NOTE: this routine has been adapted from the CSparse library:
|
NOTE: this routine has been adapted from the CSparse library:
|
||||||
|
|
||||||
Copyright (c) 2006, Timothy A. Davis.
|
Copyright (c) 2006, Timothy A. Davis.
|
||||||
http://www.cise.ufl.edu/research/sparse/CSparse
|
http://www.suitesparse.com
|
||||||
|
|
||||||
CSparse is free software; you can redistribute it and/or
|
CSparse is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU Lesser General Public
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -41,12 +41,8 @@
|
|||||||
//
|
//
|
||||||
// The colamd/symamd library is available at
|
// The colamd/symamd library is available at
|
||||||
//
|
//
|
||||||
// http://www.cise.ufl.edu/research/sparse/colamd/
|
// http://www.suitesparse.com
|
||||||
|
|
||||||
// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
|
|
||||||
// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c
|
|
||||||
// files, and by any C code that calls the routines whose prototypes are
|
|
||||||
// listed below, or that uses the colamd/symamd definitions listed below.
|
|
||||||
|
|
||||||
#ifndef EIGEN_COLAMD_H
|
#ifndef EIGEN_COLAMD_H
|
||||||
#define EIGEN_COLAMD_H
|
#define EIGEN_COLAMD_H
|
||||||
@ -102,9 +98,6 @@ namespace internal {
|
|||||||
/* === Definitions ========================================================== */
|
/* === Definitions ========================================================== */
|
||||||
/* ========================================================================== */
|
/* ========================================================================== */
|
||||||
|
|
||||||
#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b))
|
|
||||||
#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b))
|
|
||||||
|
|
||||||
#define ONES_COMPLEMENT(r) (-(r)-1)
|
#define ONES_COMPLEMENT(r) (-(r)-1)
|
||||||
|
|
||||||
/* -------------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------------- */
|
||||||
@ -739,8 +732,8 @@ static void init_scoring
|
|||||||
|
|
||||||
/* === Extract knobs ==================================================== */
|
/* === Extract knobs ==================================================== */
|
||||||
|
|
||||||
dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
|
dense_row_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_ROW] * n_col), n_col)) ;
|
||||||
dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
|
dense_col_count = numext::maxi(IndexType(0), numext::mini(IndexType(knobs [COLAMD_DENSE_COL] * n_row), n_row)) ;
|
||||||
COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
|
COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ;
|
||||||
max_deg = 0 ;
|
max_deg = 0 ;
|
||||||
n_col2 = n_col ;
|
n_col2 = n_col ;
|
||||||
@ -804,7 +797,7 @@ static void init_scoring
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* keep track of max degree of remaining rows */
|
/* keep track of max degree of remaining rows */
|
||||||
max_deg = COLAMD_MAX (max_deg, deg) ;
|
max_deg = numext::maxi(max_deg, deg) ;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
|
COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
|
||||||
@ -842,7 +835,7 @@ static void init_scoring
|
|||||||
/* add row's external degree */
|
/* add row's external degree */
|
||||||
score += Row [row].shared1.degree - 1 ;
|
score += Row [row].shared1.degree - 1 ;
|
||||||
/* guard against integer overflow */
|
/* guard against integer overflow */
|
||||||
score = COLAMD_MIN (score, n_col) ;
|
score = numext::mini(score, n_col) ;
|
||||||
}
|
}
|
||||||
/* determine pruned column length */
|
/* determine pruned column length */
|
||||||
col_length = (IndexType) (new_cp - &A [Col [c].start]) ;
|
col_length = (IndexType) (new_cp - &A [Col [c].start]) ;
|
||||||
@ -914,7 +907,7 @@ static void init_scoring
|
|||||||
head [score] = c ;
|
head [score] = c ;
|
||||||
|
|
||||||
/* see if this score is less than current min */
|
/* see if this score is less than current min */
|
||||||
min_score = COLAMD_MIN (min_score, score) ;
|
min_score = numext::mini(min_score, score) ;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -1040,7 +1033,7 @@ static IndexType find_ordering /* return the number of garbage collections */
|
|||||||
|
|
||||||
/* === Garbage_collection, if necessary ============================= */
|
/* === Garbage_collection, if necessary ============================= */
|
||||||
|
|
||||||
needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ;
|
needed_memory = numext::mini(pivot_col_score, n_col - k) ;
|
||||||
if (pfree + needed_memory >= Alen)
|
if (pfree + needed_memory >= Alen)
|
||||||
{
|
{
|
||||||
pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
|
pfree = Eigen::internal::garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
|
||||||
@ -1099,7 +1092,7 @@ static IndexType find_ordering /* return the number of garbage collections */
|
|||||||
|
|
||||||
/* clear tag on pivot column */
|
/* clear tag on pivot column */
|
||||||
Col [pivot_col].shared1.thickness = pivot_col_thickness ;
|
Col [pivot_col].shared1.thickness = pivot_col_thickness ;
|
||||||
max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ;
|
max_deg = numext::maxi(max_deg, pivot_row_degree) ;
|
||||||
|
|
||||||
|
|
||||||
/* === Kill all rows used to construct pivot row ==================== */
|
/* === Kill all rows used to construct pivot row ==================== */
|
||||||
@ -1273,7 +1266,7 @@ static IndexType find_ordering /* return the number of garbage collections */
|
|||||||
/* add set difference */
|
/* add set difference */
|
||||||
cur_score += row_mark - tag_mark ;
|
cur_score += row_mark - tag_mark ;
|
||||||
/* integer overflow... */
|
/* integer overflow... */
|
||||||
cur_score = COLAMD_MIN (cur_score, n_col) ;
|
cur_score = numext::mini(cur_score, n_col) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* recompute the column's length */
|
/* recompute the column's length */
|
||||||
@ -1386,7 +1379,7 @@ static IndexType find_ordering /* return the number of garbage collections */
|
|||||||
cur_score -= Col [col].shared1.thickness ;
|
cur_score -= Col [col].shared1.thickness ;
|
||||||
|
|
||||||
/* make sure score is less or equal than the max score */
|
/* make sure score is less or equal than the max score */
|
||||||
cur_score = COLAMD_MIN (cur_score, max_score) ;
|
cur_score = numext::mini(cur_score, max_score) ;
|
||||||
COLAMD_ASSERT (cur_score >= 0) ;
|
COLAMD_ASSERT (cur_score >= 0) ;
|
||||||
|
|
||||||
/* store updated score */
|
/* store updated score */
|
||||||
@ -1409,7 +1402,7 @@ static IndexType find_ordering /* return the number of garbage collections */
|
|||||||
head [cur_score] = col ;
|
head [cur_score] = col ;
|
||||||
|
|
||||||
/* see if this score is less than current min */
|
/* see if this score is less than current min */
|
||||||
min_score = COLAMD_MIN (min_score, cur_score) ;
|
min_score = numext::mini(min_score, cur_score) ;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,11 +100,11 @@ protected:
|
|||||||
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
|
enum { OuterSize = IsRowMajor ? BlockRows : BlockCols };
|
||||||
public:
|
public:
|
||||||
|
|
||||||
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index i)
|
inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index i)
|
||||||
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
|
: m_matrix(xpr), m_outerStart(convert_index(i)), m_outerSize(OuterSize)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
inline sparse_matrix_block_impl(const SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
inline sparse_matrix_block_impl(SparseMatrixType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
||||||
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
|
: m_matrix(xpr), m_outerStart(convert_index(IsRowMajor ? startRow : startCol)), m_outerSize(convert_index(IsRowMajor ? blockRows : blockCols))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -112,7 +112,7 @@ public:
|
|||||||
inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
|
inline BlockType& operator=(const SparseMatrixBase<OtherDerived>& other)
|
||||||
{
|
{
|
||||||
typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
|
typedef typename internal::remove_all<typename SparseMatrixType::Nested>::type _NestedMatrixType;
|
||||||
_NestedMatrixType& matrix = const_cast<_NestedMatrixType&>(m_matrix);;
|
_NestedMatrixType& matrix = m_matrix;
|
||||||
// This assignment is slow if this vector set is not empty
|
// This assignment is slow if this vector set is not empty
|
||||||
// and/or it is not at the end of the nonzeros of the underlying matrix.
|
// and/or it is not at the end of the nonzeros of the underlying matrix.
|
||||||
|
|
||||||
@ -209,28 +209,28 @@ public:
|
|||||||
inline const Scalar* valuePtr() const
|
inline const Scalar* valuePtr() const
|
||||||
{ return m_matrix.valuePtr(); }
|
{ return m_matrix.valuePtr(); }
|
||||||
inline Scalar* valuePtr()
|
inline Scalar* valuePtr()
|
||||||
{ return m_matrix.const_cast_derived().valuePtr(); }
|
{ return m_matrix.valuePtr(); }
|
||||||
|
|
||||||
inline const StorageIndex* innerIndexPtr() const
|
inline const StorageIndex* innerIndexPtr() const
|
||||||
{ return m_matrix.innerIndexPtr(); }
|
{ return m_matrix.innerIndexPtr(); }
|
||||||
inline StorageIndex* innerIndexPtr()
|
inline StorageIndex* innerIndexPtr()
|
||||||
{ return m_matrix.const_cast_derived().innerIndexPtr(); }
|
{ return m_matrix.innerIndexPtr(); }
|
||||||
|
|
||||||
inline const StorageIndex* outerIndexPtr() const
|
inline const StorageIndex* outerIndexPtr() const
|
||||||
{ return m_matrix.outerIndexPtr() + m_outerStart; }
|
{ return m_matrix.outerIndexPtr() + m_outerStart; }
|
||||||
inline StorageIndex* outerIndexPtr()
|
inline StorageIndex* outerIndexPtr()
|
||||||
{ return m_matrix.const_cast_derived().outerIndexPtr() + m_outerStart; }
|
{ return m_matrix.outerIndexPtr() + m_outerStart; }
|
||||||
|
|
||||||
inline const StorageIndex* innerNonZeroPtr() const
|
inline const StorageIndex* innerNonZeroPtr() const
|
||||||
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
|
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
|
||||||
inline StorageIndex* innerNonZeroPtr()
|
inline StorageIndex* innerNonZeroPtr()
|
||||||
{ return isCompressed() ? 0 : (m_matrix.const_cast_derived().innerNonZeroPtr()+m_outerStart); }
|
{ return isCompressed() ? 0 : (m_matrix.innerNonZeroPtr()+m_outerStart); }
|
||||||
|
|
||||||
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
|
bool isCompressed() const { return m_matrix.innerNonZeroPtr()==0; }
|
||||||
|
|
||||||
inline Scalar& coeffRef(Index row, Index col)
|
inline Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_matrix.const_cast_derived().coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
|
return m_matrix.coeffRef(row + (IsRowMajor ? m_outerStart : 0), col + (IsRowMajor ? 0 : m_outerStart));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const Scalar coeff(Index row, Index col) const
|
inline const Scalar coeff(Index row, Index col) const
|
||||||
@ -264,7 +264,7 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
typename SparseMatrixType::Nested m_matrix;
|
typename internal::ref_selector<SparseMatrixType>::non_const_type m_matrix;
|
||||||
Index m_outerStart;
|
Index m_outerStart;
|
||||||
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
|
const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
|
||||||
|
|
||||||
@ -373,7 +373,7 @@ public:
|
|||||||
|
|
||||||
/** Column or Row constructor
|
/** Column or Row constructor
|
||||||
*/
|
*/
|
||||||
inline BlockImpl(const XprType& xpr, Index i)
|
inline BlockImpl(XprType& xpr, Index i)
|
||||||
: m_matrix(xpr),
|
: m_matrix(xpr),
|
||||||
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
|
m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? convert_index(i) : 0),
|
||||||
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
|
m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? convert_index(i) : 0),
|
||||||
@ -383,7 +383,7 @@ public:
|
|||||||
|
|
||||||
/** Dynamic-size constructor
|
/** Dynamic-size constructor
|
||||||
*/
|
*/
|
||||||
inline BlockImpl(const XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
inline BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, Index blockCols)
|
||||||
: m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
|
: m_matrix(xpr), m_startRow(convert_index(startRow)), m_startCol(convert_index(startCol)), m_blockRows(convert_index(blockRows)), m_blockCols(convert_index(blockCols))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
@ -392,8 +392,7 @@ public:
|
|||||||
|
|
||||||
inline Scalar& coeffRef(Index row, Index col)
|
inline Scalar& coeffRef(Index row, Index col)
|
||||||
{
|
{
|
||||||
return m_matrix.const_cast_derived()
|
return m_matrix.coeffRef(row + m_startRow.value(), col + m_startCol.value());
|
||||||
.coeffRef(row + m_startRow.value(), col + m_startCol.value());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const Scalar coeff(Index row, Index col) const
|
inline const Scalar coeff(Index row, Index col) const
|
||||||
@ -403,16 +402,14 @@ public:
|
|||||||
|
|
||||||
inline Scalar& coeffRef(Index index)
|
inline Scalar& coeffRef(Index index)
|
||||||
{
|
{
|
||||||
return m_matrix.const_cast_derived()
|
return m_matrix.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const Scalar coeff(Index index) const
|
inline const Scalar coeff(Index index) const
|
||||||
{
|
{
|
||||||
return m_matrix
|
return m_matrix.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
||||||
.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index),
|
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
||||||
m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
|
inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
|
||||||
@ -430,7 +427,7 @@ public:
|
|||||||
|
|
||||||
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
|
||||||
|
|
||||||
typename XprType::Nested m_matrix;
|
typename internal::ref_selector<XprType>::non_const_type m_matrix;
|
||||||
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
|
||||||
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
|
||||||
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
|
const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
|
||||||
|
@ -117,6 +117,24 @@ template<typename Derived>
|
|||||||
class SparseCompressedBase<Derived>::InnerIterator
|
class SparseCompressedBase<Derived>::InnerIterator
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
InnerIterator()
|
||||||
|
: m_values(0), m_indices(0), m_outer(0), m_id(0), m_end(0)
|
||||||
|
{}
|
||||||
|
|
||||||
|
InnerIterator(const InnerIterator& other)
|
||||||
|
: m_values(other.m_values), m_indices(other.m_indices), m_outer(other.m_outer), m_id(other.m_id), m_end(other.m_end)
|
||||||
|
{}
|
||||||
|
|
||||||
|
InnerIterator& operator=(const InnerIterator& other)
|
||||||
|
{
|
||||||
|
m_values = other.m_values;
|
||||||
|
m_indices = other.m_indices;
|
||||||
|
const_cast<OuterType&>(m_outer).setValue(other.m_outer.value());
|
||||||
|
m_id = other.m_id;
|
||||||
|
m_end = other.m_end;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
InnerIterator(const SparseCompressedBase& mat, Index outer)
|
InnerIterator(const SparseCompressedBase& mat, Index outer)
|
||||||
: m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)
|
: m_values(mat.valuePtr()), m_indices(mat.innerIndexPtr()), m_outer(outer)
|
||||||
{
|
{
|
||||||
@ -162,7 +180,8 @@ class SparseCompressedBase<Derived>::InnerIterator
|
|||||||
protected:
|
protected:
|
||||||
const Scalar* m_values;
|
const Scalar* m_values;
|
||||||
const StorageIndex* m_indices;
|
const StorageIndex* m_indices;
|
||||||
const internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> m_outer;
|
typedef internal::variable_if_dynamic<Index,Derived::IsVectorAtCompileTime?0:Dynamic> OuterType;
|
||||||
|
const OuterType m_outer;
|
||||||
Index m_id;
|
Index m_id;
|
||||||
Index m_end;
|
Index m_end;
|
||||||
private:
|
private:
|
||||||
|
@ -49,17 +49,10 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
|
|||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<typename BinaryOp, typename Lhs, typename Rhs, typename Derived,
|
|
||||||
typename _LhsStorageMode = typename traits<Lhs>::StorageKind,
|
|
||||||
typename _RhsStorageMode = typename traits<Rhs>::StorageKind>
|
|
||||||
class sparse_cwise_binary_op_inner_iterator_selector;
|
|
||||||
|
|
||||||
} // end namespace internal
|
|
||||||
|
|
||||||
namespace internal {
|
|
||||||
|
|
||||||
|
|
||||||
// Generic "sparse OP sparse"
|
// Generic "sparse OP sparse"
|
||||||
|
template<typename XprType> struct binary_sparse_evaluator;
|
||||||
|
|
||||||
template<typename BinaryOp, typename Lhs, typename Rhs>
|
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||||
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
|
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IteratorBased>
|
||||||
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||||
@ -153,6 +146,182 @@ protected:
|
|||||||
evaluator<Rhs> m_rhsImpl;
|
evaluator<Rhs> m_rhsImpl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// dense op sparse
|
||||||
|
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||||
|
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IteratorBased>
|
||||||
|
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
typedef typename evaluator<Rhs>::InnerIterator RhsIterator;
|
||||||
|
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||||
|
typedef typename traits<XprType>::Scalar Scalar;
|
||||||
|
typedef typename XprType::StorageIndex StorageIndex;
|
||||||
|
public:
|
||||||
|
|
||||||
|
class ReverseInnerIterator;
|
||||||
|
class InnerIterator
|
||||||
|
{
|
||||||
|
enum { IsRowMajor = (int(Rhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||||
|
public:
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
|
||||||
|
: m_lhsEval(aEval.m_lhsImpl), m_rhsIter(aEval.m_rhsImpl,outer), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.rhs().innerSize())
|
||||||
|
{
|
||||||
|
this->operator++();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE InnerIterator& operator++()
|
||||||
|
{
|
||||||
|
++m_id;
|
||||||
|
if(m_id<m_innerSize)
|
||||||
|
{
|
||||||
|
Scalar lhsVal = m_lhsEval.coeff(IsRowMajor?m_rhsIter.outer():m_id,
|
||||||
|
IsRowMajor?m_id:m_rhsIter.outer());
|
||||||
|
if(m_rhsIter && m_rhsIter.index()==m_id)
|
||||||
|
{
|
||||||
|
m_value = m_functor(lhsVal, m_rhsIter.value());
|
||||||
|
++m_rhsIter;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
m_value = m_functor(lhsVal, Scalar(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
|
||||||
|
EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_rhsIter.outer() : m_id; }
|
||||||
|
EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_rhsIter.outer(); }
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const evaluator<Lhs> &m_lhsEval;
|
||||||
|
RhsIterator m_rhsIter;
|
||||||
|
const BinaryOp& m_functor;
|
||||||
|
Scalar m_value;
|
||||||
|
StorageIndex m_id;
|
||||||
|
StorageIndex m_innerSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||||
|
// Expose storage order of the sparse expression
|
||||||
|
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit binary_evaluator(const XprType& xpr)
|
||||||
|
: m_functor(xpr.functor()),
|
||||||
|
m_lhsImpl(xpr.lhs()),
|
||||||
|
m_rhsImpl(xpr.rhs()),
|
||||||
|
m_expr(xpr)
|
||||||
|
{
|
||||||
|
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
|
||||||
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Index nonZerosEstimate() const {
|
||||||
|
return m_expr.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const BinaryOp m_functor;
|
||||||
|
evaluator<Lhs> m_lhsImpl;
|
||||||
|
evaluator<Rhs> m_rhsImpl;
|
||||||
|
const XprType &m_expr;
|
||||||
|
};
|
||||||
|
|
||||||
|
// sparse op dense
|
||||||
|
template<typename BinaryOp, typename Lhs, typename Rhs>
|
||||||
|
struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IteratorBased, IndexBased>
|
||||||
|
: evaluator_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
typedef typename evaluator<Lhs>::InnerIterator LhsIterator;
|
||||||
|
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
|
||||||
|
typedef typename traits<XprType>::Scalar Scalar;
|
||||||
|
typedef typename XprType::StorageIndex StorageIndex;
|
||||||
|
public:
|
||||||
|
|
||||||
|
class ReverseInnerIterator;
|
||||||
|
class InnerIterator
|
||||||
|
{
|
||||||
|
enum { IsRowMajor = (int(Lhs::Flags)&RowMajorBit)==RowMajorBit };
|
||||||
|
public:
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE InnerIterator(const binary_evaluator& aEval, Index outer)
|
||||||
|
: m_lhsIter(aEval.m_lhsImpl,outer), m_rhsEval(aEval.m_rhsImpl), m_functor(aEval.m_functor), m_id(-1), m_innerSize(aEval.m_expr.lhs().innerSize())
|
||||||
|
{
|
||||||
|
this->operator++();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE InnerIterator& operator++()
|
||||||
|
{
|
||||||
|
++m_id;
|
||||||
|
if(m_id<m_innerSize)
|
||||||
|
{
|
||||||
|
Scalar rhsVal = m_rhsEval.coeff(IsRowMajor?m_lhsIter.outer():m_id,
|
||||||
|
IsRowMajor?m_id:m_lhsIter.outer());
|
||||||
|
if(m_lhsIter && m_lhsIter.index()==m_id)
|
||||||
|
{
|
||||||
|
m_value = m_functor(m_lhsIter.value(), rhsVal);
|
||||||
|
++m_lhsIter;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
m_value = m_functor(Scalar(0),rhsVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE Scalar value() const { return m_value; }
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE StorageIndex index() const { return m_id; }
|
||||||
|
EIGEN_STRONG_INLINE Index row() const { return IsRowMajor ? m_lhsIter.outer() : m_id; }
|
||||||
|
EIGEN_STRONG_INLINE Index col() const { return IsRowMajor ? m_id : m_lhsIter.outer(); }
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE operator bool() const { return m_id<m_innerSize; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
LhsIterator m_lhsIter;
|
||||||
|
const evaluator<Rhs> &m_rhsEval;
|
||||||
|
const BinaryOp& m_functor;
|
||||||
|
Scalar m_value;
|
||||||
|
StorageIndex m_id;
|
||||||
|
StorageIndex m_innerSize;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||||
|
// Expose storage order of the sparse expression
|
||||||
|
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
|
||||||
|
};
|
||||||
|
|
||||||
|
explicit binary_evaluator(const XprType& xpr)
|
||||||
|
: m_functor(xpr.functor()),
|
||||||
|
m_lhsImpl(xpr.lhs()),
|
||||||
|
m_rhsImpl(xpr.rhs()),
|
||||||
|
m_expr(xpr)
|
||||||
|
{
|
||||||
|
EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits<BinaryOp>::Cost);
|
||||||
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Index nonZerosEstimate() const {
|
||||||
|
return m_expr.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const BinaryOp m_functor;
|
||||||
|
evaluator<Lhs> m_lhsImpl;
|
||||||
|
evaluator<Rhs> m_rhsImpl;
|
||||||
|
const XprType &m_expr;
|
||||||
|
};
|
||||||
|
|
||||||
// "sparse .* sparse"
|
// "sparse .* sparse"
|
||||||
template<typename T, typename Lhs, typename Rhs>
|
template<typename T, typename Lhs, typename Rhs>
|
||||||
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
|
struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
|
||||||
@ -287,7 +456,8 @@ public:
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||||
Flags = XprType::Flags
|
// Expose storage order of the sparse expression
|
||||||
|
Flags = (XprType::Flags & ~RowMajorBit) | (int(Rhs::Flags)&RowMajorBit)
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit binary_evaluator(const XprType& xpr)
|
explicit binary_evaluator(const XprType& xpr)
|
||||||
@ -360,7 +530,8 @@ public:
|
|||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
|
||||||
Flags = XprType::Flags
|
// Expose storage order of the sparse expression
|
||||||
|
Flags = (XprType::Flags & ~RowMajorBit) | (int(Lhs::Flags)&RowMajorBit)
|
||||||
};
|
};
|
||||||
|
|
||||||
explicit binary_evaluator(const XprType& xpr)
|
explicit binary_evaluator(const XprType& xpr)
|
||||||
@ -428,6 +599,34 @@ SparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) c
|
|||||||
return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived());
|
return typename CwiseProductDenseReturnType<OtherDerived>::Type(derived(), other.derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename DenseDerived, typename SparseDerived>
|
||||||
|
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
|
||||||
|
operator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
|
||||||
|
{
|
||||||
|
return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SparseDerived, typename DenseDerived>
|
||||||
|
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
|
||||||
|
operator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
|
||||||
|
{
|
||||||
|
return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename DenseDerived, typename SparseDerived>
|
||||||
|
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
|
||||||
|
operator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
|
||||||
|
{
|
||||||
|
return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename SparseDerived, typename DenseDerived>
|
||||||
|
EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
|
||||||
|
operator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
|
||||||
|
{
|
||||||
|
return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H
|
#endif // EIGEN_SPARSE_CWISE_BINARY_OP_H
|
||||||
|
@ -48,7 +48,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
|
|||||||
// It basically represents the minimal amount of work to be done to be worth it.
|
// It basically represents the minimal amount of work to be done to be worth it.
|
||||||
if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
|
if(threads>1 && lhsEval.nonZerosEstimate() > 20000)
|
||||||
{
|
{
|
||||||
#pragma omp parallel for schedule(static) num_threads(threads)
|
#pragma omp parallel for schedule(dynamic,(n+threads*4-1)/(threads*4)) num_threads(threads)
|
||||||
for(Index i=0; i<n; ++i)
|
for(Index i=0; i<n; ++i)
|
||||||
processRow(lhsEval,rhs,res,alpha,i,c);
|
processRow(lhsEval,rhs,res,alpha,i,c);
|
||||||
}
|
}
|
||||||
|
@ -538,7 +538,12 @@ class SparseMatrix
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched.
|
/** Resizes the matrix to a \a rows x \a cols matrix leaving old values untouched.
|
||||||
* \sa reserve(), setZero()
|
*
|
||||||
|
* If the sizes of the matrix are decreased, then the matrix is turned to \b uncompressed-mode
|
||||||
|
* and the storage of the out of bounds coefficients is kept and reserved.
|
||||||
|
* Call makeCompressed() to pack the entries and squeeze extra memory.
|
||||||
|
*
|
||||||
|
* \sa reserve(), setZero(), makeCompressed()
|
||||||
*/
|
*/
|
||||||
void conservativeResize(Index rows, Index cols)
|
void conservativeResize(Index rows, Index cols)
|
||||||
{
|
{
|
||||||
|
@ -55,10 +55,10 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename MatrixType::StorageIndex StorageIndex;
|
typedef typename MatrixType::StorageIndex StorageIndex;
|
||||||
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
typedef Matrix<StorageIndex,Dynamic,1> VectorI;
|
||||||
typedef typename MatrixType::Nested MatrixTypeNested;
|
typedef typename internal::ref_selector<MatrixType>::non_const_type MatrixTypeNested;
|
||||||
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
typedef typename internal::remove_all<MatrixTypeNested>::type _MatrixTypeNested;
|
||||||
|
|
||||||
explicit inline SparseSelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
|
explicit inline SparseSelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
|
||||||
{
|
{
|
||||||
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
|
eigen_assert(rows()==cols() && "SelfAdjointView is only for squared matrices");
|
||||||
}
|
}
|
||||||
@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
|
|
||||||
/** \internal \returns a reference to the nested matrix */
|
/** \internal \returns a reference to the nested matrix */
|
||||||
const _MatrixTypeNested& matrix() const { return m_matrix; }
|
const _MatrixTypeNested& matrix() const { return m_matrix; }
|
||||||
_MatrixTypeNested& matrix() { return m_matrix.const_cast_derived(); }
|
typename internal::remove_reference<MatrixTypeNested>::type& matrix() { return m_matrix; }
|
||||||
|
|
||||||
/** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs.
|
/** \returns an expression of the matrix product between a sparse self-adjoint matrix \c *this and a sparse matrix \a rhs.
|
||||||
*
|
*
|
||||||
@ -158,7 +158,7 @@ template<typename MatrixType, unsigned int _Mode> class SparseSelfAdjointView
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
typename MatrixType::Nested m_matrix;
|
MatrixTypeNested m_matrix;
|
||||||
//mutable VectorI m_countPerRow;
|
//mutable VectorI m_countPerRow;
|
||||||
//mutable VectorI m_countPerCol;
|
//mutable VectorI m_countPerCol;
|
||||||
private:
|
private:
|
||||||
@ -194,9 +194,9 @@ SparseSelfAdjointView<MatrixType,Mode>::rankUpdate(const SparseMatrixBase<Derive
|
|||||||
{
|
{
|
||||||
SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
|
SparseMatrix<Scalar,(MatrixType::Flags&RowMajorBit)?RowMajor:ColMajor> tmp = u * u.adjoint();
|
||||||
if(alpha==Scalar(0))
|
if(alpha==Scalar(0))
|
||||||
m_matrix.const_cast_derived() = tmp.template triangularView<Mode>();
|
m_matrix = tmp.template triangularView<Mode>();
|
||||||
else
|
else
|
||||||
m_matrix.const_cast_derived() += alpha * tmp.template triangularView<Mode>();
|
m_matrix += alpha * tmp.template triangularView<Mode>();
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -205,23 +205,54 @@ class SparseVector
|
|||||||
|
|
||||||
inline void finalize() {}
|
inline void finalize() {}
|
||||||
|
|
||||||
|
/** \copydoc SparseMatrix::prune(const Scalar&,const RealScalar&) */
|
||||||
void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
|
void prune(const Scalar& reference, const RealScalar& epsilon = NumTraits<RealScalar>::dummy_precision())
|
||||||
{
|
{
|
||||||
m_data.prune(reference,epsilon);
|
m_data.prune(reference,epsilon);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes the sparse vector to \a rows x \a cols
|
||||||
|
*
|
||||||
|
* This method is provided for compatibility with matrices.
|
||||||
|
* For a column vector, \a cols must be equal to 1.
|
||||||
|
* For a row vector, \a rows must be equal to 1.
|
||||||
|
*
|
||||||
|
* \sa resize(Index)
|
||||||
|
*/
|
||||||
void resize(Index rows, Index cols)
|
void resize(Index rows, Index cols)
|
||||||
{
|
{
|
||||||
eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1");
|
eigen_assert((IsColVector ? cols : rows)==1 && "Outer dimension must equal 1");
|
||||||
resize(IsColVector ? rows : cols);
|
resize(IsColVector ? rows : cols);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes the sparse vector to \a newSize
|
||||||
|
* This method deletes all entries, thus leaving an empty sparse vector
|
||||||
|
*
|
||||||
|
* \sa conservativeResize(), setZero() */
|
||||||
void resize(Index newSize)
|
void resize(Index newSize)
|
||||||
{
|
{
|
||||||
m_size = newSize;
|
m_size = newSize;
|
||||||
m_data.clear();
|
m_data.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Resizes the sparse vector to \a newSize, while leaving old values untouched.
|
||||||
|
*
|
||||||
|
* If the size of the vector is decreased, then the storage of the out-of bounds coefficients is kept and reserved.
|
||||||
|
* Call .data().squeeze() to free extra memory.
|
||||||
|
*
|
||||||
|
* \sa reserve(), setZero()
|
||||||
|
*/
|
||||||
|
void conservativeResize(Index newSize)
|
||||||
|
{
|
||||||
|
if (newSize < m_size)
|
||||||
|
{
|
||||||
|
Index i = 0;
|
||||||
|
while (i<m_data.size() && m_data.index(i)<newSize) ++i;
|
||||||
|
m_data.resize(i);
|
||||||
|
}
|
||||||
|
m_size = newSize;
|
||||||
|
}
|
||||||
|
|
||||||
void resizeNonZeros(Index size) { m_data.resize(size); }
|
void resizeNonZeros(Index size) { m_data.resize(size); }
|
||||||
|
|
||||||
inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }
|
inline SparseVector() : m_size(0) { check_template_parameters(); resize(0); }
|
||||||
|
@ -38,7 +38,7 @@ public:
|
|||||||
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
typedef typename internal::remove_all<MatrixType>::type NestedExpression;
|
||||||
|
|
||||||
explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),
|
explicit SparseView(const MatrixType& mat, const Scalar& reference = Scalar(0),
|
||||||
RealScalar epsilon = NumTraits<Scalar>::dummy_precision())
|
const RealScalar &epsilon = NumTraits<Scalar>::dummy_precision())
|
||||||
: m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {}
|
: m_matrix(mat), m_reference(reference), m_epsilon(epsilon) {}
|
||||||
|
|
||||||
inline Index rows() const { return m_matrix.rows(); }
|
inline Index rows() const { return m_matrix.rows(); }
|
||||||
|
@ -128,6 +128,17 @@ class SparseQR : public SparseSolverBase<SparseQR<_MatrixType,_OrderingType> >
|
|||||||
inline Index cols() const { return m_pmat.cols();}
|
inline Index cols() const { return m_pmat.cols();}
|
||||||
|
|
||||||
/** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
|
/** \returns a const reference to the \b sparse upper triangular matrix R of the QR factorization.
|
||||||
|
* \warning The entries of the returned matrix are not sorted. This means that using it in algorithms
|
||||||
|
* expecting sorted entries will fail. This include random coefficient accesses (SpaseMatrix::coeff()),
|
||||||
|
* and coefficient-wise operations. Matrix products and triangular solves are fine though.
|
||||||
|
*
|
||||||
|
* To sort the entries, you can assign it to a row-major matrix, and if a column-major matrix
|
||||||
|
* is required, you can copy it again:
|
||||||
|
* \code
|
||||||
|
* SparseMatrix<double> R = qr.matrixR(); // column-major, not sorted!
|
||||||
|
* SparseMatrix<double,RowMajor> Rr = qr.matrixR(); // row-major, sorted
|
||||||
|
* SparseMatrix<double> Rc = Rr; // column-major, sorted
|
||||||
|
* \endcode
|
||||||
*/
|
*/
|
||||||
const QRMatrixType& matrixR() const { return m_R; }
|
const QRMatrixType& matrixR() const { return m_R; }
|
||||||
|
|
||||||
|
@ -22,6 +22,7 @@ typedef CwiseUnaryOp<internal::scalar_tanh_op<Scalar>, const Derived> TanhReturn
|
|||||||
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
|
typedef CwiseUnaryOp<internal::scalar_sinh_op<Scalar>, const Derived> SinhReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
|
typedef CwiseUnaryOp<internal::scalar_cosh_op<Scalar>, const Derived> CoshReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
|
typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaReturnType;
|
||||||
|
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
|
typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
|
||||||
@ -318,6 +319,16 @@ lgamma() const
|
|||||||
return LgammaReturnType(derived());
|
return LgammaReturnType(derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \returns an expression of the coefficient-wise digamma (psi, derivative of lgamma).
|
||||||
|
*
|
||||||
|
* \sa cos(), sin(), tan()
|
||||||
|
*/
|
||||||
|
inline const DigammaReturnType
|
||||||
|
digamma() const
|
||||||
|
{
|
||||||
|
return DigammaReturnType(derived());
|
||||||
|
}
|
||||||
|
|
||||||
/** \returns an expression of the coefficient-wise Gauss error
|
/** \returns an expression of the coefficient-wise Gauss error
|
||||||
* function of *this.
|
* function of *this.
|
||||||
*
|
*
|
||||||
|
8
bench/tensors/README
Normal file
8
bench/tensors/README
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
Each benchmark comes in 2 flavors: one that runs on CPU, and one that runs on GPU.
|
||||||
|
|
||||||
|
To compile the CPU benchmarks, simply call:
|
||||||
|
g++ tensor_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu
|
||||||
|
|
||||||
|
To compile the GPU benchmarks, simply call:
|
||||||
|
nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -arch compute_35 -o benchmarks_gpu
|
||||||
|
|
49
bench/tensors/benchmark.h
Normal file
49
bench/tensors/benchmark.h
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace testing {
|
||||||
|
class Benchmark {
|
||||||
|
public:
|
||||||
|
Benchmark(const char* name, void (*fn)(int)) {
|
||||||
|
Register(name, fn, NULL);
|
||||||
|
}
|
||||||
|
Benchmark(const char* name, void (*fn_range)(int, int)) {
|
||||||
|
Register(name, NULL, fn_range);
|
||||||
|
}
|
||||||
|
Benchmark* Arg(int x);
|
||||||
|
Benchmark* Range(int lo, int hi);
|
||||||
|
const char* Name();
|
||||||
|
bool ShouldRun(int argc, char* argv[]);
|
||||||
|
void Run();
|
||||||
|
private:
|
||||||
|
const char* name_;
|
||||||
|
void (*fn_)(int);
|
||||||
|
void (*fn_range_)(int, int);
|
||||||
|
std::vector<int> args_;
|
||||||
|
void Register(const char* name, void (*fn)(int), void (*fn_range)(int, int));
|
||||||
|
void RunRepeatedlyWithArg(int iterations, int arg);
|
||||||
|
void RunWithArg(int arg);
|
||||||
|
};
|
||||||
|
} // namespace testing
|
||||||
|
void SetBenchmarkFlopsProcessed(int64_t);
|
||||||
|
void StopBenchmarkTiming();
|
||||||
|
void StartBenchmarkTiming();
|
||||||
|
#define BENCHMARK(f) \
|
||||||
|
static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
|
||||||
|
(new ::testing::Benchmark(#f, f))
|
237
bench/tensors/benchmark_main.cc
Normal file
237
bench/tensors/benchmark_main.cc
Normal file
@ -0,0 +1,237 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
#include "benchmark.h"
|
||||||
|
#include <regex.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <string>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
static int64_t g_flops_processed;
|
||||||
|
static int64_t g_benchmark_total_time_ns;
|
||||||
|
static int64_t g_benchmark_start_time_ns;
|
||||||
|
typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
|
||||||
|
typedef BenchmarkMap::iterator BenchmarkMapIt;
|
||||||
|
|
||||||
|
BenchmarkMap& gBenchmarks() {
|
||||||
|
static BenchmarkMap g_benchmarks;
|
||||||
|
return g_benchmarks;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int g_name_column_width = 20;
|
||||||
|
|
||||||
|
static int Round(int n) {
|
||||||
|
int base = 1;
|
||||||
|
while (base*10 < n) {
|
||||||
|
base *= 10;
|
||||||
|
}
|
||||||
|
if (n < 2*base) {
|
||||||
|
return 2*base;
|
||||||
|
}
|
||||||
|
if (n < 5*base) {
|
||||||
|
return 5*base;
|
||||||
|
}
|
||||||
|
return 10*base;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#include <mach/mach_time.h>
|
||||||
|
static mach_timebase_info_data_t g_time_info;
|
||||||
|
static void __attribute__((constructor)) init_info() {
|
||||||
|
mach_timebase_info(&g_time_info);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int64_t NanoTime() {
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
uint64_t t = mach_absolute_time();
|
||||||
|
return t * g_time_info.numer / g_time_info.denom;
|
||||||
|
#else
|
||||||
|
struct timespec t;
|
||||||
|
t.tv_sec = t.tv_nsec = 0;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &t);
|
||||||
|
return static_cast<int64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace testing {
|
||||||
|
Benchmark* Benchmark::Arg(int arg) {
|
||||||
|
args_.push_back(arg);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
Benchmark* Benchmark::Range(int lo, int hi) {
|
||||||
|
const int kRangeMultiplier = 8;
|
||||||
|
if (hi < lo) {
|
||||||
|
int temp = hi;
|
||||||
|
hi = lo;
|
||||||
|
lo = temp;
|
||||||
|
}
|
||||||
|
while (lo < hi) {
|
||||||
|
args_.push_back(lo);
|
||||||
|
lo *= kRangeMultiplier;
|
||||||
|
}
|
||||||
|
// We always run the hi number.
|
||||||
|
args_.push_back(hi);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* Benchmark::Name() {
|
||||||
|
return name_;
|
||||||
|
}
|
||||||
|
bool Benchmark::ShouldRun(int argc, char* argv[]) {
|
||||||
|
if (argc == 1) {
|
||||||
|
return true; // With no arguments, we run all benchmarks.
|
||||||
|
}
|
||||||
|
// Otherwise, we interpret each argument as a regular expression and
|
||||||
|
// see if any of our benchmarks match.
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
regex_t re;
|
||||||
|
if (regcomp(&re, argv[i], 0) != 0) {
|
||||||
|
fprintf(stderr, "couldn't compile \"%s\" as a regular expression!\n", argv[i]);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
int match = regexec(&re, name_, 0, NULL, 0);
|
||||||
|
regfree(&re);
|
||||||
|
if (match != REG_NOMATCH) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int, int)) {
|
||||||
|
name_ = name;
|
||||||
|
fn_ = fn;
|
||||||
|
fn_range_ = fn_range;
|
||||||
|
if (fn_ == NULL && fn_range_ == NULL) {
|
||||||
|
fprintf(stderr, "%s: missing function\n", name_);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
gBenchmarks().insert(std::make_pair(name, this));
|
||||||
|
}
|
||||||
|
void Benchmark::Run() {
|
||||||
|
if (fn_ != NULL) {
|
||||||
|
RunWithArg(0);
|
||||||
|
} else {
|
||||||
|
if (args_.empty()) {
|
||||||
|
fprintf(stderr, "%s: no args!\n", name_);
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < args_.size(); ++i) {
|
||||||
|
RunWithArg(args_[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Benchmark::RunRepeatedlyWithArg(int iterations, int arg) {
|
||||||
|
g_flops_processed = 0;
|
||||||
|
g_benchmark_total_time_ns = 0;
|
||||||
|
g_benchmark_start_time_ns = NanoTime();
|
||||||
|
if (fn_ != NULL) {
|
||||||
|
fn_(iterations);
|
||||||
|
} else {
|
||||||
|
fn_range_(iterations, arg);
|
||||||
|
}
|
||||||
|
if (g_benchmark_start_time_ns != 0) {
|
||||||
|
g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void Benchmark::RunWithArg(int arg) {
|
||||||
|
// run once in case it's expensive
|
||||||
|
int iterations = 1;
|
||||||
|
RunRepeatedlyWithArg(iterations, arg);
|
||||||
|
while (g_benchmark_total_time_ns < 1e9 && iterations < 1e9) {
|
||||||
|
int last = iterations;
|
||||||
|
if (g_benchmark_total_time_ns/iterations == 0) {
|
||||||
|
iterations = 1e9;
|
||||||
|
} else {
|
||||||
|
iterations = 1e9 / (g_benchmark_total_time_ns/iterations);
|
||||||
|
}
|
||||||
|
iterations = std::max(last + 1, std::min(iterations + iterations/2, 100*last));
|
||||||
|
iterations = Round(iterations);
|
||||||
|
RunRepeatedlyWithArg(iterations, arg);
|
||||||
|
}
|
||||||
|
char throughput[100];
|
||||||
|
throughput[0] = '\0';
|
||||||
|
if (g_benchmark_total_time_ns > 0 && g_flops_processed > 0) {
|
||||||
|
double mflops_processed = static_cast<double>(g_flops_processed)/1e6;
|
||||||
|
double seconds = static_cast<double>(g_benchmark_total_time_ns)/1e9;
|
||||||
|
snprintf(throughput, sizeof(throughput), " %8.2f MFlops/s", mflops_processed/seconds);
|
||||||
|
}
|
||||||
|
char full_name[100];
|
||||||
|
if (fn_range_ != NULL) {
|
||||||
|
if (arg >= (1<<20)) {
|
||||||
|
snprintf(full_name, sizeof(full_name), "%s/%dM", name_, arg/(1<<20));
|
||||||
|
} else if (arg >= (1<<10)) {
|
||||||
|
snprintf(full_name, sizeof(full_name), "%s/%dK", name_, arg/(1<<10));
|
||||||
|
} else {
|
||||||
|
snprintf(full_name, sizeof(full_name), "%s/%d", name_, arg);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
snprintf(full_name, sizeof(full_name), "%s", name_);
|
||||||
|
}
|
||||||
|
printf("%-*s %10d %10" PRId64 "%s\n", g_name_column_width, full_name,
|
||||||
|
iterations, g_benchmark_total_time_ns/iterations, throughput);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
} // namespace testing
|
||||||
|
void SetBenchmarkFlopsProcessed(int64_t x) {
|
||||||
|
g_flops_processed = x;
|
||||||
|
}
|
||||||
|
void StopBenchmarkTiming() {
|
||||||
|
if (g_benchmark_start_time_ns != 0) {
|
||||||
|
g_benchmark_total_time_ns += NanoTime() - g_benchmark_start_time_ns;
|
||||||
|
}
|
||||||
|
g_benchmark_start_time_ns = 0;
|
||||||
|
}
|
||||||
|
void StartBenchmarkTiming() {
|
||||||
|
if (g_benchmark_start_time_ns == 0) {
|
||||||
|
g_benchmark_start_time_ns = NanoTime();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (gBenchmarks().empty()) {
|
||||||
|
fprintf(stderr, "No benchmarks registered!\n");
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||||
|
int name_width = static_cast<int>(strlen(it->second->Name()));
|
||||||
|
g_name_column_width = std::max(g_name_column_width, name_width);
|
||||||
|
}
|
||||||
|
bool need_header = true;
|
||||||
|
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||||
|
::testing::Benchmark* b = it->second;
|
||||||
|
if (b->ShouldRun(argc, argv)) {
|
||||||
|
if (need_header) {
|
||||||
|
printf("%-*s %10s %10s\n", g_name_column_width, "", "iterations", "ns/op");
|
||||||
|
fflush(stdout);
|
||||||
|
need_header = false;
|
||||||
|
}
|
||||||
|
b->Run();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (need_header) {
|
||||||
|
fprintf(stderr, "No matching benchmarks!\n");
|
||||||
|
fprintf(stderr, "Available benchmarks:\n");
|
||||||
|
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||||
|
fprintf(stderr, " %s\n", it->second->Name());
|
||||||
|
}
|
||||||
|
exit(EXIT_FAILURE);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
@ -4,13 +4,15 @@
|
|||||||
typedef int TensorIndex;
|
typedef int TensorIndex;
|
||||||
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
||||||
|
|
||||||
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
#include "unsupported/Eigen/CXX11/Tensor"
|
||||||
#include "testing/base/public/benchmark.h"
|
#include "benchmark.h"
|
||||||
|
|
||||||
|
#define BENCHMARK_RANGE(bench, lo, hi) \
|
||||||
|
BENCHMARK(bench)->Range(lo, hi)
|
||||||
|
|
||||||
using Eigen::Tensor;
|
using Eigen::Tensor;
|
||||||
using Eigen::TensorMap;
|
using Eigen::TensorMap;
|
||||||
|
|
||||||
|
|
||||||
// TODO(bsteiner): also templatize on the input type since we have users
|
// TODO(bsteiner): also templatize on the input type since we have users
|
||||||
// for int8 as well as floats.
|
// for int8 as well as floats.
|
||||||
template <typename Device> class BenchmarkSuite {
|
template <typename Device> class BenchmarkSuite {
|
||||||
@ -38,12 +40,26 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
device_.memcpy(c_, a_, m_ * m_ * sizeof(float));
|
device_.memcpy(c_, a_, m_ * m_ * sizeof(float));
|
||||||
}
|
}
|
||||||
// Record the number of values copied per second
|
// Record the number of values copied per second
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
void typeCasting(int num_iters) {
|
||||||
|
eigen_assert(m_ == n_);
|
||||||
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, k_}};
|
||||||
|
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> A(a_, sizes);
|
||||||
|
TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> B((int*)b_, sizes);
|
||||||
|
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
B.device(device_) = A.cast<int>();
|
||||||
|
}
|
||||||
|
// Record the number of values copied per second
|
||||||
|
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void random(int num_iters) {
|
void random(int num_iters) {
|
||||||
eigen_assert(m_ == k_ && k_ == n_);
|
eigen_assert(m_ == k_ && k_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> sizes(m_, m_);
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
@ -51,21 +67,21 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
C.device(device_) = C.random();
|
C.device(device_) = C.random();
|
||||||
}
|
}
|
||||||
// Record the number of random numbers generated per second
|
// Record the number of random numbers generated per second
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void slicing(int num_iters) {
|
void slicing(int num_iters) {
|
||||||
eigen_assert(m_ == k_ && k_ == n_);
|
eigen_assert(m_ == k_ && k_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> sizes(m_, m_);
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
||||||
|
|
||||||
const Eigen::DSizes<TensorIndex, 2> quarter_sizes(Eigen::array<TensorIndex, 2>(m_/2, m_/2));
|
const Eigen::DSizes<TensorIndex, 2> quarter_sizes(m_/2, m_/2);
|
||||||
const Eigen::DSizes<TensorIndex, 2> first_quadrant(Eigen::array<TensorIndex, 2>(0, 0));
|
const Eigen::DSizes<TensorIndex, 2> first_quadrant(0, 0);
|
||||||
const Eigen::DSizes<TensorIndex, 2> second_quadrant(Eigen::array<TensorIndex, 2>(0, m_/2));
|
const Eigen::DSizes<TensorIndex, 2> second_quadrant(0, m_/2);
|
||||||
const Eigen::DSizes<TensorIndex, 2> third_quadrant(Eigen::array<TensorIndex, 2>(m_/2, 0));
|
const Eigen::DSizes<TensorIndex, 2> third_quadrant(m_/2, 0);
|
||||||
const Eigen::DSizes<TensorIndex, 2> fourth_quadrant(Eigen::array<TensorIndex, 2>(m_/2, m_/2));
|
const Eigen::DSizes<TensorIndex, 2> fourth_quadrant(m_/2, m_/2);
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
@ -80,31 +96,59 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of values copied from the rhs slice to the lhs slice
|
// Record the number of values copied from the rhs slice to the lhs slice
|
||||||
// each second
|
// each second
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
void rowChip(int num_iters) {
|
||||||
|
const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
|
||||||
|
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
|
||||||
|
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
|
||||||
|
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
|
||||||
|
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
C.device(device_) = B.chip(iter % k_, 0);
|
||||||
|
}
|
||||||
|
// Record the number of values copied from the rhs chip to the lhs.
|
||||||
|
finalizeBenchmark(static_cast<int64_t>(n_) * num_iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
void colChip(int num_iters) {
|
||||||
|
const Eigen::array<TensorIndex, 2> input_size= {{k_, n_}};
|
||||||
|
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
|
||||||
|
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
|
||||||
|
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
|
||||||
|
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
C.device(device_) = B.chip(iter % n_, 1);
|
||||||
|
}
|
||||||
|
// Record the number of values copied from the rhs chip to the lhs.
|
||||||
|
finalizeBenchmark(static_cast<int64_t>(n_) * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void shuffling(int num_iters) {
|
void shuffling(int num_iters) {
|
||||||
eigen_assert(m_ == n_);
|
eigen_assert(m_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> size_a(m_, k_);
|
const Eigen::array<TensorIndex, 2> size_a = {{m_, k_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
||||||
const Eigen::array<TensorIndex, 2> size_b(k_, m_);
|
const Eigen::array<TensorIndex, 2> size_b = {{k_, m_}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
||||||
|
|
||||||
const Eigen::array<int, 2> shuffle(1, 0);
|
const Eigen::array<int, 2> shuffle = {{1, 0}};
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
B.device(device_) = A.shuffle(shuffle);
|
B.device(device_) = A.shuffle(shuffle);
|
||||||
}
|
}
|
||||||
// Record the number of values shuffled from A and copied to B each second
|
// Record the number of values shuffled from A and copied to B each second
|
||||||
finalizeBenchmark(m_ * k_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void padding(int num_iters) {
|
void padding(int num_iters) {
|
||||||
eigen_assert(m_ == k_);
|
eigen_assert(m_ == k_);
|
||||||
const Eigen::array<TensorIndex, 2> size_a(m_, k_-3);
|
const Eigen::array<TensorIndex, 2> size_a = {{m_, k_-3}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
||||||
const Eigen::array<TensorIndex, 2> size_b(k_, m_);
|
const Eigen::array<TensorIndex, 2> size_b = {{k_, m_}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
||||||
|
|
||||||
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
|
Eigen::array<Eigen::IndexPair<TensorIndex>, 2> paddings;
|
||||||
@ -116,35 +160,34 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
B.device(device_) = A.pad(paddings);
|
B.device(device_) = A.pad(paddings);
|
||||||
}
|
}
|
||||||
// Record the number of values copied from the padded tensor A each second
|
// Record the number of values copied from the padded tensor A each second
|
||||||
finalizeBenchmark(m_ * k_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void striding(int num_iters) {
|
void striding(int num_iters) {
|
||||||
eigen_assert(m_ == k_);
|
eigen_assert(m_ == k_);
|
||||||
const Eigen::array<TensorIndex, 2> size_a(m_, k_);
|
const Eigen::array<TensorIndex, 2> size_a = {{m_, k_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
||||||
const Eigen::array<TensorIndex, 2> size_b(m_, k_ / 2);
|
const Eigen::array<TensorIndex, 2> size_b = {{m_, k_ / 2}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, size_b);
|
||||||
|
|
||||||
const Eigen::array<TensorIndex, 2> strides(1, 2);
|
const Eigen::array<TensorIndex, 2> strides = {{1, 2}};
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
B.device(device_) = A.stride(strides);
|
B.device(device_) = A.stride(strides);
|
||||||
}
|
}
|
||||||
// Record the number of values copied from the padded tensor A each second
|
// Record the number of values copied from the padded tensor A each second
|
||||||
finalizeBenchmark(m_ * k_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void broadcasting(int num_iters) {
|
void broadcasting(int num_iters) {
|
||||||
const Eigen::array<TensorIndex, 2> size_a(m_, 1);
|
const Eigen::array<TensorIndex, 2> size_a = {{m_, 1}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, size_a);
|
||||||
const Eigen::array<TensorIndex, 2> size_c(m_, n_);
|
const Eigen::array<TensorIndex, 2> size_c = {{m_, n_}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, size_c);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, size_c);
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
#ifndef EIGEN_HAS_INDEX_LIST
|
||||||
// nvcc doesn't support cxx11
|
const Eigen::array<int, 2> broadcast = {{1, n_}};
|
||||||
const Eigen::array<int, 2> broadcast(1, n_);
|
|
||||||
#else
|
#else
|
||||||
// Take advantage of cxx11 to give the compiler information it can use to
|
// Take advantage of cxx11 to give the compiler information it can use to
|
||||||
// optimize the code.
|
// optimize the code.
|
||||||
@ -157,12 +200,12 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
C.device(device_) = A.broadcast(broadcast);
|
C.device(device_) = A.broadcast(broadcast);
|
||||||
}
|
}
|
||||||
// Record the number of values broadcasted from A and copied to C each second
|
// Record the number of values broadcasted from A and copied to C each second
|
||||||
finalizeBenchmark(m_ * n_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * n_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void coeffWiseOp(int num_iters) {
|
void coeffWiseOp(int num_iters) {
|
||||||
eigen_assert(m_ == k_ && k_ == n_);
|
eigen_assert(m_ == k_ && k_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> sizes(m_, m_);
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
||||||
@ -173,12 +216,12 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (2 multiplications and
|
// Record the number of FLOP executed per second (2 multiplications and
|
||||||
// 1 addition per value)
|
// 1 addition per value)
|
||||||
finalizeBenchmark(3 * m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(3) * m_ * m_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void algebraicFunc(int num_iters) {
|
void algebraicFunc(int num_iters) {
|
||||||
eigen_assert(m_ == k_ && k_ == n_);
|
eigen_assert(m_ == k_ && k_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> sizes(m_, m_);
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
||||||
@ -189,12 +232,12 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (assuming one operation
|
// Record the number of FLOP executed per second (assuming one operation
|
||||||
// per value)
|
// per value)
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void transcendentalFunc(int num_iters) {
|
void transcendentalFunc(int num_iters) {
|
||||||
eigen_assert(m_ == k_ && k_ == n_);
|
eigen_assert(m_ == k_ && k_ == n_);
|
||||||
const Eigen::array<TensorIndex, 2> sizes(m_, m_);
|
const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizes);
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizes);
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizes);
|
||||||
@ -205,17 +248,23 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (assuming one operation
|
// Record the number of FLOP executed per second (assuming one operation
|
||||||
// per value)
|
// per value)
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(m_) * m_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple reduction
|
// Row reduction
|
||||||
void reduction(int num_iters) {
|
void rowReduction(int num_iters) {
|
||||||
const Eigen::array<TensorIndex, 2> input_size(k_, n_);
|
const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, input_size);
|
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
|
||||||
const Eigen::array<TensorIndex, 1> output_size(n_);
|
const Eigen::array<TensorIndex, 1> output_size = {{n_}};
|
||||||
TensorMap<Tensor<float, 1>, Eigen::Aligned> C(c_, output_size);
|
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
|
||||||
|
|
||||||
const Eigen::array<TensorIndex, 1> sum_along_dim(0);
|
#ifndef EIGEN_HAS_INDEX_LIST
|
||||||
|
const Eigen::array<TensorIndex, 1> sum_along_dim = {{0}};
|
||||||
|
#else
|
||||||
|
// Take advantage of cxx11 to give the compiler information it can use to
|
||||||
|
// optimize the code.
|
||||||
|
Eigen::IndexList<Eigen::type2index<0>> sum_along_dim;
|
||||||
|
#endif
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
@ -223,21 +272,47 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (assuming one operation
|
// Record the number of FLOP executed per second (assuming one operation
|
||||||
// per value)
|
// per value)
|
||||||
finalizeBenchmark(m_ * m_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Column reduction
|
||||||
|
void colReduction(int num_iters) {
|
||||||
|
const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}};
|
||||||
|
const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(
|
||||||
|
b_, input_size);
|
||||||
|
const Eigen::array<TensorIndex, 1> output_size = {{k_}};
|
||||||
|
TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(
|
||||||
|
c_, output_size);
|
||||||
|
|
||||||
|
#ifndef EIGEN_HAS_INDEX_LIST
|
||||||
|
const Eigen::array<TensorIndex, 1> sum_along_dim = {{1}};
|
||||||
|
#else
|
||||||
|
// Take advantage of cxx11 to give the compiler information it can use to
|
||||||
|
// optimize the code.
|
||||||
|
Eigen::IndexList<Eigen::type2index<1>> sum_along_dim;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
StartBenchmarkTiming();
|
||||||
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
|
C.device(device_) = B.sum(sum_along_dim);
|
||||||
|
}
|
||||||
|
// Record the number of FLOP executed per second (assuming one operation
|
||||||
|
// per value)
|
||||||
|
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
// do a contraction which is equivalent to a matrix multiplication
|
// do a contraction which is equivalent to a matrix multiplication
|
||||||
void contraction(int num_iters) {
|
void contraction(int num_iters) {
|
||||||
const Eigen::array<TensorIndex, 2> sizeA(m_, k_);
|
const Eigen::array<TensorIndex, 2> sizeA = {{m_, k_}};
|
||||||
const Eigen::array<TensorIndex, 2> sizeB(k_, n_);
|
const Eigen::array<TensorIndex, 2> sizeB = {{k_, n_}};
|
||||||
const Eigen::array<TensorIndex, 2> sizeC(m_, n_);
|
const Eigen::array<TensorIndex, 2> sizeC = {{m_, n_}};
|
||||||
|
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizeA);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, sizeA);
|
||||||
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizeB);
|
const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, sizeB);
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizeC);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, sizeC);
|
||||||
|
|
||||||
typedef typename Tensor<float, 2>::DimensionPair DimPair;
|
typedef typename Tensor<float, 2>::DimensionPair DimPair;
|
||||||
const Eigen::array<DimPair, 1> dims(DimPair(1, 0));
|
const Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
@ -245,18 +320,18 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (size_ multiplications and
|
// Record the number of FLOP executed per second (size_ multiplications and
|
||||||
// additions for each value in the resulting tensor)
|
// additions for each value in the resulting tensor)
|
||||||
finalizeBenchmark(static_cast<int64>(2) * m_ * n_ * k_ * num_iters);
|
finalizeBenchmark(static_cast<int64_t>(2) * m_ * n_ * k_ * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
void convolution(int num_iters, int kernel_x, int kernel_y) {
|
void convolution(int num_iters, int kernel_x, int kernel_y) {
|
||||||
const Eigen::array<TensorIndex, 2> input_sizes(m_, n_);
|
const Eigen::array<TensorIndex, 2> input_sizes = {{m_, n_}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, input_sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> A(a_, input_sizes);
|
||||||
const Eigen::array<TensorIndex, 2> kernel_sizes(kernel_x, kernel_y);
|
const Eigen::array<TensorIndex, 2> kernel_sizes = {{kernel_x, kernel_y}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, kernel_sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, kernel_sizes);
|
||||||
const Eigen::array<TensorIndex, 2> result_sizes(
|
const Eigen::array<TensorIndex, 2> result_sizes =
|
||||||
m_ - kernel_x + 1, n_ - kernel_y + 1);
|
{{m_ - kernel_x + 1, n_ - kernel_y + 1}};
|
||||||
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, result_sizes);
|
TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, result_sizes);
|
||||||
Eigen::array<Tensor<float, 2>::Index, 2> dims(0, 1);
|
Eigen::array<Tensor<float, 2>::Index, 2> dims = {{0, 1}};
|
||||||
|
|
||||||
StartBenchmarkTiming();
|
StartBenchmarkTiming();
|
||||||
for (int iter = 0; iter < num_iters; ++iter) {
|
for (int iter = 0; iter < num_iters; ++iter) {
|
||||||
@ -264,8 +339,8 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
}
|
}
|
||||||
// Record the number of FLOP executed per second (kernel_size
|
// Record the number of FLOP executed per second (kernel_size
|
||||||
// multiplications and additions for each value in the resulting tensor)
|
// multiplications and additions for each value in the resulting tensor)
|
||||||
finalizeBenchmark(
|
finalizeBenchmark(static_cast<int64_t>(2) *
|
||||||
(m_ - kernel_x + 1) * (n_ - kernel_y + 1) * kernel_x * kernel_y * 2 * num_iters);
|
(m_ - kernel_x + 1) * (n_ - kernel_y + 1) * kernel_x * kernel_y * num_iters);
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -280,23 +355,23 @@ template <typename Device> class BenchmarkSuite {
|
|||||||
device_.memset(b_, 23, k_ * n_ * sizeof(float));
|
device_.memset(b_, 23, k_ * n_ * sizeof(float));
|
||||||
device_.memset(c_, 31, m_ * n_ * sizeof(float));
|
device_.memset(c_, 31, m_ * n_ * sizeof(float));
|
||||||
|
|
||||||
BenchmarkUseRealTime();
|
//BenchmarkUseRealTime();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void finalizeBenchmark(int64 num_items) {
|
inline void finalizeBenchmark(int64_t num_items) {
|
||||||
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
|
||||||
if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
|
if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
|
||||||
device_.synchronize();
|
device_.synchronize();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
StopBenchmarkTiming();
|
StopBenchmarkTiming();
|
||||||
SetBenchmarkItemsProcessed(num_items);
|
SetBenchmarkFlopsProcessed(num_items);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
size_t m_;
|
TensorIndex m_;
|
||||||
size_t k_;
|
TensorIndex k_;
|
||||||
size_t n_;
|
TensorIndex n_;
|
||||||
float* a_;
|
float* a_;
|
||||||
float* b_;
|
float* b_;
|
||||||
float* c_;
|
float* c_;
|
||||||
|
@ -1,19 +1,12 @@
|
|||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
#include "base/sysinfo.h"
|
#include <string>
|
||||||
#include "strings/strcat.h"
|
|
||||||
#include "third_party/eigen3/tensor_benchmarks.h"
|
#include "tensor_benchmarks.h"
|
||||||
#include "thread/threadpool.h"
|
|
||||||
|
|
||||||
#ifdef __ANDROID__
|
|
||||||
#define CREATE_THREAD_POOL(threads) \
|
#define CREATE_THREAD_POOL(threads) \
|
||||||
Eigen::ThreadPoolDevice device(threads);
|
Eigen::ThreadPool pool(threads); \
|
||||||
#else
|
Eigen::ThreadPoolDevice device(&pool, threads);
|
||||||
#define CREATE_THREAD_POOL(threads) \
|
|
||||||
ThreadPool tp(threads); \
|
|
||||||
tp.StartWorkers(); \
|
|
||||||
Eigen::ThreadPoolDevice device(&tp, threads);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Simple functions
|
// Simple functions
|
||||||
#define BM_FuncCPU(FUNC, THREADS) \
|
#define BM_FuncCPU(FUNC, THREADS) \
|
||||||
@ -22,7 +15,6 @@ Eigen::ThreadPoolDevice device(&tp, threads);
|
|||||||
CREATE_THREAD_POOL(THREADS); \
|
CREATE_THREAD_POOL(THREADS); \
|
||||||
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
|
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
|
||||||
suite.FUNC(iters); \
|
suite.FUNC(iters); \
|
||||||
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
|
BENCHMARK_RANGE(BM_##FUNC##_##THREADS##T, 10, 5000);
|
||||||
|
|
||||||
@ -30,6 +22,10 @@ BM_FuncCPU(memcpy, 4);
|
|||||||
BM_FuncCPU(memcpy, 8);
|
BM_FuncCPU(memcpy, 8);
|
||||||
BM_FuncCPU(memcpy, 12);
|
BM_FuncCPU(memcpy, 12);
|
||||||
|
|
||||||
|
BM_FuncCPU(typeCasting, 4);
|
||||||
|
BM_FuncCPU(typeCasting, 8);
|
||||||
|
BM_FuncCPU(typeCasting, 12);
|
||||||
|
|
||||||
BM_FuncCPU(random, 4);
|
BM_FuncCPU(random, 4);
|
||||||
BM_FuncCPU(random, 8);
|
BM_FuncCPU(random, 8);
|
||||||
BM_FuncCPU(random, 12);
|
BM_FuncCPU(random, 12);
|
||||||
@ -38,6 +34,14 @@ BM_FuncCPU(slicing, 4);
|
|||||||
BM_FuncCPU(slicing, 8);
|
BM_FuncCPU(slicing, 8);
|
||||||
BM_FuncCPU(slicing, 12);
|
BM_FuncCPU(slicing, 12);
|
||||||
|
|
||||||
|
BM_FuncCPU(rowChip, 4);
|
||||||
|
BM_FuncCPU(rowChip, 8);
|
||||||
|
BM_FuncCPU(rowChip, 12);
|
||||||
|
|
||||||
|
BM_FuncCPU(colChip, 4);
|
||||||
|
BM_FuncCPU(colChip, 8);
|
||||||
|
BM_FuncCPU(colChip, 12);
|
||||||
|
|
||||||
BM_FuncCPU(shuffling, 4);
|
BM_FuncCPU(shuffling, 4);
|
||||||
BM_FuncCPU(shuffling, 8);
|
BM_FuncCPU(shuffling, 8);
|
||||||
BM_FuncCPU(shuffling, 12);
|
BM_FuncCPU(shuffling, 12);
|
||||||
@ -66,9 +70,13 @@ BM_FuncCPU(transcendentalFunc, 4);
|
|||||||
BM_FuncCPU(transcendentalFunc, 8);
|
BM_FuncCPU(transcendentalFunc, 8);
|
||||||
BM_FuncCPU(transcendentalFunc, 12);
|
BM_FuncCPU(transcendentalFunc, 12);
|
||||||
|
|
||||||
BM_FuncCPU(reduction, 4);
|
BM_FuncCPU(rowReduction, 4);
|
||||||
BM_FuncCPU(reduction, 8);
|
BM_FuncCPU(rowReduction, 8);
|
||||||
BM_FuncCPU(reduction, 12);
|
BM_FuncCPU(rowReduction, 12);
|
||||||
|
|
||||||
|
BM_FuncCPU(colReduction, 4);
|
||||||
|
BM_FuncCPU(colReduction, 8);
|
||||||
|
BM_FuncCPU(colReduction, 12);
|
||||||
|
|
||||||
|
|
||||||
// Contractions
|
// Contractions
|
||||||
@ -84,7 +92,6 @@ BM_FuncCPU(reduction, 12);
|
|||||||
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \
|
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, D1, D2, D3); \
|
||||||
suite.FUNC(iters); \
|
suite.FUNC(iters); \
|
||||||
} \
|
} \
|
||||||
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
|
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3##_##THREADS##T, 10, 5000);
|
||||||
|
|
||||||
@ -107,6 +114,12 @@ BM_FuncWithInputDimsCPU(contraction, N, 64, N, 8);
|
|||||||
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12);
|
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 12);
|
||||||
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16);
|
BM_FuncWithInputDimsCPU(contraction, N, 64, N, 16);
|
||||||
|
|
||||||
|
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 1);
|
||||||
|
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 4);
|
||||||
|
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 8);
|
||||||
|
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 12);
|
||||||
|
BM_FuncWithInputDimsCPU(contraction, N, N, 64, 16);
|
||||||
|
|
||||||
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1);
|
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 1);
|
||||||
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4);
|
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 4);
|
||||||
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8);
|
BM_FuncWithInputDimsCPU(contraction, 1, N, N, 8);
|
||||||
@ -127,7 +140,6 @@ BM_FuncWithInputDimsCPU(contraction, N, N, 1, 16);
|
|||||||
CREATE_THREAD_POOL(THREADS); \
|
CREATE_THREAD_POOL(THREADS); \
|
||||||
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
|
BenchmarkSuite<Eigen::ThreadPoolDevice> suite(device, N); \
|
||||||
suite.FUNC(iters, DIM1, DIM2); \
|
suite.FUNC(iters, DIM1, DIM2); \
|
||||||
SetBenchmarkLabel(StrCat("using ", THREADS, " threads")); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);
|
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2##_##THREADS##T, 128, 5000);
|
||||||
|
|
||||||
|
@ -3,47 +3,47 @@
|
|||||||
#include <cuda.h>
|
#include <cuda.h>
|
||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "strings/strcat.h"
|
|
||||||
#include "third_party/eigen3/tensor_benchmarks.h"
|
|
||||||
|
|
||||||
|
|
||||||
|
#include "tensor_benchmarks.h"
|
||||||
|
|
||||||
// Simple functions
|
// Simple functions
|
||||||
#define BM_FuncGPU(FUNC) \
|
#define BM_FuncGPU(FUNC) \
|
||||||
static void BM_##FUNC(int iters, int N) { \
|
static void BM_##FUNC(int iters, int N) { \
|
||||||
StopBenchmarkTiming(); \
|
StopBenchmarkTiming(); \
|
||||||
cudaStream_t stream; \
|
Eigen::CudaStreamDevice stream; \
|
||||||
cudaStreamCreate(&stream); \
|
|
||||||
Eigen::GpuDevice device(&stream); \
|
Eigen::GpuDevice device(&stream); \
|
||||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
||||||
cudaDeviceSynchronize(); \
|
cudaDeviceSynchronize(); \
|
||||||
suite.FUNC(iters); \
|
suite.FUNC(iters); \
|
||||||
cudaStreamDestroy(stream); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
|
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
|
||||||
|
|
||||||
BM_FuncGPU(memcpy);
|
BM_FuncGPU(memcpy);
|
||||||
|
BM_FuncGPU(typeCasting);
|
||||||
BM_FuncGPU(random);
|
BM_FuncGPU(random);
|
||||||
BM_FuncGPU(slicing);
|
BM_FuncGPU(slicing);
|
||||||
|
BM_FuncGPU(rowChip);
|
||||||
|
BM_FuncGPU(colChip);
|
||||||
BM_FuncGPU(shuffling);
|
BM_FuncGPU(shuffling);
|
||||||
BM_FuncGPU(padding);
|
BM_FuncGPU(padding);
|
||||||
BM_FuncGPU(striding);
|
BM_FuncGPU(striding);
|
||||||
BM_FuncGPU(broadcasting);
|
BM_FuncGPU(broadcasting);
|
||||||
BM_FuncGPU(coeffWiseOp);
|
BM_FuncGPU(coeffWiseOp);
|
||||||
BM_FuncGPU(reduction);
|
BM_FuncGPU(algebraicFunc);
|
||||||
|
BM_FuncGPU(transcendentalFunc);
|
||||||
|
BM_FuncGPU(rowReduction);
|
||||||
|
BM_FuncGPU(colReduction);
|
||||||
|
|
||||||
|
|
||||||
// Contractions
|
// Contractions
|
||||||
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
|
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
|
||||||
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
|
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
|
||||||
StopBenchmarkTiming(); \
|
StopBenchmarkTiming(); \
|
||||||
cudaStream_t stream; \
|
Eigen::CudaStreamDevice stream; \
|
||||||
cudaStreamCreate(&stream); \
|
|
||||||
Eigen::GpuDevice device(&stream); \
|
Eigen::GpuDevice device(&stream); \
|
||||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
|
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
|
||||||
cudaDeviceSynchronize(); \
|
cudaDeviceSynchronize(); \
|
||||||
suite.FUNC(iters); \
|
suite.FUNC(iters); \
|
||||||
cudaStreamDestroy(stream); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
|
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
|
||||||
|
|
||||||
@ -51,19 +51,18 @@ BM_FuncGPU(reduction);
|
|||||||
BM_FuncWithInputDimsGPU(contraction, N, N, N);
|
BM_FuncWithInputDimsGPU(contraction, N, N, N);
|
||||||
BM_FuncWithInputDimsGPU(contraction, 64, N, N);
|
BM_FuncWithInputDimsGPU(contraction, 64, N, N);
|
||||||
BM_FuncWithInputDimsGPU(contraction, N, 64, N);
|
BM_FuncWithInputDimsGPU(contraction, N, 64, N);
|
||||||
|
BM_FuncWithInputDimsGPU(contraction, N, N, 64);
|
||||||
|
|
||||||
|
|
||||||
// Convolutions
|
// Convolutions
|
||||||
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
|
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
|
||||||
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
|
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
|
||||||
StopBenchmarkTiming(); \
|
StopBenchmarkTiming(); \
|
||||||
cudaStream_t stream; \
|
Eigen::CudaStreamDevice stream; \
|
||||||
cudaStreamCreate(&stream); \
|
|
||||||
Eigen::GpuDevice device(&stream); \
|
Eigen::GpuDevice device(&stream); \
|
||||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
||||||
cudaDeviceSynchronize(); \
|
cudaDeviceSynchronize(); \
|
||||||
suite.FUNC(iters, DIM1, DIM2); \
|
suite.FUNC(iters, DIM1, DIM2); \
|
||||||
cudaStreamDestroy(stream); \
|
|
||||||
} \
|
} \
|
||||||
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);
|
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);
|
||||||
|
|
@ -19,19 +19,12 @@
|
|||||||
int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
|
int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
|
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run);
|
|
||||||
func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
@ -111,19 +104,12 @@ int EIGEN_BLAS_FUNC(hemv)(char *uplo, int *n, RealScalar *palpha, RealScalar *pa
|
|||||||
int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap)
|
int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pap)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar);
|
typedef void (*functype)(int, Scalar*, const Scalar*, RealScalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
|
|
||||||
func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
||||||
@ -162,19 +148,12 @@ int EIGEN_BLAS_FUNC(hpr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
|||||||
int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
|
int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
|
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::packed_rank2_update_selector<Scalar,int,Upper>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::packed_rank2_update_selector<Scalar,int,Lower>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
|
|
||||||
func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||||
@ -217,19 +196,12 @@ int EIGEN_BLAS_FUNC(hpr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
|||||||
int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
|
int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pa, int *lda)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
|
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
|
|
||||||
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
@ -271,19 +243,12 @@ int EIGEN_BLAS_FUNC(her)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
|||||||
int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
|
int EIGEN_BLAS_FUNC(her2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pa, int *lda)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::rank2_update_selector<Scalar,int,Upper>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::rank2_update_selector<Scalar,int,Lower>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
|
|
||||||
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||||
|
@ -26,20 +26,15 @@ struct general_matrix_vector_product_wrapper
|
|||||||
int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc)
|
int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *pb, int *incb, RealScalar *pbeta, RealScalar *pc, int *incc)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar);
|
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int , Scalar *, int, Scalar);
|
||||||
static functype func[4];
|
static const functype func[4] = {
|
||||||
|
// array index: NOTR
|
||||||
static bool init = false;
|
(general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run),
|
||||||
if(!init)
|
// array index: TR
|
||||||
{
|
(general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run),
|
||||||
for(int k=0; k<4; ++k)
|
// array index: ADJ
|
||||||
func[k] = 0;
|
(general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run),
|
||||||
|
0
|
||||||
func[NOTR] = (general_matrix_vector_product_wrapper<int,Scalar,ColMajor,false,false>::run);
|
};
|
||||||
func[TR ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,false,false>::run);
|
|
||||||
func[ADJ ] = (general_matrix_vector_product_wrapper<int,Scalar,RowMajor,Conj ,false>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -90,32 +85,36 @@ int EIGEN_BLAS_FUNC(gemv)(char *opa, int *m, int *n, RealScalar *palpha, RealSca
|
|||||||
int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, const Scalar *, int, Scalar *);
|
typedef void (*functype)(int, const Scalar *, int, Scalar *);
|
||||||
static functype func[16];
|
static const functype func[16] = {
|
||||||
|
// array index: NOTR | (UP << 2) | (NUNIT << 3)
|
||||||
static bool init = false;
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2) | (NUNIT << 3)
|
||||||
{
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run),
|
||||||
for(int k=0; k<16; ++k)
|
// array index: ADJ | (UP << 2) | (NUNIT << 3)
|
||||||
func[k] = 0;
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run);
|
// array index: NOTR | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run);
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run),
|
||||||
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run);
|
// array index: TR | (LO << 2) | (NUNIT << 3)
|
||||||
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run),
|
||||||
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run);
|
// array index: ADJ | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run);
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run),
|
||||||
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run);
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run),
|
||||||
func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run);
|
// array index: TR | (UP << 2) | (UNIT << 3)
|
||||||
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run);
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run);
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run),
|
||||||
func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run);
|
0,
|
||||||
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run);
|
// array index: NOTR | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run),
|
||||||
init = true;
|
// array index: TR | (LO << 2) | (UNIT << 3)
|
||||||
}
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -145,32 +144,36 @@ int EIGEN_BLAS_FUNC(trsv)(char *uplo, char *opa, char *diag, int *n, RealScalar
|
|||||||
int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
int EIGEN_BLAS_FUNC(trmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pa, int *lda, RealScalar *pb, int *incb)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&);
|
typedef void (*functype)(int, int, const Scalar *, int, const Scalar *, int, Scalar *, int, const Scalar&);
|
||||||
static functype func[16];
|
static const functype func[16] = {
|
||||||
|
// array index: NOTR | (UP << 2) | (NUNIT << 3)
|
||||||
static bool init = false;
|
(internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2) | (NUNIT << 3)
|
||||||
{
|
(internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
for(int k=0; k<16; ++k)
|
// array index: ADJ | (UP << 2) | (NUNIT << 3)
|
||||||
func[k] = 0;
|
(internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run);
|
// array index: NOTR | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run);
|
(internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run);
|
// array index: TR | (LO << 2) | (NUNIT << 3)
|
||||||
|
(internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run);
|
// array index: ADJ | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run);
|
(internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run);
|
(internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
func[TR | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run);
|
// array index: TR | (UP << 2) | (UNIT << 3)
|
||||||
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run);
|
(internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run);
|
(internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
func[TR | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run);
|
0,
|
||||||
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run);
|
// array index: NOTR | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
init = true;
|
// array index: TR | (LO << 2) | (UNIT << 3)
|
||||||
}
|
(internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -346,32 +349,36 @@ int EIGEN_BLAS_FUNC(tbmv)(char *uplo, char *opa, char *diag, int *n, int *k, Rea
|
|||||||
int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
|
int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, RealScalar *pa, int *lda, RealScalar *px, int *incx)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
|
typedef void (*functype)(int, int, const Scalar *, int, Scalar *);
|
||||||
static functype func[16];
|
static const functype func[16] = {
|
||||||
|
// array index: NOTR | (UP << 2) | (NUNIT << 3)
|
||||||
static bool init = false;
|
(internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2) | (NUNIT << 3)
|
||||||
{
|
(internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run),
|
||||||
for(int i=0; i<16; ++i)
|
// array index: ADJ | (UP << 2) | (NUNIT << 3)
|
||||||
func[i] = 0;
|
(internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,ColMajor>::run);
|
// array index: NOTR | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,RowMajor>::run);
|
(internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run),
|
||||||
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,Conj, Scalar,RowMajor>::run);
|
// array index: TR | (LO << 2) | (NUNIT << 3)
|
||||||
|
(internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run),
|
||||||
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|0, Scalar,false,Scalar,ColMajor>::run);
|
// array index: ADJ | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,false,Scalar,RowMajor>::run);
|
(internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run),
|
||||||
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|0, Scalar,Conj, Scalar,RowMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run);
|
(internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,ColMajor>::run),
|
||||||
func[TR | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run);
|
// array index: TR | (UP << 2) | (UNIT << 3)
|
||||||
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run);
|
(internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,RowMajor>::run),
|
||||||
|
// array index: ADJ | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run);
|
(internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run),
|
||||||
func[TR | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run);
|
0,
|
||||||
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run);
|
// array index: NOTR | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::band_solve_triangular_selector<int,Lower|UnitDiag,Scalar,false,Scalar,ColMajor>::run),
|
||||||
init = true;
|
// array index: TR | (LO << 2) | (UNIT << 3)
|
||||||
}
|
(internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,false,Scalar,RowMajor>::run),
|
||||||
|
// array index: ADJ | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::band_solve_triangular_selector<int,Upper|UnitDiag,Scalar,Conj, Scalar,RowMajor>::run),
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
@ -416,32 +423,36 @@ int EIGEN_BLAS_FUNC(tbsv)(char *uplo, char *op, char *diag, int *n, int *k, Real
|
|||||||
int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
|
int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar);
|
typedef void (*functype)(int, const Scalar*, const Scalar*, Scalar*, Scalar);
|
||||||
static functype func[16];
|
static const functype func[16] = {
|
||||||
|
// array index: NOTR | (UP << 2) | (NUNIT << 3)
|
||||||
static bool init = false;
|
(internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2) | (NUNIT << 3)
|
||||||
{
|
(internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
for(int k=0; k<16; ++k)
|
// array index: ADJ | (UP << 2) | (NUNIT << 3)
|
||||||
func[k] = 0;
|
(internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,ColMajor>::run);
|
// array index: NOTR | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,RowMajor>::run);
|
(internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,Conj, Scalar,false,RowMajor>::run);
|
// array index: TR | (LO << 2) | (NUNIT << 3)
|
||||||
|
(internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|0, Scalar,false,Scalar,false,ColMajor>::run);
|
// array index: ADJ | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,false,Scalar,false,RowMajor>::run);
|
(internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|0, Scalar,Conj, Scalar,false,RowMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run);
|
(internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run);
|
// array index: TR | (UP << 2) | (UNIT << 3)
|
||||||
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run);
|
(internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run);
|
(internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run);
|
0,
|
||||||
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run);
|
// array index: NOTR | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::packed_triangular_matrix_vector_product<int,Lower|UnitDiag,Scalar,false,Scalar,false,ColMajor>::run),
|
||||||
init = true;
|
// array index: TR | (LO << 2) | (UNIT << 3)
|
||||||
}
|
(internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,false,Scalar,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::packed_triangular_matrix_vector_product<int,Upper|UnitDiag,Scalar,Conj, Scalar,false,RowMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
@ -487,32 +498,36 @@ int EIGEN_BLAS_FUNC(tpmv)(char *uplo, char *opa, char *diag, int *n, RealScalar
|
|||||||
int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
|
int EIGEN_BLAS_FUNC(tpsv)(char *uplo, char *opa, char *diag, int *n, RealScalar *pap, RealScalar *px, int *incx)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, const Scalar*, Scalar*);
|
typedef void (*functype)(int, const Scalar*, Scalar*);
|
||||||
static functype func[16];
|
static const functype func[16] = {
|
||||||
|
// array index: NOTR | (UP << 2) | (NUNIT << 3)
|
||||||
static bool init = false;
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2) | (NUNIT << 3)
|
||||||
{
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run),
|
||||||
for(int k=0; k<16; ++k)
|
// array index: ADJ | (UP << 2) | (NUNIT << 3)
|
||||||
func[k] = 0;
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,ColMajor>::run);
|
// array index: NOTR | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,RowMajor>::run);
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run),
|
||||||
func[ADJ | (UP << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, Conj, RowMajor>::run);
|
// array index: TR | (LO << 2) | (NUNIT << 3)
|
||||||
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run),
|
||||||
func[NOTR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|0, false,ColMajor>::run);
|
// array index: ADJ | (LO << 2) | (NUNIT << 3)
|
||||||
func[TR | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, false,RowMajor>::run);
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run),
|
||||||
func[ADJ | (LO << 2) | (NUNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|0, Conj, RowMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run);
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,ColMajor>::run),
|
||||||
func[TR | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run);
|
// array index: TR | (UP << 2) | (UNIT << 3)
|
||||||
func[ADJ | (UP << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run);
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (UP << 2) | (UNIT << 3)
|
||||||
func[NOTR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run);
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,Conj, RowMajor>::run),
|
||||||
func[TR | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run);
|
0,
|
||||||
func[ADJ | (LO << 2) | (UNIT << 3)] = (internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run);
|
// array index: NOTR | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Lower|UnitDiag,false,ColMajor>::run),
|
||||||
init = true;
|
// array index: TR | (LO << 2) | (UNIT << 3)
|
||||||
}
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,false,RowMajor>::run),
|
||||||
|
// array index: ADJ | (LO << 2) | (UNIT << 3)
|
||||||
|
(internal::packed_triangular_solve_vector<Scalar,Scalar,int,OnTheLeft, Upper|UnitDiag,Conj, RowMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
|
@ -13,19 +13,12 @@
|
|||||||
int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
|
int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *pa, int *lda, RealScalar *px, int *incx, RealScalar *pbeta, RealScalar *py, int *incy)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
|
typedef void (*functype)(int, const Scalar*, int, const Scalar*, Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Upper,false,false>::run);
|
|
||||||
func[LO] = (internal::selfadjoint_matrix_vector_product<Scalar,int,ColMajor,Lower,false,false>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
@ -71,34 +64,13 @@ int EIGEN_BLAS_FUNC(symv) (char *uplo, int *n, RealScalar *palpha, RealScalar *p
|
|||||||
int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc)
|
int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *pc, int *ldc)
|
||||||
{
|
{
|
||||||
|
|
||||||
// typedef void (*functype)(int, const Scalar *, int, Scalar *, int, Scalar);
|
|
||||||
// static functype func[2];
|
|
||||||
|
|
||||||
// static bool init = false;
|
|
||||||
// if(!init)
|
|
||||||
// {
|
|
||||||
// for(int k=0; k<2; ++k)
|
|
||||||
// func[k] = 0;
|
|
||||||
//
|
|
||||||
// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
|
|
||||||
// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
|
|
||||||
|
|
||||||
// init = true;
|
|
||||||
// }
|
|
||||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
|
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, const Scalar&);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Upper,false,Conj>::run);
|
|
||||||
func[LO] = (selfadjoint_rank1_update<Scalar,int,ColMajor,Lower,false,Conj>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||||
@ -131,34 +103,13 @@ int EIGEN_BLAS_FUNC(syr)(char *uplo, int *n, RealScalar *palpha, RealScalar *px,
|
|||||||
// C := alpha*x*y' + alpha*y*x' + C
|
// C := alpha*x*y' + alpha*y*x' + C
|
||||||
int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc)
|
int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pc, int *ldc)
|
||||||
{
|
{
|
||||||
// typedef void (*functype)(int, const Scalar *, int, const Scalar *, int, Scalar *, int, Scalar);
|
|
||||||
// static functype func[2];
|
|
||||||
//
|
|
||||||
// static bool init = false;
|
|
||||||
// if(!init)
|
|
||||||
// {
|
|
||||||
// for(int k=0; k<2; ++k)
|
|
||||||
// func[k] = 0;
|
|
||||||
//
|
|
||||||
// func[UP] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,UpperTriangular>::run);
|
|
||||||
// func[LO] = (internal::selfadjoint_product<Scalar,ColMajor,ColMajor,false,LowerTriangular>::run);
|
|
||||||
//
|
|
||||||
// init = true;
|
|
||||||
// }
|
|
||||||
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
typedef void (*functype)(int, Scalar*, int, const Scalar*, const Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::rank2_update_selector<Scalar,int,Upper>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::rank2_update_selector<Scalar,int,Lower>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::rank2_update_selector<Scalar,int,Upper>::run);
|
|
||||||
func[LO] = (internal::rank2_update_selector<Scalar,int,Lower>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||||
@ -234,19 +185,12 @@ int EIGEN_BLAS_FUNC(syr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px
|
|||||||
int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap)
|
int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *incx, Scalar *pap)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, const Scalar*, Scalar);
|
typedef void (*functype)(int, Scalar*, const Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Upper,false,false>::run);
|
|
||||||
func[LO] = (internal::selfadjoint_packed_rank1_update<Scalar,int,ColMajor,Lower,false,false>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
Scalar* ap = reinterpret_cast<Scalar*>(pap);
|
||||||
@ -285,19 +229,12 @@ int EIGEN_BLAS_FUNC(spr)(char *uplo, int *n, Scalar *palpha, Scalar *px, int *in
|
|||||||
int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
|
int EIGEN_BLAS_FUNC(spr2)(char *uplo, int *n, RealScalar *palpha, RealScalar *px, int *incx, RealScalar *py, int *incy, RealScalar *pap)
|
||||||
{
|
{
|
||||||
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
|
typedef void (*functype)(int, Scalar*, const Scalar*, const Scalar*, Scalar);
|
||||||
static functype func[2];
|
static const functype func[2] = {
|
||||||
|
// array index: UP
|
||||||
static bool init = false;
|
(internal::packed_rank2_update_selector<Scalar,int,Upper>::run),
|
||||||
if(!init)
|
// array index: LO
|
||||||
{
|
(internal::packed_rank2_update_selector<Scalar,int,Lower>::run),
|
||||||
for(int k=0; k<2; ++k)
|
};
|
||||||
func[k] = 0;
|
|
||||||
|
|
||||||
func[UP] = (internal::packed_rank2_update_selector<Scalar,int,Upper>::run);
|
|
||||||
func[LO] = (internal::packed_rank2_update_selector<Scalar,int,Lower>::run);
|
|
||||||
|
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* x = reinterpret_cast<Scalar*>(px);
|
Scalar* x = reinterpret_cast<Scalar*>(px);
|
||||||
Scalar* y = reinterpret_cast<Scalar*>(py);
|
Scalar* y = reinterpret_cast<Scalar*>(py);
|
||||||
|
@ -13,24 +13,29 @@ int EIGEN_BLAS_FUNC(gemm)(char *opa, char *opb, int *m, int *n, int *k, RealScal
|
|||||||
{
|
{
|
||||||
// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
|
// std::cerr << "in gemm " << *opa << " " << *opb << " " << *m << " " << *n << " " << *k << " " << *lda << " " << *ldb << " " << *ldc << " " << *palpha << " " << *pbeta << "\n";
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
|
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, Scalar, internal::level3_blocking<Scalar,Scalar>&, Eigen::internal::GemmParallelInfo<DenseIndex>*);
|
||||||
static functype func[12];
|
static const functype func[12] = {
|
||||||
|
// array index: NOTR | (NOTR << 2)
|
||||||
static bool init = false;
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (NOTR << 2)
|
||||||
{
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run),
|
||||||
for(int i=0; i<12; ++i)
|
// array index: ADJ | (NOTR << 2)
|
||||||
func[i] = 0;
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,ColMajor,false,ColMajor>::run);
|
0,
|
||||||
func[TR | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,false,ColMajor>::run);
|
// array index: NOTR | (TR << 2)
|
||||||
func[ADJ | (NOTR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor>::run);
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,false,ColMajor>::run);
|
// array index: TR | (TR << 2)
|
||||||
func[TR | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run);
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,false,ColMajor>::run),
|
||||||
func[ADJ | (TR << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run);
|
// array index: ADJ | (TR << 2)
|
||||||
func[NOTR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run);
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,false,ColMajor>::run),
|
||||||
func[TR | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run);
|
0,
|
||||||
func[ADJ | (ADJ << 2)] = (internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run);
|
// array index: NOTR | (ADJ << 2)
|
||||||
init = true;
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
|
||||||
}
|
// array index: TR | (ADJ << 2)
|
||||||
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,false,Scalar,RowMajor,Conj, ColMajor>::run),
|
||||||
|
// array index: ADJ | (ADJ << 2)
|
||||||
|
(internal::general_matrix_matrix_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,RowMajor,Conj, ColMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -73,49 +78,64 @@ int EIGEN_BLAS_FUNC(trsm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
|||||||
{
|
{
|
||||||
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
|
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static functype func[32];
|
static const functype func[32] = {
|
||||||
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
static bool init = false;
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run),
|
||||||
if(!init)
|
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
{
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run),
|
||||||
for(int i=0; i<32; ++i)
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[i] = 0;
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run),\
|
||||||
|
0,
|
||||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run);
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run),
|
||||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run);
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run),
|
||||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run);
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run),
|
||||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run),
|
||||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run);
|
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run),
|
||||||
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run),
|
||||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run);
|
0,
|
||||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run);
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run),
|
||||||
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run),
|
||||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run);
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run);
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run),
|
||||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run),
|
||||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run);
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
||||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run),
|
||||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run);
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run);
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run),
|
||||||
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
init = true;
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
||||||
}
|
0,
|
||||||
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run),
|
||||||
|
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run),
|
||||||
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
||||||
|
0,
|
||||||
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run),
|
||||||
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run),
|
||||||
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -162,47 +182,64 @@ int EIGEN_BLAS_FUNC(trmm)(char *side, char *uplo, char *opa, char *diag, int *m,
|
|||||||
{
|
{
|
||||||
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static functype func[32];
|
static const functype func[32] = {
|
||||||
static bool init = false;
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
if(!init)
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
{
|
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
for(int k=0; k<32; ++k)
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[k] = 0;
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
0,
|
||||||
func[TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
||||||
func[TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
||||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
0,
|
||||||
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
||||||
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
func[NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
||||||
func[TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
0,
|
||||||
func[ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run);
|
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
func[ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run);
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
||||||
func[NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run);
|
0,
|
||||||
func[TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run);
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
func[ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)] = (internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run);
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
init = true;
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
||||||
}
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
||||||
|
0,
|
||||||
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
|
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
||||||
|
0,
|
||||||
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
||||||
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
||||||
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
Scalar* b = reinterpret_cast<Scalar*>(pb);
|
||||||
@ -275,9 +312,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
|
||||||
#if ISCOMPLEX
|
#if ISCOMPLEX
|
||||||
// FIXME add support for symmetric complex matrix
|
// FIXME add support for symmetric complex matrix
|
||||||
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
|
|
||||||
Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
|
Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
|
||||||
if(UPLO(*uplo)==UP)
|
if(UPLO(*uplo)==UP)
|
||||||
{
|
{
|
||||||
@ -294,13 +331,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
|||||||
else if(SIDE(*side)==RIGHT)
|
else if(SIDE(*side)==RIGHT)
|
||||||
matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
|
matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
|
||||||
#else
|
#else
|
||||||
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
|
||||||
|
|
||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
else if(SIDE(*side)==RIGHT)
|
else if(SIDE(*side)==RIGHT)
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
@ -315,25 +354,23 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
|||||||
{
|
{
|
||||||
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
// std::cerr << "in syrk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
||||||
#if !ISCOMPLEX
|
#if !ISCOMPLEX
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
|
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static functype func[8];
|
static const functype func[8] = {
|
||||||
|
// array index: NOTR | (UP << 2)
|
||||||
static bool init = false;
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run),
|
||||||
if(!init)
|
// array index: TR | (UP << 2)
|
||||||
{
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run),
|
||||||
for(int i=0; i<8; ++i)
|
// array index: ADJ | (UP << 2)
|
||||||
func[i] = 0;
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run),
|
||||||
|
0,
|
||||||
func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Upper>::run);
|
// array index: NOTR | (LO << 2)
|
||||||
func[TR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Upper>::run);
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run),
|
||||||
func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Upper>::run);
|
// array index: TR | (LO << 2)
|
||||||
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run),
|
||||||
func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,ColMajor,Conj, Lower>::run);
|
// array index: ADJ | (LO << 2)
|
||||||
func[TR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,false,Scalar,ColMajor,ColMajor,Conj, Lower>::run);
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run),
|
||||||
func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,ColMajor,false,Lower>::run);
|
0
|
||||||
|
};
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
@ -381,8 +418,10 @@ int EIGEN_BLAS_FUNC(syrk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
|||||||
matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
|
matrix(c, *n, *n, *ldc).triangularView<Lower>() += alpha * matrix(a,*k,*n,*lda).transpose() * matrix(a,*k,*n,*lda);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
|
||||||
|
|
||||||
int code = OP(*op) | (UPLO(*uplo) << 2);
|
int code = OP(*op) | (UPLO(*uplo) << 2);
|
||||||
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha);
|
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -486,20 +525,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
|
||||||
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
|
||||||
|
|
||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
{
|
{
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
|
||||||
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
|
||||||
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha);
|
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
else if(SIDE(*side)==RIGHT)
|
else if(SIDE(*side)==RIGHT)
|
||||||
{
|
{
|
||||||
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
|
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
|
||||||
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/
|
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
|
||||||
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);
|
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -516,23 +558,21 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
|||||||
{
|
{
|
||||||
// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
// std::cerr << "in herk " << *uplo << " " << *op << " " << *n << " " << *k << " " << *palpha << " " << *lda << " " << *pbeta << " " << *ldc << "\n";
|
||||||
|
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&);
|
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static functype func[8];
|
static const functype func[8] = {
|
||||||
|
// array index: NOTR | (UP << 2)
|
||||||
static bool init = false;
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run),
|
||||||
if(!init)
|
0,
|
||||||
{
|
// array index: ADJ | (UP << 2)
|
||||||
for(int i=0; i<8; ++i)
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run),
|
||||||
func[i] = 0;
|
0,
|
||||||
|
// array index: NOTR | (LO << 2)
|
||||||
func[NOTR | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Upper>::run);
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run),
|
||||||
func[ADJ | (UP << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Upper>::run);
|
0,
|
||||||
|
// array index: ADJ | (LO << 2)
|
||||||
func[NOTR | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,ColMajor,false,Scalar,RowMajor,Conj, ColMajor,Lower>::run);
|
(internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run),
|
||||||
func[ADJ | (LO << 2)] = (internal::general_matrix_matrix_triangular_product<DenseIndex,Scalar,RowMajor,Conj, Scalar,ColMajor,false,ColMajor,Lower>::run);
|
0
|
||||||
|
};
|
||||||
init = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
Scalar* a = reinterpret_cast<Scalar*>(pa);
|
||||||
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
Scalar* c = reinterpret_cast<Scalar*>(pc);
|
||||||
@ -571,7 +611,8 @@ int EIGEN_BLAS_FUNC(herk)(char *uplo, char *op, int *n, int *k, RealScalar *palp
|
|||||||
|
|
||||||
if(*k>0 && alpha!=RealScalar(0))
|
if(*k>0 && alpha!=RealScalar(0))
|
||||||
{
|
{
|
||||||
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*n,*n,*k,1,false);
|
||||||
|
func[code](*n, *k, a, *lda, a, *lda, c, *ldc, alpha, blocking);
|
||||||
matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
|
matrix(c, *n, *n, *ldc).diagonal().imag().setZero();
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -44,7 +44,7 @@ C.setRandom(rows,cols) // C = rand(rows,cols)*2-1
|
|||||||
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
|
VectorXd::LinSpaced(size,low,high) // linspace(low,high,size)'
|
||||||
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
|
v.setLinSpaced(size,low,high) // v = linspace(low,high,size)'
|
||||||
VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi
|
VectorXi::LinSpaced(((hi-low)/step)+1, // low:step:hi
|
||||||
low,low+step*(size-1))
|
low,low+step*(size-1)) //
|
||||||
|
|
||||||
|
|
||||||
// Matrix slicing and blocks. All expressions listed here are read/write.
|
// Matrix slicing and blocks. All expressions listed here are read/write.
|
||||||
@ -94,6 +94,8 @@ R.transpose() // R.' or conj(R') // Read-write
|
|||||||
R.diagonal() // diag(R) // Read-write
|
R.diagonal() // diag(R) // Read-write
|
||||||
x.asDiagonal() // diag(x)
|
x.asDiagonal() // diag(x)
|
||||||
R.transpose().colwise().reverse() // rot90(R) // Read-write
|
R.transpose().colwise().reverse() // rot90(R) // Read-write
|
||||||
|
R.rowwise().reverse() // fliplr(R)
|
||||||
|
R.colwise().reverse() // flipud(R)
|
||||||
R.replicate(i,j) // repmat(P,i,j)
|
R.replicate(i,j) // repmat(P,i,j)
|
||||||
|
|
||||||
|
|
||||||
@ -139,6 +141,7 @@ R.cwiseAbs2() // abs(P.^2)
|
|||||||
R.array().abs2() // abs(P.^2)
|
R.array().abs2() // abs(P.^2)
|
||||||
(R.array() < s).select(P,Q ); // (R < s ? P : Q)
|
(R.array() < s).select(P,Q ); // (R < s ? P : Q)
|
||||||
R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0)
|
R = (Q.array()==0).select(P,A) // R(Q==0) = P(Q==0)
|
||||||
|
R = P.unaryExpr(ptr_fun(func)) // R = arrayfun(func, P) // with: scalar func(const scalar &x);
|
||||||
|
|
||||||
|
|
||||||
// Reductions.
|
// Reductions.
|
||||||
|
@ -65,17 +65,17 @@ They are summarized in the following tables:
|
|||||||
<td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td>
|
<td>Requires the <a href="http://pastix.gforge.inria.fr">PaStiX</a> package, \b CeCILL-C </td>
|
||||||
<td>optimized for tough problems and symmetric patterns</td></tr>
|
<td>optimized for tough problems and symmetric patterns</td></tr>
|
||||||
<tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
<tr><td>CholmodSupernodalLLT</td><td>\link CholmodSupport_Module CholmodSupport \endlink</td><td>Direct LLt factorization</td><td>SPD</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
||||||
<td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
|
<td>Requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td>
|
||||||
<td></td></tr>
|
<td></td></tr>
|
||||||
<tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
<tr><td>UmfPackLU</td><td>\link UmfPackSupport_Module UmfPackSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
||||||
<td>Requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td>
|
<td>Requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td>
|
||||||
<td></td></tr>
|
<td></td></tr>
|
||||||
<tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
<tr><td>SuperLU</td><td>\link SuperLUSupport_Module SuperLUSupport \endlink</td><td>Direct LU factorization</td><td>Square</td><td>Fill-in reducing, Leverage fast dense algebra</td>
|
||||||
<td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td>
|
<td>Requires the <a href="http://crd-legacy.lbl.gov/~xiaoye/SuperLU/">SuperLU</a> library, (BSD-like)</td>
|
||||||
<td></td></tr>
|
<td></td></tr>
|
||||||
<tr><td>SPQR</td><td>\link SPQRSupport_Module SPQRSupport \endlink </td> <td> QR factorization </td>
|
<tr><td>SPQR</td><td>\link SPQRSupport_Module SPQRSupport \endlink </td> <td> QR factorization </td>
|
||||||
<td> Any, rectangular</td><td>fill-in reducing, multithreaded, fast dense algebra</td>
|
<td> Any, rectangular</td><td>fill-in reducing, multithreaded, fast dense algebra</td>
|
||||||
<td> requires the <a href="http://www.cise.ufl.edu/research/sparse/SuiteSparse/">SuiteSparse</a> package, \b GPL </td><td>recommended for linear least-squares problems, has a rank-revealing feature</tr>
|
<td> requires the <a href="http://www.suitesparse.com">SuiteSparse</a> package, \b GPL </td><td>recommended for linear least-squares problems, has a rank-revealing feature</tr>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
Here \c SPD means symmetric positive definite.
|
Here \c SPD means symmetric positive definite.
|
||||||
|
@ -153,10 +153,11 @@ not necessary to evaluate the right-hand side explicitly.
|
|||||||
|
|
||||||
\section TopicAliasingMatrixMult Aliasing and matrix multiplication
|
\section TopicAliasingMatrixMult Aliasing and matrix multiplication
|
||||||
|
|
||||||
Matrix multiplication is the only operation in %Eigen that assumes aliasing by default. Thus, if \c matA is a
|
Matrix multiplication is the only operation in %Eigen that assumes aliasing by default, <strong>under the
|
||||||
matrix, then the statement <tt>matA = matA * matA;</tt> is safe. All other operations in %Eigen assume that
|
condition that the destination matrix is not resized</strong>.
|
||||||
there are no aliasing problems, either because the result is assigned to a different matrix or because it is a
|
Thus, if \c matA is a \b squared matrix, then the statement <tt>matA = matA * matA;</tt> is safe.
|
||||||
component-wise operation.
|
All other operations in %Eigen assume that there are no aliasing problems,
|
||||||
|
either because the result is assigned to a different matrix or because it is a component-wise operation.
|
||||||
|
|
||||||
<table class="example">
|
<table class="example">
|
||||||
<tr><th>Example</th><th>Output</th></tr>
|
<tr><th>Example</th><th>Output</th></tr>
|
||||||
@ -198,6 +199,27 @@ may get wrong results:
|
|||||||
\verbinclude TopicAliasing_mult3.out
|
\verbinclude TopicAliasing_mult3.out
|
||||||
</td></tr></table>
|
</td></tr></table>
|
||||||
|
|
||||||
|
Moreover, starting in Eigen 3.3, aliasing is \b not assumed if the destination matrix is resized and the product is not directly assigned to the destination.
|
||||||
|
Therefore, the following example is also wrong:
|
||||||
|
|
||||||
|
<table class="example">
|
||||||
|
<tr><th>Example</th><th>Output</th></tr>
|
||||||
|
<tr><td>
|
||||||
|
\include TopicAliasing_mult4.cpp
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
\verbinclude TopicAliasing_mult4.out
|
||||||
|
</td></tr></table>
|
||||||
|
|
||||||
|
As for any aliasing issue, you can resolve it by explicitly evaluating the expression prior to assignment:
|
||||||
|
<table class="example">
|
||||||
|
<tr><th>Example</th><th>Output</th></tr>
|
||||||
|
<tr><td>
|
||||||
|
\include TopicAliasing_mult5.cpp
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
\verbinclude TopicAliasing_mult5.out
|
||||||
|
</td></tr></table>
|
||||||
|
|
||||||
\section TopicAliasingSummary Summary
|
\section TopicAliasingSummary Summary
|
||||||
|
|
||||||
|
@ -101,17 +101,16 @@ row and column position are to be stored. These variables should be of type
|
|||||||
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out
|
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_visitors.out
|
||||||
</td></tr></table>
|
</td></tr></table>
|
||||||
|
|
||||||
Note that both functions also return the value of the minimum or maximum coefficient if needed,
|
Both functions also return the value of the minimum or maximum coefficient.
|
||||||
as if it was a typical reduction operation.
|
|
||||||
|
|
||||||
\section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions
|
\section TutorialReductionsVisitorsBroadcastingPartialReductions Partial reductions
|
||||||
Partial reductions are reductions that can operate column- or row-wise on a Matrix or
|
Partial reductions are reductions that can operate column- or row-wise on a Matrix or
|
||||||
Array, applying the reduction operation on each column or row and
|
Array, applying the reduction operation on each column or row and
|
||||||
returning a column or row-vector with the corresponding values. Partial reductions are applied
|
returning a column or row vector with the corresponding values. Partial reductions are applied
|
||||||
with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink.
|
with \link DenseBase::colwise() colwise() \endlink or \link DenseBase::rowwise() rowwise() \endlink.
|
||||||
|
|
||||||
A simple example is obtaining the maximum of the elements
|
A simple example is obtaining the maximum of the elements
|
||||||
in each column in a given matrix, storing the result in a row-vector:
|
in each column in a given matrix, storing the result in a row vector:
|
||||||
|
|
||||||
<table class="example">
|
<table class="example">
|
||||||
<tr><th>Example:</th><th>Output:</th></tr>
|
<tr><th>Example:</th><th>Output:</th></tr>
|
||||||
@ -133,8 +132,7 @@ The same operation can be performed row-wise:
|
|||||||
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out
|
\verbinclude Tutorial_ReductionsVisitorsBroadcasting_rowwise.out
|
||||||
</td></tr></table>
|
</td></tr></table>
|
||||||
|
|
||||||
<b>Note that column-wise operations return a 'row-vector' while row-wise operations
|
<b>Note that column-wise operations return a row vector, while row-wise operations return a column vector.</b>
|
||||||
return a 'column-vector'</b>
|
|
||||||
|
|
||||||
\subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations
|
\subsection TutorialReductionsVisitorsBroadcastingPartialReductionsCombined Combining partial reductions with other operations
|
||||||
It is also possible to use the result of a partial reduction to do further processing.
|
It is also possible to use the result of a partial reduction to do further processing.
|
||||||
@ -176,7 +174,7 @@ The concept behind broadcasting is similar to partial reductions, with the diffe
|
|||||||
constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in
|
constructs an expression where a vector (column or row) is interpreted as a matrix by replicating it in
|
||||||
one direction.
|
one direction.
|
||||||
|
|
||||||
A simple example is to add a certain column-vector to each column in a matrix.
|
A simple example is to add a certain column vector to each column in a matrix.
|
||||||
This can be accomplished with:
|
This can be accomplished with:
|
||||||
|
|
||||||
<table class="example">
|
<table class="example">
|
||||||
@ -253,7 +251,7 @@ is a new matrix whose size is the same as matrix <tt>m</tt>: \f[
|
|||||||
\f]
|
\f]
|
||||||
|
|
||||||
- <tt>(m.colwise() - v).colwise().squaredNorm()</tt> is a partial reduction, computing the squared norm column-wise. The result of
|
- <tt>(m.colwise() - v).colwise().squaredNorm()</tt> is a partial reduction, computing the squared norm column-wise. The result of
|
||||||
this operation is a row-vector where each coefficient is the squared Euclidean distance between each column in <tt>m</tt> and <tt>v</tt>: \f[
|
this operation is a row vector where each coefficient is the squared Euclidean distance between each column in <tt>m</tt> and <tt>v</tt>: \f[
|
||||||
\mbox{(m.colwise() - v).colwise().squaredNorm()} =
|
\mbox{(m.colwise() - v).colwise().squaredNorm()} =
|
||||||
\begin{bmatrix}
|
\begin{bmatrix}
|
||||||
1 & 505 & 32 & 50
|
1 & 505 & 32 & 50
|
||||||
|
@ -257,7 +257,14 @@ Binary coefficient wise operators can also mix sparse and dense expressions:
|
|||||||
\code
|
\code
|
||||||
sm2 = sm1.cwiseProduct(dm1);
|
sm2 = sm1.cwiseProduct(dm1);
|
||||||
dm2 = sm1 + dm1;
|
dm2 = sm1 + dm1;
|
||||||
|
dm2 = dm1 - sm1;
|
||||||
\endcode
|
\endcode
|
||||||
|
Performance-wise, the adding/subtracting sparse and dense matrices is better performed in two steps. For instance, instead of doing <tt>dm2 = sm1 + dm1</tt>, better write:
|
||||||
|
\code
|
||||||
|
dm2 = dm1;
|
||||||
|
dm2 += sm1;
|
||||||
|
\endcode
|
||||||
|
This version has the advantage to fully exploit the higher performance of dense storage (no indirection, SIMD, etc.), and to pay the cost of slow sparse evaluation on the few non-zeros of the sparse matrix only.
|
||||||
|
|
||||||
|
|
||||||
%Sparse expressions also support transposition:
|
%Sparse expressions also support transposition:
|
||||||
|
@ -52,7 +52,7 @@ When doing so, a number of Eigen's algorithms are silently substituted with call
|
|||||||
These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex<float>, and \c complex<double>.
|
These substitutions apply only for \b Dynamic \b or \b large enough objects with one of the following four standard scalar types: \c float, \c double, \c complex<float>, and \c complex<double>.
|
||||||
Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms.
|
Operations on other scalar types or mixing reals and complexes will continue to use the built-in algorithms.
|
||||||
|
|
||||||
In addition you can coarsely select choose which parts will be substituted by defining one or multiple of the following macros:
|
In addition you can choose which parts will be substituted by defining one or multiple of the following macros:
|
||||||
|
|
||||||
<table class="manual">
|
<table class="manual">
|
||||||
<tr><td>\c EIGEN_USE_BLAS </td><td>Enables the use of external BLAS level 2 and 3 routines (currently works with Intel MKL only)</td></tr>
|
<tr><td>\c EIGEN_USE_BLAS </td><td>Enables the use of external BLAS level 2 and 3 routines (currently works with Intel MKL only)</td></tr>
|
||||||
|
5
doc/snippets/TopicAliasing_mult4.cpp
Normal file
5
doc/snippets/TopicAliasing_mult4.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
MatrixXf A(2,2), B(3,2);
|
||||||
|
B << 2, 0, 0, 3, 1, 1;
|
||||||
|
A << 2, 0, 0, -2;
|
||||||
|
A = (B * A).cwiseAbs();
|
||||||
|
cout << A;
|
5
doc/snippets/TopicAliasing_mult5.cpp
Normal file
5
doc/snippets/TopicAliasing_mult5.cpp
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
MatrixXf A(2,2), B(3,2);
|
||||||
|
B << 2, 0, 0, 3, 1, 1;
|
||||||
|
A << 2, 0, 0, -2;
|
||||||
|
A = (B * A).eval().cwiseAbs();
|
||||||
|
cout << A;
|
@ -45,12 +45,14 @@ template<> struct adjoint_specific<false> {
|
|||||||
|
|
||||||
// check null inputs
|
// check null inputs
|
||||||
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
|
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
|
||||||
|
#if (!EIGEN_ARCH_i386) || defined(EIGEN_VECTORIZE)
|
||||||
RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
|
RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
|
||||||
VERIFY( (v1*very_small).norm() == 0 );
|
VERIFY( (v1*very_small).norm() == 0 );
|
||||||
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
|
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
|
||||||
v3 = v1*very_small;
|
v3 = v1*very_small;
|
||||||
v3.normalize();
|
v3.normalize();
|
||||||
VERIFY_IS_APPROX(v3, (v1*very_small));
|
VERIFY_IS_APPROX(v3, (v1*very_small));
|
||||||
|
#endif
|
||||||
|
|
||||||
// check compatibility of dot and adjoint
|
// check compatibility of dot and adjoint
|
||||||
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
|
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
|
||||||
|
@ -219,6 +219,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
VERIFY_IS_APPROX(m1.tanh(), tanh(m1));
|
||||||
#ifdef EIGEN_HAS_C99_MATH
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
|
VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
|
||||||
|
VERIFY_IS_APPROX(m1.digamma(), digamma(m1));
|
||||||
VERIFY_IS_APPROX(m1.erf(), erf(m1));
|
VERIFY_IS_APPROX(m1.erf(), erf(m1));
|
||||||
VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
|
VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
|
||||||
#endif // EIGEN_HAS_C99_MATH
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
@ -309,7 +310,22 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
|||||||
s1 += Scalar(tiny);
|
s1 += Scalar(tiny);
|
||||||
m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
|
m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
|
||||||
VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse());
|
VERIFY_IS_APPROX(s1/m1, s1 * m1.inverse());
|
||||||
|
|
||||||
|
// check special functions (comparing against numpy implementation)
|
||||||
|
#ifdef EIGEN_HAS_C99_MATH
|
||||||
|
if (!NumTraits<Scalar>::IsComplex) {
|
||||||
|
VERIFY_IS_APPROX(numext::digamma(Scalar(1)), RealScalar(-0.5772156649015329));
|
||||||
|
VERIFY_IS_APPROX(numext::digamma(Scalar(1.5)), RealScalar(0.03648997397857645));
|
||||||
|
VERIFY_IS_APPROX(numext::digamma(Scalar(4)), RealScalar(1.2561176684318));
|
||||||
|
VERIFY_IS_APPROX(numext::digamma(Scalar(-10.5)), RealScalar(2.398239129535781));
|
||||||
|
VERIFY_IS_APPROX(numext::digamma(Scalar(10000.5)), RealScalar(9.210340372392849));
|
||||||
|
VERIFY_IS_EQUAL(numext::digamma(Scalar(0)),
|
||||||
|
std::numeric_limits<RealScalar>::infinity());
|
||||||
|
VERIFY_IS_EQUAL(numext::digamma(Scalar(-1)),
|
||||||
|
std::numeric_limits<RealScalar>::infinity());
|
||||||
|
}
|
||||||
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
// check inplace transpose
|
// check inplace transpose
|
||||||
m3 = m1;
|
m3 = m1;
|
||||||
m3.transposeInPlace();
|
m3.transposeInPlace();
|
||||||
@ -336,8 +352,6 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
|||||||
|
|
||||||
Array<RealScalar, -1, -1> m3(rows, cols);
|
Array<RealScalar, -1, -1> m3(rows, cols);
|
||||||
|
|
||||||
Scalar s1 = internal::random<Scalar>();
|
|
||||||
|
|
||||||
for (Index i = 0; i < m.rows(); ++i)
|
for (Index i = 0; i < m.rows(); ++i)
|
||||||
for (Index j = 0; j < m.cols(); ++j)
|
for (Index j = 0; j < m.cols(); ++j)
|
||||||
m2(i,j) = sqrt(m1(i,j));
|
m2(i,j) = sqrt(m1(i,j));
|
||||||
@ -410,6 +424,7 @@ template<typename ArrayType> void array_complex(const ArrayType& m)
|
|||||||
VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1);
|
VERIFY_IS_APPROX( m1.sign() * m1.abs(), m1);
|
||||||
|
|
||||||
// scalar by array division
|
// scalar by array division
|
||||||
|
Scalar s1 = internal::random<Scalar>();
|
||||||
const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon());
|
const RealScalar tiny = sqrt(std::numeric_limits<RealScalar>::epsilon());
|
||||||
s1 += Scalar(tiny);
|
s1 += Scalar(tiny);
|
||||||
m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
|
m1 += ArrayType::Constant(rows,cols,Scalar(tiny));
|
||||||
|
@ -68,6 +68,16 @@ template<typename MatrixType> void array_for_matrix(const MatrixType& m)
|
|||||||
const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0);
|
const Scalar& ref_a2 = m.array().matrix().coeffRef(0,0);
|
||||||
VERIFY(&ref_a1 == &ref_m1);
|
VERIFY(&ref_a1 == &ref_m1);
|
||||||
VERIFY(&ref_a2 == &ref_m2);
|
VERIFY(&ref_a2 == &ref_m2);
|
||||||
|
|
||||||
|
// Check write accessors:
|
||||||
|
m1.array().coeffRef(0,0) = 1;
|
||||||
|
VERIFY_IS_APPROX(m1(0,0),Scalar(1));
|
||||||
|
m1.array()(0,0) = 2;
|
||||||
|
VERIFY_IS_APPROX(m1(0,0),Scalar(2));
|
||||||
|
m1.array().matrix().coeffRef(0,0) = 3;
|
||||||
|
VERIFY_IS_APPROX(m1(0,0),Scalar(3));
|
||||||
|
m1.array().matrix()(0,0) = 4;
|
||||||
|
VERIFY_IS_APPROX(m1(0,0),Scalar(4));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename MatrixType> void comparisons(const MatrixType& m)
|
template<typename MatrixType> void comparisons(const MatrixType& m)
|
||||||
|
@ -20,6 +20,8 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
|
|||||||
MatrixType m1 = MatrixType::Random(rows, cols),
|
MatrixType m1 = MatrixType::Random(rows, cols),
|
||||||
m2 = MatrixType::Random(rows, cols);
|
m2 = MatrixType::Random(rows, cols);
|
||||||
|
|
||||||
|
Scalar s1 = internal::random<Scalar>();
|
||||||
|
|
||||||
//check diagonal()
|
//check diagonal()
|
||||||
VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal());
|
VERIFY_IS_APPROX(m1.diagonal(), m1.transpose().diagonal());
|
||||||
m2.diagonal() = 2 * m1.diagonal();
|
m2.diagonal() = 2 * m1.diagonal();
|
||||||
@ -58,6 +60,11 @@ template<typename MatrixType> void diagonal(const MatrixType& m)
|
|||||||
VERIFY_IS_APPROX(m2.template diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2));
|
VERIFY_IS_APPROX(m2.template diagonal<N2>(), static_cast<Scalar>(2) * m1.diagonal(N2));
|
||||||
m2.diagonal(N2)[0] *= 3;
|
m2.diagonal(N2)[0] *= 3;
|
||||||
VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
|
VERIFY_IS_APPROX(m2.diagonal(N2)[0], static_cast<Scalar>(6) * m1.diagonal(N2)[0]);
|
||||||
|
|
||||||
|
m2.diagonal(N2).x() = s1;
|
||||||
|
VERIFY_IS_APPROX(m2.diagonal(N2).x(), s1);
|
||||||
|
m2.diagonal(N2).coeffRef(0) = Scalar(2)*s1;
|
||||||
|
VERIFY_IS_APPROX(m2.diagonal(N2).coeff(0), Scalar(2)*s1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
|
// Copyright (C) 2015-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@ -34,4 +34,32 @@ void test_incomplete_cholesky()
|
|||||||
CALL_SUBTEST_1(( test_incomplete_cholesky_T<double,int>() ));
|
CALL_SUBTEST_1(( test_incomplete_cholesky_T<double,int>() ));
|
||||||
CALL_SUBTEST_2(( test_incomplete_cholesky_T<std::complex<double>, int>() ));
|
CALL_SUBTEST_2(( test_incomplete_cholesky_T<std::complex<double>, int>() ));
|
||||||
CALL_SUBTEST_3(( test_incomplete_cholesky_T<double,long int>() ));
|
CALL_SUBTEST_3(( test_incomplete_cholesky_T<double,long int>() ));
|
||||||
|
|
||||||
|
#ifdef EIGEN_TEST_PART_1
|
||||||
|
// regression for bug 1150
|
||||||
|
for(int N = 1; N<20; ++N)
|
||||||
|
{
|
||||||
|
Eigen::MatrixXd b( N, N );
|
||||||
|
b.setOnes();
|
||||||
|
|
||||||
|
Eigen::SparseMatrix<double> m( N, N );
|
||||||
|
m.reserve(Eigen::VectorXi::Constant(N,4));
|
||||||
|
for( int i = 0; i < N; ++i )
|
||||||
|
{
|
||||||
|
m.insert( i, i ) = 1;
|
||||||
|
m.coeffRef( i, i / 2 ) = 2;
|
||||||
|
m.coeffRef( i, i / 3 ) = 2;
|
||||||
|
m.coeffRef( i, i / 4 ) = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
Eigen::SparseMatrix<double> A;
|
||||||
|
A = m * m.transpose();
|
||||||
|
|
||||||
|
Eigen::ConjugateGradient<Eigen::SparseMatrix<double>,
|
||||||
|
Eigen::Lower | Eigen::Upper,
|
||||||
|
Eigen::IncompleteCholesky<double> > solver( A );
|
||||||
|
VERIFY(solver.preconditioner().info() == Eigen::Success);
|
||||||
|
VERIFY(solver.info() == Eigen::Success);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,7 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
|
|||||||
Mat_d md = mf.template cast<double>();
|
Mat_d md = mf.template cast<double>();
|
||||||
Mat_cf mcf = Mat_cf::Random(size,size);
|
Mat_cf mcf = Mat_cf::Random(size,size);
|
||||||
Mat_cd mcd = mcf.template cast<complex<double> >();
|
Mat_cd mcd = mcf.template cast<complex<double> >();
|
||||||
|
Mat_cd rcd = mcd;
|
||||||
Vec_f vf = Vec_f::Random(size,1);
|
Vec_f vf = Vec_f::Random(size,1);
|
||||||
Vec_d vd = vf.template cast<double>();
|
Vec_d vd = vf.template cast<double>();
|
||||||
Vec_cf vcf = Vec_cf::Random(size,1);
|
Vec_cf vcf = Vec_cf::Random(size,1);
|
||||||
@ -103,24 +104,23 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
|
|||||||
VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast<std::complex<double> >());
|
VERIFY_IS_APPROX(mcd.array() *= md.array(), mcd2.array() *= md.array().template cast<std::complex<double> >());
|
||||||
|
|
||||||
// check matrix-matrix products
|
// check matrix-matrix products
|
||||||
|
|
||||||
VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast<CD>().eval()*mcd);
|
VERIFY_IS_APPROX(sd*md*mcd, (sd*md).template cast<CD>().eval()*mcd);
|
||||||
VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast<CD>());
|
VERIFY_IS_APPROX(sd*mcd*md, sd*mcd*md.template cast<CD>());
|
||||||
VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast<CD>().eval()*mcd);
|
VERIFY_IS_APPROX(scd*md*mcd, scd*md.template cast<CD>().eval()*mcd);
|
||||||
VERIFY_IS_APPROX(scd*mcd*md, scd*mcd*md.template cast<CD>());
|
VERIFY_IS_APPROX(scd*mcd*md, scd*mcd*md.template cast<CD>());
|
||||||
|
|
||||||
VERIFY_IS_APPROX(sf*mf*mcf, sf*mf.template cast<CF>()*mcf);
|
VERIFY_IS_APPROX(sf*mf*mcf, sf*mf.template cast<CF>()*mcf);
|
||||||
VERIFY_IS_APPROX(sf*mcf*mf, sf*mcf*mf.template cast<CF>());
|
VERIFY_IS_APPROX(sf*mcf*mf, sf*mcf*mf.template cast<CF>());
|
||||||
VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast<CF>()*mcf);
|
VERIFY_IS_APPROX(scf*mf*mcf, scf*mf.template cast<CF>()*mcf);
|
||||||
VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast<CF>());
|
VERIFY_IS_APPROX(scf*mcf*mf, scf*mcf*mf.template cast<CF>());
|
||||||
|
|
||||||
VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast<CD>().eval().adjoint()*mcd);
|
VERIFY_IS_APPROX(sd*md.adjoint()*mcd, (sd*md).template cast<CD>().eval().adjoint()*mcd);
|
||||||
VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast<CD>());
|
VERIFY_IS_APPROX(sd*mcd.adjoint()*md, sd*mcd.adjoint()*md.template cast<CD>());
|
||||||
VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast<CD>().eval().adjoint()*mcd.adjoint());
|
VERIFY_IS_APPROX(sd*md.adjoint()*mcd.adjoint(), (sd*md).template cast<CD>().eval().adjoint()*mcd.adjoint());
|
||||||
VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast<CD>().adjoint());
|
VERIFY_IS_APPROX(sd*mcd.adjoint()*md.adjoint(), sd*mcd.adjoint()*md.template cast<CD>().adjoint());
|
||||||
VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast<CD>().eval()*mcd.adjoint());
|
VERIFY_IS_APPROX(sd*md*mcd.adjoint(), (sd*md).template cast<CD>().eval()*mcd.adjoint());
|
||||||
VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast<CD>().adjoint());
|
VERIFY_IS_APPROX(sd*mcd*md.adjoint(), sd*mcd*md.template cast<CD>().adjoint());
|
||||||
|
|
||||||
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast<CF>().eval().adjoint()*mcf);
|
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf, (sf*mf).template cast<CF>().eval().adjoint()*mcf);
|
||||||
VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast<CF>());
|
VERIFY_IS_APPROX(sf*mcf.adjoint()*mf, sf*mcf.adjoint()*mf.template cast<CF>());
|
||||||
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast<CF>().eval().adjoint()*mcf.adjoint());
|
VERIFY_IS_APPROX(sf*mf.adjoint()*mcf.adjoint(), (sf*mf).template cast<CF>().eval().adjoint()*mcf.adjoint());
|
||||||
@ -147,6 +147,39 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
|
|||||||
VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast<CD>().eval());
|
VERIFY_IS_APPROX(scd*vcd.adjoint()*md, scd*vcd.adjoint()*md.template cast<CD>().eval());
|
||||||
VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast<CD>().eval()*mcd);
|
VERIFY_IS_APPROX(sd*vd.adjoint()*mcd, sd*vd.adjoint().template cast<CD>().eval()*mcd);
|
||||||
VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast<CD>().eval()*mcd);
|
VERIFY_IS_APPROX(scd*vd.adjoint()*mcd, scd*vd.adjoint().template cast<CD>().eval()*mcd);
|
||||||
|
|
||||||
|
VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template triangularView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template triangularView<Upper>());
|
||||||
|
VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template triangularView<Lower>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template triangularView<Lower>());
|
||||||
|
VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template triangularView<Lower>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template triangularView<Lower>());
|
||||||
|
VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template triangularView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template triangularView<Upper>());
|
||||||
|
|
||||||
|
// Not supported yet: trmm
|
||||||
|
// VERIFY_IS_APPROX(sd*mcd*md.template triangularView<Lower>(), sd*mcd*md.template cast<CD>().eval().template triangularView<Lower>());
|
||||||
|
// VERIFY_IS_APPROX(scd*mcd*md.template triangularView<Upper>(), scd*mcd*md.template cast<CD>().eval().template triangularView<Upper>());
|
||||||
|
// VERIFY_IS_APPROX(sd*md*mcd.template triangularView<Lower>(), sd*md.template cast<CD>().eval()*mcd.template triangularView<Lower>());
|
||||||
|
// VERIFY_IS_APPROX(scd*md*mcd.template triangularView<Upper>(), scd*md.template cast<CD>().eval()*mcd.template triangularView<Upper>());
|
||||||
|
|
||||||
|
// Not supported yet: symv
|
||||||
|
// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
|
||||||
|
// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView<Lower>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Lower>());
|
||||||
|
// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView<Lower>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Lower>());
|
||||||
|
// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
|
||||||
|
|
||||||
|
// Not supported yet: symm
|
||||||
|
// VERIFY_IS_APPROX(sd*vcd.adjoint()*md.template selfadjointView<Upper>(), sd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
|
||||||
|
// VERIFY_IS_APPROX(scd*vcd.adjoint()*md.template selfadjointView<Upper>(), scd*vcd.adjoint()*md.template cast<CD>().eval().template selfadjointView<Upper>());
|
||||||
|
// VERIFY_IS_APPROX(sd*vd.adjoint()*mcd.template selfadjointView<Upper>(), sd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
|
||||||
|
// VERIFY_IS_APPROX(scd*vd.adjoint()*mcd.template selfadjointView<Upper>(), scd*vd.adjoint().template cast<CD>().eval()*mcd.template selfadjointView<Upper>());
|
||||||
|
|
||||||
|
rcd.setZero();
|
||||||
|
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = sd * mcd * md),
|
||||||
|
Mat_cd((sd * mcd * md.template cast<CD>().eval()).template triangularView<Upper>()));
|
||||||
|
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = sd * md * mcd),
|
||||||
|
Mat_cd((sd * md.template cast<CD>().eval() * mcd).template triangularView<Upper>()));
|
||||||
|
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = scd * mcd * md),
|
||||||
|
Mat_cd((scd * mcd * md.template cast<CD>().eval()).template triangularView<Upper>()));
|
||||||
|
VERIFY_IS_APPROX(Mat_cd(rcd.template triangularView<Upper>() = scd * md * mcd),
|
||||||
|
Mat_cd((scd * md.template cast<CD>().eval() * mcd).template triangularView<Upper>()));
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_mixingtypes()
|
void test_mixingtypes()
|
||||||
|
@ -78,14 +78,15 @@ template<typename MatrixType> void nomalloc(const MatrixType& m)
|
|||||||
VERIFY_IS_APPROX(m2,m2);
|
VERIFY_IS_APPROX(m2,m2);
|
||||||
|
|
||||||
m2.template selfadjointView<Lower>().rankUpdate(m1.col(0),-1);
|
m2.template selfadjointView<Lower>().rankUpdate(m1.col(0),-1);
|
||||||
m2.template selfadjointView<Lower>().rankUpdate(m1.row(0),-1);
|
m2.template selfadjointView<Upper>().rankUpdate(m1.row(0),-1);
|
||||||
|
m2.template selfadjointView<Lower>().rankUpdate(m1.col(0), m1.col(0)); // rank-2
|
||||||
|
|
||||||
// The following fancy matrix-matrix products are not safe yet regarding static allocation
|
// The following fancy matrix-matrix products are not safe yet regarding static allocation
|
||||||
// m1 += m1.template triangularView<Upper>() * m2.col(;
|
m2.template selfadjointView<Lower>().rankUpdate(m1);
|
||||||
// m1.template selfadjointView<Lower>().rankUpdate(m2);
|
m2 += m2.template triangularView<Upper>() * m1;
|
||||||
// m1 += m1.template triangularView<Upper>() * m2;
|
m2.template triangularView<Upper>() = m2 * m2;
|
||||||
// m1 += m1.template selfadjointView<Lower>() * m2;
|
m1 += m1.template selfadjointView<Lower>() * m2;
|
||||||
// VERIFY_IS_APPROX(m1,m1);
|
VERIFY_IS_APPROX(m2,m2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar>
|
template<typename Scalar>
|
||||||
|
@ -48,30 +48,32 @@ void testVectorType(const VectorType& base)
|
|||||||
VectorType m(base);
|
VectorType m(base);
|
||||||
m.setLinSpaced(size,low,high);
|
m.setLinSpaced(size,low,high);
|
||||||
|
|
||||||
|
if(!NumTraits<Scalar>::IsInteger)
|
||||||
|
{
|
||||||
|
VectorType n(size);
|
||||||
|
for (int i=0; i<size; ++i)
|
||||||
|
n(i) = low+i*step;
|
||||||
|
VERIFY_IS_APPROX(m,n);
|
||||||
|
}
|
||||||
|
|
||||||
VectorType n(size);
|
VectorType n(size);
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
n(i) = low+i*step;
|
n(i) = size==1 ? low : (low + ((high-low)*Scalar(i))/(size-1));
|
||||||
|
|
||||||
VERIFY_IS_APPROX(m,n);
|
VERIFY_IS_APPROX(m,n);
|
||||||
|
|
||||||
// random access version
|
// random access version
|
||||||
m = VectorType::LinSpaced(size,low,high);
|
m = VectorType::LinSpaced(size,low,high);
|
||||||
VERIFY_IS_APPROX(m,n);
|
VERIFY_IS_APPROX(m,n);
|
||||||
|
|
||||||
// Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79).
|
VERIFY( internal::isApprox(m(m.size()-1),high) );
|
||||||
VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits<Scalar>::epsilon() );
|
VERIFY( size==1 || internal::isApprox(m(0),low) );
|
||||||
|
|
||||||
// These guys sometimes fail! This is not good. Any ideas how to fix them!?
|
|
||||||
//VERIFY( m(m.size()-1) == high );
|
|
||||||
//VERIFY( m(0) == low );
|
|
||||||
|
|
||||||
// sequential access version
|
// sequential access version
|
||||||
m = VectorType::LinSpaced(Sequential,size,low,high);
|
m = VectorType::LinSpaced(Sequential,size,low,high);
|
||||||
VERIFY_IS_APPROX(m,n);
|
VERIFY_IS_APPROX(m,n);
|
||||||
|
|
||||||
// These guys sometimes fail! This is not good. Any ideas how to fix them!?
|
VERIFY( internal::isApprox(m(m.size()-1),high) );
|
||||||
//VERIFY( m(m.size()-1) == high );
|
VERIFY( size==1 || internal::isApprox(m(0),low) );
|
||||||
//VERIFY( m(0) == low );
|
|
||||||
|
|
||||||
// check whether everything works with row and col major vectors
|
// check whether everything works with row and col major vectors
|
||||||
Matrix<Scalar,Dynamic,1> row_vector(size);
|
Matrix<Scalar,Dynamic,1> row_vector(size);
|
||||||
@ -126,5 +128,13 @@ void test_nullary()
|
|||||||
CALL_SUBTEST_8( testVectorType(Vector4f()) );
|
CALL_SUBTEST_8( testVectorType(Vector4f()) );
|
||||||
CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) );
|
CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) );
|
||||||
CALL_SUBTEST_8( testVectorType(Matrix<float,1,1>()) );
|
CALL_SUBTEST_8( testVectorType(Matrix<float,1,1>()) );
|
||||||
|
|
||||||
|
CALL_SUBTEST_9( testVectorType(VectorXi(internal::random<int>(1,300))) );
|
||||||
|
CALL_SUBTEST_9( testVectorType(Matrix<int,1,1>()) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_TEST_PART_6
|
||||||
|
// Assignment of a RowVectorXd to a MatrixXd (regression test for bug #79).
|
||||||
|
VERIFY( (MatrixXd(RowVectorXd::LinSpaced(3, 0, 1)) - RowVector3d(0, 0.5, 1)).norm() < std::numeric_limits<double>::epsilon() );
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -192,6 +192,11 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
|||||||
VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3));
|
VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3));
|
||||||
// VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4);
|
// VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4);
|
||||||
|
|
||||||
|
VERIFY_IS_APPROX(refM4 + m3, refM4 + refM3);
|
||||||
|
VERIFY_IS_APPROX(m3 + refM4, refM3 + refM4);
|
||||||
|
VERIFY_IS_APPROX(refM4 - m3, refM4 - refM3);
|
||||||
|
VERIFY_IS_APPROX(m3 - refM4, refM3 - refM4);
|
||||||
|
|
||||||
// test aliasing
|
// test aliasing
|
||||||
VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1));
|
VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1));
|
||||||
VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval()));
|
VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval()));
|
||||||
@ -455,6 +460,33 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
|
|||||||
refMat1.setIdentity();
|
refMat1.setIdentity();
|
||||||
VERIFY_IS_APPROX(m1, refMat1);
|
VERIFY_IS_APPROX(m1, refMat1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test array/vector of InnerIterator
|
||||||
|
{
|
||||||
|
typedef typename SparseMatrixType::InnerIterator IteratorType;
|
||||||
|
|
||||||
|
DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
|
||||||
|
SparseMatrixType m2(rows, cols);
|
||||||
|
initSparse<Scalar>(density, refMat2, m2);
|
||||||
|
IteratorType static_array[2];
|
||||||
|
static_array[0] = IteratorType(m2,0);
|
||||||
|
static_array[1] = IteratorType(m2,m2.outerSize()-1);
|
||||||
|
VERIFY( static_array[0] || m2.innerVector(static_array[0].outer()).nonZeros() == 0 );
|
||||||
|
VERIFY( static_array[1] || m2.innerVector(static_array[1].outer()).nonZeros() == 0 );
|
||||||
|
if(static_array[0] && static_array[1])
|
||||||
|
{
|
||||||
|
++(static_array[1]);
|
||||||
|
static_array[1] = IteratorType(m2,0);
|
||||||
|
VERIFY( static_array[1] );
|
||||||
|
VERIFY( static_array[1].index() == static_array[0].index() );
|
||||||
|
VERIFY( static_array[1].outer() == static_array[0].outer() );
|
||||||
|
VERIFY( static_array[1].value() == static_array[0].value() );
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<IteratorType> iters(2);
|
||||||
|
iters[0] = IteratorType(m2,0);
|
||||||
|
iters[1] = IteratorType(m2,m2.outerSize()-1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9,14 +9,14 @@
|
|||||||
|
|
||||||
#include "sparse.h"
|
#include "sparse.h"
|
||||||
|
|
||||||
template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
|
template<typename Scalar,typename StorageIndex> void sparse_vector(int rows, int cols)
|
||||||
{
|
{
|
||||||
double densityMat = (std::max)(8./(rows*cols), 0.01);
|
double densityMat = (std::max)(8./(rows*cols), 0.01);
|
||||||
double densityVec = (std::max)(8./float(rows), 0.1);
|
double densityVec = (std::max)(8./float(rows), 0.1);
|
||||||
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
|
||||||
typedef Matrix<Scalar,Dynamic,1> DenseVector;
|
typedef Matrix<Scalar,Dynamic,1> DenseVector;
|
||||||
typedef SparseVector<Scalar,0,Index> SparseVectorType;
|
typedef SparseVector<Scalar,0,StorageIndex> SparseVectorType;
|
||||||
typedef SparseMatrix<Scalar,0,Index> SparseMatrixType;
|
typedef SparseMatrix<Scalar,0,StorageIndex> SparseMatrixType;
|
||||||
Scalar eps = 1e-6;
|
Scalar eps = 1e-6;
|
||||||
|
|
||||||
SparseMatrixType m1(rows,rows);
|
SparseMatrixType m1(rows,rows);
|
||||||
@ -87,8 +87,10 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
|
|||||||
|
|
||||||
VERIFY_IS_APPROX(m1*v2, refM1*refV2);
|
VERIFY_IS_APPROX(m1*v2, refM1*refV2);
|
||||||
VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2));
|
VERIFY_IS_APPROX(v1.dot(m1*v2), refV1.dot(refM1*refV2));
|
||||||
int i = internal::random<int>(0,rows-1);
|
{
|
||||||
VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i)));
|
int i = internal::random<int>(0,rows-1);
|
||||||
|
VERIFY_IS_APPROX(v1.dot(m1.col(i)), refV1.dot(refM1.col(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm());
|
VERIFY_IS_APPROX(v1.squaredNorm(), refV1.squaredNorm());
|
||||||
@ -111,15 +113,51 @@ template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
|
|||||||
VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense());
|
VERIFY_IS_APPROX(refV3 = v1.transpose(),v1.toDense());
|
||||||
VERIFY_IS_APPROX(DenseVector(v1),v1.toDense());
|
VERIFY_IS_APPROX(DenseVector(v1),v1.toDense());
|
||||||
|
|
||||||
|
// test conservative resize
|
||||||
|
{
|
||||||
|
std::vector<StorageIndex> inc;
|
||||||
|
if(rows > 3)
|
||||||
|
inc.push_back(-3);
|
||||||
|
inc.push_back(0);
|
||||||
|
inc.push_back(3);
|
||||||
|
inc.push_back(1);
|
||||||
|
inc.push_back(10);
|
||||||
|
|
||||||
|
for(std::size_t i = 0; i< inc.size(); i++) {
|
||||||
|
StorageIndex incRows = inc[i];
|
||||||
|
SparseVectorType vec1(rows);
|
||||||
|
DenseVector refVec1 = DenseVector::Zero(rows);
|
||||||
|
initSparse<Scalar>(densityVec, refVec1, vec1);
|
||||||
|
|
||||||
|
vec1.conservativeResize(rows+incRows);
|
||||||
|
refVec1.conservativeResize(rows+incRows);
|
||||||
|
if (incRows > 0) refVec1.tail(incRows).setZero();
|
||||||
|
|
||||||
|
VERIFY_IS_APPROX(vec1, refVec1);
|
||||||
|
|
||||||
|
// Insert new values
|
||||||
|
if (incRows > 0)
|
||||||
|
vec1.insert(vec1.rows()-1) = refVec1(refVec1.rows()-1) = 1;
|
||||||
|
|
||||||
|
VERIFY_IS_APPROX(vec1, refVec1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_sparse_vector()
|
void test_sparse_vector()
|
||||||
{
|
{
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
|
int r = Eigen::internal::random<int>(1,500), c = Eigen::internal::random<int>(1,500);
|
||||||
|
if(Eigen::internal::random<int>(0,4) == 0) {
|
||||||
|
r = c; // check square matrices in 25% of tries
|
||||||
|
}
|
||||||
|
EIGEN_UNUSED_VARIABLE(r+c);
|
||||||
|
|
||||||
CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) ));
|
CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) ));
|
||||||
CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(16, 16) ));
|
CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(r, c) ));
|
||||||
CALL_SUBTEST_1(( sparse_vector<double,long int>(299, 535) ));
|
CALL_SUBTEST_1(( sparse_vector<double,long int>(r, c) ));
|
||||||
CALL_SUBTEST_1(( sparse_vector<double,short>(299, 535) ));
|
CALL_SUBTEST_1(( sparse_vector<double,short>(r, c) ));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -163,6 +163,21 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
|
|||||||
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
|
VERIFY(!(numext::isfinite)(v.blueNorm())); VERIFY((numext::isnan)(v.blueNorm()));
|
||||||
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm()));
|
VERIFY(!(numext::isfinite)(v.hypotNorm())); VERIFY((numext::isnan)(v.hypotNorm()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// stableNormalize[d]
|
||||||
|
{
|
||||||
|
VERIFY_IS_APPROX(vrand.stableNormalized(), vrand.normalized());
|
||||||
|
MatrixType vcopy(vrand);
|
||||||
|
vcopy.stableNormalize();
|
||||||
|
VERIFY_IS_APPROX(vcopy, vrand.normalized());
|
||||||
|
VERIFY_IS_APPROX((vrand.stableNormalized()).norm(), RealScalar(1));
|
||||||
|
VERIFY_IS_APPROX(vcopy.norm(), RealScalar(1));
|
||||||
|
VERIFY_IS_APPROX((vbig.stableNormalized()).norm(), RealScalar(1));
|
||||||
|
VERIFY_IS_APPROX((vsmall.stableNormalized()).norm(), RealScalar(1));
|
||||||
|
RealScalar big_scaling = ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
|
||||||
|
VERIFY_IS_APPROX(vbig/big_scaling, (vbig.stableNorm() * vbig.stableNormalized()).eval()/big_scaling);
|
||||||
|
VERIFY_IS_APPROX(vsmall, vsmall.stableNorm() * vsmall.stableNormalized());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_stable_norm()
|
void test_stable_norm()
|
||||||
|
@ -210,6 +210,9 @@ template<typename MatrixType> void vectorwiseop_matrix(const MatrixType& m)
|
|||||||
VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm<Infinity>());
|
VERIFY_IS_APPROX(m1.cwiseAbs().colwise().maxCoeff(), m1.colwise().template lpNorm<Infinity>());
|
||||||
VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm<Infinity>());
|
VERIFY_IS_APPROX(m1.cwiseAbs().rowwise().maxCoeff(), m1.rowwise().template lpNorm<Infinity>());
|
||||||
|
|
||||||
|
// regression for bug 1158
|
||||||
|
VERIFY_IS_APPROX(m1.cwiseAbs().colwise().sum().x(), m1.col(0).cwiseAbs().sum());
|
||||||
|
|
||||||
// test normalized
|
// test normalized
|
||||||
m2 = m1.colwise().normalized();
|
m2 = m1.colwise().normalized();
|
||||||
VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());
|
VERIFY_IS_APPROX(m2.col(c), m1.col(c).normalized());
|
||||||
|
@ -25,6 +25,7 @@ template<typename MatrixType> void zeroReduction(const MatrixType& m) {
|
|||||||
template<typename MatrixType> void zeroSizedMatrix()
|
template<typename MatrixType> void zeroSizedMatrix()
|
||||||
{
|
{
|
||||||
MatrixType t1;
|
MatrixType t1;
|
||||||
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
|
|
||||||
if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0)
|
if (MatrixType::SizeAtCompileTime == Dynamic || MatrixType::SizeAtCompileTime == 0)
|
||||||
{
|
{
|
||||||
@ -45,6 +46,23 @@ template<typename MatrixType> void zeroSizedMatrix()
|
|||||||
VERIFY(t1==t2);
|
VERIFY(t1==t2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(MatrixType::MaxColsAtCompileTime!=0 && MatrixType::MaxRowsAtCompileTime!=0)
|
||||||
|
{
|
||||||
|
Index rows = MatrixType::RowsAtCompileTime==Dynamic ? internal::random<Index>(1,10) : MatrixType::RowsAtCompileTime;
|
||||||
|
Index cols = MatrixType::ColsAtCompileTime==Dynamic ? internal::random<Index>(1,10) : MatrixType::ColsAtCompileTime;
|
||||||
|
MatrixType m(rows,cols);
|
||||||
|
zeroReduction(m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols));
|
||||||
|
zeroReduction(m.template block<MatrixType::RowsAtCompileTime,0>(0,0,rows,0));
|
||||||
|
zeroReduction(m.template block<0,1>(0,0));
|
||||||
|
zeroReduction(m.template block<1,0>(0,0));
|
||||||
|
Matrix<Scalar,Dynamic,Dynamic> prod = m.template block<MatrixType::RowsAtCompileTime,0>(0,0,rows,0) * m.template block<0,MatrixType::ColsAtCompileTime>(0,0,0,cols);
|
||||||
|
VERIFY(prod.rows()==rows && prod.cols()==cols);
|
||||||
|
VERIFY(prod.isZero());
|
||||||
|
prod = m.template block<1,0>(0,0) * m.template block<0,1>(0,0);
|
||||||
|
VERIFY(prod.size()==1);
|
||||||
|
VERIFY(prod.isZero());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename VectorType> void zeroSizedVector()
|
template<typename VectorType> void zeroSizedVector()
|
||||||
|
@ -188,7 +188,7 @@ template<typename _Scalar> class AlignedVector3
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
inline bool isApprox(const MatrixBase<Derived>& other, RealScalar eps=NumTraits<Scalar>::dummy_precision()) const
|
inline bool isApprox(const MatrixBase<Derived>& other, const RealScalar& eps=NumTraits<Scalar>::dummy_precision()) const
|
||||||
{
|
{
|
||||||
return m_coeffs.template head<3>().isApprox(other,eps);
|
return m_coeffs.template head<3>().isApprox(other,eps);
|
||||||
}
|
}
|
||||||
|
@ -25,6 +25,16 @@ template <typename T, size_t n> class array {
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
EIGEN_STRONG_INLINE const T& operator[] (size_t index) const { return values[index]; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE T& front() { return values[0]; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const T& front() const { return values[0]; }
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE T& back() { return values[n-1]; }
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const T& back() const { return values[n-1]; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
static std::size_t size() { return n; }
|
static std::size_t size() { return n; }
|
||||||
|
|
||||||
@ -123,13 +133,33 @@ template <typename T> class array<T, 0> {
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE T& operator[] (size_t) {
|
EIGEN_STRONG_INLINE T& operator[] (size_t) {
|
||||||
eigen_assert(false && "Can't index a zero size array");
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
return *static_cast<T*>(NULL);
|
return dummy;
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const T& operator[] (size_t) const {
|
EIGEN_STRONG_INLINE const T& operator[] (size_t) const {
|
||||||
eigen_assert(false && "Can't index a zero size array");
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
return *static_cast<const T*>(NULL);
|
return dummy;
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE T& front() {
|
||||||
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
|
return dummy;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const T& front() const {
|
||||||
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
|
return dummy;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE T& back() {
|
||||||
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
|
return dummy;
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const T& back() const {
|
||||||
|
eigen_assert(false && "Can't index a zero size array");
|
||||||
|
return dummy;
|
||||||
}
|
}
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
|
static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::size_t size() { return 0; }
|
||||||
@ -142,6 +172,9 @@ template <typename T> class array<T, 0> {
|
|||||||
eigen_assert(l.size() == 0);
|
eigen_assert(l.size() == 0);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
private:
|
||||||
|
T dummy;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
@ -128,6 +128,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return unaryExpr(internal::scalar_lgamma_op<Scalar>());
|
return unaryExpr(internal::scalar_lgamma_op<Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived>
|
||||||
|
digamma() const {
|
||||||
|
return unaryExpr(internal::scalar_digamma_op<Scalar>());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
|
||||||
erf() const {
|
erf() const {
|
||||||
|
@ -378,7 +378,7 @@ struct TensorContractionEvaluatorBase
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
|
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
|
||||||
void evalGemv(Scalar* buffer) const {
|
EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const {
|
||||||
const Index rows = m_i_size;
|
const Index rows = m_i_size;
|
||||||
const Index cols = m_k_size;
|
const Index cols = m_k_size;
|
||||||
|
|
||||||
@ -516,7 +516,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
Base(op, device) { }
|
Base(op, device) { }
|
||||||
|
|
||||||
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
|
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
|
||||||
void evalProduct(Scalar* buffer) const {
|
EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const {
|
||||||
if (this->m_j_size == 1) {
|
if (this->m_j_size == 1) {
|
||||||
this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
|
this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
|
||||||
return;
|
return;
|
||||||
@ -582,10 +582,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
|
|
||||||
OutputMapper output(buffer, m);
|
OutputMapper output(buffer, m);
|
||||||
|
|
||||||
typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType;
|
|
||||||
|
|
||||||
// Sizes of the blocks to load in cache. See the Goto paper for details.
|
// Sizes of the blocks to load in cache. See the Goto paper for details.
|
||||||
BlockingType blocking(m, n, k, 1, true);
|
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1);
|
||||||
const Index kc = blocking.kc();
|
const Index kc = blocking.kc();
|
||||||
const Index mc = numext::mini(m, blocking.mc());
|
const Index mc = numext::mini(m, blocking.mc());
|
||||||
const Index nc = numext::mini(n, blocking.nc());
|
const Index nc = numext::mini(n, blocking.nc());
|
||||||
|
@ -28,7 +28,7 @@ class TensorContractionBlocking {
|
|||||||
typedef typename LhsMapper::Scalar LhsScalar;
|
typedef typename LhsMapper::Scalar LhsScalar;
|
||||||
typedef typename RhsMapper::Scalar RhsScalar;
|
typedef typename RhsMapper::Scalar RhsScalar;
|
||||||
|
|
||||||
TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
|
EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
|
||||||
kc_(k), mc_(m), nc_(n)
|
kc_(k), mc_(m), nc_(n)
|
||||||
{
|
{
|
||||||
if (ShardingType == ShardByCol) {
|
if (ShardingType == ShardByCol) {
|
||||||
@ -41,9 +41,9 @@ class TensorContractionBlocking {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
|
||||||
EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
|
||||||
EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Index kc_;
|
Index kc_;
|
||||||
|
@ -426,15 +426,16 @@ class TensorContractionSubMapper {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<typename Scalar, typename Index, int side,
|
template<typename Scalar_, typename Index, int side,
|
||||||
typename Tensor,
|
typename Tensor,
|
||||||
typename nocontract_t, typename contract_t,
|
typename nocontract_t, typename contract_t,
|
||||||
int packet_size,
|
int packet_size,
|
||||||
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
|
||||||
class TensorContractionInputMapper
|
class TensorContractionInputMapper
|
||||||
: public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
|
: public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
typedef Scalar_ Scalar;
|
||||||
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
|
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
|
||||||
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
|
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
|
||||||
typedef SubMapper VectorMapper;
|
typedef SubMapper VectorMapper;
|
||||||
|
@ -176,10 +176,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
|
|||||||
|
|
||||||
// compute block sizes (which depend on number of threads)
|
// compute block sizes (which depend on number of threads)
|
||||||
const Index num_threads = this->m_device.numThreads();
|
const Index num_threads = this->m_device.numThreads();
|
||||||
Index mc = m;
|
internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, num_threads);
|
||||||
Index nc = n;
|
Index mc = blocking.mc();
|
||||||
Index kc = k;
|
Index nc = blocking.nc();
|
||||||
internal::computeProductBlockingSizes<LhsScalar,RhsScalar,1>(kc, mc, nc, num_threads);
|
Index kc = blocking.kc();
|
||||||
eigen_assert(mc <= m);
|
eigen_assert(mc <= m);
|
||||||
eigen_assert(nc <= n);
|
eigen_assert(nc <= n);
|
||||||
eigen_assert(kc <= k);
|
eigen_assert(kc <= k);
|
||||||
|
@ -21,7 +21,7 @@ namespace Eigen {
|
|||||||
*/
|
*/
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template <typename Index, typename InputDims, size_t NumKernelDims, int Layout>
|
template <typename Index, typename InputDims, int NumKernelDims, int Layout>
|
||||||
class IndexMapper {
|
class IndexMapper {
|
||||||
public:
|
public:
|
||||||
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
|
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
|
||||||
@ -123,7 +123,7 @@ class IndexMapper {
|
|||||||
}
|
}
|
||||||
inputIndex += p * m_inputStrides[NumKernelDims];
|
inputIndex += p * m_inputStrides[NumKernelDims];
|
||||||
} else {
|
} else {
|
||||||
int limit = 0;
|
std::ptrdiff_t limit = 0;
|
||||||
if (NumKernelDims < NumDims) {
|
if (NumKernelDims < NumDims) {
|
||||||
limit = NumDims - NumKernelDims - 1;
|
limit = NumDims - NumKernelDims - 1;
|
||||||
}
|
}
|
||||||
@ -147,7 +147,7 @@ class IndexMapper {
|
|||||||
}
|
}
|
||||||
outputIndex += p * m_outputStrides[NumKernelDims];
|
outputIndex += p * m_outputStrides[NumKernelDims];
|
||||||
} else {
|
} else {
|
||||||
int limit = 0;
|
std::ptrdiff_t limit = 0;
|
||||||
if (NumKernelDims < NumDims) {
|
if (NumKernelDims < NumDims) {
|
||||||
limit = NumDims - NumKernelDims - 1;
|
limit = NumDims - NumKernelDims - 1;
|
||||||
}
|
}
|
||||||
@ -206,7 +206,7 @@ class IndexMapper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static const size_t NumDims = internal::array_size<InputDims>::value;
|
static const int NumDims = internal::array_size<InputDims>::value;
|
||||||
array<Index, NumDims> m_inputStrides;
|
array<Index, NumDims> m_inputStrides;
|
||||||
array<Index, NumDims> m_outputStrides;
|
array<Index, NumDims> m_outputStrides;
|
||||||
array<Index, NumDims> m_cudaInputStrides;
|
array<Index, NumDims> m_cudaInputStrides;
|
||||||
|
@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
|
|||||||
struct GpuDevice {
|
struct GpuDevice {
|
||||||
// The StreamInterface is not owned: the caller is
|
// The StreamInterface is not owned: the caller is
|
||||||
// responsible for its initialization and eventual destruction.
|
// responsible for its initialization and eventual destruction.
|
||||||
explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
|
explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
|
||||||
|
eigen_assert(stream);
|
||||||
|
}
|
||||||
|
explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
|
||||||
eigen_assert(stream);
|
eigen_assert(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(bsteiner): This is an internal API, we should not expose it.
|
// TODO(bsteiner): This is an internal API, we should not expose it.
|
||||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
|
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
|
||||||
return stream_->stream();
|
return stream_->stream();
|
||||||
@ -246,6 +248,10 @@ struct GpuDevice {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
|
||||||
|
return max_blocks_;
|
||||||
|
}
|
||||||
|
|
||||||
// This function checks if the CUDA runtime recorded an error for the
|
// This function checks if the CUDA runtime recorded an error for the
|
||||||
// underlying stream device.
|
// underlying stream device.
|
||||||
inline bool ok() const {
|
inline bool ok() const {
|
||||||
@ -259,7 +265,7 @@ struct GpuDevice {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
const StreamInterface* stream_;
|
const StreamInterface* stream_;
|
||||||
|
int max_blocks_;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef __CUDA_ARCH__
|
#ifndef __CUDA_ARCH__
|
||||||
|
@ -136,7 +136,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<int LoadMode>
|
template<int LoadMode>
|
||||||
EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
|
||||||
{
|
{
|
||||||
return internal::ploadt<Packet, LoadMode>(m_buffer + index);
|
return internal::ploadt<Packet, LoadMode>(m_buffer + index);
|
||||||
}
|
}
|
||||||
|
@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
|
|||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const int block_size = device.maxCudaThreadsPerBlock();
|
const int block_size = device.maxCudaThreadsPerBlock();
|
||||||
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
|
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
|
||||||
const Index size = array_prod(evaluator.dimensions());
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
||||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
||||||
@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
|
|||||||
if (needs_assign)
|
if (needs_assign)
|
||||||
{
|
{
|
||||||
const int block_size = device.maxCudaThreadsPerBlock();
|
const int block_size = device.maxCudaThreadsPerBlock();
|
||||||
const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
|
const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
|
||||||
const Index size = array_prod(evaluator.dimensions());
|
const Index size = array_prod(evaluator.dimensions());
|
||||||
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
|
||||||
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
|
||||||
|
@ -106,7 +106,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
|
||||||
m_impl.evalSubExprsIfNeeded(NULL);
|
|
||||||
const Index numValues = m_impl.dimensions().TotalSize();
|
const Index numValues = m_impl.dimensions().TotalSize();
|
||||||
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
|
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
|
||||||
// Should initialize the memory in case we're dealing with non POD types.
|
// Should initialize the memory in case we're dealing with non POD types.
|
||||||
@ -119,7 +118,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
|
|||||||
EvalTo evalToTmp(m_buffer, m_op);
|
EvalTo evalToTmp(m_buffer, m_op);
|
||||||
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
|
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
|
||||||
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
|
||||||
m_impl.cleanup();
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user