mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-20 08:39:37 +08:00
bug #1741: fix self-adjoint*matrix, triangular*matrix, and triangular^1*matrix with a destination having a non-trivial inner-stride
This commit is contained in:
parent
459b2bcc08
commit
031f17117d
@ -19,7 +19,7 @@ namespace internal {
|
|||||||
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
|
template<typename LhsScalar, typename RhsScalar, typename Index, int Side, int Mode, bool Conjugate, int StorageOrder>
|
||||||
struct triangular_solve_vector;
|
struct triangular_solve_vector;
|
||||||
|
|
||||||
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
|
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder, int OtherInnerStride>
|
||||||
struct triangular_solve_matrix;
|
struct triangular_solve_matrix;
|
||||||
|
|
||||||
// small helper struct extracting some traits on the underlying solver operation
|
// small helper struct extracting some traits on the underlying solver operation
|
||||||
@ -98,8 +98,8 @@ struct triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,Dynamic>
|
|||||||
BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
|
BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false);
|
||||||
|
|
||||||
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
triangular_solve_matrix<Scalar,Index,Side,Mode,LhsProductTraits::NeedToConjugate,(int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
||||||
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
(Rhs::Flags&RowMajorBit) ? RowMajor : ColMajor, Rhs::InnerStrideAtCompileTime>
|
||||||
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.outerStride(), blocking);
|
::run(size, othersize, &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &rhs.coeffRef(0,0), rhs.innerStride(), rhs.outerStride(), blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -294,20 +294,21 @@ struct symm_pack_rhs
|
|||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
||||||
int ResStorageOrder>
|
int ResStorageOrder, int ResInnerStride>
|
||||||
struct product_selfadjoint_matrix;
|
struct product_selfadjoint_matrix;
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs>
|
int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
|
||||||
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor>
|
int ResInnerStride>
|
||||||
|
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
|
||||||
{
|
{
|
||||||
|
|
||||||
static EIGEN_STRONG_INLINE void run(
|
static EIGEN_STRONG_INLINE void run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* lhs, Index lhsStride,
|
const Scalar* lhs, Index lhsStride,
|
||||||
const Scalar* rhs, Index rhsStride,
|
const Scalar* rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
product_selfadjoint_matrix<Scalar, Index,
|
product_selfadjoint_matrix<Scalar, Index,
|
||||||
@ -315,33 +316,35 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
|
|||||||
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
|
RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsSelfAdjoint,ConjugateRhs),
|
||||||
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
|
||||||
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
|
||||||
ColMajor>
|
ColMajor,ResInnerStride>
|
||||||
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>
|
int ResInnerStride>
|
||||||
|
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
|
||||||
{
|
{
|
||||||
|
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
|
int ResInnerStride>
|
||||||
|
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index size = rows;
|
Index size = rows;
|
||||||
@ -351,11 +354,11 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|||||||
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
||||||
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
|
typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
|
||||||
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
||||||
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
||||||
LhsMapper lhs(_lhs,lhsStride);
|
LhsMapper lhs(_lhs,lhsStride);
|
||||||
LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
|
LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
|
||||||
RhsMapper rhs(_rhs,rhsStride);
|
RhsMapper rhs(_rhs,rhsStride);
|
||||||
ResMapper res(_res, resStride);
|
ResMapper res(_res, resStride, resIncr);
|
||||||
|
|
||||||
Index kc = blocking.kc(); // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
@ -415,26 +418,28 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
|
|||||||
// matrix * selfadjoint product
|
// matrix * selfadjoint product
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>
|
int ResInnerStride>
|
||||||
|
struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
|
||||||
{
|
{
|
||||||
|
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
|
int ResInnerStride>
|
||||||
|
EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
|
||||||
Index rows, Index cols,
|
Index rows, Index cols,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index size = cols;
|
Index size = cols;
|
||||||
@ -442,9 +447,9 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
|
|||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
|
|
||||||
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
||||||
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
||||||
LhsMapper lhs(_lhs,lhsStride);
|
LhsMapper lhs(_lhs,lhsStride);
|
||||||
ResMapper res(_res,resStride);
|
ResMapper res(_res,resStride, resIncr);
|
||||||
|
|
||||||
Index kc = blocking.kc(); // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
@ -520,12 +525,13 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
|
|||||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
|
||||||
EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
|
EIGEN_LOGICAL_XOR(RhsIsUpper,internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
|
||||||
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
|
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(RhsIsUpper,bool(RhsBlasTraits::NeedToConjugate)),
|
||||||
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor>
|
internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
|
||||||
|
Dest::InnerStrideAtCompileTime>
|
||||||
::run(
|
::run(
|
||||||
lhs.rows(), rhs.cols(), // sizes
|
lhs.rows(), rhs.cols(), // sizes
|
||||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
|
||||||
actualAlpha, blocking // alpha
|
actualAlpha, blocking // alpha
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -44,16 +44,18 @@ namespace internal {
|
|||||||
template <typename Index, \
|
template <typename Index, \
|
||||||
int LhsStorageOrder, bool ConjugateLhs, \
|
int LhsStorageOrder, bool ConjugateLhs, \
|
||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
|
||||||
{\
|
{\
|
||||||
\
|
\
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
EIGTYPE* res, Index resStride, \
|
EIGTYPE* res, Index resIncr, Index resStride, \
|
||||||
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
||||||
|
eigen_assert(resIncr == 1); \
|
||||||
char side='L', uplo='L'; \
|
char side='L', uplo='L'; \
|
||||||
BlasIndex m, n, lda, ldb, ldc; \
|
BlasIndex m, n, lda, ldb, ldc; \
|
||||||
const EIGTYPE *a, *b; \
|
const EIGTYPE *a, *b; \
|
||||||
@ -91,15 +93,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLh
|
|||||||
template <typename Index, \
|
template <typename Index, \
|
||||||
int LhsStorageOrder, bool ConjugateLhs, \
|
int LhsStorageOrder, bool ConjugateLhs, \
|
||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor,1> \
|
||||||
{\
|
{\
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
EIGTYPE* res, Index resStride, \
|
EIGTYPE* res, Index resIncr, Index resStride, \
|
||||||
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
||||||
|
eigen_assert(resIncr == 1); \
|
||||||
char side='L', uplo='L'; \
|
char side='L', uplo='L'; \
|
||||||
BlasIndex m, n, lda, ldb, ldc; \
|
BlasIndex m, n, lda, ldb, ldc; \
|
||||||
const EIGTYPE *a, *b; \
|
const EIGTYPE *a, *b; \
|
||||||
@ -167,16 +171,18 @@ EIGEN_BLAS_HEMM_L(scomplex, float, cf, chemm_)
|
|||||||
template <typename Index, \
|
template <typename Index, \
|
||||||
int LhsStorageOrder, bool ConjugateLhs, \
|
int LhsStorageOrder, bool ConjugateLhs, \
|
||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
|
||||||
{\
|
{\
|
||||||
\
|
\
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
EIGTYPE* res, Index resStride, \
|
EIGTYPE* res, Index resIncr, Index resStride, \
|
||||||
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
||||||
|
eigen_assert(resIncr == 1); \
|
||||||
char side='R', uplo='L'; \
|
char side='R', uplo='L'; \
|
||||||
BlasIndex m, n, lda, ldb, ldc; \
|
BlasIndex m, n, lda, ldb, ldc; \
|
||||||
const EIGTYPE *a, *b; \
|
const EIGTYPE *a, *b; \
|
||||||
@ -213,15 +219,17 @@ struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateL
|
|||||||
template <typename Index, \
|
template <typename Index, \
|
||||||
int LhsStorageOrder, bool ConjugateLhs, \
|
int LhsStorageOrder, bool ConjugateLhs, \
|
||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
|
struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor,1> \
|
||||||
{\
|
{\
|
||||||
static void run( \
|
static void run( \
|
||||||
Index rows, Index cols, \
|
Index rows, Index cols, \
|
||||||
const EIGTYPE* _lhs, Index lhsStride, \
|
const EIGTYPE* _lhs, Index lhsStride, \
|
||||||
const EIGTYPE* _rhs, Index rhsStride, \
|
const EIGTYPE* _rhs, Index rhsStride, \
|
||||||
EIGTYPE* res, Index resStride, \
|
EIGTYPE* res, Index resIncr, Index resStride, \
|
||||||
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
EIGTYPE alpha, level3_blocking<EIGTYPE, EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
||||||
|
eigen_assert(resIncr == 1); \
|
||||||
char side='R', uplo='L'; \
|
char side='R', uplo='L'; \
|
||||||
BlasIndex m, n, lda, ldb, ldc; \
|
BlasIndex m, n, lda, ldb, ldc; \
|
||||||
const EIGTYPE *a, *b; \
|
const EIGTYPE *a, *b; \
|
||||||
|
@ -45,22 +45,24 @@ template <typename Scalar, typename Index,
|
|||||||
int Mode, bool LhsIsTriangular,
|
int Mode, bool LhsIsTriangular,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs,
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
int ResStorageOrder, int Version = Specialized>
|
int ResStorageOrder, int ResInnerStride,
|
||||||
|
int Version = Specialized>
|
||||||
struct product_triangular_matrix_matrix;
|
struct product_triangular_matrix_matrix;
|
||||||
|
|
||||||
template <typename Scalar, typename Index,
|
template <typename Scalar, typename Index,
|
||||||
int Mode, bool LhsIsTriangular,
|
int Mode, bool LhsIsTriangular,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
|
int ResInnerStride, int Version>
|
||||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,RowMajor,Version>
|
RhsStorageOrder,ConjugateRhs,RowMajor,ResInnerStride,Version>
|
||||||
{
|
{
|
||||||
static EIGEN_STRONG_INLINE void run(
|
static EIGEN_STRONG_INLINE void run(
|
||||||
Index rows, Index cols, Index depth,
|
Index rows, Index cols, Index depth,
|
||||||
const Scalar* lhs, Index lhsStride,
|
const Scalar* lhs, Index lhsStride,
|
||||||
const Scalar* rhs, Index rhsStride,
|
const Scalar* rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
product_triangular_matrix_matrix<Scalar, Index,
|
product_triangular_matrix_matrix<Scalar, Index,
|
||||||
@ -70,18 +72,19 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
|
|||||||
ConjugateRhs,
|
ConjugateRhs,
|
||||||
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
LhsStorageOrder==RowMajor ? ColMajor : RowMajor,
|
||||||
ConjugateLhs,
|
ConjugateLhs,
|
||||||
ColMajor>
|
ColMajor, ResInnerStride>
|
||||||
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
|
::run(cols, rows, depth, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// implements col-major += alpha * op(triangular) * op(general)
|
// implements col-major += alpha * op(triangular) * op(general)
|
||||||
template <typename Scalar, typename Index, int Mode,
|
template <typename Scalar, typename Index, int Mode,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
|
int ResInnerStride, int Version>
|
||||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
||||||
{
|
{
|
||||||
|
|
||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
@ -95,20 +98,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
Index _rows, Index _cols, Index _depth,
|
Index _rows, Index _cols, Index _depth,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index, int Mode,
|
template <typename Scalar, typename Index, int Mode,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
|
int ResInnerStride, int Version>
|
||||||
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
|
||||||
Index _rows, Index _cols, Index _depth,
|
Index _rows, Index _cols, Index _depth,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
// strip zeros
|
// strip zeros
|
||||||
@ -119,10 +123,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
|
|
||||||
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
||||||
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
||||||
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
||||||
LhsMapper lhs(_lhs,lhsStride);
|
LhsMapper lhs(_lhs,lhsStride);
|
||||||
RhsMapper rhs(_rhs,rhsStride);
|
RhsMapper rhs(_rhs,rhsStride);
|
||||||
ResMapper res(_res, resStride);
|
ResMapper res(_res, resStride, resIncr);
|
||||||
|
|
||||||
Index kc = blocking.kc(); // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
@ -235,10 +239,11 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
// implements col-major += alpha * op(general) * op(triangular)
|
// implements col-major += alpha * op(general) * op(triangular)
|
||||||
template <typename Scalar, typename Index, int Mode,
|
template <typename Scalar, typename Index, int Mode,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
|
int ResInnerStride, int Version>
|
||||||
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>
|
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>
|
||||||
{
|
{
|
||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
enum {
|
enum {
|
||||||
@ -251,20 +256,21 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
Index _rows, Index _cols, Index _depth,
|
Index _rows, Index _cols, Index _depth,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* res, Index resStride,
|
Scalar* res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, typename Index, int Mode,
|
template <typename Scalar, typename Index, int Mode,
|
||||||
int LhsStorageOrder, bool ConjugateLhs,
|
int LhsStorageOrder, bool ConjugateLhs,
|
||||||
int RhsStorageOrder, bool ConjugateRhs, int Version>
|
int RhsStorageOrder, bool ConjugateRhs,
|
||||||
|
int ResInnerStride, int Version>
|
||||||
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
||||||
LhsStorageOrder,ConjugateLhs,
|
LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
|
RhsStorageOrder,ConjugateRhs,ColMajor,ResInnerStride,Version>::run(
|
||||||
Index _rows, Index _cols, Index _depth,
|
Index _rows, Index _cols, Index _depth,
|
||||||
const Scalar* _lhs, Index lhsStride,
|
const Scalar* _lhs, Index lhsStride,
|
||||||
const Scalar* _rhs, Index rhsStride,
|
const Scalar* _rhs, Index rhsStride,
|
||||||
Scalar* _res, Index resStride,
|
Scalar* _res, Index resIncr, Index resStride,
|
||||||
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
|
const Index PacketBytes = packet_traits<Scalar>::size*sizeof(Scalar);
|
||||||
@ -276,10 +282,10 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
|
|
||||||
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
|
||||||
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
|
||||||
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor> ResMapper;
|
typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
|
||||||
LhsMapper lhs(_lhs,lhsStride);
|
LhsMapper lhs(_lhs,lhsStride);
|
||||||
RhsMapper rhs(_rhs,rhsStride);
|
RhsMapper rhs(_rhs,rhsStride);
|
||||||
ResMapper res(_res, resStride);
|
ResMapper res(_res, resStride, resIncr);
|
||||||
|
|
||||||
Index kc = blocking.kc(); // cache block size along the K direction
|
Index kc = blocking.kc(); // cache block size along the K direction
|
||||||
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
|
||||||
@ -433,12 +439,12 @@ struct triangular_product_impl<Mode,LhsIsTriangular,Lhs,false,Rhs,false>
|
|||||||
Mode, LhsIsTriangular,
|
Mode, LhsIsTriangular,
|
||||||
(internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
(internal::traits<ActualLhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
|
||||||
(internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
(internal::traits<ActualRhsTypeCleaned>::Flags&RowMajorBit) ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
|
||||||
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor>
|
(internal::traits<Dest >::Flags&RowMajorBit) ? RowMajor : ColMajor, Dest::InnerStrideAtCompileTime>
|
||||||
::run(
|
::run(
|
||||||
stripedRows, stripedCols, stripedDepth, // sizes
|
stripedRows, stripedCols, stripedDepth, // sizes
|
||||||
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
|
||||||
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
|
||||||
&dst.coeffRef(0,0), dst.outerStride(), // result info
|
&dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
|
||||||
actualAlpha, blocking
|
actualAlpha, blocking
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -46,7 +46,7 @@ template <typename Scalar, typename Index,
|
|||||||
struct product_triangular_matrix_matrix_trmm :
|
struct product_triangular_matrix_matrix_trmm :
|
||||||
product_triangular_matrix_matrix<Scalar,Index,Mode,
|
product_triangular_matrix_matrix<Scalar,Index,Mode,
|
||||||
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
|
LhsIsTriangular,LhsStorageOrder,ConjugateLhs,
|
||||||
RhsStorageOrder, ConjugateRhs, ResStorageOrder, BuiltIn> {};
|
RhsStorageOrder, ConjugateRhs, ResStorageOrder, 1, BuiltIn> {};
|
||||||
|
|
||||||
|
|
||||||
// try to go to BLAS specialization
|
// try to go to BLAS specialization
|
||||||
@ -55,13 +55,15 @@ template <typename Index, int Mode, \
|
|||||||
int LhsStorageOrder, bool ConjugateLhs, \
|
int LhsStorageOrder, bool ConjugateLhs, \
|
||||||
int RhsStorageOrder, bool ConjugateRhs> \
|
int RhsStorageOrder, bool ConjugateRhs> \
|
||||||
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
|
struct product_triangular_matrix_matrix<Scalar,Index, Mode, LhsIsTriangular, \
|
||||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,Specialized> { \
|
LhsStorageOrder,ConjugateLhs, RhsStorageOrder,ConjugateRhs,ColMajor,1,Specialized> { \
|
||||||
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
|
static inline void run(Index _rows, Index _cols, Index _depth, const Scalar* _lhs, Index lhsStride,\
|
||||||
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
|
const Scalar* _rhs, Index rhsStride, Scalar* res, Index resIncr, Index resStride, Scalar alpha, level3_blocking<Scalar,Scalar>& blocking) { \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(resIncr); \
|
||||||
|
eigen_assert(resIncr == 1); \
|
||||||
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
|
product_triangular_matrix_matrix_trmm<Scalar,Index,Mode, \
|
||||||
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
|
LhsIsTriangular,LhsStorageOrder,ConjugateLhs, \
|
||||||
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
|
RhsStorageOrder, ConjugateRhs, ColMajor>::run( \
|
||||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -115,8 +117,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
|
|||||||
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
|
if (((nthr==1) && (((std::max)(rows,depth)-diagSize)/(double)diagSize < 0.5))) { \
|
||||||
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
|
/* Most likely no benefit to call TRMM or GEMM from BLAS */ \
|
||||||
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
|
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,true, \
|
||||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
|
||||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
|
||||||
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
/*std::cout << "TRMM_L: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
||||||
} else { \
|
} else { \
|
||||||
/* Make sense to call GEMM */ \
|
/* Make sense to call GEMM */ \
|
||||||
@ -232,8 +234,8 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
|
|||||||
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
|
if ((nthr==1) && (((std::max)(cols,depth)-diagSize)/(double)diagSize < 0.5)) { \
|
||||||
/* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
|
/* Most likely no benefit to call TRMM or GEMM from BLAS*/ \
|
||||||
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
|
product_triangular_matrix_matrix<EIGTYPE,Index,Mode,false, \
|
||||||
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, BuiltIn>::run( \
|
LhsStorageOrder,ConjugateLhs, RhsStorageOrder, ConjugateRhs, ColMajor, 1, BuiltIn>::run( \
|
||||||
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, resStride, alpha, blocking); \
|
_rows, _cols, _depth, _lhs, lhsStride, _rhs, rhsStride, res, 1, resStride, alpha, blocking); \
|
||||||
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
/*std::cout << "TRMM_R: A is not square! Go to Eigen TRMM implementation!\n";*/ \
|
||||||
} else { \
|
} else { \
|
||||||
/* Make sense to call GEMM */ \
|
/* Make sense to call GEMM */ \
|
||||||
|
@ -15,48 +15,48 @@ namespace Eigen {
|
|||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
// if the rhs is row major, let's transpose the product
|
// if the rhs is row major, let's transpose the product
|
||||||
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
||||||
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
|
struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor,OtherInnerStride>
|
||||||
{
|
{
|
||||||
static void run(
|
static void run(
|
||||||
Index size, Index cols,
|
Index size, Index cols,
|
||||||
const Scalar* tri, Index triStride,
|
const Scalar* tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherIncr, Index otherStride,
|
||||||
level3_blocking<Scalar,Scalar>& blocking)
|
level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
triangular_solve_matrix<
|
triangular_solve_matrix<
|
||||||
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
|
Scalar, Index, Side==OnTheLeft?OnTheRight:OnTheLeft,
|
||||||
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
|
(Mode&UnitDiag) | ((Mode&Upper) ? Lower : Upper),
|
||||||
NumTraits<Scalar>::IsComplex && Conjugate,
|
NumTraits<Scalar>::IsComplex && Conjugate,
|
||||||
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor>
|
TriStorageOrder==RowMajor ? ColMajor : RowMajor, ColMajor, OtherInnerStride>
|
||||||
::run(size, cols, tri, triStride, _other, otherStride, blocking);
|
::run(size, cols, tri, triStride, _other, otherIncr, otherStride, blocking);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
|
/* Optimized triangular solver with multiple right hand side and the triangular matrix on the left
|
||||||
*/
|
*/
|
||||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder,int OtherInnerStride>
|
||||||
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
|
struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherIncr, Index otherStride,
|
||||||
level3_blocking<Scalar,Scalar>& blocking);
|
level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
||||||
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherIncr, Index otherStride,
|
||||||
level3_blocking<Scalar,Scalar>& blocking)
|
level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index cols = otherSize;
|
Index cols = otherSize;
|
||||||
|
|
||||||
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> TriMapper;
|
||||||
typedef blas_data_mapper<Scalar, Index, ColMajor> OtherMapper;
|
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> OtherMapper;
|
||||||
TriMapper tri(_tri, triStride);
|
TriMapper tri(_tri, triStride);
|
||||||
OtherMapper other(_other, otherStride);
|
OtherMapper other(_other, otherStride, otherIncr);
|
||||||
|
|
||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
|
|
||||||
@ -128,19 +128,19 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
|||||||
{
|
{
|
||||||
Scalar b(0);
|
Scalar b(0);
|
||||||
const Scalar* l = &tri(i,s);
|
const Scalar* l = &tri(i,s);
|
||||||
Scalar* r = &other(s,j);
|
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
||||||
for (Index i3=0; i3<k; ++i3)
|
for (Index i3=0; i3<k; ++i3)
|
||||||
b += conj(l[i3]) * r[i3];
|
b += conj(l[i3]) * r(i3);
|
||||||
|
|
||||||
other(i,j) = (other(i,j) - b)*a;
|
other(i,j) = (other(i,j) - b)*a;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Scalar b = (other(i,j) *= a);
|
Scalar b = (other(i,j) *= a);
|
||||||
Scalar* r = &other(s,j);
|
typename OtherMapper::LinearMapper r = other.getLinearMapper(s,j);
|
||||||
const Scalar* l = &tri(s,i);
|
typename TriMapper::LinearMapper l = tri.getLinearMapper(s,i);
|
||||||
for (Index i3=0;i3<rs;++i3)
|
for (Index i3=0;i3<rs;++i3)
|
||||||
r[i3] -= b * conj(l[i3]);
|
r(i3) -= b * conj(l(i3));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -185,28 +185,28 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conju
|
|||||||
|
|
||||||
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
/* Optimized triangular solver with multiple left hand sides and the triangular matrix on the right
|
||||||
*/
|
*/
|
||||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
||||||
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
|
struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>
|
||||||
{
|
{
|
||||||
static EIGEN_DONT_INLINE void run(
|
static EIGEN_DONT_INLINE void run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherIncr, Index otherStride,
|
||||||
level3_blocking<Scalar,Scalar>& blocking);
|
level3_blocking<Scalar,Scalar>& blocking);
|
||||||
};
|
};
|
||||||
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
|
template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder, int OtherInnerStride>
|
||||||
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
|
EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,OtherInnerStride>::run(
|
||||||
Index size, Index otherSize,
|
Index size, Index otherSize,
|
||||||
const Scalar* _tri, Index triStride,
|
const Scalar* _tri, Index triStride,
|
||||||
Scalar* _other, Index otherStride,
|
Scalar* _other, Index otherIncr, Index otherStride,
|
||||||
level3_blocking<Scalar,Scalar>& blocking)
|
level3_blocking<Scalar,Scalar>& blocking)
|
||||||
{
|
{
|
||||||
Index rows = otherSize;
|
Index rows = otherSize;
|
||||||
typedef typename NumTraits<Scalar>::Real RealScalar;
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
||||||
|
|
||||||
typedef blas_data_mapper<Scalar, Index, ColMajor> LhsMapper;
|
typedef blas_data_mapper<Scalar, Index, ColMajor, Unaligned, OtherInnerStride> LhsMapper;
|
||||||
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
typedef const_blas_data_mapper<Scalar, Index, TriStorageOrder> RhsMapper;
|
||||||
LhsMapper lhs(_other, otherStride);
|
LhsMapper lhs(_other, otherStride, otherIncr);
|
||||||
RhsMapper rhs(_tri, triStride);
|
RhsMapper rhs(_tri, triStride);
|
||||||
|
|
||||||
typedef gebp_traits<Scalar,Scalar> Traits;
|
typedef gebp_traits<Scalar,Scalar> Traits;
|
||||||
@ -297,24 +297,24 @@ EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conj
|
|||||||
{
|
{
|
||||||
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
|
Index j = IsLower ? absolute_j2+actualPanelWidth-k-1 : absolute_j2+k;
|
||||||
|
|
||||||
Scalar* r = &lhs(i2,j);
|
typename LhsMapper::LinearMapper r = lhs.getLinearMapper(i2,j);
|
||||||
for (Index k3=0; k3<k; ++k3)
|
for (Index k3=0; k3<k; ++k3)
|
||||||
{
|
{
|
||||||
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
|
Scalar b = conj(rhs(IsLower ? j+1+k3 : absolute_j2+k3,j));
|
||||||
Scalar* a = &lhs(i2,IsLower ? j+1+k3 : absolute_j2+k3);
|
typename LhsMapper::LinearMapper a = lhs.getLinearMapper(i2,IsLower ? j+1+k3 : absolute_j2+k3);
|
||||||
for (Index i=0; i<actual_mc; ++i)
|
for (Index i=0; i<actual_mc; ++i)
|
||||||
r[i] -= a[i] * b;
|
r(i) -= a(i) * b;
|
||||||
}
|
}
|
||||||
if((Mode & UnitDiag)==0)
|
if((Mode & UnitDiag)==0)
|
||||||
{
|
{
|
||||||
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
Scalar inv_rjj = RealScalar(1)/conj(rhs(j,j));
|
||||||
for (Index i=0; i<actual_mc; ++i)
|
for (Index i=0; i<actual_mc; ++i)
|
||||||
r[i] *= inv_rjj;
|
r(i) *= inv_rjj;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// pack the just computed part of lhs to A
|
// pack the just computed part of lhs to A
|
||||||
pack_lhs_panel(blockA, LhsMapper(_other+absolute_j2*otherStride+i2, otherStride),
|
pack_lhs_panel(blockA, lhs.getSubMapper(i2,absolute_j2),
|
||||||
actualPanelWidth, actual_mc,
|
actualPanelWidth, actual_mc,
|
||||||
actual_kc, j2);
|
actual_kc, j2);
|
||||||
}
|
}
|
||||||
|
@ -40,7 +40,7 @@ namespace internal {
|
|||||||
// implements LeftSide op(triangular)^-1 * general
|
// implements LeftSide op(triangular)^-1 * general
|
||||||
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
|
#define EIGEN_BLAS_TRSM_L(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
|
||||||
{ \
|
{ \
|
||||||
enum { \
|
enum { \
|
||||||
IsLower = (Mode&Lower) == Lower, \
|
IsLower = (Mode&Lower) == Lower, \
|
||||||
@ -51,8 +51,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
|
|||||||
static void run( \
|
static void run( \
|
||||||
Index size, Index otherSize, \
|
Index size, Index otherSize, \
|
||||||
const EIGTYPE* _tri, Index triStride, \
|
const EIGTYPE* _tri, Index triStride, \
|
||||||
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
|
||||||
|
eigen_assert(otherIncr == 1); \
|
||||||
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
|
BlasIndex m = convert_index<BlasIndex>(size), n = convert_index<BlasIndex>(otherSize), lda, ldb; \
|
||||||
char side = 'L', uplo, diag='N', transa; \
|
char side = 'L', uplo, diag='N', transa; \
|
||||||
/* Set alpha_ */ \
|
/* Set alpha_ */ \
|
||||||
@ -99,7 +101,7 @@ EIGEN_BLAS_TRSM_L(scomplex, float, ctrsm_)
|
|||||||
// implements RightSide general * op(triangular)^-1
|
// implements RightSide general * op(triangular)^-1
|
||||||
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
|
#define EIGEN_BLAS_TRSM_R(EIGTYPE, BLASTYPE, BLASFUNC) \
|
||||||
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
template <typename Index, int Mode, bool Conjugate, int TriStorageOrder> \
|
||||||
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor> \
|
struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor,1> \
|
||||||
{ \
|
{ \
|
||||||
enum { \
|
enum { \
|
||||||
IsLower = (Mode&Lower) == Lower, \
|
IsLower = (Mode&Lower) == Lower, \
|
||||||
@ -110,8 +112,10 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
|
|||||||
static void run( \
|
static void run( \
|
||||||
Index size, Index otherSize, \
|
Index size, Index otherSize, \
|
||||||
const EIGTYPE* _tri, Index triStride, \
|
const EIGTYPE* _tri, Index triStride, \
|
||||||
EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
EIGTYPE* _other, Index otherIncr, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
|
||||||
{ \
|
{ \
|
||||||
|
EIGEN_ONLY_USED_FOR_DEBUG(otherIncr); \
|
||||||
|
eigen_assert(otherIncr == 1); \
|
||||||
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
|
BlasIndex m = convert_index<BlasIndex>(otherSize), n = convert_index<BlasIndex>(size), lda, ldb; \
|
||||||
char side = 'R', uplo, diag='N', transa; \
|
char side = 'R', uplo, diag='N', transa; \
|
||||||
/* Set alpha_ */ \
|
/* Set alpha_ */ \
|
||||||
|
@ -79,63 +79,63 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c
|
|||||||
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
|
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
|
||||||
{
|
{
|
||||||
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
// std::cerr << "in trsm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << "," << *n << " " << *palpha << " " << *lda << " " << *ldb<< "\n";
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
|
typedef void (*functype)(DenseIndex, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static const functype func[32] = {
|
static const functype func[32] = {
|
||||||
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor>::run),\
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, Conj, RowMajor,ColMajor,1>::run),\
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|0, false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|0, Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|0, false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|0, Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Lower|UnitDiag,false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheLeft, Upper|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Lower|UnitDiag,false,ColMajor,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,false,RowMajor,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor>::run),
|
(internal::triangular_solve_matrix<Scalar,DenseIndex,OnTheRight,Upper|UnitDiag,Conj, RowMajor,ColMajor,1>::run),
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -163,12 +163,12 @@ int EIGEN_BLAS_FUNC(trsm)(const char *side, const char *uplo, const char *opa, c
|
|||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
{
|
{
|
||||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
|
||||||
func[code](*m, *n, a, *lda, b, *ldb, blocking);
|
func[code](*m, *n, a, *lda, b, 1, *ldb, blocking);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
|
||||||
func[code](*n, *m, a, *lda, b, *ldb, blocking);
|
func[code](*n, *m, a, *lda, b, 1, *ldb, blocking);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(alpha!=Scalar(1))
|
if(alpha!=Scalar(1))
|
||||||
@ -184,63 +184,63 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c
|
|||||||
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
|
const RealScalar *palpha, const RealScalar *pa, const int *lda, RealScalar *pb, const int *ldb)
|
||||||
{
|
{
|
||||||
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
// std::cerr << "in trmm " << *side << " " << *uplo << " " << *opa << " " << *diag << " " << *m << " " << *n << " " << *lda << " " << *ldb << " " << *palpha << "\n";
|
||||||
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
typedef void (*functype)(DenseIndex, DenseIndex, DenseIndex, const Scalar *, DenseIndex, const Scalar *, DenseIndex, Scalar *, DenseIndex, DenseIndex, const Scalar&, internal::level3_blocking<Scalar,Scalar>&);
|
||||||
static const functype func[32] = {
|
static const functype func[32] = {
|
||||||
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: TR | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: TR | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|0, false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (NUNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|0, false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: TR | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: TR | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (UP << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: NOTR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,true, ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: TR | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: ADJ | (LEFT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,true, RowMajor,Conj, ColMajor,false,ColMajor,1>::run),
|
||||||
0,
|
0,
|
||||||
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: NOTR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Lower|UnitDiag,false,ColMajor,false,ColMajor,false,ColMajor,1>::run),
|
||||||
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: TR | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,false,ColMajor,1>::run),
|
||||||
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
// array index: ADJ | (RIGHT << 2) | (LO << 3) | (UNIT << 4)
|
||||||
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor>::run),
|
(internal::product_triangular_matrix_matrix<Scalar,DenseIndex,Upper|UnitDiag,false,ColMajor,false,RowMajor,Conj, ColMajor,1>::run),
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -272,12 +272,12 @@ int EIGEN_BLAS_FUNC(trmm)(const char *side, const char *uplo, const char *opa, c
|
|||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
{
|
{
|
||||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*m,1,false);
|
||||||
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, *ldb, alpha, blocking);
|
func[code](*m, *n, *m, a, *lda, tmp.data(), tmp.outerStride(), b, 1, *ldb, alpha, blocking);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic,4> blocking(*m,*n,*n,1,false);
|
||||||
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, *ldb, alpha, blocking);
|
func[code](*m, *n, *n, tmp.data(), tmp.outerStride(), a, *lda, b, 1, *ldb, alpha, blocking);
|
||||||
}
|
}
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -338,12 +338,12 @@ int EIGEN_BLAS_FUNC(symm)(const char *side, const char *uplo, const int *m, cons
|
|||||||
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
|
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
|
||||||
|
|
||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor,1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor,1>::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
else if(SIDE(*side)==RIGHT)
|
else if(SIDE(*side)==RIGHT)
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor,1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor,1>::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
@ -537,18 +537,18 @@ int EIGEN_BLAS_FUNC(hemm)(const char *side, const char *uplo, const int *m, cons
|
|||||||
|
|
||||||
if(SIDE(*side)==LEFT)
|
if(SIDE(*side)==LEFT)
|
||||||
{
|
{
|
||||||
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
|
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor, 1>
|
||||||
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor,1>
|
||||||
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
|
::run(*m, *n, a, *lda, b, *ldb, c, 1, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
else if(SIDE(*side)==RIGHT)
|
else if(SIDE(*side)==RIGHT)
|
||||||
{
|
{
|
||||||
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
|
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor, 1>
|
||||||
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/
|
::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);*/
|
||||||
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
|
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor,1>
|
||||||
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
|
::run(*m, *n, b, *ldb, a, *lda, c, 1, *ldc, alpha, blocking);
|
||||||
else return 0;
|
else return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -75,12 +75,12 @@ template<typename Scalar, int Size, int OtherSize> void symm(int size = Size, in
|
|||||||
rhs13 = (s1*m1.adjoint()) * (s2*rhs2.adjoint()));
|
rhs13 = (s1*m1.adjoint()) * (s2*rhs2.adjoint()));
|
||||||
|
|
||||||
// test row major = <...>
|
// test row major = <...>
|
||||||
m2 = m1.template triangularView<Lower>(); rhs12.setRandom(); rhs13 = rhs12;
|
m2 = m1.template triangularView<Lower>(); rhs32.setRandom(); rhs13 = rhs32;
|
||||||
VERIFY_IS_APPROX(rhs12 -= (s1*m2).template selfadjointView<Lower>() * (s2*rhs3),
|
VERIFY_IS_APPROX(rhs32.noalias() -= (s1*m2).template selfadjointView<Lower>() * (s2*rhs3),
|
||||||
rhs13 -= (s1*m1) * (s2 * rhs3));
|
rhs13 -= (s1*m1) * (s2 * rhs3));
|
||||||
|
|
||||||
m2 = m1.template triangularView<Upper>();
|
m2 = m1.template triangularView<Upper>();
|
||||||
VERIFY_IS_APPROX(rhs12 = (s1*m2.adjoint()).template selfadjointView<Lower>() * (s2*rhs3).conjugate(),
|
VERIFY_IS_APPROX(rhs32.noalias() = (s1*m2.adjoint()).template selfadjointView<Lower>() * (s2*rhs3).conjugate(),
|
||||||
rhs13 = (s1*m1.adjoint()) * (s2*rhs3).conjugate());
|
rhs13 = (s1*m1.adjoint()) * (s2*rhs3).conjugate());
|
||||||
|
|
||||||
|
|
||||||
@ -92,6 +92,20 @@ template<typename Scalar, int Size, int OtherSize> void symm(int size = Size, in
|
|||||||
VERIFY_IS_APPROX(rhs22 = (rhs2) * (m2).template selfadjointView<Lower>(), rhs23 = (rhs2) * (m1));
|
VERIFY_IS_APPROX(rhs22 = (rhs2) * (m2).template selfadjointView<Lower>(), rhs23 = (rhs2) * (m1));
|
||||||
VERIFY_IS_APPROX(rhs22 = (s2*rhs2) * (s1*m2).template selfadjointView<Lower>(), rhs23 = (s2*rhs2) * (s1*m1));
|
VERIFY_IS_APPROX(rhs22 = (s2*rhs2) * (s1*m2).template selfadjointView<Lower>(), rhs23 = (s2*rhs2) * (s1*m1));
|
||||||
|
|
||||||
|
// destination with a non-default inner-stride
|
||||||
|
// see bug 1741
|
||||||
|
{
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> MatrixX;
|
||||||
|
MatrixX buffer(2*cols,2*othersize);
|
||||||
|
Map<Rhs1,0,Stride<Dynamic,2> > map1(buffer.data(),cols,othersize,Stride<Dynamic,2>(2*rows,2));
|
||||||
|
buffer.setZero();
|
||||||
|
VERIFY_IS_APPROX( map1.noalias() = (s1*m2).template selfadjointView<Lower>() * (s2*rhs1),
|
||||||
|
rhs13 = (s1*m1) * (s2*rhs1));
|
||||||
|
|
||||||
|
Map<Rhs2,0,Stride<Dynamic,2> > map2(buffer.data(),rhs22.rows(),rhs22.cols(),Stride<Dynamic,2>(2*rhs22.outerStride(),2));
|
||||||
|
buffer.setZero();
|
||||||
|
VERIFY_IS_APPROX(map2 = (rhs2) * (m2).template selfadjointView<Lower>(), rhs23 = (rhs2) * (m1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DECLARE_TEST(product_symm)
|
EIGEN_DECLARE_TEST(product_symm)
|
||||||
|
@ -76,8 +76,18 @@ void trmm(int rows=get_random_size<Scalar>(),
|
|||||||
VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint());
|
VERIFY_IS_APPROX( ge_xs = (s1*mat).adjoint().template triangularView<Mode>() * ge_left.adjoint(), numext::conj(s1) * triTr.conjugate() * ge_left.adjoint());
|
||||||
VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint());
|
VERIFY_IS_APPROX( ge_xs = (s1*mat).transpose().template triangularView<Mode>() * ge_left.adjoint(), s1triTr * ge_left.adjoint());
|
||||||
|
|
||||||
|
|
||||||
// TODO check with sub-matrix expressions ?
|
// TODO check with sub-matrix expressions ?
|
||||||
|
|
||||||
|
// destination with a non-default inner-stride
|
||||||
|
// see bug 1741
|
||||||
|
{
|
||||||
|
VERIFY_IS_APPROX( ge_xs.noalias() = mat.template triangularView<Mode>() * ge_right, tri * ge_right);
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> MatrixX;
|
||||||
|
MatrixX buffer(2*ge_xs.rows(),2*ge_xs.cols());
|
||||||
|
Map<ResXS,0,Stride<Dynamic,2> > map1(buffer.data(),ge_xs.rows(),ge_xs.cols(),Stride<Dynamic,2>(2*ge_xs.outerStride(),2));
|
||||||
|
buffer.setZero();
|
||||||
|
VERIFY_IS_APPROX( map1.noalias() = mat.template triangularView<Mode>() * ge_right, tri * ge_right);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar, int Mode, int TriOrder>
|
template<typename Scalar, int Mode, int TriOrder>
|
||||||
|
@ -72,6 +72,19 @@ template<typename Scalar,int Size, int Cols> void trsolve(int size=Size,int cols
|
|||||||
VERIFY_TRSM(rmLhs.template triangularView<Lower>(), rmRhs.col(c));
|
VERIFY_TRSM(rmLhs.template triangularView<Lower>(), rmRhs.col(c));
|
||||||
VERIFY_TRSM(cmLhs.template triangularView<Lower>(), rmRhs.col(c));
|
VERIFY_TRSM(cmLhs.template triangularView<Lower>(), rmRhs.col(c));
|
||||||
|
|
||||||
|
// destination with a non-default inner-stride
|
||||||
|
// see bug 1741
|
||||||
|
{
|
||||||
|
typedef Matrix<Scalar,Dynamic,Dynamic> MatrixX;
|
||||||
|
MatrixX buffer(2*cmRhs.rows(),2*cmRhs.cols());
|
||||||
|
Map<Matrix<Scalar,Size,Cols,colmajor>,0,Stride<Dynamic,2> > map1(buffer.data(),cmRhs.rows(),cmRhs.cols(),Stride<Dynamic,2>(2*cmRhs.outerStride(),2));
|
||||||
|
Map<Matrix<Scalar,Size,Cols,rowmajor>,0,Stride<Dynamic,2> > map2(buffer.data(),rmRhs.rows(),rmRhs.cols(),Stride<Dynamic,2>(2*rmRhs.outerStride(),2));
|
||||||
|
buffer.setZero();
|
||||||
|
VERIFY_TRSM(cmLhs.conjugate().template triangularView<Lower>(), map1);
|
||||||
|
buffer.setZero();
|
||||||
|
VERIFY_TRSM(cmLhs .template triangularView<Lower>(), map2);
|
||||||
|
}
|
||||||
|
|
||||||
if(Size==Dynamic)
|
if(Size==Dynamic)
|
||||||
{
|
{
|
||||||
cmLhs.resize(0,0);
|
cmLhs.resize(0,0);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user