bug #51: add block preallocation mechanism to selfadjoit*matrix product.

This commit is contained in:
Gael Guennebaud 2016-01-25 22:06:42 +01:00
parent 2f9e6314b1
commit 8328caa618
3 changed files with 39 additions and 32 deletions

View File

@ -291,7 +291,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
const Scalar* lhs, Index lhsStride, const Scalar* lhs, Index lhsStride,
const Scalar* rhs, Index rhsStride, const Scalar* rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
product_selfadjoint_matrix<Scalar, Index, product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@ -299,7 +299,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor, EIGEN_LOGICAL_XOR(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs), LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsSelfAdjoint,ConjugateLhs),
ColMajor> ColMajor>
::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha); ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resStride, alpha, blocking);
} }
}; };
@ -314,7 +314,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -325,7 +325,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = rows; Index size = rows;
@ -340,17 +340,16 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,t
RhsMapper rhs(_rhs,rhsStride); RhsMapper rhs(_rhs,rhsStride);
ResMapper res(_res, resStride); ResMapper res(_res, resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction // kc must be smaller than mc
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
// kc must smaller than mc
kc = (std::min)(kc,mc); kc = (std::min)(kc,mc);
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0);
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
Scalar* blockB = allocatedBlockB; ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -410,7 +409,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* res, Index resStride, Scalar* res, Index resStride,
const Scalar& alpha); const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
}; };
template <typename Scalar, typename Index, template <typename Scalar, typename Index,
@ -421,7 +420,7 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
const Scalar* _lhs, Index lhsStride, const Scalar* _lhs, Index lhsStride,
const Scalar* _rhs, Index rhsStride, const Scalar* _rhs, Index rhsStride,
Scalar* _res, Index resStride, Scalar* _res, Index resStride,
const Scalar& alpha) const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
{ {
Index size = cols; Index size = cols;
@ -432,14 +431,12 @@ EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,f
LhsMapper lhs(_lhs,lhsStride); LhsMapper lhs(_lhs,lhsStride);
ResMapper res(_res,resStride); ResMapper res(_res,resStride);
Index kc = size; // cache block size along the K direction Index kc = blocking.kc(); // cache block size along the K direction
Index mc = rows; // cache block size along the M direction Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
Index nc = cols; // cache block size along the N direction std::size_t sizeA = kc*mc;
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc, 1);
std::size_t sizeB = kc*cols; std::size_t sizeB = kc*cols;
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, kc*mc, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, allocatedBlockB, sizeB, 0); ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
Scalar* blockB = allocatedBlockB;
gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs; gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
@ -498,6 +495,11 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs) Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
* RhsBlasTraits::extractScalarFactor(a_rhs); * RhsBlasTraits::extractScalarFactor(a_rhs);
typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
internal::product_selfadjoint_matrix<Scalar, Index, internal::product_selfadjoint_matrix<Scalar, Index,
EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint, EIGEN_LOGICAL_XOR(LhsIsUpper,internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)), NumTraits<Scalar>::IsComplex && EIGEN_LOGICAL_XOR(LhsIsUpper,bool(LhsBlasTraits::NeedToConjugate)),
@ -509,7 +511,7 @@ struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
&lhs.coeffRef(0,0), lhs.outerStride(), // lhs info &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
&rhs.coeffRef(0,0), rhs.outerStride(), // rhs info &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
&dst.coeffRef(0,0), dst.outerStride(), // result info &dst.coeffRef(0,0), dst.outerStride(), // result info
actualAlpha // alpha actualAlpha, blocking // alpha
); );
} }
}; };

View File

@ -275,9 +275,9 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
return 1; return 1;
} }
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
#if ISCOMPLEX #if ISCOMPLEX
// FIXME add support for symmetric complex matrix // FIXME add support for symmetric complex matrix
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size); Matrix<Scalar,Dynamic,Dynamic,ColMajor> matA(size,size);
if(UPLO(*uplo)==UP) if(UPLO(*uplo)==UP)
{ {
@ -294,13 +294,15 @@ int EIGEN_BLAS_FUNC(symm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA; matrix(c, *m, *n, *ldc) += alpha * matrix(b, *m, *n, *ldb) * matA;
#else #else
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
if(SIDE(*side)==LEFT) if(SIDE(*side)==LEFT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, RowMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,true,false, ColMajor,false,false, ColMajor>::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else return 0; else return 0;
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, RowMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar, DenseIndex, ColMajor,false,false, ColMajor,true,false, ColMajor>::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else return 0; else return 0;
else else
return 0; return 0;
@ -488,20 +490,23 @@ int EIGEN_BLAS_FUNC(hemm)(char *side, char *uplo, int *m, int *n, RealScalar *pa
return 1; return 1;
} }
int size = (SIDE(*side)==LEFT) ? (*m) : (*n);
internal::gemm_blocking_space<ColMajor,Scalar,Scalar,Dynamic,Dynamic,Dynamic> blocking(*m,*n,size,1,false);
if(SIDE(*side)==LEFT) if(SIDE(*side)==LEFT)
{ {
if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor> if(UPLO(*uplo)==UP) internal::product_selfadjoint_matrix<Scalar,DenseIndex,RowMajor,true,Conj, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor> else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,true,false, ColMajor,false,false, ColMajor>
::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha); ::run(*m, *n, a, *lda, b, *ldb, c, *ldc, alpha, blocking);
else return 0; else return 0;
} }
else if(SIDE(*side)==RIGHT) else if(SIDE(*side)==RIGHT)
{ {
if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor> if(UPLO(*uplo)==UP) matrix(c,*m,*n,*ldc) += alpha * matrix(b,*m,*n,*ldb) * matrix(a,*n,*n,*lda).selfadjointView<Upper>();/*internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, RowMajor,true,Conj, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha);*/ ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);*/
else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor> else if(UPLO(*uplo)==LO) internal::product_selfadjoint_matrix<Scalar,DenseIndex,ColMajor,false,false, ColMajor,true,false, ColMajor>
::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha); ::run(*m, *n, b, *ldb, a, *lda, c, *ldc, alpha, blocking);
else return 0; else return 0;
} }
else else

View File

@ -85,7 +85,7 @@ template<typename MatrixType> void nomalloc(const MatrixType& m)
m2.template selfadjointView<Lower>().rankUpdate(m1); m2.template selfadjointView<Lower>().rankUpdate(m1);
m2 += m2.template triangularView<Upper>() * m1; m2 += m2.template triangularView<Upper>() * m1;
m2.template triangularView<Upper>() = m2 * m2; m2.template triangularView<Upper>() = m2 * m2;
// m1 += m1.template selfadjointView<Lower>() * m2; m1 += m1.template selfadjointView<Lower>() * m2;
VERIFY_IS_APPROX(m2,m2); VERIFY_IS_APPROX(m2,m2);
} }