fix trmm and symm wrt lhs packing

This commit is contained in:
Gael Guennebaud 2010-07-20 10:06:41 +02:00
parent 76eb9c9fd9
commit 872523844a
2 changed files with 11 additions and 13 deletions

View File

@ -26,10 +26,9 @@
#define EIGEN_SELFADJOINT_MATRIX_MATRIX_H #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
// pack a selfadjoint block diagonal for use with the gebp_kernel // pack a selfadjoint block diagonal for use with the gebp_kernel
template<typename Scalar, typename Index, int mr, int StorageOrder> template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder>
struct ei_symm_pack_lhs struct ei_symm_pack_lhs
{ {
enum { PacketSize = ei_packet_traits<Scalar>::size };
template<int BlockRows> inline template<int BlockRows> inline
void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count) void pack(Scalar* blockA, const ei_const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
{ {
@ -59,16 +58,16 @@ struct ei_symm_pack_lhs
{ {
ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride); ei_const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
Index count = 0; Index count = 0;
Index peeled_mc = (rows/mr)*mr; Index peeled_mc = (rows/Pack1)*Pack1;
for(Index i=0; i<peeled_mc; i+=mr) for(Index i=0; i<peeled_mc; i+=Pack1)
{ {
pack<mr>(blockA, lhs, cols, i, count); pack<Pack1>(blockA, lhs, cols, i, count);
} }
if(rows-peeled_mc>=PacketSize) if(rows-peeled_mc>=Pack2)
{ {
pack<mr/2>(blockA, lhs, cols, peeled_mc, count); pack<Pack2>(blockA, lhs, cols, peeled_mc, count);
peeled_mc += PacketSize; peeled_mc += Pack2;
} }
// do the same with mr==1 // do the same with mr==1
@ -269,9 +268,9 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel; ei_gebp_kernel<Scalar, Scalar, Index, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
ei_symm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder> pack_lhs; ei_symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs; ei_gemm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed; ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
for(Index k2=0; k2<size; k2+=kc) for(Index k2=0; k2<size; k2+=kc)
{ {
@ -306,7 +305,7 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
for(Index i2=k2+kc; i2<size; i2+=mc) for(Index i2=k2+kc; i2<size; i2+=mc)
{ {
const Index actual_mc = std::min(i2+mc,size)-i2; const Index actual_mc = std::min(i2+mc,size)-i2;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder,false>() ei_gemm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);

View File

@ -120,7 +120,6 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
std::size_t sizeW = kc*Traits::WorkSpaceFactor; std::size_t sizeW = kc*Traits::WorkSpaceFactor;
std::size_t sizeB = sizeW + kc*cols; std::size_t sizeB = sizeW + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB); Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
// Scalar* allocatedBlockB = new Scalar[sizeB];
Scalar* blockB = allocatedBlockB + sizeW; Scalar* blockB = allocatedBlockB + sizeW;
Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer; Matrix<Scalar,SmallPanelWidth,SmallPanelWidth,LhsStorageOrder> triangularBuffer;
@ -196,7 +195,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
for(Index i2=start; i2<end; i2+=mc) for(Index i2=start; i2<end; i2+=mc)
{ {
const Index actual_mc = std::min(i2+mc,end)-i2; const Index actual_mc = std::min(i2+mc,end)-i2;
ei_gemm_pack_lhs<Scalar, Index, Traits::mr,LhsStorageOrder,false>() ei_gemm_pack_lhs<Scalar, Index, Traits::mr,Traits::LhsProgress, LhsStorageOrder,false>()
(blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc); (blockA, &lhs(i2, actual_k2), lhsStride, actual_kc, actual_mc);
gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha);