* makes all product use the new API to set the blocking sizes

* fix an issue preventing multithreading (now Dynamic = -1 ...)
This commit is contained in:
Gael Guennebaud 2010-06-22 16:08:35 +02:00
parent fd9a9fa0ae
commit b4fe53f561
7 changed files with 45 additions and 26 deletions

View File

@ -284,7 +284,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
_ActualRhsType,
Dest> GemmFunctor;
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
ei_parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>(GemmFunctor(lhs, rhs, dst, actualAlpha), this->rows(), this->cols());
}
};

View File

@ -258,8 +258,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,Conjugate
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = std::min<Index>(Blocking::Max_kc,size); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
@ -339,8 +341,10 @@ struct ei_product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,Conjugat
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = std::min<Index>(Blocking::Max_kc,size); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
Index kc = size; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;

View File

@ -70,8 +70,10 @@ struct ei_selfadjoint_product<Scalar, Index, MatStorageOrder, ColMajor, AAT, UpL
typedef ei_product_blocking_traits<Scalar> Blocking;
Index kc = std::min<Index>(Blocking::Max_kc,depth); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
Index kc = depth; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = size; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;

View File

@ -114,8 +114,12 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
IsLower = (Mode&Lower) == Lower
};
Index kc = std::min<Index>(Blocking::Max_kc/4,depth); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
Index kc = depth; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// it is better to use smaller blocks along the diagonal
kc /= 4;
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
@ -238,8 +242,12 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
IsLower = (Mode&Lower) == Lower
};
Index kc = std::min<Index>(Blocking::Max_kc/4,depth); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,rows); // cache block size along the M direction
Index kc = depth; // cache block size along the K direction
Index mc = rows; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// it is better to use smaller blocks along the diagonal
kc /= 4;
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;

View File

@ -63,8 +63,12 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
IsLower = (Mode&Lower) == Lower
};
Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
Index kc = size; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = cols; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// it is better to use smaller blocks along the diagonal
kc /= 4;
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
@ -196,8 +200,15 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
IsLower = (Mode&Lower) == Lower
};
Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
// Index kc = std::min<Index>(Blocking::Max_kc/4,size); // cache block size along the K direction
// Index mc = std::min<Index>(Blocking::Max_mc,size); // cache block size along the M direction
// check that !!!!
Index kc = size; // cache block size along the K direction
Index mc = size; // cache block size along the M direction
Index nc = rows; // cache block size along the N direction
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
// it is better to use smaller blocks along the diagonal
kc /= 4;
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;

View File

@ -123,7 +123,7 @@ class ei_const_blas_data_mapper
Index m_stride;
};
// Defines various constant controlling level 3 blocking
// Defines various constant controlling register blocking for matrix-matrix algorithms.
template<typename Scalar>
struct ei_product_blocking_traits
{
@ -136,13 +136,7 @@ struct ei_product_blocking_traits
nr = NumberOfRegisters/4,
// register block size along the M direction (currently, this one cannot be modified)
mr = 2 * PacketSize,
// max cache block size along the K direction
Max_kc = 4 * ei_meta_sqrt<EIGEN_TUNE_FOR_CPU_CACHE_SIZE/(64*sizeof(Scalar))>::ret,
// max cache block size along the M direction
Max_mc = 2*Max_kc
mr = 2 * PacketSize
};
};