mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-31 01:03:38 +08:00
fix computation of blocking sizes for small triangular matrices
This commit is contained in:
parent
0068d3ccf6
commit
d44fce501b
@ -101,7 +101,7 @@ inline void setCpuCacheSizes(std::ptrdiff_t l1, std::ptrdiff_t l2)
|
|||||||
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
* - the number of scalars that fit into a packet (when vectorization is enabled).
|
||||||
*
|
*
|
||||||
* \sa setCpuCacheSizes */
|
* \sa setCpuCacheSizes */
|
||||||
template<typename LhsScalar, typename RhsScalar>
|
template<typename LhsScalar, typename RhsScalar, int KcFactor>
|
||||||
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
|
void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
|
||||||
{
|
{
|
||||||
// Explanations:
|
// Explanations:
|
||||||
@ -114,7 +114,7 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
|
|||||||
std::ptrdiff_t l1, l2;
|
std::ptrdiff_t l1, l2;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
kdiv = 2 * ei_product_blocking_traits<RhsScalar>::nr
|
kdiv = KcFactor * 2 * ei_product_blocking_traits<RhsScalar>::nr
|
||||||
* ei_packet_traits<RhsScalar>::size * sizeof(RhsScalar),
|
* ei_packet_traits<RhsScalar>::size * sizeof(RhsScalar),
|
||||||
mr = ei_product_blocking_traits<LhsScalar>::mr,
|
mr = ei_product_blocking_traits<LhsScalar>::mr,
|
||||||
mr_mask = (0xffffffff/mr)*mr
|
mr_mask = (0xffffffff/mr)*mr
|
||||||
@ -127,6 +127,12 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
|
|||||||
n = n;
|
n = n;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename LhsScalar, typename RhsScalar>
|
||||||
|
inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
|
||||||
|
{
|
||||||
|
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_HAS_FUSE_CJMADD
|
#ifdef EIGEN_HAS_FUSE_CJMADD
|
||||||
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
|
#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C);
|
||||||
#else
|
#else
|
||||||
|
@ -117,9 +117,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,true,
|
|||||||
Index kc = depth; // cache block size along the K direction
|
Index kc = depth; // cache block size along the K direction
|
||||||
Index mc = rows; // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
Index nc = cols; // cache block size along the N direction
|
Index nc = cols; // cache block size along the N direction
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||||
// it is better to use smaller blocks along the diagonal
|
|
||||||
kc /= 4;
|
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -245,9 +243,7 @@ struct ei_product_triangular_matrix_matrix<Scalar,Index,Mode,false,
|
|||||||
Index kc = depth; // cache block size along the K direction
|
Index kc = depth; // cache block size along the K direction
|
||||||
Index mc = rows; // cache block size along the M direction
|
Index mc = rows; // cache block size along the M direction
|
||||||
Index nc = cols; // cache block size along the N direction
|
Index nc = cols; // cache block size along the N direction
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||||
// it is better to use smaller blocks along the diagonal
|
|
||||||
kc /= 4;
|
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
|
@ -66,9 +66,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStora
|
|||||||
Index kc = size; // cache block size along the K direction
|
Index kc = size; // cache block size along the K direction
|
||||||
Index mc = size; // cache block size along the M direction
|
Index mc = size; // cache block size along the M direction
|
||||||
Index nc = cols; // cache block size along the N direction
|
Index nc = cols; // cache block size along the N direction
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||||
// it is better to use smaller blocks along the diagonal
|
|
||||||
kc /= 4;
|
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
@ -206,9 +204,7 @@ struct ei_triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStor
|
|||||||
Index kc = size; // cache block size along the K direction
|
Index kc = size; // cache block size along the K direction
|
||||||
Index mc = size; // cache block size along the M direction
|
Index mc = size; // cache block size along the M direction
|
||||||
Index nc = rows; // cache block size along the N direction
|
Index nc = rows; // cache block size along the N direction
|
||||||
computeProductBlockingSizes<Scalar,Scalar>(kc, mc, nc);
|
computeProductBlockingSizes<Scalar,Scalar,4>(kc, mc, nc);
|
||||||
// it is better to use smaller blocks along the diagonal
|
|
||||||
kc /= 4;
|
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*size;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user