mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-17 10:16:43 +08:00
fix typo
This commit is contained in:
parent
68eaefa5d4
commit
022e2f5ef4
@ -50,7 +50,7 @@ struct ei_gebp_kernel
|
|||||||
const int peeled_kc = (depth/4)*4;
|
const int peeled_kc = (depth/4)*4;
|
||||||
|
|
||||||
Scalar* unpackedB = const_cast<Scalar*>(blockB - strideB * nr * PacketSize);
|
Scalar* unpackedB = const_cast<Scalar*>(blockB - strideB * nr * PacketSize);
|
||||||
|
|
||||||
// loops on each micro vertical panel of rhs (depth x nr)
|
// loops on each micro vertical panel of rhs (depth x nr)
|
||||||
for(int j2=0; j2<packet_cols; j2+=nr)
|
for(int j2=0; j2<packet_cols; j2+=nr)
|
||||||
{
|
{
|
||||||
@ -193,7 +193,7 @@ struct ei_gebp_kernel
|
|||||||
{
|
{
|
||||||
PacketType B0, B1, B2, B3, A0, A1;
|
PacketType B0, B1, B2, B3, A0, A1;
|
||||||
PacketType T0;
|
PacketType T0;
|
||||||
|
|
||||||
A0 = ei_pload(&blA[0*PacketSize]);
|
A0 = ei_pload(&blA[0*PacketSize]);
|
||||||
A1 = ei_pload(&blA[1*PacketSize]);
|
A1 = ei_pload(&blA[1*PacketSize]);
|
||||||
B0 = ei_pload(&blB[0*PacketSize]);
|
B0 = ei_pload(&blB[0*PacketSize]);
|
||||||
@ -516,7 +516,7 @@ struct ei_gebp_kernel
|
|||||||
for(int k=0; k<depth; k++)
|
for(int k=0; k<depth; k++)
|
||||||
ei_pstore(&unpackedB[k*PacketSize], ei_pset1(blB[k]));
|
ei_pstore(&unpackedB[k*PacketSize], ei_pset1(blB[k]));
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int i=0; i<peeled_mc; i+=mr)
|
for(int i=0; i<peeled_mc; i+=mr)
|
||||||
{
|
{
|
||||||
const Scalar* blA = &blockA[i*strideA+offsetA*mr];
|
const Scalar* blA = &blockA[i*strideA+offsetA*mr];
|
||||||
|
@ -78,7 +78,7 @@ static void run(int rows, int cols, int depth,
|
|||||||
int kc = std::min<int>(Blocking::Max_kc,depth); // cache block size along the K direction
|
int kc = std::min<int>(Blocking::Max_kc,depth); // cache block size along the K direction
|
||||||
int mc = std::min<int>(Blocking::Max_mc,rows); // cache block size along the M direction
|
int mc = std::min<int>(Blocking::Max_mc,rows); // cache block size along the M direction
|
||||||
|
|
||||||
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc*8);
|
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
|
||||||
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
|
||||||
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
|
||||||
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;
|
||||||
|
@ -232,7 +232,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
|||||||
enum { PacketSize = ei_packet_traits<Scalar>::size,
|
enum { PacketSize = ei_packet_traits<Scalar>::size,
|
||||||
PacketAlignedMask = PacketSize-1
|
PacketAlignedMask = PacketSize-1
|
||||||
};
|
};
|
||||||
|
|
||||||
if(PacketSize==1)
|
if(PacketSize==1)
|
||||||
{
|
{
|
||||||
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
||||||
|
Loading…
x
Reference in New Issue
Block a user