This commit is contained in:
Gael Guennebaud 2010-02-23 18:24:15 +01:00
parent 68eaefa5d4
commit 022e2f5ef4
3 changed files with 5 additions and 5 deletions

View File

@ -50,7 +50,7 @@ struct ei_gebp_kernel
const int peeled_kc = (depth/4)*4;
Scalar* unpackedB = const_cast<Scalar*>(blockB - strideB * nr * PacketSize);
// loops on each micro vertical panel of rhs (depth x nr)
for(int j2=0; j2<packet_cols; j2+=nr)
{
@ -193,7 +193,7 @@ struct ei_gebp_kernel
{
PacketType B0, B1, B2, B3, A0, A1;
PacketType T0;
A0 = ei_pload(&blA[0*PacketSize]);
A1 = ei_pload(&blA[1*PacketSize]);
B0 = ei_pload(&blB[0*PacketSize]);
@ -516,7 +516,7 @@ struct ei_gebp_kernel
for(int k=0; k<depth; k++)
ei_pstore(&unpackedB[k*PacketSize], ei_pset1(blB[k]));
}
for(int i=0; i<peeled_mc; i+=mr)
{
const Scalar* blA = &blockA[i*strideA+offsetA*mr];

View File

@ -78,7 +78,7 @@ static void run(int rows, int cols, int depth,
int kc = std::min<int>(Blocking::Max_kc,depth); // cache block size along the K direction
int mc = std::min<int>(Blocking::Max_mc,rows); // cache block size along the M direction
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc*8);
Scalar* blockA = ei_aligned_stack_new(Scalar, kc*mc);
std::size_t sizeB = kc*Blocking::PacketSize*Blocking::nr + kc*cols;
Scalar* allocatedBlockB = ei_aligned_stack_new(Scalar, sizeB);
Scalar* blockB = allocatedBlockB + kc*Blocking::PacketSize*Blocking::nr;

View File

@ -232,7 +232,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size)
enum { PacketSize = ei_packet_traits<Scalar>::size,
PacketAlignedMask = PacketSize-1
};
if(PacketSize==1)
{
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements