mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-05 20:55:12 +08:00
Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path.
This commit is contained in:
parent
7aa5bc9558
commit
5d51a7f12c
@ -1625,9 +1625,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
||||
prefetch(&blA[0]);
|
||||
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
||||
|
||||
if( (SwappedTraits::LhsProgress % 4)==0 )
|
||||
// The following piece of code wont work for 512 bit registers
|
||||
// Moreover it assumes that there is a half packet of the same size
|
||||
// as nr (which is currently 4) for the return type.
|
||||
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||
(SwappedTraits::LhsProgress <= 8) &&
|
||||
unpacket_traits<SResPacketHalf>::size==4)
|
||||
{
|
||||
// NOTE The following piece of code wont work for 512 bit registers
|
||||
SAccPacket C0, C1, C2, C3;
|
||||
straits.initAcc(C0);
|
||||
straits.initAcc(C1);
|
||||
|
Loading…
x
Reference in New Issue
Block a user