mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-05 20:55:12 +08:00
Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path.
This commit is contained in:
parent
7aa5bc9558
commit
5d51a7f12c
@ -1625,9 +1625,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
prefetch(&blA[0]);
|
prefetch(&blA[0]);
|
||||||
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
||||||
|
|
||||||
if( (SwappedTraits::LhsProgress % 4)==0 )
|
// The following piece of code wont work for 512 bit registers
|
||||||
|
// Moreover it assumes that there is a half packet of the same size
|
||||||
|
// as nr (which is currently 4) for the return type.
|
||||||
|
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||||
|
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||||
|
(SwappedTraits::LhsProgress <= 8) &&
|
||||||
|
unpacket_traits<SResPacketHalf>::size==4)
|
||||||
{
|
{
|
||||||
// NOTE The following piece of code wont work for 512 bit registers
|
|
||||||
SAccPacket C0, C1, C2, C3;
|
SAccPacket C0, C1, C2, C3;
|
||||||
straits.initAcc(C0);
|
straits.initAcc(C0);
|
||||||
straits.initAcc(C1);
|
straits.initAcc(C1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user