mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 18:19:34 +08:00
Fix performance regression in dgemm introduced by changeset 5d51a7f12c69138ed2a43df240bdf27a5313f7ce
This commit is contained in:
parent
672076db5d
commit
0fa9e4a15c
@ -1526,12 +1526,12 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
|
|||||||
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
|
||||||
|
|
||||||
// The following piece of code wont work for 512 bit registers
|
// The following piece of code wont work for 512 bit registers
|
||||||
// Moreover it assumes that there is a half packet of the same size
|
// Moreover, if LhsProgress==8 it assumes that there is a half packet of the same size
|
||||||
// as nr (which is currently 4) for the return type.
|
// as nr (which is currently 4) for the return type.
|
||||||
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
|
||||||
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
if ((SwappedTraits::LhsProgress % 4) == 0 &&
|
||||||
(SwappedTraits::LhsProgress <= 8) &&
|
(SwappedTraits::LhsProgress <= 8) &&
|
||||||
unpacket_traits<SResPacketHalf>::size==4)
|
(SwappedTraits::LhsProgress!=8 || unpacket_traits<SResPacketHalf>::size==nr))
|
||||||
{
|
{
|
||||||
SAccPacket C0, C1, C2, C3;
|
SAccPacket C0, C1, C2, C3;
|
||||||
straits.initAcc(C0);
|
straits.initAcc(C0);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user