Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path.

2025-07-05 20:55:12 +08:00 · 2016-05-23 15:13:16 -07:00 · 2016-05-23 15:13:16 -07:00 · 5d51a7f12c
commit 5d51a7f12c
parent 7aa5bc9558
1 changed files with 7 additions and 2 deletions
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -1625,9 +1625,14 @@ void gebp_kernel<LhsScalar,RhsScalar,Index,DataMapper,mr,nr,ConjugateLhs,Conjuga
          prefetch(&blA[0]);
          const RhsScalar* blB = &blockB[j2*strideB+offsetB*nr];
-          if( (SwappedTraits::LhsProgress % 4)==0 )
+          // The following piece of code wont work for 512 bit registers
          // Moreover it assumes that there is a half packet of the same size
          // as nr (which is currently 4) for the return type.
          typedef typename unpacket_traits<SResPacket>::half SResPacketHalf;
          if ((SwappedTraits::LhsProgress % 4) == 0 &&
              (SwappedTraits::LhsProgress <= 8) &&
              unpacket_traits<SResPacketHalf>::size==4)
          {
            // NOTE The following piece of code wont work for 512 bit registers
            SAccPacket C0, C1, C2, C3;
            straits.initAcc(C0);
            straits.initAcc(C1);