From 14bc4b9704b7e347ffcfe3c52588790e27e5118b Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 26 Mar 2014 17:35:18 -0700 Subject: [PATCH] Made sure that the version of gemm_pack_rhs specialized for row major matrices is vectorized when nr == 2*PacketSize (which is the case for SSE when compiling in 64bit mode). --- Eigen/src/Core/products/GeneralBlockPanelKernel.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 569cfea71..d17752489 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1091,7 +1091,11 @@ EIGEN_DONT_INLINE void gemm_pack_rhs(&rhs[k*rhsStride + j2]); pstoreu(blockB+count, cj.pconj(A)); - count += PacketSize; + } else if (nr == 2*PacketSize) { + Packet A = ploadu(&rhs[k*rhsStride + j2]); + Packet B = ploadu(&rhs[k*rhsStride + j2 + PacketSize]); + pstoreu(blockB+count, cj.pconj(A)); + pstoreu(blockB+count+PacketSize, cj.pconj(B)); } else { const Scalar* b0 = &rhs[k*rhsStride + j2]; blockB[count+0] = cj(b0[0]); @@ -1102,8 +1106,8 @@ EIGEN_DONT_INLINE void gemm_pack_rhs=8) blockB[count+5] = cj(b0[5]); if(nr>=8) blockB[count+6] = cj(b0[6]); if(nr>=8) blockB[count+7] = cj(b0[7]); - count += nr; } + count += nr; } // skip what we have after if(PanelMode) count += nr * (stride-offset-depth);