From 3e1fe8e416eb79a64be1d9e1092217ca2dbd1dfc Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 27 Mar 2014 10:38:41 -0700 Subject: [PATCH] Vectorized the packing of a col-major matrix used as the right hand side argument in a matrix-matrix product when AVX instructions are used. No vectorization takes place when SSE instructions are used, however this doesn't seem to impact performance. --- .../Core/products/GeneralBlockPanelKernel.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index eeeb5290f..28c2a913e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -1033,6 +1033,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs::IsComplex && Conjugate> cj; Index packet_cols = (cols/nr) * nr; Index count = 0; + const Index peeled_k = (depth/PacketSize)*PacketSize; for(Index j2=0; j2 kernel; + for (int p = 0; p < PacketSize; ++p) { + kernel.packet[p] = ploadu(&rhs[(j2+p)*rhsStride+k]); + } + ptranspose(kernel); + for (int p = 0; p < PacketSize; ++p) { + pstoreu(blockB+count, cj.pconj(kernel.packet[p])); + count+=PacketSize; + } + } + } + for(; k