mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Made sure that the version of gemm_pack_rhs specialized for row major matrices is vectorized when nr == 2*PacketSize (which is the case for SSE when compiling in 64bit mode).
This commit is contained in:
parent
e45a6bed45
commit
14bc4b9704
@ -1091,7 +1091,11 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, Pan
|
|||||||
if (nr == PacketSize) {
|
if (nr == PacketSize) {
|
||||||
Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
|
Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
|
||||||
pstoreu(blockB+count, cj.pconj(A));
|
pstoreu(blockB+count, cj.pconj(A));
|
||||||
count += PacketSize;
|
} else if (nr == 2*PacketSize) {
|
||||||
|
Packet A = ploadu<Packet>(&rhs[k*rhsStride + j2]);
|
||||||
|
Packet B = ploadu<Packet>(&rhs[k*rhsStride + j2 + PacketSize]);
|
||||||
|
pstoreu(blockB+count, cj.pconj(A));
|
||||||
|
pstoreu(blockB+count+PacketSize, cj.pconj(B));
|
||||||
} else {
|
} else {
|
||||||
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
const Scalar* b0 = &rhs[k*rhsStride + j2];
|
||||||
blockB[count+0] = cj(b0[0]);
|
blockB[count+0] = cj(b0[0]);
|
||||||
@ -1102,8 +1106,8 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, Pan
|
|||||||
if(nr>=8) blockB[count+5] = cj(b0[5]);
|
if(nr>=8) blockB[count+5] = cj(b0[5]);
|
||||||
if(nr>=8) blockB[count+6] = cj(b0[6]);
|
if(nr>=8) blockB[count+6] = cj(b0[6]);
|
||||||
if(nr>=8) blockB[count+7] = cj(b0[7]);
|
if(nr>=8) blockB[count+7] = cj(b0[7]);
|
||||||
count += nr;
|
|
||||||
}
|
}
|
||||||
|
count += nr;
|
||||||
}
|
}
|
||||||
// skip what we have after
|
// skip what we have after
|
||||||
if(PanelMode) count += nr * (stride-offset-depth);
|
if(PanelMode) count += nr * (stride-offset-depth);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user