mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
rename indices to a common scheme
This commit is contained in:
parent
0916d69ca5
commit
bec3f9bfe4
@ -91,7 +91,7 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
const Index ResPacketAlignedMask = ResPacketSize-1;
|
const Index ResPacketAlignedMask = ResPacketSize-1;
|
||||||
const Index PeelAlignedMask = ResPacketSize*peels-1;
|
const Index PeelAlignedMask = ResPacketSize*peels-1;
|
||||||
const Index size = rows;
|
const Index size = rows;
|
||||||
|
|
||||||
// How many coeffs of the result do we have to skip to be aligned.
|
// How many coeffs of the result do we have to skip to be aligned.
|
||||||
// Here we assume data are at least aligned on the base scalar type.
|
// Here we assume data are at least aligned on the base scalar type.
|
||||||
Index alignedStart = ei_first_aligned(res,size);
|
Index alignedStart = ei_first_aligned(res,size);
|
||||||
@ -244,30 +244,29 @@ EIGEN_DONT_INLINE static void run(
|
|||||||
Index start = columnBound;
|
Index start = columnBound;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
for (Index i=start; i<end; ++i)
|
for (Index k=start; k<end; ++k)
|
||||||
{
|
{
|
||||||
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[i*rhsIncr]);
|
RhsPacket ptmp0 = ei_pset1<RhsPacket>(alpha*rhs[k*rhsIncr]);
|
||||||
const LhsScalar* lhs0 = lhs + i*lhsStride;
|
const LhsScalar* lhs0 = lhs + k*lhsStride;
|
||||||
|
|
||||||
if (Vectorizable)
|
if (Vectorizable)
|
||||||
{
|
{
|
||||||
/* explicit vectorization */
|
/* explicit vectorization */
|
||||||
// process first unaligned result's coeffs
|
// process first unaligned result's coeffs
|
||||||
for (Index j=0; j<alignedStart; ++j)
|
for (Index j=0; j<alignedStart; ++j)
|
||||||
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
|
res[j] += cj.pmul(lhs0[j], rhs[k*rhsIncr]/*ei_pfirst(ptmp0)*/);
|
||||||
|
|
||||||
// process aligned result's coeffs
|
// process aligned result's coeffs
|
||||||
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
|
if ((size_t(lhs0+alignedStart)%sizeof(LhsPacket))==0)
|
||||||
for (Index j = alignedStart;j<alignedSize;j+=ResPacketSize)
|
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||||
ei_pstore(&res[j], pcj.pmadd(ei_pload<LhsPacket>(&lhs0[j]), ptmp0, ei_pload<ResPacket>(&res[j])));
|
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
|
||||||
else
|
else
|
||||||
for (Index j = alignedStart;j<alignedSize;j+=ResPacketSize)
|
for (Index i = alignedStart;i<alignedSize;i+=ResPacketSize)
|
||||||
ei_pstore(&res[j], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[j]), ptmp0, ei_pload<ResPacket>(&res[j])));
|
ei_pstore(&res[i], pcj.pmadd(ei_ploadu<LhsPacket>(&lhs0[i]), ptmp0, ei_pload<ResPacket>(&res[i])));
|
||||||
}
|
}
|
||||||
|
|
||||||
// process remaining scalars (or all if no explicit vectorization)
|
// process remaining scalars (or all if no explicit vectorization)
|
||||||
for (Index j=alignedSize; j<size; ++j)
|
for (Index i=alignedSize; i<size; ++i)
|
||||||
res[j] += cj.pmul(lhs0[j], ei_pfirst(ptmp0));
|
res[i] += cj.pmul(lhs0[i], ei_pfirst(ptmp0));
|
||||||
}
|
}
|
||||||
if (skipColumns)
|
if (skipColumns)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user