mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix -Waggressive-loop-optimizations
This commit is contained in:
parent
6e7abeae69
commit
4e9e493b4a
@ -361,6 +361,10 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
|
HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize);
|
||||||
|
const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf);
|
||||||
|
const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter);
|
||||||
|
|
||||||
Index i=0;
|
Index i=0;
|
||||||
for(; i<n8; i+=8)
|
for(; i<n8; i+=8)
|
||||||
{
|
{
|
||||||
@ -373,8 +377,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
c6 = pset1<ResPacket>(ResScalar(0)),
|
c6 = pset1<ResPacket>(ResScalar(0)),
|
||||||
c7 = pset1<ResPacket>(ResScalar(0));
|
c7 = pset1<ResPacket>(ResScalar(0));
|
||||||
|
|
||||||
Index j=0;
|
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
|
||||||
{
|
{
|
||||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||||
|
|
||||||
@ -395,7 +398,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
ResScalar cc5 = predux(c5);
|
ResScalar cc5 = predux(c5);
|
||||||
ResScalar cc6 = predux(c6);
|
ResScalar cc6 = predux(c6);
|
||||||
ResScalar cc7 = predux(c7);
|
ResScalar cc7 = predux(c7);
|
||||||
for(; j<cols; ++j)
|
|
||||||
|
for (Index j = fullColBlockEnd; j < cols; ++j)
|
||||||
{
|
{
|
||||||
RhsScalar b0 = rhs(j,0);
|
RhsScalar b0 = rhs(j,0);
|
||||||
|
|
||||||
@ -424,8 +428,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
c2 = pset1<ResPacket>(ResScalar(0)),
|
c2 = pset1<ResPacket>(ResScalar(0)),
|
||||||
c3 = pset1<ResPacket>(ResScalar(0));
|
c3 = pset1<ResPacket>(ResScalar(0));
|
||||||
|
|
||||||
Index j=0;
|
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
|
||||||
{
|
{
|
||||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||||
|
|
||||||
@ -438,7 +441,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
ResScalar cc1 = predux(c1);
|
ResScalar cc1 = predux(c1);
|
||||||
ResScalar cc2 = predux(c2);
|
ResScalar cc2 = predux(c2);
|
||||||
ResScalar cc3 = predux(c3);
|
ResScalar cc3 = predux(c3);
|
||||||
for(; j<cols; ++j)
|
|
||||||
|
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||||
{
|
{
|
||||||
RhsScalar b0 = rhs(j,0);
|
RhsScalar b0 = rhs(j,0);
|
||||||
|
|
||||||
@ -457,8 +461,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
|
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
|
||||||
c1 = pset1<ResPacket>(ResScalar(0));
|
c1 = pset1<ResPacket>(ResScalar(0));
|
||||||
|
|
||||||
Index j=0;
|
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
|
||||||
{
|
{
|
||||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||||
|
|
||||||
@ -467,7 +470,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
}
|
}
|
||||||
ResScalar cc0 = predux(c0);
|
ResScalar cc0 = predux(c0);
|
||||||
ResScalar cc1 = predux(c1);
|
ResScalar cc1 = predux(c1);
|
||||||
for(; j<cols; ++j)
|
|
||||||
|
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||||
{
|
{
|
||||||
RhsScalar b0 = rhs(j,0);
|
RhsScalar b0 = rhs(j,0);
|
||||||
|
|
||||||
@ -482,15 +486,15 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0));
|
ResPacket c0 = pset1<ResPacket>(ResScalar(0));
|
||||||
ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));
|
ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));
|
||||||
ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));
|
ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));
|
||||||
Index j=0;
|
|
||||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||||
{
|
{
|
||||||
RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
|
RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
|
||||||
c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
|
c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
|
||||||
}
|
}
|
||||||
ResScalar cc0 = predux(c0);
|
ResScalar cc0 = predux(c0);
|
||||||
if (HasHalf) {
|
if (HasHalf) {
|
||||||
for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf)
|
for (Index j = fullColBlockEnd; j < halfColBlockEnd; j += LhsPacketSizeHalf)
|
||||||
{
|
{
|
||||||
RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);
|
RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);
|
||||||
c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);
|
c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);
|
||||||
@ -498,14 +502,14 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
|||||||
cc0 += predux(c0_h);
|
cc0 += predux(c0_h);
|
||||||
}
|
}
|
||||||
if (HasQuarter) {
|
if (HasQuarter) {
|
||||||
for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter)
|
for (Index j = halfColBlockEnd; j < quarterColBlockEnd; j += LhsPacketSizeQuarter)
|
||||||
{
|
{
|
||||||
RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);
|
RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);
|
||||||
c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);
|
c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);
|
||||||
}
|
}
|
||||||
cc0 += predux(c0_q);
|
cc0 += predux(c0_q);
|
||||||
}
|
}
|
||||||
for(; j<cols; ++j)
|
for (Index j = quarterColBlockEnd; j < cols; ++j)
|
||||||
{
|
{
|
||||||
cc0 += cj.pmul(lhs(i,j), rhs(j,0));
|
cc0 += cj.pmul(lhs(i,j), rhs(j,0));
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user