mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix -Waggressive-loop-optimizations
This commit is contained in:
parent
6e7abeae69
commit
4e9e493b4a
@ -361,6 +361,10 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
|
||||
};
|
||||
|
||||
const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize);
|
||||
const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf);
|
||||
const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter);
|
||||
|
||||
Index i=0;
|
||||
for(; i<n8; i+=8)
|
||||
{
|
||||
@ -373,8 +377,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
c6 = pset1<ResPacket>(ResScalar(0)),
|
||||
c7 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -395,7 +398,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResScalar cc5 = predux(c5);
|
||||
ResScalar cc6 = predux(c6);
|
||||
ResScalar cc7 = predux(c7);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for (Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -424,8 +428,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
c2 = pset1<ResPacket>(ResScalar(0)),
|
||||
c3 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -438,7 +441,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResScalar cc1 = predux(c1);
|
||||
ResScalar cc2 = predux(c2);
|
||||
ResScalar cc3 = predux(c3);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -457,8 +461,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
|
||||
c1 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -467,7 +470,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
}
|
||||
ResScalar cc0 = predux(c0);
|
||||
ResScalar cc1 = predux(c1);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -482,15 +486,15 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0));
|
||||
ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));
|
||||
ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
|
||||
c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
|
||||
}
|
||||
ResScalar cc0 = predux(c0);
|
||||
if (HasHalf) {
|
||||
for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf)
|
||||
for (Index j = fullColBlockEnd; j < halfColBlockEnd; j += LhsPacketSizeHalf)
|
||||
{
|
||||
RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);
|
||||
c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);
|
||||
@ -498,14 +502,14 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
cc0 += predux(c0_h);
|
||||
}
|
||||
if (HasQuarter) {
|
||||
for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter)
|
||||
for (Index j = halfColBlockEnd; j < quarterColBlockEnd; j += LhsPacketSizeQuarter)
|
||||
{
|
||||
RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);
|
||||
c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);
|
||||
}
|
||||
cc0 += predux(c0_q);
|
||||
}
|
||||
for(; j<cols; ++j)
|
||||
for (Index j = quarterColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
cc0 += cj.pmul(lhs(i,j), rhs(j,0));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user