mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-15 17:41:48 +08:00
Fix -Waggressive-loop-optimizations
(cherry picked from commit 4e9e493b4abc57dba377fc326082b40d08193619)
This commit is contained in:
parent
2e3f1d8044
commit
7c6020e424
@ -359,6 +359,10 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
HasQuarter = (int)ResPacketSizeQuarter < (int)ResPacketSizeHalf
|
||||
};
|
||||
|
||||
const Index fullColBlockEnd = LhsPacketSize * (cols / LhsPacketSize);
|
||||
const Index halfColBlockEnd = LhsPacketSizeHalf * (cols / LhsPacketSizeHalf);
|
||||
const Index quarterColBlockEnd = LhsPacketSizeQuarter * (cols / LhsPacketSizeQuarter);
|
||||
|
||||
Index i=0;
|
||||
for(; i<n8; i+=8)
|
||||
{
|
||||
@ -371,8 +375,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
c6 = pset1<ResPacket>(ResScalar(0)),
|
||||
c7 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -393,7 +396,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResScalar cc5 = predux(c5);
|
||||
ResScalar cc6 = predux(c6);
|
||||
ResScalar cc7 = predux(c7);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for (Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -422,8 +426,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
c2 = pset1<ResPacket>(ResScalar(0)),
|
||||
c3 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -436,7 +439,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResScalar cc1 = predux(c1);
|
||||
ResScalar cc2 = predux(c2);
|
||||
ResScalar cc3 = predux(c3);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -455,8 +459,7 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0)),
|
||||
c1 = pset1<ResPacket>(ResScalar(0));
|
||||
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket, Unaligned>(j,0);
|
||||
|
||||
@ -465,7 +468,8 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
}
|
||||
ResScalar cc0 = predux(c0);
|
||||
ResScalar cc1 = predux(c1);
|
||||
for(; j<cols; ++j)
|
||||
|
||||
for(Index j = fullColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
RhsScalar b0 = rhs(j,0);
|
||||
|
||||
@ -480,15 +484,15 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
ResPacket c0 = pset1<ResPacket>(ResScalar(0));
|
||||
ResPacketHalf c0_h = pset1<ResPacketHalf>(ResScalar(0));
|
||||
ResPacketQuarter c0_q = pset1<ResPacketQuarter>(ResScalar(0));
|
||||
Index j=0;
|
||||
for(; j+LhsPacketSize<=cols; j+=LhsPacketSize)
|
||||
|
||||
for (Index j = 0; j < fullColBlockEnd; j += LhsPacketSize)
|
||||
{
|
||||
RhsPacket b0 = rhs.template load<RhsPacket,Unaligned>(j,0);
|
||||
c0 = pcj.pmadd(lhs.template load<LhsPacket,LhsAlignment>(i,j),b0,c0);
|
||||
}
|
||||
ResScalar cc0 = predux(c0);
|
||||
if (HasHalf) {
|
||||
for(; j+LhsPacketSizeHalf<=cols; j+=LhsPacketSizeHalf)
|
||||
for (Index j = fullColBlockEnd; j < halfColBlockEnd; j += LhsPacketSizeHalf)
|
||||
{
|
||||
RhsPacketHalf b0 = rhs.template load<RhsPacketHalf,Unaligned>(j,0);
|
||||
c0_h = pcj_half.pmadd(lhs.template load<LhsPacketHalf,LhsAlignment>(i,j),b0,c0_h);
|
||||
@ -496,14 +500,14 @@ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE void general_matrix_vector_product<Index,Lhs
|
||||
cc0 += predux(c0_h);
|
||||
}
|
||||
if (HasQuarter) {
|
||||
for(; j+LhsPacketSizeQuarter<=cols; j+=LhsPacketSizeQuarter)
|
||||
for (Index j = halfColBlockEnd; j < quarterColBlockEnd; j += LhsPacketSizeQuarter)
|
||||
{
|
||||
RhsPacketQuarter b0 = rhs.template load<RhsPacketQuarter,Unaligned>(j,0);
|
||||
c0_q = pcj_quarter.pmadd(lhs.template load<LhsPacketQuarter,LhsAlignment>(i,j),b0,c0_q);
|
||||
}
|
||||
cc0 += predux(c0_q);
|
||||
}
|
||||
for(; j<cols; ++j)
|
||||
for (Index j = quarterColBlockEnd; j < cols; ++j)
|
||||
{
|
||||
cc0 += cj.pmul(lhs(i,j), rhs(j,0));
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user