bug #1090: fix a shortcoming in redux logic for which slice-vectorization plus unrolling might happen.

This commit is contained in:
Gael Guennebaud 2015-10-21 20:58:33 +02:00
parent b178cc3479
commit e78bc111f1
2 changed files with 11 additions and 2 deletions

View File

@ -269,8 +269,9 @@ struct redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
} }
}; };
template<typename Func, typename Derived> // NOTE: for SliceVectorizedTraversal we simply bypass unrolling
struct redux_impl<Func, Derived, SliceVectorizedTraversal, NoUnrolling> template<typename Func, typename Derived, int Unrolling>
struct redux_impl<Func, Derived, SliceVectorizedTraversal, Unrolling>
{ {
typedef typename Derived::Scalar Scalar; typedef typename Derived::Scalar Scalar;
typedef typename packet_traits<Scalar>::type PacketType; typedef typename packet_traits<Scalar>::type PacketType;

View File

@ -56,6 +56,14 @@ template<typename MatrixType> void matrixRedux(const MatrixType& m)
VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod()); VERIFY_IS_APPROX(m1_for_prod.block(r0,c0,r1,c1).prod(), m1_for_prod.block(r0,c0,r1,c1).eval().prod());
VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().minCoeff(), m1.block(r0,c0,r1,c1).real().eval().minCoeff());
VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff()); VERIFY_IS_APPROX(m1.block(r0,c0,r1,c1).real().maxCoeff(), m1.block(r0,c0,r1,c1).real().eval().maxCoeff());
// regression for bug 1090
const int R1 = MatrixType::RowsAtCompileTime>=2 ? MatrixType::RowsAtCompileTime/2 : 6;
const int C1 = MatrixType::ColsAtCompileTime>=2 ? MatrixType::ColsAtCompileTime/2 : 6;
if(R1<=rows-r0 && C1<=cols-c0)
{
VERIFY_IS_APPROX( (m1.template block<R1,C1>(r0,c0).sum()), m1.block(r0,c0,R1,C1).sum() );
}
// test empty objects // test empty objects
VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0)); VERIFY_IS_APPROX(m1.block(r0,c0,0,0).sum(), Scalar(0));