Fix packet and alignment propagation logic of Block<Xpr> expressions. In particular, (A+B).col(j) lost vectorisation.

This commit is contained in:
Gael Guennebaud 2017-12-14 14:24:33 +01:00
parent 76c7dae600
commit 9c3aed9d48
2 changed files with 10 additions and 2 deletions

View File

@ -1034,7 +1034,7 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
OuterStrideAtCompileTime = HasSameStorageOrderAsArgType
? int(outer_stride_at_compile_time<ArgType>::ret)
: int(inner_stride_at_compile_time<ArgType>::ret),
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0,
MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
@ -1044,7 +1044,9 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit,
PacketAlignment = unpacket_traits<PacketScalar>::alignment,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic)
&& (OuterStrideAtCompileTime!=0)
&& (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0,
Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator<ArgType>::Alignment, Alignment0)
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;

View File

@ -207,6 +207,12 @@ struct vectorization_logic
VERIFY(test_redux(Vector1(),
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Vector1().array()*Vector1().array(),
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux((Vector1().array()*Vector1().array()).col(0),
LinearVectorizedTraversal,CompleteUnrolling));
VERIFY(test_redux(Matrix<Scalar,PacketSize,3>(),
LinearVectorizedTraversal,CompleteUnrolling));