From 9c9e90f6db7e93617cdfd617083013ca5b176adb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 14 Dec 2017 14:24:33 +0100 Subject: [PATCH] Fix packet and alignment propagation logic of Block expressions. In particular, (A+B).col(j) lost vectorisation. (grafted from 9c3aed9d48d7dbc0f88d2fb92ca232dcbf0d402e ) --- Eigen/src/Core/CoreEvaluators.h | 6 ++++-- test/vectorization_logic.cpp | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index f7c1effca..90e70fb4f 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -977,7 +977,7 @@ struct evaluator > OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time::ret) : int(inner_stride_at_compile_time::ret), - MaskPacketAccessBit = (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator::Flags&LinearAccessBit))) ? LinearAccessBit : 0, FlagsRowMajorBit = XprType::Flags&RowMajorBit, @@ -987,7 +987,9 @@ struct evaluator > Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, PacketAlignment = unpacket_traits::alignment, - Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) + && (OuterStrideAtCompileTime!=0) + && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) ? int(PacketAlignment) : 0, Alignment = EIGEN_PLAIN_ENUM_MIN(evaluator::Alignment, Alignment0) }; typedef block_evaluator block_evaluator_type; diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 83c1439ad..37e7495f5 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -207,6 +207,12 @@ struct vectorization_logic VERIFY(test_redux(Vector1(), LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Vector1().array()*Vector1().array(), + LinearVectorizedTraversal,CompleteUnrolling)); + + VERIFY(test_redux((Vector1().array()*Vector1().array()).col(0), + LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_redux(Matrix(), LinearVectorizedTraversal,CompleteUnrolling));