diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 918d48881..5740510c0 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -188,8 +188,32 @@ template struct packet_traits; template struct unpacket_traits; +// If we vectorize regardless of alignment, pick the full-sized packet if: +// +// * The size is large enough; +// * Picking it will result in less operations than picking the half size. +// Consider the case where the size is 12, the full packet is 8, and the +// half packet is 4. If we pick the full packet we'd have 1 + 4 operations, +// but only 3 operations if we pick the half-packet. +// +// The reason why we only do this with EIGEN_UNALIGNED_VECTORIZE is that if +// we chose packets which do not divide the data size exactly we're going to +// be left with some possibly unaligned data at the end. +#if EIGEN_UNALIGNED_VECTORIZE +template= unpacket_traits::size && + // If the packet size is 1 we're always good -- it will always divide things perfectly. + // We have this check since otherwise 1/2 would be 0 in the division below. + (unpacket_traits::size == 1 || + (Size/unpacket_traits::size + Size%unpacket_traits::size) <= + (Size/(unpacket_traits::size/2) + Size%(unpacket_traits::size/2)))) || + is_same::half>::value> +#else template::size)==0 || is_same::half>::value> +#endif struct find_best_packet_helper; template< int Size, typename PacketType> diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index 602e9f15c..62d3f60ff 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -258,7 +258,38 @@ struct vectorization_logic VERIFY(test_redux(VectorX(10), LinearVectorizedTraversal,NoUnrolling)); + + // Some static checks for packet-picking -- see + // for context. + + // Any multiple of the packet size itself will result in the normal packet + STATIC_CHECK(( + internal::is_same::type, PacketType>::value + )); + STATIC_CHECK(( + internal::is_same::type, PacketType>::value + )); + STATIC_CHECK(( + internal::is_same::type, PacketType>::value + )); + // Moreover, situations where the size is _not_ a multiple but picking the full packet + // is convenient will also work, but only with unaligned vectorize + STATIC_CHECK(( + !(EIGEN_UNALIGNED_VECTORIZE || PacketSize == HalfPacketSize) || + internal::is_same::type, PacketType>::value + )); + STATIC_CHECK(( + !(EIGEN_UNALIGNED_VECTORIZE || PacketSize == HalfPacketSize) || + internal::is_same::type, PacketType>::value + )); + // In situations where the picking the full-packet would be detrimental the half-packet + // is chosen. + STATIC_CHECK(( + !(PacketSize > 2) || + internal::is_same::type, HalfPacketType>::value + )); } + }; template struct vectorization_logic