From 543bd28a24314c211c7eb0843fb445309104778e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 8 Dec 2015 11:28:05 +0100 Subject: [PATCH] Fix Alignment in coeff-based product, and enable unaligned vectorization --- Eigen/src/Core/ProductEvaluators.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index bd1e1c85d..a55af911e 100755 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -447,7 +447,7 @@ struct product_evaluator, ProductTag, DenseShape, MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, PacketSize = packet_traits::size, - + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, CoeffReadCost = InnerSize==0 ? NumTraits::ReadCost @@ -463,19 +463,16 @@ struct product_evaluator, ProductTag, DenseShape, LhsAlignment = LhsEtorType::Alignment, RhsAlignment = RhsEtorType::Alignment, - LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits::alignment), - RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits::alignment), - LhsRowMajor = LhsFlags & RowMajorBit, RhsRowMajor = RhsFlags & RowMajorBit, SameType = is_same::value, CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) - && (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ), + && (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ), CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) - && (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ), + && (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -487,10 +484,13 @@ struct product_evaluator, ProductTag, DenseShape, | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), - Alignment = CanVectorizeLhs ? LhsAlignment - : CanVectorizeRhs ? RhsAlignment + LhsOuterStrideBytes = LhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename LhsNestedCleaned::Scalar), + RhsOuterStrideBytes = RhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename RhsNestedCleaned::Scalar), + + Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment) + : CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment) : 0, - + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect @@ -500,7 +500,6 @@ struct product_evaluator, ProductTag, DenseShape, && LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit) - && (LhsIsAligned && RhsIsAligned) && (InnerSize % packet_traits::size == 0) }; @@ -524,10 +523,9 @@ struct product_evaluator, ProductTag, DenseShape, const PacketType packet(Index row, Index col) const { PacketType res; - typedef etor_product_packet_impl PacketImpl; - PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); return res; }