diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 3029a7758..766980e28 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -81,10 +81,10 @@ private: MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0 && int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0 - && int(JointAlignment)>=int(InnerRequiredAlignment), + && (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)), MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit), MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess - && ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), + && (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic), /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, so it's only good for large enough sizes. */ MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess) @@ -130,8 +130,9 @@ public: : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling) - : int(NoUnrolling) ) + ? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment))) + ? int(CompleteUnrolling) + : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) @@ -156,6 +157,7 @@ public: EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(LinearPacketSize) EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) EIGEN_DEBUG_VAR(StorageOrdersAgree) EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MayLinearize) diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h index ac40c82f0..86fce9571 100644 --- a/Eigen/src/Core/Matrix.h +++ b/Eigen/src/Core/Matrix.h @@ -27,7 +27,7 @@ private: default_alignment = compute_default_alignment<_Scalar,max_size>::value, actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0, required_alignment = unpacket_traits::alignment, - packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0 + packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0 }; public: diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 466ca73ac..2d2d30cd2 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -756,6 +756,11 @@ namespace Eigen { #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES #endif + +#ifndef EIGEN_UNALIGNED_VECTORIZE +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + //---------------------------------------------------------------------- diff --git a/test/vectorization_logic.cpp b/test/vectorization_logic.cpp index ee446c3c1..24a7641ff 100644 --- a/test/vectorization_logic.cpp +++ b/test/vectorization_logic.cpp @@ -7,6 +7,14 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 +#define EIGEN_UNALIGNED_VECTORIZE 1 +#endif + +#ifdef EIGEN_TEST_PART_2 +#define EIGEN_UNALIGNED_VECTORIZE 0 +#endif + #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #undef EIGEN_DEFAULT_TO_ROW_MAJOR #endif @@ -144,10 +152,16 @@ struct vectorization_logic InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); + + VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(), + (Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal, + CompleteUnrolling)); VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) + : LinearTraversal, CompleteUnrolling)); VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3), InnerVectorizedTraversal,CompleteUnrolling)); @@ -158,19 +172,30 @@ struct vectorization_logic if(PacketSize>1) { typedef Matrix Matrix33c; + typedef Matrix Vector3; VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), LinearTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector3(),Vector3()+Vector3(), + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling)); VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal), + ((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling)); VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()), LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling)); + HalfPacketSize==1 ? InnerVectorizedTraversal : + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : + LinearTraversal, + NoUnrolling)); + + VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), - DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + (EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal, + (EIGEN_UNALIGNED_VECTORIZE || PacketSize<=4) ? CompleteUnrolling : InnerUnrolling )); VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); @@ -270,6 +295,12 @@ struct vectorization_logic_half InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1()+Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Vector1().template segment(0).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(), + InnerVectorizedTraversal,CompleteUnrolling)); + VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment(0)-Vector1().template segment(0)).derived(), + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()), InnerVectorizedTraversal,CompleteUnrolling)); VERIFY(test_assign(Vector1(),Vector1().template cast(), @@ -287,10 +318,11 @@ struct vectorization_logic_half InnerVectorizedTraversal,InnerUnrolling)); VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal, + EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling)); VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); if(PacketSize>1) { @@ -298,16 +330,17 @@ struct vectorization_logic_half VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1), LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()), PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,NoUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal, + NoUnrolling)); VERIFY(test_assign(Matrix11(),Matrix().template block(2,3)+Matrix().template block(8,4), - DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling)); VERIFY(test_assign(Vector1(),Matrix11()*Vector1(), InnerVectorizedTraversal,CompleteUnrolling)); @@ -367,19 +400,19 @@ void test_vectorization_logic() if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } if(internal::packet_traits::Vectorizable) { VERIFY(test_assign(Matrix(),Matrix()+Matrix(), - LinearTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling)); VERIFY(test_redux(Matrix(), - DefaultTraversal,CompleteUnrolling)); + EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling)); } #endif // EIGEN_VECTORIZE