mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-20 20:04:26 +08:00
bug #256: enable vectorization with unaligned loads/stores.
This concerns all architectures and all sizes. This new behavior can be disabled by defining EIGEN_UNALIGNED_VECTORIZE=0
This commit is contained in:
parent
78390e4189
commit
e68e165a23
@ -81,10 +81,10 @@ private:
|
|||||||
MayInnerVectorize = MightVectorize
|
MayInnerVectorize = MightVectorize
|
||||||
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
|
&& int(InnerSize)!=Dynamic && int(InnerSize)%int(InnerPacketSize)==0
|
||||||
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
&& int(OuterStride)!=Dynamic && int(OuterStride)%int(InnerPacketSize)==0
|
||||||
&& int(JointAlignment)>=int(InnerRequiredAlignment),
|
&& (EIGEN_UNALIGNED_VECTORIZE || int(JointAlignment)>=int(InnerRequiredAlignment)),
|
||||||
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
MayLinearize = bool(StorageOrdersAgree) && (int(DstFlags) & int(SrcFlags) & LinearAccessBit),
|
||||||
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
|
MayLinearVectorize = bool(MightVectorize) && MayLinearize && DstHasDirectAccess
|
||||||
&& ((int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
&& (EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)) || MaxSizeAtCompileTime == Dynamic),
|
||||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||||
so it's only good for large enough sizes. */
|
so it's only good for large enough sizes. */
|
||||||
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
||||||
@ -130,7 +130,8 @@ public:
|
|||||||
: int(NoUnrolling)
|
: int(NoUnrolling)
|
||||||
)
|
)
|
||||||
: int(Traversal) == int(LinearVectorizedTraversal)
|
: int(Traversal) == int(LinearVectorizedTraversal)
|
||||||
? ( bool(MayUnrollCompletely) && (int(DstAlignment)>=int(LinearRequiredAlignment)) ? int(CompleteUnrolling)
|
? ( bool(MayUnrollCompletely) && ( EIGEN_UNALIGNED_VECTORIZE || (int(DstAlignment)>=int(LinearRequiredAlignment)))
|
||||||
|
? int(CompleteUnrolling)
|
||||||
: int(NoUnrolling) )
|
: int(NoUnrolling) )
|
||||||
: int(Traversal) == int(LinearTraversal)
|
: int(Traversal) == int(LinearTraversal)
|
||||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||||
@ -156,6 +157,7 @@ public:
|
|||||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||||
EIGEN_DEBUG_VAR(LinearPacketSize)
|
EIGEN_DEBUG_VAR(LinearPacketSize)
|
||||||
EIGEN_DEBUG_VAR(InnerPacketSize)
|
EIGEN_DEBUG_VAR(InnerPacketSize)
|
||||||
|
EIGEN_DEBUG_VAR(ActualPacketSize)
|
||||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||||
EIGEN_DEBUG_VAR(MightVectorize)
|
EIGEN_DEBUG_VAR(MightVectorize)
|
||||||
EIGEN_DEBUG_VAR(MayLinearize)
|
EIGEN_DEBUG_VAR(MayLinearize)
|
||||||
|
@ -27,7 +27,7 @@ private:
|
|||||||
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
default_alignment = compute_default_alignment<_Scalar,max_size>::value,
|
||||||
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
actual_alignment = ((_Options&DontAlign)==0) ? default_alignment : 0,
|
||||||
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
required_alignment = unpacket_traits<PacketScalar>::alignment,
|
||||||
packet_access_bit = packet_traits<_Scalar>::Vectorizable && (actual_alignment>=required_alignment) ? PacketAccessBit : 0
|
packet_access_bit = (packet_traits<_Scalar>::Vectorizable && (EIGEN_UNALIGNED_VECTORIZE || (actual_alignment>=required_alignment))) ? PacketAccessBit : 0
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -756,6 +756,11 @@ namespace Eigen {
|
|||||||
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
#define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef EIGEN_UNALIGNED_VECTORIZE
|
||||||
|
#define EIGEN_UNALIGNED_VECTORIZE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,14 @@
|
|||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifdef EIGEN_TEST_PART_1
|
||||||
|
#define EIGEN_UNALIGNED_VECTORIZE 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef EIGEN_TEST_PART_2
|
||||||
|
#define EIGEN_UNALIGNED_VECTORIZE 0
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||||
#undef EIGEN_DEFAULT_TO_ROW_MAJOR
|
#undef EIGEN_DEFAULT_TO_ROW_MAJOR
|
||||||
#endif
|
#endif
|
||||||
@ -144,10 +152,16 @@ struct vectorization_logic
|
|||||||
InnerVectorizedTraversal,InnerUnrolling));
|
InnerVectorizedTraversal,InnerUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
|
VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
|
||||||
LinearTraversal,NoUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
|
||||||
|
|
||||||
|
VERIFY(test_assign(Matrix1(),Matrix1()+Matrix1(),
|
||||||
|
(Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal,
|
||||||
|
CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal)
|
||||||
|
: LinearTraversal, CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
|
VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
|
||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
@ -158,19 +172,30 @@ struct vectorization_logic
|
|||||||
if(PacketSize>1)
|
if(PacketSize>1)
|
||||||
{
|
{
|
||||||
typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
|
typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
|
||||||
|
typedef Matrix<Scalar,3,1,ColMajor> Vector3;
|
||||||
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
||||||
LinearTraversal,CompleteUnrolling));
|
LinearTraversal,CompleteUnrolling));
|
||||||
|
VERIFY(test_assign(Vector3(),Vector3()+Vector3(),
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal), CompleteUnrolling));
|
||||||
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? (HalfPacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : (HalfPacketSize==1 ? SliceVectorizedTraversal : LinearTraversal),
|
||||||
|
((!EIGEN_UNALIGNED_VECTORIZE) && HalfPacketSize==1) ? NoUnrolling : CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
|
VERIFY(test_assign(Matrix3(),Matrix3().cwiseProduct(Matrix3()),
|
||||||
LinearVectorizedTraversal,CompleteUnrolling));
|
LinearVectorizedTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
||||||
HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling));
|
HalfPacketSize==1 ? InnerVectorizedTraversal :
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal :
|
||||||
|
LinearTraversal,
|
||||||
|
NoUnrolling));
|
||||||
|
|
||||||
|
VERIFY(test_assign(Matrix11(), Matrix11()+Matrix11(),InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
||||||
DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
(EIGEN_UNALIGNED_VECTORIZE) ? InnerVectorizedTraversal : DefaultTraversal,
|
||||||
|
(EIGEN_UNALIGNED_VECTORIZE || PacketSize<=4) ? CompleteUnrolling : InnerUnrolling ));
|
||||||
|
|
||||||
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
@ -270,6 +295,12 @@ struct vectorization_logic_half
|
|||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
|
VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
|
||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
|
VERIFY(test_assign(Vector1(),Vector1().template segment<PacketSize>(0).derived(),
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
|
||||||
|
VERIFY(test_assign(Vector1(),Scalar(2.1)*Vector1()-Vector1(),
|
||||||
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
|
VERIFY(test_assign(Vector1(),(Scalar(2.1)*Vector1().template segment<PacketSize>(0)-Vector1().template segment<PacketSize>(0)).derived(),
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearVectorizedTraversal,CompleteUnrolling));
|
||||||
VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
|
VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
|
||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
VERIFY(test_assign(Vector1(),Vector1().template cast<Scalar>(),
|
VERIFY(test_assign(Vector1(),Vector1().template cast<Scalar>(),
|
||||||
@ -287,10 +318,11 @@ struct vectorization_logic_half
|
|||||||
InnerVectorizedTraversal,InnerUnrolling));
|
InnerVectorizedTraversal,InnerUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
|
VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
|
||||||
LinearTraversal,NoUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : LinearTraversal,
|
||||||
|
EIGEN_UNALIGNED_VECTORIZE ? InnerUnrolling : NoUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? ((Matrix1::InnerSizeAtCompileTime % PacketSize)==0 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
if(PacketSize>1)
|
if(PacketSize>1)
|
||||||
{
|
{
|
||||||
@ -298,16 +330,17 @@ struct vectorization_logic_half
|
|||||||
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
|
||||||
LinearTraversal,CompleteUnrolling));
|
LinearTraversal,CompleteUnrolling));
|
||||||
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
|
VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
|
||||||
PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
|
||||||
LinearTraversal,NoUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? (PacketSize==1 ? InnerVectorizedTraversal : LinearVectorizedTraversal) : LinearTraversal,
|
||||||
|
NoUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
|
||||||
DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? InnerVectorizedTraversal : DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
VERIFY(test_assign(Vector1(),Matrix11()*Vector1(),
|
||||||
InnerVectorizedTraversal,CompleteUnrolling));
|
InnerVectorizedTraversal,CompleteUnrolling));
|
||||||
@ -367,19 +400,19 @@ void test_vectorization_logic()
|
|||||||
if(internal::packet_traits<float>::Vectorizable)
|
if(internal::packet_traits<float>::Vectorizable)
|
||||||
{
|
{
|
||||||
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_redux(Matrix<float,5,2>(),
|
VERIFY(test_redux(Matrix<float,5,2>(),
|
||||||
DefaultTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
||||||
}
|
}
|
||||||
|
|
||||||
if(internal::packet_traits<double>::Vectorizable)
|
if(internal::packet_traits<double>::Vectorizable)
|
||||||
{
|
{
|
||||||
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
|
VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
|
||||||
LinearTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
|
||||||
|
|
||||||
VERIFY(test_redux(Matrix<double,7,3>(),
|
VERIFY(test_redux(Matrix<double,7,3>(),
|
||||||
DefaultTraversal,CompleteUnrolling));
|
EIGEN_UNALIGNED_VECTORIZE ? LinearVectorizedTraversal : DefaultTraversal,CompleteUnrolling));
|
||||||
}
|
}
|
||||||
#endif // EIGEN_VECTORIZE
|
#endif // EIGEN_VECTORIZE
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user