mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-20 08:39:37 +08:00
bug #973, improve AVX support by enabling vectorization of Vector4i-like types, and enforcing alignement of Vector4f/Vector2d-like types to preserve compatibility with SSE and future Eigen versions that will vectorize them with AVX enabled.
This commit is contained in:
parent
d99ab35f9e
commit
1330f8bbd1
@ -647,11 +647,15 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
|
|||||||
HasNoStride = HasNoInnerStride && HasNoOuterStride,
|
HasNoStride = HasNoInnerStride && HasNoOuterStride,
|
||||||
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
|
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
|
||||||
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
|
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
|
||||||
|
|
||||||
|
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||||
|
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||||
|
|
||||||
KeepsPacketAccess = bool(HasNoInnerStride)
|
KeepsPacketAccess = bool(HasNoInnerStride)
|
||||||
&& ( bool(IsDynamicSize)
|
&& ( bool(IsDynamicSize)
|
||||||
|| HasNoOuterStride
|
|| HasNoOuterStride
|
||||||
|| ( OuterStrideAtCompileTime!=Dynamic
|
|| ( OuterStrideAtCompileTime!=Dynamic
|
||||||
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%EIGEN_ALIGN_BYTES)==0 ) ),
|
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime) % AlignBytes)==0 ) ),
|
||||||
Flags0 = evaluator<PlainObjectType>::Flags,
|
Flags0 = evaluator<PlainObjectType>::Flags,
|
||||||
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
|
||||||
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
|
||||||
@ -717,7 +721,10 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
|
|||||||
&& (InnerStrideAtCompileTime == 1)
|
&& (InnerStrideAtCompileTime == 1)
|
||||||
? PacketAccessBit : 0,
|
? PacketAccessBit : 0,
|
||||||
|
|
||||||
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0)) ? AlignedBit : 0,
|
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||||
|
AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||||
|
|
||||||
|
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % AlignBytes) == 0)) ? AlignedBit : 0,
|
||||||
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || (InnerPanel && (evaluator<ArgType>::Flags&LinearAccessBit))) ? LinearAccessBit : 0,
|
||||||
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
|
||||||
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
|
Flags0 = evaluator<ArgType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
|
||||||
@ -825,12 +832,15 @@ struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAc
|
|||||||
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
|
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
|
||||||
{
|
{
|
||||||
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
|
||||||
|
typedef typename XprType::Scalar Scalar;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
|
EIGEN_DEVICE_FUNC explicit block_evaluator(const XprType& block)
|
||||||
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
|
||||||
{
|
{
|
||||||
|
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||||
|
const int AlignBytes = int(packet_traits<Scalar>::size) * sizeof(Scalar);
|
||||||
// FIXME this should be an internal assertion
|
// FIXME this should be an internal assertion
|
||||||
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
|
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % AlignBytes) == 0) && "data is not aligned");
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -34,14 +34,35 @@ void check_static_allocation_size()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
|
||||||
|
bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
|
||||||
|
bool TryHalf = bool(unpacket_traits<Packet>::size > Size)
|
||||||
|
&& bool(unpacket_traits<Packet>::size > unpacket_traits<typename unpacket_traits<Packet>::half>::size) >
|
||||||
|
struct compute_default_alignment
|
||||||
|
{
|
||||||
|
enum { value = 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, int Size, typename Packet>
|
||||||
|
struct compute_default_alignment<T, Size, Packet, true, false> // Match
|
||||||
|
{
|
||||||
|
enum { value = sizeof(T) * unpacket_traits<Packet>::size };
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T, int Size, typename Packet>
|
||||||
|
struct compute_default_alignment<T, Size, Packet, false, true>
|
||||||
|
{
|
||||||
|
// current packet too large, try with an half-packet
|
||||||
|
enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
|
* Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned:
|
||||||
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
|
* to 16 bytes boundary if the total size is a multiple of 16 bytes.
|
||||||
*/
|
*/
|
||||||
template <typename T, int Size, int MatrixOrArrayOptions,
|
template <typename T, int Size, int MatrixOrArrayOptions,
|
||||||
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
|
||||||
: (((Size*sizeof(T))%EIGEN_ALIGN_BYTES)==0) ? EIGEN_ALIGN_BYTES
|
: compute_default_alignment<T,Size>::value >
|
||||||
: 0 >
|
|
||||||
struct plain_array
|
struct plain_array
|
||||||
{
|
{
|
||||||
T array[Size];
|
T array[Size];
|
||||||
@ -81,14 +102,71 @@ struct plain_array
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename T, int Size, int MatrixOrArrayOptions>
|
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||||
struct plain_array<T, Size, MatrixOrArrayOptions, EIGEN_ALIGN_BYTES>
|
struct plain_array<T, Size, MatrixOrArrayOptions, 8>
|
||||||
{
|
{
|
||||||
EIGEN_USER_ALIGN_DEFAULT T array[Size];
|
EIGEN_ALIGN_TO_BOUNDARY(8) T array[Size];
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
plain_array()
|
plain_array()
|
||||||
{
|
{
|
||||||
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(EIGEN_ALIGN_BYTES-1);
|
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(7);
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array(constructor_without_unaligned_array_assert)
|
||||||
|
{
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||||
|
struct plain_array<T, Size, MatrixOrArrayOptions, 16>
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN_TO_BOUNDARY(16) T array[Size];
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array()
|
||||||
|
{
|
||||||
|
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(15);
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array(constructor_without_unaligned_array_assert)
|
||||||
|
{
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||||
|
struct plain_array<T, Size, MatrixOrArrayOptions, 32>
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN_TO_BOUNDARY(32) T array[Size];
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array()
|
||||||
|
{
|
||||||
|
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(31);
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array(constructor_without_unaligned_array_assert)
|
||||||
|
{
|
||||||
|
check_static_allocation_size<T,Size>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int Size, int MatrixOrArrayOptions>
|
||||||
|
struct plain_array<T, Size, MatrixOrArrayOptions, 64>
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN_TO_BOUNDARY(64) T array[Size];
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
plain_array()
|
||||||
|
{
|
||||||
|
EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(63);
|
||||||
check_static_allocation_size<T,Size>();
|
check_static_allocation_size<T,Size>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -318,6 +318,9 @@
|
|||||||
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
|
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
|
||||||
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
|
||||||
// aligned at all regardless of the value of this #define.
|
// aligned at all regardless of the value of this #define.
|
||||||
|
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
|
||||||
|
// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
|
||||||
|
// and 16 bytes alignment is also enough for Vector4f.
|
||||||
#define EIGEN_ALIGN_BYTES 16
|
#define EIGEN_ALIGN_BYTES 16
|
||||||
|
|
||||||
#ifdef EIGEN_DONT_ALIGN
|
#ifdef EIGEN_DONT_ALIGN
|
||||||
|
@ -160,12 +160,15 @@ class compute_matrix_evaluator_flags
|
|||||||
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
|
||||||
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
|
||||||
|
|
||||||
|
// TODO: should check for smaller packet types once we can handle multi-sized packet types
|
||||||
|
align_bytes = int(packet_traits<Scalar>::size) * sizeof(Scalar),
|
||||||
|
|
||||||
aligned_bit =
|
aligned_bit =
|
||||||
(
|
(
|
||||||
((Options&DontAlign)==0)
|
((Options&DontAlign)==0)
|
||||||
&& (
|
&& (
|
||||||
#if EIGEN_ALIGN_STATICALLY
|
#if EIGEN_ALIGN_STATICALLY
|
||||||
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % EIGEN_ALIGN_BYTES) == 0))
|
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
|
||||||
#else
|
#else
|
||||||
0
|
0
|
||||||
#endif
|
#endif
|
||||||
|
@ -81,7 +81,7 @@ void construct_at_boundary(int boundary)
|
|||||||
|
|
||||||
void unalignedassert()
|
void unalignedassert()
|
||||||
{
|
{
|
||||||
#if EIGEN_ALIGN_STATICALLY
|
#if EIGEN_ALIGN_STATICALLY
|
||||||
construct_at_boundary<Vector2f>(4);
|
construct_at_boundary<Vector2f>(4);
|
||||||
construct_at_boundary<Vector3f>(4);
|
construct_at_boundary<Vector3f>(4);
|
||||||
construct_at_boundary<Vector4f>(16);
|
construct_at_boundary<Vector4f>(16);
|
||||||
@ -100,7 +100,7 @@ void unalignedassert()
|
|||||||
construct_at_boundary<Vector3cf>(4);
|
construct_at_boundary<Vector3cf>(4);
|
||||||
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
|
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
|
||||||
construct_at_boundary<Vector3cd>(16);
|
construct_at_boundary<Vector3cd>(16);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
check_unalignedassert_good<TestNew1>();
|
check_unalignedassert_good<TestNew1>();
|
||||||
check_unalignedassert_good<TestNew2>();
|
check_unalignedassert_good<TestNew2>();
|
||||||
@ -112,11 +112,12 @@ void unalignedassert()
|
|||||||
check_unalignedassert_good<Depends<true> >();
|
check_unalignedassert_good<Depends<true> >();
|
||||||
|
|
||||||
#if EIGEN_ALIGN_STATICALLY
|
#if EIGEN_ALIGN_STATICALLY
|
||||||
if(EIGEN_ALIGN_BYTES==16)
|
if(EIGEN_ALIGN_BYTES>=16)
|
||||||
{
|
{
|
||||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
|
||||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
|
||||||
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
|
||||||
}
|
}
|
||||||
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
|
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
|
||||||
{
|
{
|
||||||
|
@ -214,7 +214,7 @@ template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectori
|
|||||||
>(DefaultTraversal,CompleteUnrolling)));
|
>(DefaultTraversal,CompleteUnrolling)));
|
||||||
|
|
||||||
VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
|
VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,EIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,EIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
|
||||||
PacketSize>=EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD?DefaultTraversal:InnerVectorizedTraversal, CompleteUnrolling)));
|
InnerVectorizedTraversal, CompleteUnrolling)));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),
|
VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user