mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
we were already aligning to 16 byte boundary fixed-size objects that are multiple of 16 bytes;
now we also align to 8byte boundary fixed-size objects that are multiple of 8 bytes. That's only useful for now for double, not e.g. for Vector2f, but that didn't seem to hurt. Am I missing something? Do you prefer that we don't align Vector2f at all? Also, improvements in test_unalignedassert.
This commit is contained in:
parent
a9a9ba8453
commit
d41577819b
@ -29,32 +29,48 @@
|
|||||||
struct ei_constructor_without_unaligned_array_assert {};
|
struct ei_constructor_without_unaligned_array_assert {};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* Static array automatically aligned if the total byte size is a multiple of 16 and the matrix options require auto alignment
|
* Static array. If the MatrixOptions require auto-alignment, and the array will be automatically aligned:
|
||||||
|
* - to 16 bytes boundary, if the total size is a multiple of 16 bytes;
|
||||||
|
* - or else to 8 bytes boundary, if the total size is a multiple of 8 bytes.
|
||||||
*/
|
*/
|
||||||
template <typename T, int Size, int MatrixOptions,
|
template <typename T, int Size, int MatrixOptions,
|
||||||
bool Align = (!(MatrixOptions&DontAlign)) && (((Size*sizeof(T))&0xf)==0)
|
int Alignment = (MatrixOptions&DontAlign) ? 0
|
||||||
> struct ei_matrix_array
|
: (((Size*sizeof(T))%16)==0) ? 16
|
||||||
{
|
: (((Size*sizeof(T))%8)==0) ? 8
|
||||||
EIGEN_ALIGN_128 T array[Size];
|
: 0 >
|
||||||
|
struct ei_matrix_array
|
||||||
ei_matrix_array()
|
|
||||||
{
|
|
||||||
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
|
||||||
ei_assert((reinterpret_cast<size_t>(array) & 0xf) == 0
|
|
||||||
&& "this assertion is explained here: http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html **** READ THIS WEB PAGE !!! ****");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, int Size, int MatrixOptions> struct ei_matrix_array<T,Size,MatrixOptions,false>
|
|
||||||
{
|
{
|
||||||
T array[Size];
|
T array[Size];
|
||||||
ei_matrix_array() {}
|
ei_matrix_array() {}
|
||||||
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
|
||||||
|
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask)
|
||||||
|
#else
|
||||||
|
#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) \
|
||||||
|
ei_assert((reinterpret_cast<size_t>(array) & sizemask) == 0 \
|
||||||
|
&& "this assertion is explained here: " \
|
||||||
|
"http://eigen.tuxfamily.org/dox/UnalignedArrayAssert.html" \
|
||||||
|
" **** READ THIS WEB PAGE !!! ****");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template <typename T, int Size, int MatrixOptions>
|
||||||
|
struct ei_matrix_array<T, Size, MatrixOptions, 16>
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN16 T array[Size];
|
||||||
|
ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf) }
|
||||||
|
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T, int Size, int MatrixOptions>
|
||||||
|
struct ei_matrix_array<T, Size, MatrixOptions, 8>
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN8 T array[Size];
|
||||||
|
ei_matrix_array() { EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0x7) }
|
||||||
|
ei_matrix_array(ei_constructor_without_unaligned_array_assert) {}
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
*
|
*
|
||||||
* \class ei_matrix_storage
|
* \class ei_matrix_storage
|
||||||
|
@ -265,14 +265,14 @@ template<> inline void ei_pstoreu(int* to , const v4i& from )
|
|||||||
|
|
||||||
template<> inline float ei_pfirst(const v4f& a)
|
template<> inline float ei_pfirst(const v4f& a)
|
||||||
{
|
{
|
||||||
float EIGEN_ALIGN_128 af[4];
|
float EIGEN_ALIGN16 af[4];
|
||||||
vec_st(a, 0, af);
|
vec_st(a, 0, af);
|
||||||
return af[0];
|
return af[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> inline int ei_pfirst(const v4i& a)
|
template<> inline int ei_pfirst(const v4i& a)
|
||||||
{
|
{
|
||||||
int EIGEN_ALIGN_128 ai[4];
|
int EIGEN_ALIGN16 ai[4];
|
||||||
vec_st(a, 0, ai);
|
vec_st(a, 0, ai);
|
||||||
return ai[0];
|
return ai[0];
|
||||||
}
|
}
|
||||||
@ -373,7 +373,7 @@ inline float ei_predux_mul(const v4f& a)
|
|||||||
|
|
||||||
inline int ei_predux_mul(const v4i& a)
|
inline int ei_predux_mul(const v4i& a)
|
||||||
{
|
{
|
||||||
EIGEN_ALIGN_128 int aux[4];
|
EIGEN_ALIGN16 int aux[4];
|
||||||
ei_pstore(aux, a);
|
ei_pstore(aux, a);
|
||||||
return aux[0] * aux[1] * aux[2] * aux[3];
|
return aux[0] * aux[1] * aux[2] * aux[3];
|
||||||
}
|
}
|
||||||
|
@ -359,7 +359,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_mul<Packet4i>(const Packet4i& a)
|
|||||||
// after some experiments, it is seems this is the fastest way to implement it
|
// after some experiments, it is seems this is the fastest way to implement it
|
||||||
// for GCC (eg., reusing ei_pmul is very slow !)
|
// for GCC (eg., reusing ei_pmul is very slow !)
|
||||||
// TODO try to call _mm_mul_epu32 directly
|
// TODO try to call _mm_mul_epu32 directly
|
||||||
EIGEN_ALIGN_128 int aux[4];
|
EIGEN_ALIGN16 int aux[4];
|
||||||
ei_pstore(aux, a);
|
ei_pstore(aux, a);
|
||||||
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
|
return (aux[0] * aux[1]) * (aux[2] * aux[3]);;
|
||||||
}
|
}
|
||||||
@ -378,7 +378,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_min<Packet4i>(const Packet4i& a)
|
|||||||
{
|
{
|
||||||
// after some experiments, it is seems this is the fastest way to implement it
|
// after some experiments, it is seems this is the fastest way to implement it
|
||||||
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
|
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
|
||||||
EIGEN_ALIGN_128 int aux[4];
|
EIGEN_ALIGN16 int aux[4];
|
||||||
ei_pstore(aux, a);
|
ei_pstore(aux, a);
|
||||||
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
|
||||||
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
|
||||||
@ -399,7 +399,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
|
|||||||
{
|
{
|
||||||
// after some experiments, it is seems this is the fastest way to implement it
|
// after some experiments, it is seems this is the fastest way to implement it
|
||||||
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
|
// for GCC (eg., it does not like using std::min after the ei_pstore !!)
|
||||||
EIGEN_ALIGN_128 int aux[4];
|
EIGEN_ALIGN16 int aux[4];
|
||||||
ei_pstore(aux, a);
|
ei_pstore(aux, a);
|
||||||
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
|
||||||
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
|
||||||
|
@ -202,25 +202,29 @@ using Eigen::ei_cos;
|
|||||||
#define EIGEN_ASM_COMMENT(X)
|
#define EIGEN_ASM_COMMENT(X)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* EIGEN_ALIGN_128 forces data to be 16-byte aligned, EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
|
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
|
||||||
|
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
|
||||||
* so that vectorization doesn't affect binary compatibility.
|
* so that vectorization doesn't affect binary compatibility.
|
||||||
*
|
*
|
||||||
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
|
* If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
|
||||||
* vectorized and non-vectorized code.
|
* vectorized and non-vectorized code.
|
||||||
*/
|
*/
|
||||||
#if !EIGEN_ALIGN
|
#if !EIGEN_ALIGN
|
||||||
#define EIGEN_ALIGN_128
|
#define EIGEN_ALIGN_TO_BOUNDARY(n)
|
||||||
#elif (defined __GNUC__)
|
#elif (defined __GNUC__)
|
||||||
#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
||||||
#elif (defined _MSC_VER)
|
#elif (defined _MSC_VER)
|
||||||
#define EIGEN_ALIGN_128 __declspec(align(16))
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))
|
||||||
#elif (defined __SUNPRO_CC)
|
#elif (defined __SUNPRO_CC)
|
||||||
// FIXME not sure about this one:
|
// FIXME not sure about this one:
|
||||||
#define EIGEN_ALIGN_128 __attribute__((aligned(16)))
|
#define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
|
||||||
#else
|
#else
|
||||||
#error Please tell me what is the equivalent of __attribute__((aligned(16))) for your compiler
|
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
|
||||||
|
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
|
||||||
|
|
||||||
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
|
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
|
||||||
#define EIGEN_RESTRICT
|
#define EIGEN_RESTRICT
|
||||||
#endif
|
#endif
|
||||||
|
@ -99,10 +99,10 @@ template<typename Scalar> void packetmath()
|
|||||||
const int PacketSize = ei_packet_traits<Scalar>::size;
|
const int PacketSize = ei_packet_traits<Scalar>::size;
|
||||||
|
|
||||||
const int size = PacketSize*4;
|
const int size = PacketSize*4;
|
||||||
EIGEN_ALIGN_128 Scalar data1[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar data1[ei_packet_traits<Scalar>::size*4];
|
||||||
EIGEN_ALIGN_128 Scalar data2[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar data2[ei_packet_traits<Scalar>::size*4];
|
||||||
EIGEN_ALIGN_128 Packet packets[PacketSize*2];
|
EIGEN_ALIGN16 Packet packets[PacketSize*2];
|
||||||
EIGEN_ALIGN_128 Scalar ref[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar ref[ei_packet_traits<Scalar>::size*4];
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
{
|
{
|
||||||
data1[i] = ei_random<Scalar>();
|
data1[i] = ei_random<Scalar>();
|
||||||
@ -202,9 +202,9 @@ template<typename Scalar> void packetmath_real()
|
|||||||
const int PacketSize = ei_packet_traits<Scalar>::size;
|
const int PacketSize = ei_packet_traits<Scalar>::size;
|
||||||
|
|
||||||
const int size = PacketSize*4;
|
const int size = PacketSize*4;
|
||||||
EIGEN_ALIGN_128 Scalar data1[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar data1[ei_packet_traits<Scalar>::size*4];
|
||||||
EIGEN_ALIGN_128 Scalar data2[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar data2[ei_packet_traits<Scalar>::size*4];
|
||||||
EIGEN_ALIGN_128 Scalar ref[ei_packet_traits<Scalar>::size*4];
|
EIGEN_ALIGN16 Scalar ref[ei_packet_traits<Scalar>::size*4];
|
||||||
|
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
{
|
{
|
||||||
|
@ -24,52 +24,38 @@
|
|||||||
|
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
|
|
||||||
struct Good1
|
struct TestNew1
|
||||||
{
|
{
|
||||||
MatrixXd m; // good: m will allocate its own array, taking care of alignment.
|
MatrixXd m; // good: m will allocate its own array, taking care of alignment.
|
||||||
Good1() : m(20,20) {}
|
TestNew1() : m(20,20) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Good2
|
struct TestNew2
|
||||||
{
|
{
|
||||||
Matrix3d m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be aligned
|
Matrix3d m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned,
|
||||||
|
// 8-byte alignment is good enough here, which we'll get automatically
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Good3
|
struct TestNew3
|
||||||
{
|
{
|
||||||
Vector2f m; // good: same reason
|
Vector2f m; // good: m's size isn't a multiple of 16 bytes, so m doesn't have to be 16-byte aligned
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Bad4
|
struct TestNew4
|
||||||
{
|
|
||||||
Vector2d m; // bad: sizeof(m)%16==0 so alignment is required
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Bad5
|
|
||||||
{
|
|
||||||
Matrix<float, 2, 6> m; // bad: same reason
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Bad6
|
|
||||||
{
|
|
||||||
Matrix<double, 3, 4> m; // bad: same reason
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Good7
|
|
||||||
{
|
{
|
||||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
||||||
Vector2d m;
|
Vector2d m;
|
||||||
float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects
|
float f; // make the struct have sizeof%16!=0 to make it a little more tricky when we allow an array of 2 such objects
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Good8
|
struct TestNew5
|
||||||
{
|
{
|
||||||
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
|
||||||
float f; // try the f at first -- the EIGEN_ALIGN_128 attribute of m should make that still work
|
float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work
|
||||||
Matrix4f m;
|
Matrix4f m;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Good9
|
struct TestNew6
|
||||||
{
|
{
|
||||||
Matrix<float,2,2,DontAlign> m; // good: no alignment requested
|
Matrix<float,2,2,DontAlign> m; // good: no alignment requested
|
||||||
float f;
|
float f;
|
||||||
@ -94,34 +80,56 @@ void check_unalignedassert_good()
|
|||||||
|
|
||||||
#if EIGEN_ALIGN
|
#if EIGEN_ALIGN
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void check_unalignedassert_bad()
|
void construct_at_boundary(int boundary)
|
||||||
{
|
{
|
||||||
float buf[sizeof(T)+16];
|
char buf[sizeof(T)+256];
|
||||||
float *unaligned = buf;
|
size_t _buf = reinterpret_cast<size_t>(buf);
|
||||||
while((reinterpret_cast<size_t>(unaligned)&0xf)==0) ++unaligned; // make sure unaligned is really unaligned
|
_buf += (16 - (_buf % 16)); // make 16-byte aligned
|
||||||
T *x = ::new(static_cast<void*>(unaligned)) T;
|
_buf += boundary; // make exact boundary-aligned
|
||||||
|
T *x = ::new(reinterpret_cast<void*>(_buf)) T;
|
||||||
x->~T();
|
x->~T();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void unalignedassert()
|
void unalignedassert()
|
||||||
{
|
{
|
||||||
check_unalignedassert_good<Good1>();
|
construct_at_boundary<Vector2f>(8);
|
||||||
check_unalignedassert_good<Good2>();
|
construct_at_boundary<Vector3f>(4);
|
||||||
check_unalignedassert_good<Good3>();
|
construct_at_boundary<Vector4f>(16);
|
||||||
#if EIGEN_ALIGN
|
construct_at_boundary<Matrix2f>(16);
|
||||||
VERIFY_RAISES_ASSERT(check_unalignedassert_bad<Bad4>());
|
construct_at_boundary<Matrix3f>(4);
|
||||||
VERIFY_RAISES_ASSERT(check_unalignedassert_bad<Bad5>());
|
construct_at_boundary<Matrix4f>(16);
|
||||||
VERIFY_RAISES_ASSERT(check_unalignedassert_bad<Bad6>());
|
|
||||||
#endif
|
construct_at_boundary<Vector2d>(16);
|
||||||
|
construct_at_boundary<Vector3d>(8);
|
||||||
|
construct_at_boundary<Vector4d>(16);
|
||||||
|
construct_at_boundary<Matrix2d>(16);
|
||||||
|
construct_at_boundary<Matrix3d>(8);
|
||||||
|
construct_at_boundary<Matrix4d>(16);
|
||||||
|
|
||||||
|
check_unalignedassert_good<TestNew1>();
|
||||||
|
check_unalignedassert_good<TestNew2>();
|
||||||
|
check_unalignedassert_good<TestNew3>();
|
||||||
|
|
||||||
check_unalignedassert_good<Good7>();
|
check_unalignedassert_good<TestNew4>();
|
||||||
check_unalignedassert_good<Good8>();
|
check_unalignedassert_good<TestNew5>();
|
||||||
check_unalignedassert_good<Good9>();
|
check_unalignedassert_good<TestNew6>();
|
||||||
check_unalignedassert_good<Depends<true> >();
|
check_unalignedassert_good<Depends<true> >();
|
||||||
|
|
||||||
#if EIGEN_ALIGN
|
#if EIGEN_ALIGN
|
||||||
VERIFY_RAISES_ASSERT(check_unalignedassert_bad<Depends<false> >());
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2f>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2f>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2f>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2d>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector3d>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4d>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix2d>(8));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix3d>(4));
|
||||||
|
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4d>(8));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user