bug #973: update macro-level control of alignement by introducing user-controllable EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES macros. This changeset also removes EIGEN_ALIGN (replaced by EIGEN_MAX_ALIGN_BYTES>0), EIGEN_ALIGN_STATICALLY (replaced by EIGEN_MAX_STATIC_ALIGN_BYTES>0), EIGEN_USER_ALIGN*, EIGEN_ALIGN_DEFAULT (replaced by EIGEN_ALIGN_MAX).

This commit is contained in:
Gael Guennebaud 2015-07-29 10:22:25 +02:00
parent 76874b128e
commit 175ed636ea
14 changed files with 210 additions and 133 deletions

View File

@ -73,9 +73,9 @@
// and inclusion of their respective header files
#include "src/Core/util/MKL_support.h"
// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
#if !EIGEN_ALIGN
// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
#if EIGEN_MAX_ALIGN_BYTES==0
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE
#endif

View File

@ -641,7 +641,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
// TODO: should check for smaller packet types once we can handle multi-sized packet types

View File

@ -34,26 +34,25 @@ void check_static_allocation_size()
#endif
}
template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
bool Match = bool((Size%unpacket_traits<Packet>::size)==0),
bool TryHalf = bool(int(unpacket_traits<Packet>::size) > 1)
&& bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
template<int ArrayBytes, int AlignmentBytes,
bool Match = bool((ArrayBytes%AlignmentBytes)==0),
bool TryHalf = bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
struct compute_default_alignment
{
enum { value = 0 };
};
template<typename T, int Size, typename Packet, bool TryHalf>
struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
struct compute_default_alignment<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
{
enum { value = sizeof(T) * unpacket_traits<Packet>::size };
enum { value = AlignmentBytes };
};
template<typename T, int Size, typename Packet>
struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
template<int ArrayBytes, int AlignmentBytes>
struct compute_default_alignment<ArrayBytes, AlignmentBytes, false, true> // Try-half
{
// current packet too large, try with an half-packet
enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
enum { value = compute_default_alignment<ArrayBytes, AlignmentBytes/2>::value };
};
/** \internal
@ -62,7 +61,7 @@ struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
*/
template <typename T, int Size, int MatrixOrArrayOptions,
int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
: compute_default_alignment<T,Size>::value >
: compute_default_alignment<Size*sizeof(T), EIGEN_PLAIN_ENUM_MAX(packet_traits<T>::size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) >::value >
struct plain_array
{
T array[Size];
@ -180,7 +179,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 64>
template <typename T, int MatrixOrArrayOptions, int Alignment>
struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
{
EIGEN_USER_ALIGN_DEFAULT T array[1];
T array[1];
EIGEN_DEVICE_FUNC plain_array() {}
EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
};

View File

@ -183,7 +183,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
template<typename Scalar,int Size,int MaxSize>
struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
{
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
#else
@ -196,7 +196,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
EIGEN_STRONG_INLINE Scalar* data() {
return ForceAlignment
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
: m_data.array;
}
#endif

View File

@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
};

View File

@ -160,7 +160,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
// TODO "IsAligned" should be replaced to handle arbitrary alignment
#if EIGEN_MAX_ALIGN_BYTES>0
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned");
#endif
}
PointerType m_data;

View File

@ -293,8 +293,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
SizeB = ActualCols * MaxDepth
};
EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
public:

View File

@ -463,7 +463,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
// FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);
// this helps the compiler generating good binary code
@ -572,7 +573,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
{
for (Index i=start; i<end; ++i)
{
EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
ResPacket ptmp0 = pset1<ResPacket>(tmp0);
const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
// process first unaligned result's coeffs

View File

@ -274,7 +274,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
std::size_t sizeA = kc*mc;
std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);
ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
@ -311,7 +311,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;
Scalar* geb = blockB+ts*ts;
geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
geb = geb + internal::first_aligned(geb,EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar));
pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
@ -306,68 +306,10 @@
#define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
#endif
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !EIGEN_COMP_SUNCC \
&& !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
// for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
// and 16 bytes alignment is also enough for Vector4f.
#define EIGEN_ALIGN_BYTES 16
#ifdef EIGEN_DONT_ALIGN
#ifndef EIGEN_DONT_ALIGN_STATICALLY
#define EIGEN_DONT_ALIGN_STATICALLY
#endif
#define EIGEN_ALIGN 0
#elif !defined(EIGEN_DONT_VECTORIZE)
#if defined(__AVX__)
#undef EIGEN_ALIGN_BYTES
#define EIGEN_ALIGN_BYTES 32
#endif
#define EIGEN_ALIGN 1
#else
#define EIGEN_ALIGN 0
#endif
// This macro can be used to prevent from macro expansion, e.g.:
// std::max EIGEN_NOT_A_MACRO(a,b)
#define EIGEN_NOT_A_MACRO
// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
#define EIGEN_ALIGN_STATICALLY 1
#else
#define EIGEN_ALIGN_STATICALLY 0
#ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
#endif
#ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
#else
@ -585,6 +527,20 @@ namespace Eigen {
#endif
#endif
//------------------------------------------------------------------------------------------
// Static and dynamic alignment control
//
// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
// a default value is automatically computed based on architecture, compiler, and OS.
//
// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
// to be used to declare statically aligned buffers.
//------------------------------------------------------------------------------------------
/* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
* However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
* so that vectorization doesn't affect binary compatibility.
@ -605,23 +561,116 @@ namespace Eigen {
#error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
#endif
// If the user explicitly disable vectorization, then we also disable alignment
#if defined(EIGEN_DONT_VECTORIZE)
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
#elif defined(__AVX__)
// 32 bytes static alignmeent is preferred only if really required
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
#else
#define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
#endif
// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
#define EIGEN_MIN_ALIGN_BYTES 16
// Defined the boundary (in bytes) on which the data needs to be aligned. Note
// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
// aligned at all regardless of the value of this #define.
#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
#endif
// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
#ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#endif
#define EIGEN_MAX_STATIC_ALIGN_BYTES 0
#endif
#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
// Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
// certain common platform (compiler+architecture combinations) to avoid these problems.
// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
// when we have to disable static alignment.
#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
#else
#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
#endif
// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
&& !EIGEN_GCC3_OR_OLDER \
&& !EIGEN_COMP_SUNCC \
&& !EIGEN_OS_QNX
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
#else
#define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
#endif
#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
#endif
// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
#undef EIGEN_MAX_STATIC_ALIGN_BYTES
#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
#endif
#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
#define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
#endif
// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
#define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
#define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
#define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
#if EIGEN_ALIGN_STATICALLY
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
#else
#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
#define EIGEN_USER_ALIGN16
#define EIGEN_USER_ALIGN32
#define EIGEN_USER_ALIGN_DEFAULT
#define EIGEN_ALIGN_MAX
#endif
// Dynamic alignment control
#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
#endif
#ifdef EIGEN_DONT_ALIGN
#ifdef EIGEN_MAX_ALIGN_BYTES
#undef EIGEN_MAX_ALIGN_BYTES
#endif
#define EIGEN_MAX_ALIGN_BYTES 0
#elif !defined(EIGEN_MAX_ALIGN_BYTES)
#define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
#endif
//----------------------------------------------------------------------
#ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
#define EIGEN_RESTRICT
#endif

View File

@ -1,7 +1,7 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
// Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
// Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
@ -32,7 +32,7 @@
// page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
// quite safe, at least within the context of glibc, to equate 64-bit with LP64.
#if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
&& defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_MAX_ALIGN_BYTES == 16)
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@ -42,14 +42,14 @@
// See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
// FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
// See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_MAX_ALIGN_BYTES == 16)
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
#else
#define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
#endif
#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16)) \
|| (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16)) \
#if (EIGEN_OS_MAC && (EIGEN_MAX_ALIGN_BYTES == 16)) \
|| (EIGEN_OS_WIN64 && (EIGEN_MAX_ALIGN_BYTES == 16)) \
|| EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
|| EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
#define EIGEN_MALLOC_ALREADY_ALIGNED 1
@ -107,9 +107,9 @@ inline void throw_std_bad_alloc()
*/
inline void* handmade_aligned_malloc(std::size_t size)
{
void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
void *original = std::malloc(size+EIGEN_MAX_ALIGN_BYTES);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@ -130,9 +130,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
if (ptr == 0) return handmade_aligned_malloc(size);
void *original = *(reinterpret_cast<void**>(ptr) - 1);
std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
original = std::realloc(original,size+EIGEN_MAX_ALIGN_BYTES);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
@ -218,16 +218,16 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
check_that_malloc_is_allowed();
void *result;
#if !EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES==0
result = std::malloc(size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::malloc(size);
#elif EIGEN_HAS_POSIX_MEMALIGN
if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
if(posix_memalign(&result, EIGEN_MAX_ALIGN_BYTES, size)) result = 0;
#elif EIGEN_HAS_MM_MALLOC
result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
result = _mm_malloc(size, EIGEN_MAX_ALIGN_BYTES);
#elif EIGEN_OS_WIN_STRICT
result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
result = _aligned_malloc(size, EIGEN_MAX_ALIGN_BYTES);
#else
result = handmade_aligned_malloc(size);
#endif
@ -241,7 +241,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
/** \internal Frees memory allocated with aligned_malloc. */
EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if !EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES==0
std::free(ptr);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
std::free(ptr);
@ -266,7 +266,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
EIGEN_UNUSED_VARIABLE(old_size);
void *result;
#if !EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES==0
result = std::realloc(ptr,new_size);
#elif EIGEN_MALLOC_ALREADY_ALIGNED
result = std::realloc(ptr,new_size);
@ -277,12 +277,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
// implements _mm_malloc/_mm_free based on the corresponding _aligned_
// functions. This may not always be the case and we just try to be safe.
#if EIGEN_OS_WIN_STRICT && defined(_mm_free)
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
#else
result = generic_aligned_realloc(ptr,new_size,old_size);
#endif
#elif EIGEN_OS_WIN_STRICT
result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
#else
result = handmade_aligned_realloc(ptr,new_size,old_size);
#endif
@ -691,7 +691,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
#ifdef EIGEN_ALLOCA
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1)))
#define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_MAX_ALIGN_BYTES-1)) + EIGEN_MAX_ALIGN_BYTES-1) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1)))
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
@ -715,7 +715,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
*** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] ***
*****************************************************************************/
#if EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES!=0
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
void* operator new(size_t size, const std::nothrow_t&) throw() { \
EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
@ -751,7 +751,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
#define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))
/****************************************************************************/

View File

@ -167,7 +167,7 @@ class compute_matrix_evaluator_flags
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
#else
0
@ -175,7 +175,7 @@ class compute_matrix_evaluator_flags
||
#if EIGEN_ALIGN
#if EIGEN_MAX_ALIGN_BYTES!=0
is_dynamic_size_storage
#else
0

View File

@ -21,6 +21,17 @@ template<typename MatrixType> void verifySizeOf(const MatrixType&)
void test_sizeof()
{
CALL_SUBTEST(verifySizeOf(Matrix<float, 1, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 2, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 3, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 4, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 5, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 6, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 7, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 8, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 9, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 10, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 11, 1>()) );
CALL_SUBTEST(verifySizeOf(Array<float, 12, 1>()) );
CALL_SUBTEST(verifySizeOf(Vector2d()) );
CALL_SUBTEST(verifySizeOf(Vector4f()) );
CALL_SUBTEST(verifySizeOf(Matrix4d()) );

View File

@ -2,11 +2,25 @@
// for linear algebra.
//
// Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#if defined(EIGEN_TEST_PART_1)
// default
#elif defined(EIGEN_TEST_PART_2)
#define EIGEN_MAX_STATIC_ALIGN_BYTES 16
#define EIGEN_MAX_ALIGN_BYTES 16
#elif defined(EIGEN_TEST_PART_3)
#define EIGEN_MAX_STATIC_ALIGN_BYTES 32
#define EIGEN_MAX_ALIGN_BYTES 32
#elif defined(EIGEN_TEST_PART_4)
#define EIGEN_MAX_STATIC_ALIGN_BYTES 64
#define EIGEN_MAX_ALIGN_BYTES 64
#endif
#include "main.h"
typedef Matrix<float, 6,1> Vector6f;
@ -48,7 +62,7 @@ struct TestNew4
struct TestNew5
{
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work
float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work
Matrix4f m;
};
@ -75,13 +89,13 @@ void check_unalignedassert_good()
delete[] y;
}
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
template<typename T>
void construct_at_boundary(int boundary)
{
char buf[sizeof(T)+256];
size_t _buf = reinterpret_cast<size_t>(buf);
_buf += (EIGEN_ALIGN_BYTES - (_buf % EIGEN_ALIGN_BYTES)); // make 16/32-byte aligned
_buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned
_buf += boundary; // make exact boundary-aligned
T *x = ::new(reinterpret_cast<void*>(_buf)) T;
x[0].setZero(); // just in order to silence warnings
@ -91,34 +105,34 @@ void construct_at_boundary(int boundary)
void unalignedassert()
{
#if EIGEN_ALIGN_STATICALLY
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
construct_at_boundary<Vector2f>(4);
construct_at_boundary<Vector3f>(4);
construct_at_boundary<Vector4f>(16);
construct_at_boundary<Vector6f>(4);
construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector8f>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector12f>(16);
construct_at_boundary<Matrix2f>(16);
construct_at_boundary<Matrix3f>(4);
construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Matrix4f>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector2d>(16);
construct_at_boundary<Vector3d>(4);
construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector4d>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector5d>(4);
construct_at_boundary<Vector6d>(16);
construct_at_boundary<Vector7d>(4);
construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector8d>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector9d>(4);
construct_at_boundary<Vector10d>(16);
construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector12d>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Matrix2d>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Matrix3d>(4);
construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Matrix4d>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector2cf>(16);
construct_at_boundary<Vector3cf>(4);
construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
construct_at_boundary<Vector2cd>(EIGEN_MAX_ALIGN_BYTES);
construct_at_boundary<Vector3cd>(16);
#endif
@ -131,8 +145,8 @@ void unalignedassert()
check_unalignedassert_good<TestNew6>();
check_unalignedassert_good<Depends<true> >();
#if EIGEN_ALIGN_STATICALLY
if(EIGEN_ALIGN_BYTES>=16)
#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
if(EIGEN_MAX_ALIGN_BYTES>=16)
{
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
@ -146,7 +160,7 @@ void unalignedassert()
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
}
for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8)
{
VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));