bug #973: update macro-level control of alignement by introducing user-controllable EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES macros. This changeset also removes EIGEN_ALIGN (replaced by EIGEN_MAX_ALIGN_BYTES>0), EIGEN_ALIGN_STATICALLY (replaced by EIGEN_MAX_STATIC_ALIGN_BYTES>0), EIGEN_USER_ALIGN*, EIGEN_ALIGN_DEFAULT (replaced by EIGEN_ALIGN_MAX).

2025-07-14 09:01:47 +08:00 · 2015-07-29 10:22:25 +02:00 · 2015-07-29 10:22:25 +02:00 · 175ed636ea
commit 175ed636ea
parent 76874b128e
14 changed files with 210 additions and 133 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -73,9 +73,9 @@
 // and inclusion of their respective header files
 #include "src/Core/util/MKL_support.h"

-// if alignment is disabled, then disable vectorization. Note: EIGEN_ALIGN is the proper check, it takes into
-// account both the user's will (EIGEN_DONT_ALIGN) and our own platform checks
-#if !EIGEN_ALIGN
+// if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
+// account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
+#if EIGEN_MAX_ALIGN_BYTES==0
  #ifndef EIGEN_DONT_VECTORIZE
    #define EIGEN_DONT_VECTORIZE
  #endif
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@ -641,7 +641,7 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
    HasNoInnerStride = InnerStrideAtCompileTime == 1,
    HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
    HasNoStride = HasNoInnerStride && HasNoOuterStride,
-    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+    IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
    IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
    
    // TODO: should check for smaller packet types once we can handle multi-sized packet types
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@ -34,26 +34,25 @@ void check_static_allocation_size()
  #endif
 }

-template<typename T, int Size, typename Packet = typename packet_traits<T>::type,
-         bool Match     =  bool((Size%unpacket_traits<Packet>::size)==0),
-         bool TryHalf   =  bool(int(unpacket_traits<Packet>::size) > 1)
-                        && bool(int(unpacket_traits<Packet>::size) > int(unpacket_traits<typename unpacket_traits<Packet>::half>::size)) >
+template<int ArrayBytes, int AlignmentBytes,
+         bool Match     =  bool((ArrayBytes%AlignmentBytes)==0),
+         bool TryHalf   =  bool(AlignmentBytes>EIGEN_MIN_ALIGN_BYTES) >
 struct compute_default_alignment
 {
  enum { value = 0 };
 };

-template<typename T, int Size, typename Packet, bool TryHalf>
-struct compute_default_alignment<T, Size, Packet, true, TryHalf> // Match
+template<int ArrayBytes, int AlignmentBytes, bool TryHalf>
+struct compute_default_alignment<ArrayBytes, AlignmentBytes, true, TryHalf> // Match
 {
-  enum { value = sizeof(T) * unpacket_traits<Packet>::size };
+  enum { value = AlignmentBytes };
 };

-template<typename T, int Size, typename Packet>
-struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
+template<int ArrayBytes, int AlignmentBytes>
+struct compute_default_alignment<ArrayBytes, AlignmentBytes, false, true> // Try-half
 {
  // current packet too large, try with an half-packet
-  enum { value = compute_default_alignment<T, Size, typename unpacket_traits<Packet>::half>::value };
+  enum { value = compute_default_alignment<ArrayBytes, AlignmentBytes/2>::value };
 };

 /** \internal
@ -62,7 +61,7 @@ struct compute_default_alignment<T, Size, Packet, false, true> // Try-half
  */
 template <typename T, int Size, int MatrixOrArrayOptions,
          int Alignment = (MatrixOrArrayOptions&DontAlign) ? 0
-                        : compute_default_alignment<T,Size>::value >
+                        : compute_default_alignment<Size*sizeof(T), EIGEN_PLAIN_ENUM_MAX(packet_traits<T>::size*sizeof(T), EIGEN_MAX_STATIC_ALIGN_BYTES) >::value >
 struct plain_array
 {
  T array[Size];
@ -180,7 +179,7 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 64>
 template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
-  EIGEN_USER_ALIGN_DEFAULT T array[1];
+  T array[1];
  EIGEN_DEVICE_FUNC plain_array() {}
  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
 };
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@ -183,7 +183,7 @@ struct gemv_static_vector_if<Scalar,Size,Dynamic,true>
 template<typename Scalar,int Size,int MaxSize>
 struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 {
-  #if EIGEN_ALIGN_STATICALLY
+  #if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() { return m_data.array; }
  #else
@ -196,7 +196,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
  internal::plain_array<Scalar,EIGEN_SIZE_MIN_PREFER_FIXED(Size,MaxSize)+(ForceAlignment?PacketSize:0),0> m_data;
  EIGEN_STRONG_INLINE Scalar* data() {
    return ForceAlignment
-            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES)
+            ? reinterpret_cast<Scalar*>((reinterpret_cast<size_t>(m_data.array) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES)
            : m_data.array;
  }
  #endif
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@ -77,7 +77,7 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
    OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
                             ? int(PlainObjectType::OuterStrideAtCompileTime)
                             : int(StrideType::OuterStrideAtCompileTime),
-    IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
+    IsAligned = bool(EIGEN_MAX_ALIGN_BYTES>0) && ((int(MapOptions)&Aligned)==Aligned),
    Flags0 = TraitsBase::Flags & (~NestByRefBit),
    Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
  };
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@ -160,7 +160,10 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
    EIGEN_DEVICE_FUNC
    void checkSanity() const
    {
-      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_ALIGN_BYTES) == 0) && "data is not aligned");
+      // TODO "IsAligned" should be replaced to handle arbitrary alignment
+#if EIGEN_MAX_ALIGN_BYTES>0
+      eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % EIGEN_MAX_ALIGN_BYTES) == 0) && "data is not aligned");
+#endif
    }

    PointerType m_data;
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@ -293,8 +293,8 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
      SizeB = ActualCols * MaxDepth
    };

-    EIGEN_ALIGN_DEFAULT LhsScalar m_staticA[SizeA];
-    EIGEN_ALIGN_DEFAULT RhsScalar m_staticB[SizeB];
+    EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
+    EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];

  public:

--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@ -463,7 +463,8 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
  Index rowBound = ((rows-skipRows)/rowsAtOnce)*rowsAtOnce + skipRows;
  for (Index i=skipRows; i<rowBound; i+=rowsAtOnce)
  {
-    EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
+    // FIXME: what is the purpose of this EIGEN_ALIGN_DEFAULT ??
+    EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
    ResScalar tmp1 = ResScalar(0), tmp2 = ResScalar(0), tmp3 = ResScalar(0);

    // this helps the compiler generating good binary code
@ -572,7 +573,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,R
  {
    for (Index i=start; i<end; ++i)
    {
-      EIGEN_ALIGN_DEFAULT ResScalar tmp0 = ResScalar(0);
+      EIGEN_ALIGN_MAX ResScalar tmp0 = ResScalar(0);
      ResPacket ptmp0 = pset1<ResPacket>(tmp0);
      const LhsScalars lhs0 = lhs.getVectorMapper(i, 0);
      // process first unaligned result's coeffs
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@ -274,7 +274,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
    Index mc = (std::min)(rows,blocking.mc());  // cache block size along the M direction

    std::size_t sizeA = kc*mc;
-    std::size_t sizeB = kc*cols+EIGEN_ALIGN_BYTES/sizeof(Scalar);
+    std::size_t sizeB = kc*cols+EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar);

    ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
    ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
@ -311,7 +311,7 @@ EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
      Index ts = (IsLower && actual_k2>=cols) ? 0 : actual_kc;

      Scalar* geb = blockB+ts*ts;
-      geb = geb + internal::first_aligned(geb,EIGEN_ALIGN_BYTES/sizeof(Scalar));
+      geb = geb + internal::first_aligned(geb,EIGEN_MAX_ALIGN_BYTES/sizeof(Scalar));

      pack_rhs(geb, rhs.getSubMapper(actual_k2,IsLower ? 0 : k2), actual_kc, rs);

--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@ -306,68 +306,10 @@
  #define EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO 1
 #endif

-// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
-// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
-// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
-// certain common platform (compiler+architecture combinations) to avoid these problems.
-// Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
-// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
-// when we have to disable static alignment.
-#if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
-#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
-#else
-#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
-#endif
-
-// static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
-#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
- && !EIGEN_GCC3_OR_OLDER \
- && !EIGEN_COMP_SUNCC \
- && !EIGEN_OS_QNX
-  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
-#else
-  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
-#endif
-
-// Defined the boundary (in bytes) on which the data needs to be aligned. Note
-// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
-// aligned at all regardless of the value of this #define.
-// TODO should be renamed EIGEN_MAXIMAL_ALIGN_BYTES,
-//      for instance with AVX 1 EIGEN_MAXIMAL_ALIGN_BYTES=32 while for 'int' 16 bytes alignment is always enough,
-//      and 16 bytes alignment is also enough for Vector4f.
-#define EIGEN_ALIGN_BYTES 16
-
-#ifdef EIGEN_DONT_ALIGN
-  #ifndef EIGEN_DONT_ALIGN_STATICALLY
-    #define EIGEN_DONT_ALIGN_STATICALLY
-  #endif
-  #define EIGEN_ALIGN 0
-#elif !defined(EIGEN_DONT_VECTORIZE)
-  #if defined(__AVX__)
-    #undef EIGEN_ALIGN_BYTES
-    #define EIGEN_ALIGN_BYTES 32
-  #endif
-  #define EIGEN_ALIGN 1
-#else
-  #define EIGEN_ALIGN 0
-#endif
-
-
 // This macro can be used to prevent from macro expansion, e.g.:
 //   std::max EIGEN_NOT_A_MACRO(a,b)
 #define EIGEN_NOT_A_MACRO

-// EIGEN_ALIGN_STATICALLY is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable
-// alignment (EIGEN_DONT_ALIGN_STATICALLY) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STATICALLY should be used.
-#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STATICALLY)
-  #define EIGEN_ALIGN_STATICALLY 1
-#else
-  #define EIGEN_ALIGN_STATICALLY 0
-  #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
-    #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
-  #endif
-#endif
-
 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
 #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
 #else
@ -585,6 +527,20 @@ namespace Eigen {
  #endif
 #endif

+
+//------------------------------------------------------------------------------------------
+// Static and dynamic alignment control
+// 
+// The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
+// as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
+// The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
+// a default value is automatically computed based on architecture, compiler, and OS.
+// 
+// This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
+// to be used to declare statically aligned buffers.
+//------------------------------------------------------------------------------------------
+
+
 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
 * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
 * so that vectorization doesn't affect binary compatibility.
@ -605,23 +561,116 @@ namespace Eigen {
  #error Please tell me what is the equivalent of __attribute__((aligned(n))) for your compiler
 #endif

+// If the user explicitly disable vectorization, then we also disable alignment
+#if defined(EIGEN_DONT_VECTORIZE)
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
+#elif defined(__AVX__)
+  // 32 bytes static alignmeent is preferred only if really required
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
+#else
+  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
+#endif
+
+
+// EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
+#define EIGEN_MIN_ALIGN_BYTES 16
+
+// Defined the boundary (in bytes) on which the data needs to be aligned. Note
+// that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
+// aligned at all regardless of the value of this #define.
+
+#if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN))  && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
+#endif
+
+// EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprectated
+// They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
+#if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
+  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
+    #undef EIGEN_MAX_STATIC_ALIGN_BYTES
+  #endif
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
+#endif
+
+#ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
+
+  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
+  
+  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
+  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
+  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
+  // certain common platform (compiler+architecture combinations) to avoid these problems.
+  // Only static alignment is really problematic (relies on nonstandard compiler extensions that don't
+  // work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even
+  // when we have to disable static alignment.
+  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64)
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
+  #else
+  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
+  #endif
+
+  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
+  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
+  && !EIGEN_GCC3_OR_OLDER \
+  && !EIGEN_COMP_SUNCC \
+  && !EIGEN_OS_QNX
+    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
+  #else
+    #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
+  #endif
+  
+  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
+    #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+  #endif
+  
+#endif
+
+// If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_ALIGN_BYTES
+#if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
+#undef EIGEN_MAX_STATIC_ALIGN_BYTES
+#define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
+#endif
+
+#if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
+  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
+#endif
+
+// At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
+// It takes into account both the user choice to explicitly enable/disable alignment (by settting EIGEN_MAX_STATIC_ALIGN_BYTES)
+// and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
+// Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
+
+
+// Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
 #define EIGEN_ALIGN8  EIGEN_ALIGN_TO_BOUNDARY(8)
 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
-#define EIGEN_ALIGN_DEFAULT EIGEN_ALIGN_TO_BOUNDARY(EIGEN_ALIGN_BYTES)
-
-#if EIGEN_ALIGN_STATICALLY
-#define EIGEN_USER_ALIGN_TO_BOUNDARY(n) EIGEN_ALIGN_TO_BOUNDARY(n)
-#define EIGEN_USER_ALIGN16 EIGEN_ALIGN16
-#define EIGEN_USER_ALIGN32 EIGEN_ALIGN32
-#define EIGEN_USER_ALIGN_DEFAULT EIGEN_ALIGN_DEFAULT
+#define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+#define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
 #else
-#define EIGEN_USER_ALIGN_TO_BOUNDARY(n)
-#define EIGEN_USER_ALIGN16
-#define EIGEN_USER_ALIGN32
-#define EIGEN_USER_ALIGN_DEFAULT
+#define EIGEN_ALIGN_MAX
 #endif

+
+// Dynamic alignment control
+
+#if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
+#error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
+#endif
+
+#ifdef EIGEN_DONT_ALIGN
+  #ifdef EIGEN_MAX_ALIGN_BYTES
+    #undef EIGEN_MAX_ALIGN_BYTES
+  #endif
+  #define EIGEN_MAX_ALIGN_BYTES 0
+#elif !defined(EIGEN_MAX_ALIGN_BYTES)
+  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
+#endif
+
+//----------------------------------------------------------------------
+
+
 #ifdef EIGEN_DONT_USE_RESTRICT_KEYWORD
  #define EIGEN_RESTRICT
 #endif
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2014 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
 // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
@ -32,7 +32,7 @@
 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_ALIGN_BYTES == 16)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_MAX_ALIGN_BYTES == 16)
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
 #else
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@ -42,14 +42,14 @@
 //   See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
 //   See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
-#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_ALIGN_BYTES == 16)
+#if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_MAX_ALIGN_BYTES == 16)
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
 #else
  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
 #endif

-#if (EIGEN_OS_MAC && (EIGEN_ALIGN_BYTES == 16))     \
- || (EIGEN_OS_WIN64 && (EIGEN_ALIGN_BYTES == 16))   \
+#if (EIGEN_OS_MAC && (EIGEN_MAX_ALIGN_BYTES == 16))     \
+ || (EIGEN_OS_WIN64 && (EIGEN_MAX_ALIGN_BYTES == 16))   \
 || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED              \
 || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
@ -107,9 +107,9 @@ inline void throw_std_bad_alloc()
  */
 inline void* handmade_aligned_malloc(std::size_t size)
 {
-  void *original = std::malloc(size+EIGEN_ALIGN_BYTES);
+  void *original = std::malloc(size+EIGEN_MAX_ALIGN_BYTES);
  if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
  *(reinterpret_cast<void**>(aligned) - 1) = original;
  return aligned;
 }
@ -130,9 +130,9 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
  if (ptr == 0) return handmade_aligned_malloc(size);
  void *original = *(reinterpret_cast<void**>(ptr) - 1);
  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
-  original = std::realloc(original,size+EIGEN_ALIGN_BYTES);
+  original = std::realloc(original,size+EIGEN_MAX_ALIGN_BYTES);
  if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_ALIGN_BYTES-1))) + EIGEN_ALIGN_BYTES);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES-1))) + EIGEN_MAX_ALIGN_BYTES);
  void *previous_aligned = static_cast<char *>(original)+previous_offset;
  if(aligned!=previous_aligned)
    std::memmove(aligned, previous_aligned, size);
@ -218,16 +218,16 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
  check_that_malloc_is_allowed();

  void *result;
-  #if !EIGEN_ALIGN
+  #if EIGEN_MAX_ALIGN_BYTES==0
    result = std::malloc(size);
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
    result = std::malloc(size);
  #elif EIGEN_HAS_POSIX_MEMALIGN
-    if(posix_memalign(&result, EIGEN_ALIGN_BYTES, size)) result = 0;
+    if(posix_memalign(&result, EIGEN_MAX_ALIGN_BYTES, size)) result = 0;
  #elif EIGEN_HAS_MM_MALLOC
-    result = _mm_malloc(size, EIGEN_ALIGN_BYTES);
+    result = _mm_malloc(size, EIGEN_MAX_ALIGN_BYTES);
  #elif EIGEN_OS_WIN_STRICT
-    result = _aligned_malloc(size, EIGEN_ALIGN_BYTES);
+    result = _aligned_malloc(size, EIGEN_MAX_ALIGN_BYTES);
  #else
    result = handmade_aligned_malloc(size);
  #endif
@ -241,7 +241,7 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(size_t size)
 /** \internal Frees memory allocated with aligned_malloc. */
 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
 {
-  #if !EIGEN_ALIGN
+  #if EIGEN_MAX_ALIGN_BYTES==0
    std::free(ptr);
  #elif EIGEN_MALLOC_ALREADY_ALIGNED
    std::free(ptr);
@ -266,7 +266,7 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
  EIGEN_UNUSED_VARIABLE(old_size);

  void *result;
-#if !EIGEN_ALIGN
+#if EIGEN_MAX_ALIGN_BYTES==0
  result = std::realloc(ptr,new_size);
 #elif EIGEN_MALLOC_ALREADY_ALIGNED
  result = std::realloc(ptr,new_size);
@ -277,12 +277,12 @@ inline void* aligned_realloc(void *ptr, size_t new_size, size_t old_size)
  // implements _mm_malloc/_mm_free based on the corresponding _aligned_
  // functions. This may not always be the case and we just try to be safe.
  #if EIGEN_OS_WIN_STRICT && defined(_mm_free)
-    result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
+    result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
  #else
    result = generic_aligned_realloc(ptr,new_size,old_size);
  #endif
 #elif EIGEN_OS_WIN_STRICT
-  result = _aligned_realloc(ptr,new_size,EIGEN_ALIGN_BYTES);
+  result = _aligned_realloc(ptr,new_size,EIGEN_MAX_ALIGN_BYTES);
 #else
  result = handmade_aligned_realloc(ptr,new_size,old_size);
 #endif
@ -691,7 +691,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 #ifdef EIGEN_ALLOCA
  // We always manually re-align the result of EIGEN_ALLOCA.
  // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
-  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_ALIGN_BYTES-1)) + EIGEN_ALIGN_BYTES-1) & ~(size_t(EIGEN_ALIGN_BYTES-1)))
+  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+EIGEN_MAX_ALIGN_BYTES-1)) + EIGEN_MAX_ALIGN_BYTES-1) & ~(size_t(EIGEN_MAX_ALIGN_BYTES-1)))

  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
    Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
@ -715,7 +715,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
 *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF]                ***
 *****************************************************************************/

-#if EIGEN_ALIGN
+#if EIGEN_MAX_ALIGN_BYTES!=0
  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
      void* operator new(size_t size, const std::nothrow_t&) throw() { \
        EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
@ -751,7 +751,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)

 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
-  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_ALIGN_BYTES==0)))
+  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(((Size)!=Eigen::Dynamic) && ((sizeof(Scalar)*(Size))%EIGEN_MAX_ALIGN_BYTES==0)))

 /****************************************************************************/

--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@ -167,7 +167,7 @@ class compute_matrix_evaluator_flags
      (
            ((Options&DontAlign)==0)
        && (
-#if EIGEN_ALIGN_STATICALLY
+#if EIGEN_MAX_STATIC_ALIGN_BYTES!=0
             ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % align_bytes) == 0))
 #else
             0
@ -175,7 +175,7 @@ class compute_matrix_evaluator_flags

          ||

-#if EIGEN_ALIGN
+#if EIGEN_MAX_ALIGN_BYTES!=0
             is_dynamic_size_storage
 #else
             0
--- a/test/sizeof.cpp
+++ b/test/sizeof.cpp
@ -21,6 +21,17 @@ template<typename MatrixType> void verifySizeOf(const MatrixType&)
 void test_sizeof()
 {
  CALL_SUBTEST(verifySizeOf(Matrix<float, 1, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 2, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 3, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 4, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 5, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 6, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 7, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 8, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 9, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 10, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 11, 1>()) );
+  CALL_SUBTEST(verifySizeOf(Array<float, 12, 1>()) );
  CALL_SUBTEST(verifySizeOf(Vector2d()) );
  CALL_SUBTEST(verifySizeOf(Vector4f()) );
  CALL_SUBTEST(verifySizeOf(Matrix4d()) );
--- a/test/unalignedassert.cpp
+++ b/test/unalignedassert.cpp
@ -2,11 +2,25 @@
 // for linear algebra.
 //
 // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

+#if defined(EIGEN_TEST_PART_1)
+  // default
+#elif defined(EIGEN_TEST_PART_2)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 16
+  #define EIGEN_MAX_ALIGN_BYTES 16
+#elif defined(EIGEN_TEST_PART_3)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 32
+  #define EIGEN_MAX_ALIGN_BYTES 32
+#elif defined(EIGEN_TEST_PART_4)
+  #define EIGEN_MAX_STATIC_ALIGN_BYTES 64
+  #define EIGEN_MAX_ALIGN_BYTES 64
+#endif
+
 #include "main.h"

 typedef Matrix<float,  6,1> Vector6f;
@ -48,7 +62,7 @@ struct TestNew4
 struct TestNew5
 {
  EIGEN_MAKE_ALIGNED_OPERATOR_NEW
-  float f; // try the f at first -- the EIGEN_ALIGN16 attribute of m should make that still work
+  float f; // try the f at first -- the EIGEN_ALIGN_MAX attribute of m should make that still work
  Matrix4f m;
 };

@ -75,13 +89,13 @@ void check_unalignedassert_good()
  delete[] y;
 }

-#if EIGEN_ALIGN_STATICALLY
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
 template<typename T>
 void construct_at_boundary(int boundary)
 {
  char buf[sizeof(T)+256];
  size_t _buf = reinterpret_cast<size_t>(buf);
-  _buf += (EIGEN_ALIGN_BYTES - (_buf % EIGEN_ALIGN_BYTES)); // make 16/32-byte aligned
+  _buf += (EIGEN_MAX_ALIGN_BYTES - (_buf % EIGEN_MAX_ALIGN_BYTES)); // make 16/32/...-byte aligned
  _buf += boundary; // make exact boundary-aligned
  T *x = ::new(reinterpret_cast<void*>(_buf)) T;
  x[0].setZero(); // just in order to silence warnings
@ -91,34 +105,34 @@ void construct_at_boundary(int boundary)

 void unalignedassert()
 {
-#if EIGEN_ALIGN_STATICALLY
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
  construct_at_boundary<Vector2f>(4);
  construct_at_boundary<Vector3f>(4);
  construct_at_boundary<Vector4f>(16);
  construct_at_boundary<Vector6f>(4);
-  construct_at_boundary<Vector8f>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Vector8f>(EIGEN_MAX_ALIGN_BYTES);
  construct_at_boundary<Vector12f>(16);
  construct_at_boundary<Matrix2f>(16);
  construct_at_boundary<Matrix3f>(4);
-  construct_at_boundary<Matrix4f>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Matrix4f>(EIGEN_MAX_ALIGN_BYTES);

  construct_at_boundary<Vector2d>(16);
  construct_at_boundary<Vector3d>(4);
-  construct_at_boundary<Vector4d>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Vector4d>(EIGEN_MAX_ALIGN_BYTES);
  construct_at_boundary<Vector5d>(4);
  construct_at_boundary<Vector6d>(16);
  construct_at_boundary<Vector7d>(4);
-  construct_at_boundary<Vector8d>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Vector8d>(EIGEN_MAX_ALIGN_BYTES);
  construct_at_boundary<Vector9d>(4);
  construct_at_boundary<Vector10d>(16);
-  construct_at_boundary<Vector12d>(EIGEN_ALIGN_BYTES);
-  construct_at_boundary<Matrix2d>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Vector12d>(EIGEN_MAX_ALIGN_BYTES);
+  construct_at_boundary<Matrix2d>(EIGEN_MAX_ALIGN_BYTES);
  construct_at_boundary<Matrix3d>(4);
-  construct_at_boundary<Matrix4d>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Matrix4d>(EIGEN_MAX_ALIGN_BYTES);

  construct_at_boundary<Vector2cf>(16);
  construct_at_boundary<Vector3cf>(4);
-  construct_at_boundary<Vector2cd>(EIGEN_ALIGN_BYTES);
+  construct_at_boundary<Vector2cd>(EIGEN_MAX_ALIGN_BYTES);
  construct_at_boundary<Vector3cd>(16);
 #endif

@ -131,8 +145,8 @@ void unalignedassert()
  check_unalignedassert_good<TestNew6>();
  check_unalignedassert_good<Depends<true> >();

-#if EIGEN_ALIGN_STATICALLY
-  if(EIGEN_ALIGN_BYTES>=16)
+#if EIGEN_MAX_STATIC_ALIGN_BYTES>0
+  if(EIGEN_MAX_ALIGN_BYTES>=16)
  {
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4f>(8));
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(8));
@ -146,7 +160,7 @@ void unalignedassert()
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector2cf>(8));
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector4i>(8));
  }
-  for(int b=8; b<EIGEN_ALIGN_BYTES; b+=8)
+  for(int b=8; b<EIGEN_MAX_ALIGN_BYTES; b+=8)
  {
    VERIFY_RAISES_ASSERT(construct_at_boundary<Vector8f>(b));
    VERIFY_RAISES_ASSERT(construct_at_boundary<Matrix4f>(b));