diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e9c4533d..a85bbf222 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,7 +140,13 @@ option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in t if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) add_definitions(-DEIGEN_DONT_VECTORIZE=1) message("Disabling vectorization in tests/examples") -endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) +endif() + +option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF) +if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + add_definitions(-DEIGEN_DONT_ALIGN=1) + message("Disabling alignment in tests/examples") +endif() option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF) diff --git a/Eigen/src/Core/MatrixStorage.h b/Eigen/src/Core/MatrixStorage.h index ece603ffa..3303b2663 100644 --- a/Eigen/src/Core/MatrixStorage.h +++ b/Eigen/src/Core/MatrixStorage.h @@ -50,12 +50,6 @@ struct ei_matrix_array ei_matrix_array(ei_constructor_without_unaligned_array_assert) {} }; -// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly -#ifdef __ARM_NEON__ - #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT - #endif -#endif #ifdef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT #define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(sizemask) #else diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 7968d6604..7236b42f2 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -36,13 +36,17 @@ (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \ EIGEN_MINOR_VERSION>=z)))) -// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable 16 byte alignment on all -// platforms where vectorization might be enabled. In theory we could always enable alignment, but it can be a cause of problems -// on some platforms, so we just disable it in certain common platform (compiler+architecture combinations) to avoid these problems. -#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__) || defined(__ARM_NEON__)) -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 1 +// 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable +// 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always +// enable alignment, but it can be a cause of problems on some platforms, so we just disable it in +// certain common platform (compiler+architecture combinations) to avoid these problems. +// Only stack alignment is really problematic (relies on nonstandard compiler extensions that don't +// work everywhere, for example don't work on GCC/ARM), try to keep heap alignment even +// when we have to disable stack alignment. +#if defined(__GNUC__) && !(defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(__ppc__) || defined(__ia64__)) +#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1 #else -#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT 0 +#define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0 #endif #if defined(__GNUC__) && (__GNUC__ <= 3) @@ -51,27 +55,42 @@ #define EIGEN_GCC3_OR_OLDER 0 #endif -// FIXME vectorization + alignment is completely disabled with sun studio -#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_ALIGNMENT && !EIGEN_GCC3_OR_OLDER && !defined(__SUNPRO_CC) - #define EIGEN_ARCH_WANTS_ALIGNMENT 1 +// FIXME vectorization + stack alignment is completely disabled with sun studio +#if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT && !EIGEN_GCC3_OR_OLDER && !defined(__SUNPRO_CC) + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1 #else - #define EIGEN_ARCH_WANTS_ALIGNMENT 0 + #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0 #endif -// EIGEN_ALIGN is the true test whether we want to align or not. It takes into account both the user choice to explicitly disable -// alignment (EIGEN_DONT_ALIGN) and the architecture config (EIGEN_ARCH_WANTS_ALIGNMENT). Henceforth, only EIGEN_ALIGN should be used. -#if EIGEN_ARCH_WANTS_ALIGNMENT && !defined(EIGEN_DONT_ALIGN) - #define EIGEN_ALIGN 1 +#ifdef EIGEN_DONT_ALIGN + #ifndef EIGEN_DONT_ALIGN_STACK + #define EIGEN_DONT_ALIGN_STACK + #endif + #ifndef EIGEN_DONT_ALIGN_HEAP + #define EIGEN_DONT_ALIGN_HEAP + #endif +#endif + +// EIGEN_ALIGN_STACK is the true test whether we want to align arrays on the stack or not. It takes into account both the user choice to explicitly disable +// alignment (EIGEN_DONT_ALIGN_STACK) and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT). Henceforth, only EIGEN_ALIGN_STACK should be used. +#if EIGEN_ARCH_WANTS_STACK_ALIGNMENT && !defined(EIGEN_DONT_ALIGN_STACK) + #define EIGEN_ALIGN_STACK 1 #else - #define EIGEN_ALIGN 0 + #define EIGEN_ALIGN_STACK 0 #ifdef EIGEN_VECTORIZE - #error "Vectorization enabled, but our platform checks say that we don't do 16 byte alignment on this platform. If you added vectorization for another architecture, you also need to edit this platform check." + #error "Vectorization enabled, but our platform checks say that we don't do 16 byte stack alignment on this platform. If you added vectorization for another architecture, you also need to edit this platform check." #endif #ifndef EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT #endif #endif +#ifndef EIGEN_DONT_ALIGN_HEAP + #define EIGEN_ALIGN_HEAP 1 +#else + #define EIGEN_ALIGN_HEAP 0 +#endif + #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR #define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor #else @@ -185,7 +204,7 @@ using Eigen::ei_cos; * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link * vectorized and non-vectorized code. */ -#if !EIGEN_ALIGN +#if !EIGEN_ALIGN_STACK #define EIGEN_ALIGN_TO_BOUNDARY(n) #elif (defined __GNUC__) || (defined __PGI) #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n))) diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index aa0073d44..9442dffb6 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -172,7 +172,7 @@ inline void* ei_aligned_malloc(size_t size) #endif void *result; - #if !EIGEN_ALIGN + #if !EIGEN_ALIGN_HEAP result = std::malloc(size); #elif EIGEN_MALLOC_ALREADY_ALIGNED result = std::malloc(size); @@ -196,7 +196,7 @@ inline void* ei_aligned_malloc(size_t size) /** \internal Frees memory allocated with ei_aligned_malloc. */ inline void ei_aligned_free(void *ptr) { - #if !EIGEN_ALIGN + #if !EIGEN_ALIGN_HEAP std::free(ptr); #elif EIGEN_MALLOC_ALREADY_ALIGNED std::free(ptr); @@ -221,7 +221,7 @@ inline void* ei_aligned_realloc(void *ptr, size_t new_size, size_t old_size) (void)old_size; // Suppress 'unused variable' warning. Seen in boost tee. void *result; -#if !EIGEN_ALIGN +#if !EIGEN_ALIGN_HEAP result = std::realloc(ptr,new_size); #elif EIGEN_MALLOC_ALREADY_ALIGNED result = std::realloc(ptr,new_size); @@ -443,7 +443,7 @@ inline static Integer ei_first_aligned(const Scalar* array, Integer size) *** Implementation of EIGEN_MAKE_ALIGNED_OPERATOR_NEW [_IF] *** *****************************************************************************/ -#if EIGEN_ALIGN +#if EIGEN_ALIGN_HEAP #ifdef EIGEN_EXCEPTIONS #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \ void* operator new(size_t size, const std::nothrow_t&) throw() { \ diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index eff055b04..b4f72b62c 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -88,15 +88,20 @@ class ei_compute_matrix_flags enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0, is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, -#if !defined(__ARM_NEON__) +#if EIGEN_ALIGN_STACK is_fixed_size_aligned = (!is_dynamic_size_storage) && (((MaxCols*MaxRows) % ei_packet_traits::size) == 0), #else -// FIXME!!! This is a hack because ARM gcc does not honour __attribute__((aligned(16))) properly is_fixed_size_aligned = 0, #endif +#if EIGEN_ALIGN_HEAP + is_dynamic_size_aligned = is_dynamic_size_storage, +#else + is_dynamic_size_aligned = 0, +#endif + aligned_bit = (((Options&DontAlign)==0) - && (is_dynamic_size_storage || is_fixed_size_aligned)) + && (is_dynamic_size_aligned || is_fixed_size_aligned)) ? AlignedBit : 0, packet_access_bit = ei_packet_traits::size > 1 && aligned_bit ? PacketAccessBit : 0 }; diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index b08f8c340..3bb9aed2b 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -154,53 +154,55 @@ macro(ei_testing_print_summary) message("Default order: Column-major") endif() - if(EIGEN_TEST_SSE2) - message("SSE2: ON") + if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + message("Explicit alignment (hence vectorization) disabled") + elseif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + message("Explicit vectorization disabled (alignment kept enabled)") else() - message("SSE2: Using architecture defaults") - endif() - if(EIGEN_TEST_SSE3) - message("SSE3: ON") - else() - message("SSE3: Using architecture defaults") - endif() + if(EIGEN_TEST_SSE2) + message("SSE2: ON") + else() + message("SSE2: Using architecture defaults") + endif() - if(EIGEN_TEST_SSSE3) - message("SSSE3: ON") - else() - message("SSSE3: Using architecture defaults") - endif() + if(EIGEN_TEST_SSE3) + message("SSE3: ON") + else() + message("SSE3: Using architecture defaults") + endif() - if(EIGEN_TEST_SSE4_1) - message("SSE4.1: ON") - else() - message("SSE4.1: Using architecture defaults") - endif() + if(EIGEN_TEST_SSSE3) + message("SSSE3: ON") + else() + message("SSSE3: Using architecture defaults") + endif() - if(EIGEN_TEST_SSE4_2) - message("SSE4.2: ON") - else() - message("SSE4.2: Using architecture defaults") - endif() + if(EIGEN_TEST_SSE4_1) + message("SSE4.1: ON") + else() + message("SSE4.1: Using architecture defaults") + endif() - if(EIGEN_TEST_ALTIVEC) - message("Altivec: ON") - else() - message("Altivec: Using architecture defaults") - endif() + if(EIGEN_TEST_SSE4_2) + message("SSE4.2: ON") + else() + message("SSE4.2: Using architecture defaults") + endif() - if(EIGEN_TEST_NEON) - message("ARM NEON: ON") - else() - message("ARM NEON: Using architecture defaults") - endif() + if(EIGEN_TEST_ALTIVEC) + message("Altivec: ON") + else() + message("Altivec: Using architecture defaults") + endif() - if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) - message("Explicit vec: OFF") - else() - message("Explicit vec: Using architecture defaults") - endif() + if(EIGEN_TEST_NEON) + message("ARM NEON: ON") + else() + message("ARM NEON: Using architecture defaults") + endif() + + endif() # vectorization / alignment options message("\n${EIGEN_TESTING_SUMMARY}") # message("CXX: ${CMAKE_CXX_COMPILER}") diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index e0a9f9f86..85a39ec83 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -24,7 +24,7 @@ #include "main.h" -#if EIGEN_ALIGN +#if EIGEN_ALIGN_HEAP #define ALIGNMENT 16 #else #define ALIGNMENT 1 diff --git a/test/unalignedassert.cpp b/test/unalignedassert.cpp index 85a83b7b5..497c5a5aa 100644 --- a/test/unalignedassert.cpp +++ b/test/unalignedassert.cpp @@ -78,7 +78,7 @@ void check_unalignedassert_good() delete[] y; } -#if EIGEN_ALIGN +#if EIGEN_ALIGN_STACK template void construct_at_boundary(int boundary) { @@ -94,7 +94,7 @@ void construct_at_boundary(int boundary) void unalignedassert() { - #if EIGEN_ALIGN + #if EIGEN_ALIGN_STACK construct_at_boundary(4); construct_at_boundary(4); construct_at_boundary(16); @@ -124,7 +124,7 @@ void unalignedassert() check_unalignedassert_good(); check_unalignedassert_good >(); -#if EIGEN_ALIGN +#if EIGEN_ALIGN_STACK VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8)); VERIFY_RAISES_ASSERT(construct_at_boundary(8));