From 74207a31fa1278a027a8761dfc74d62466e1ad31 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 2 Jan 2010 12:45:49 -0500 Subject: [PATCH] backport the fix to bug #79, and the unit test --- Eigen/src/Core/util/Memory.h | 49 +++++++++++++++++++++------ test/CMakeLists.txt | 1 + test/first_aligned.cpp | 64 ++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 10 deletions(-) create mode 100644 test/first_aligned.cpp diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 0a43e7f7b..d12a2ec26 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -209,18 +209,47 @@ template inline void ei_conditional_aligned_delete(T *pt ei_conditional_aligned_free(ptr); } -/** \internal \returns the number of elements which have to be skipped such that data are 16 bytes aligned */ -template -inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) +/** \internal \returns the index of the first element of the array that is well aligned for vectorization. + * + * \param array the address of the start of the array + * \param size the size of the array + * + * \note If no element of the array is well aligned, the size of the array is returned. Typically, + * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the + * packet size for the given scalar type is 1, then everything is considered well-aligned. + * + * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a + * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the + * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for + * example with Scalar=double on certain 32-bit platforms, see bug #79. + * + * There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h. + */ +template +inline static Integer ei_alignmentOffset(const Scalar* array, Integer size) { typedef typename ei_packet_traits::type Packet; - const int PacketSize = ei_packet_traits::size; - const int PacketAlignedMask = PacketSize-1; - const bool Vectorized = PacketSize>1; - return Vectorized - ? std::min( (PacketSize - (int((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask)) - & PacketAlignedMask, maxOffset) - : 0; + enum { PacketSize = ei_packet_traits::size, + PacketAlignedMask = PacketSize-1 + }; + + if(PacketSize==1) + { + // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements + // of the array have the same aligment. + return 0; + } + else if(size_t(array) & (sizeof(Scalar)-1)) + { + // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar. + // Consequently, no element of the array is well aligned. + return size; + } + else + { + return std::min( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask)) + & PacketAlignedMask, size); + } } /** \internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ffcd77654..711660c4a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -180,6 +180,7 @@ ei_add_test(meta) ei_add_test(sizeof) ei_add_test(dynalloc) ei_add_test(nomalloc) +ei_add_test(first_aligned) ei_add_test(mixingtypes) ei_add_test(packetmath) ei_add_test(unalignedassert) diff --git a/test/first_aligned.cpp b/test/first_aligned.cpp new file mode 100644 index 000000000..047266c6d --- /dev/null +++ b/test/first_aligned.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#include "main.h" + +template +void test_first_aligned_helper(Scalar *array, int size) +{ + const int packet_size = sizeof(Scalar) * ei_packet_traits::size; + VERIFY(((size_t(array) + sizeof(Scalar) * ei_alignmentOffset(array, size)) % packet_size) == 0); +} + +template +void test_none_aligned_helper(Scalar *array, int size) +{ + VERIFY(ei_packet_traits::size == 1 || ei_alignmentOffset(array, size) == size); +} + +struct some_non_vectorizable_type { float x; }; + +void test_first_aligned() +{ + EIGEN_ALIGN_128 float array_float[100]; + test_first_aligned_helper(array_float, 50); + test_first_aligned_helper(array_float+1, 50); + test_first_aligned_helper(array_float+2, 50); + test_first_aligned_helper(array_float+3, 50); + test_first_aligned_helper(array_float+4, 50); + test_first_aligned_helper(array_float+5, 50); + + EIGEN_ALIGN_128 double array_double[100]; + test_first_aligned_helper(array_float, 50); + test_first_aligned_helper(array_float+1, 50); + test_first_aligned_helper(array_float+2, 50); + + double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4); + test_none_aligned_helper(array_double_plus_4_bytes, 50); + test_none_aligned_helper(array_double_plus_4_bytes+1, 50); + + some_non_vectorizable_type array_nonvec[100]; + test_first_aligned_helper(array_nonvec, 100); + test_none_aligned_helper(array_nonvec, 100); +}