mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 03:09:01 +08:00
* Fix bug #79: ei_alignmentOffset was assuming that ptr is multiple of
sizeof(Scalar), and that assumption breaks with double on linux x86-32. * Rename ei_alignmentOffset to ei_first_aligned * Rewrite its documentation and part of its body * The variant taking a MatrixBase doesn't need a separate size argument.
This commit is contained in:
parent
ed5c972801
commit
25f8adfa6c
@ -386,14 +386,22 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
|
|||||||
const int size = dst.size();
|
const int size = dst.size();
|
||||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||||
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||||
: ei_alignmentOffset(&dst.coeffRef(0), size);
|
: ei_first_aligned(&dst.coeffRef(0), size);
|
||||||
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||||
|
|
||||||
|
EIGEN_DEBUG_VAR(&dst.coeffRef(0));
|
||||||
|
EIGEN_DEBUG_VAR(size);
|
||||||
|
EIGEN_DEBUG_VAR(packetSize);
|
||||||
|
EIGEN_DEBUG_VAR(alignedStart);
|
||||||
|
EIGEN_DEBUG_VAR(alignedEnd);
|
||||||
|
|
||||||
for(int index = 0; index < alignedStart; ++index)
|
for(int index = 0; index < alignedStart; ++index)
|
||||||
dst.copyCoeff(index, src);
|
dst.copyCoeff(index, src);
|
||||||
|
|
||||||
for(int index = alignedStart; index < alignedEnd; index += packetSize)
|
for(int index = alignedStart; index < alignedEnd; index += packetSize)
|
||||||
{
|
{
|
||||||
|
EIGEN_DEBUG_VAR(index);
|
||||||
|
EIGEN_DEBUG_VAR(&dst.coeffRef(index));
|
||||||
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
|
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -431,7 +439,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
|
|||||||
const int outerSize = dst.outerSize();
|
const int outerSize = dst.outerSize();
|
||||||
const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
|
const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
|
||||||
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||||
: ei_alignmentOffset(&dst.coeffRef(0,0), innerSize);
|
: ei_first_aligned(&dst.coeffRef(0,0), innerSize);
|
||||||
|
|
||||||
for(int i = 0; i < outerSize; ++i)
|
for(int i = 0; i < outerSize; ++i)
|
||||||
{
|
{
|
||||||
|
@ -380,35 +380,33 @@ EIGEN_STRONG_INLINE void MatrixBase<Derived>::copyPacket(int index, const Matrix
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename Derived, typename Integer, bool JustReturnZero>
|
template<typename Derived, bool JustReturnZero>
|
||||||
struct ei_alignmentOffset_impl
|
struct ei_first_aligned_impl
|
||||||
{
|
{
|
||||||
inline static Integer run(const MatrixBase<Derived>&, Integer)
|
inline static int run(const MatrixBase<Derived>&)
|
||||||
{ return 0; }
|
{ return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Derived, typename Integer>
|
template<typename Derived>
|
||||||
struct ei_alignmentOffset_impl<Derived, Integer, false>
|
struct ei_first_aligned_impl<Derived, false>
|
||||||
{
|
{
|
||||||
inline static Integer run(const MatrixBase<Derived>& m, Integer maxOffset)
|
inline static int run(const MatrixBase<Derived>& m)
|
||||||
{
|
{
|
||||||
return ei_alignmentOffset(&m.const_cast_derived().coeffRef(0,0), maxOffset);
|
return ei_first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** \internal \returns the number of elements which have to be skipped, starting
|
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
|
||||||
* from the address of coeffRef(0,0), to find the first 16-byte aligned element.
|
|
||||||
*
|
*
|
||||||
* \note If the expression doesn't have the DirectAccessBit, this function returns 0.
|
* There is also the variant ei_first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
|
||||||
*
|
* documentation.
|
||||||
* There is also the variant ei_alignmentOffset(const Scalar*, Integer) defined in Memory.h.
|
|
||||||
*/
|
*/
|
||||||
template<typename Derived, typename Integer>
|
template<typename Derived>
|
||||||
inline static Integer ei_alignmentOffset(const MatrixBase<Derived>& m, Integer maxOffset)
|
inline static int ei_first_aligned(const MatrixBase<Derived>& m)
|
||||||
{
|
{
|
||||||
return ei_alignmentOffset_impl<Derived, Integer,
|
return ei_first_aligned_impl
|
||||||
(Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
|
<Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
|
||||||
::run(m, maxOffset);
|
::run(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -209,7 +209,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
|
|||||||
{
|
{
|
||||||
const int size = mat.size();
|
const int size = mat.size();
|
||||||
const int packetSize = ei_packet_traits<Scalar>::size;
|
const int packetSize = ei_packet_traits<Scalar>::size;
|
||||||
const int alignedStart = ei_alignmentOffset(mat,size);
|
const int alignedStart = ei_first_aligned(mat);
|
||||||
enum {
|
enum {
|
||||||
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
|
alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
|
||||||
? Aligned : Unaligned
|
? Aligned : Unaligned
|
||||||
|
@ -65,7 +65,7 @@ MatrixBase<Derived>::stableNorm() const
|
|||||||
int bi=0;
|
int bi=0;
|
||||||
if ((int(Flags)&DirectAccessBit) && !(int(Flags)&AlignedBit))
|
if ((int(Flags)&DirectAccessBit) && !(int(Flags)&AlignedBit))
|
||||||
{
|
{
|
||||||
bi = ei_alignmentOffset(&const_cast_derived().coeffRef(0), n);
|
bi = ei_first_aligned(&const_cast_derived().coeffRef(0), n);
|
||||||
if (bi>0)
|
if (bi>0)
|
||||||
ei_stable_norm_kernel(start(bi), ssq, scale, invScale);
|
ei_stable_norm_kernel(start(bi), ssq, scale, invScale);
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
|||||||
|
|
||||||
// How many coeffs of the result do we have to skip to be aligned.
|
// How many coeffs of the result do we have to skip to be aligned.
|
||||||
// Here we assume data are at least aligned on the base scalar type.
|
// Here we assume data are at least aligned on the base scalar type.
|
||||||
int alignedStart = ei_alignmentOffset(res,size);
|
int alignedStart = ei_first_aligned(res,size);
|
||||||
int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
||||||
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||||
|
|
||||||
@ -79,7 +79,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
|
|||||||
: FirstAligned;
|
: FirstAligned;
|
||||||
|
|
||||||
// we cannot assume the first element is aligned because of sub-matrices
|
// we cannot assume the first element is aligned because of sub-matrices
|
||||||
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
|
const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
|
||||||
|
|
||||||
// find how many columns do we have to skip to be aligned with the result (if possible)
|
// find how many columns do we have to skip to be aligned with the result (if possible)
|
||||||
int skipColumns = 0;
|
int skipColumns = 0;
|
||||||
@ -282,7 +282,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
// How many coeffs of the result do we have to skip to be aligned.
|
// How many coeffs of the result do we have to skip to be aligned.
|
||||||
// Here we assume data are at least aligned on the base scalar type
|
// Here we assume data are at least aligned on the base scalar type
|
||||||
// if that's not the case then vectorization is discarded, see below.
|
// if that's not the case then vectorization is discarded, see below.
|
||||||
int alignedStart = ei_alignmentOffset(rhs, size);
|
int alignedStart = ei_first_aligned(rhs, size);
|
||||||
int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
|
||||||
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
const int peeledSize = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
|
||||||
|
|
||||||
@ -292,7 +292,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
|
|||||||
: FirstAligned;
|
: FirstAligned;
|
||||||
|
|
||||||
// we cannot assume the first element is aligned because of sub-matrices
|
// we cannot assume the first element is aligned because of sub-matrices
|
||||||
const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
|
const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
|
||||||
|
|
||||||
// find how many rows do we have to skip to be aligned with rhs (if possible)
|
// find how many rows do we have to skip to be aligned with rhs (if possible)
|
||||||
int skipRows = 0;
|
int skipRows = 0;
|
||||||
|
@ -86,7 +86,7 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
|
|||||||
size_t starti = FirstTriangular ? 0 : j+2;
|
size_t starti = FirstTriangular ? 0 : j+2;
|
||||||
size_t endi = FirstTriangular ? j : size;
|
size_t endi = FirstTriangular ? j : size;
|
||||||
size_t alignedEnd = starti;
|
size_t alignedEnd = starti;
|
||||||
size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
|
size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti);
|
||||||
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
|
||||||
|
|
||||||
res[j] += cj0.pmul(A0[j], t0);
|
res[j] += cj0.pmul(A0[j], t0);
|
||||||
|
@ -209,27 +209,53 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
|
|||||||
ei_conditional_aligned_free<Align>(ptr);
|
ei_conditional_aligned_free<Align>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns the number of elements which have to be skipped to
|
/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
|
||||||
* find the first 16-byte aligned element
|
|
||||||
*
|
*
|
||||||
* There is also the variant ei_alignmentOffset(const MatrixBase&, Integer) defined in Coeffs.h.
|
* \param array the address of the start of the array
|
||||||
|
* \param size the size of the array
|
||||||
|
*
|
||||||
|
* \note If no element of the array is well aligned, the size of the array is returned. Typically,
|
||||||
|
* for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
|
||||||
|
* packet size for the given scalar type is 1, then everything is considered well-aligned.
|
||||||
|
*
|
||||||
|
* \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
|
||||||
|
* power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
|
||||||
|
* other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
|
||||||
|
* example with Scalar=double on certain 32-bit platforms, see bug #79.
|
||||||
|
*
|
||||||
|
* There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h.
|
||||||
*/
|
*/
|
||||||
template<typename Scalar, typename Integer>
|
template<typename Scalar, typename Integer>
|
||||||
inline static Integer ei_alignmentOffset(const Scalar* ptr, Integer maxOffset)
|
inline static Integer ei_first_aligned(const Scalar* array, Integer size)
|
||||||
{
|
{
|
||||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
const Integer PacketSize = ei_packet_traits<Scalar>::size;
|
enum { PacketSize = ei_packet_traits<Scalar>::size,
|
||||||
const Integer PacketAlignedMask = PacketSize-1;
|
PacketAlignedMask = PacketSize-1
|
||||||
const bool Vectorized = PacketSize>1;
|
};
|
||||||
return Vectorized
|
|
||||||
? std::min<Integer>( (PacketSize - (Integer((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
|
if(PacketSize==1)
|
||||||
& PacketAlignedMask, maxOffset)
|
{
|
||||||
: 0;
|
// Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
|
||||||
|
// of the array have the same aligment.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if(size_t(array) & (sizeof(Scalar)-1))
|
||||||
|
{
|
||||||
|
// There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
|
||||||
|
// Consequently, no element of the array is well aligned.
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return std::min<Integer>( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
|
||||||
|
& PacketAlignedMask, size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
|
* ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
|
||||||
* on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
|
* on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and
|
||||||
|
* if stack allocation is supported by the platform (currently, this is linux only).
|
||||||
* Otherwise the memory is allocated on the heap.
|
* Otherwise the memory is allocated on the heap.
|
||||||
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
|
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
|
||||||
* \code
|
* \code
|
||||||
|
@ -318,7 +318,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
|
|||||||
typedef typename ei_packet_traits<Scalar>::type Packet;
|
typedef typename ei_packet_traits<Scalar>::type Packet;
|
||||||
enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
|
enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
|
||||||
|
|
||||||
int alignedStart = ei_alignmentOffset(y, size);
|
int alignedStart = ei_first_aligned(y, size);
|
||||||
int alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
int alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
|
||||||
|
|
||||||
const Packet pc = ei_pset1(Scalar(j.c()));
|
const Packet pc = ei_pset1(Scalar(j.c()));
|
||||||
@ -336,7 +336,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
|
|||||||
Scalar* px = x + alignedStart;
|
Scalar* px = x + alignedStart;
|
||||||
Scalar* py = y + alignedStart;
|
Scalar* py = y + alignedStart;
|
||||||
|
|
||||||
if(ei_alignmentOffset(x, size)==alignedStart)
|
if(ei_first_aligned(x, size)==alignedStart)
|
||||||
{
|
{
|
||||||
for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
|
for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
|
||||||
{
|
{
|
||||||
|
@ -124,7 +124,7 @@ public :
|
|||||||
Scalar* A0 = dst.data() + j*dst.stride();
|
Scalar* A0 = dst.data() + j*dst.stride();
|
||||||
int starti = j;
|
int starti = j;
|
||||||
int alignedEnd = starti;
|
int alignedEnd = starti;
|
||||||
int alignedStart = (starti) + ei_alignmentOffset(&A0[starti], size-starti);
|
int alignedStart = (starti) + ei_first_aligned(&A0[starti], size-starti);
|
||||||
alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
|
alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
|
||||||
|
|
||||||
// do the non-vectorizable part of the assignment
|
// do the non-vectorizable part of the assignment
|
||||||
|
@ -265,7 +265,7 @@ public :
|
|||||||
|
|
||||||
int starti = j+2;
|
int starti = j+2;
|
||||||
int alignedEnd = starti;
|
int alignedEnd = starti;
|
||||||
int alignedStart = (starti) + ei_alignmentOffset(&X[starti], N-starti);
|
int alignedStart = (starti) + ei_first_aligned(&X[starti], N-starti);
|
||||||
alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
|
alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
|
||||||
|
|
||||||
X[j] += t0 * A0[j];
|
X[j] += t0 * A0[j];
|
||||||
|
@ -343,7 +343,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
|
|||||||
const int size = dst.size();
|
const int size = dst.size();
|
||||||
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
|
||||||
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
|
||||||
: ei_alignmentOffset(&dst.coeffRef(0), size);
|
: ei_first_aligned(&dst.coeffRef(0), size);
|
||||||
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
|
||||||
|
|
||||||
for(int index = 0; index < alignedStart; index++)
|
for(int index = 0; index < alignedStart; index++)
|
||||||
|
@ -88,6 +88,7 @@ ei_add_test(meta)
|
|||||||
ei_add_test(sizeof)
|
ei_add_test(sizeof)
|
||||||
ei_add_test(dynalloc)
|
ei_add_test(dynalloc)
|
||||||
ei_add_test(nomalloc)
|
ei_add_test(nomalloc)
|
||||||
|
ei_add_test(first_aligned)
|
||||||
ei_add_test(mixingtypes)
|
ei_add_test(mixingtypes)
|
||||||
ei_add_test(packetmath)
|
ei_add_test(packetmath)
|
||||||
ei_add_test(unalignedassert)
|
ei_add_test(unalignedassert)
|
||||||
|
64
test/first_aligned.cpp
Normal file
64
test/first_aligned.cpp
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
|
||||||
|
//
|
||||||
|
// Eigen is free software; you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU Lesser General Public
|
||||||
|
// License as published by the Free Software Foundation; either
|
||||||
|
// version 3 of the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Alternatively, you can redistribute it and/or
|
||||||
|
// modify it under the terms of the GNU General Public License as
|
||||||
|
// published by the Free Software Foundation; either version 2 of
|
||||||
|
// the License, or (at your option) any later version.
|
||||||
|
//
|
||||||
|
// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
|
||||||
|
// GNU General Public License for more details.
|
||||||
|
//
|
||||||
|
// You should have received a copy of the GNU Lesser General Public
|
||||||
|
// License and a copy of the GNU General Public License along with
|
||||||
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
void test_first_aligned_helper(Scalar *array, int size)
|
||||||
|
{
|
||||||
|
const int packet_size = sizeof(Scalar) * ei_packet_traits<Scalar>::size;
|
||||||
|
VERIFY(((size_t(array) + sizeof(Scalar) * ei_first_aligned(array, size)) % packet_size) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Scalar>
|
||||||
|
void test_none_aligned_helper(Scalar *array, int size)
|
||||||
|
{
|
||||||
|
VERIFY(ei_packet_traits<Scalar>::size == 1 || ei_first_aligned(array, size) == size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct some_non_vectorizable_type { float x; };
|
||||||
|
|
||||||
|
void test_first_aligned()
|
||||||
|
{
|
||||||
|
EIGEN_ALIGN16 float array_float[100];
|
||||||
|
test_first_aligned_helper(array_float, 50);
|
||||||
|
test_first_aligned_helper(array_float+1, 50);
|
||||||
|
test_first_aligned_helper(array_float+2, 50);
|
||||||
|
test_first_aligned_helper(array_float+3, 50);
|
||||||
|
test_first_aligned_helper(array_float+4, 50);
|
||||||
|
test_first_aligned_helper(array_float+5, 50);
|
||||||
|
|
||||||
|
EIGEN_ALIGN16 double array_double[100];
|
||||||
|
test_first_aligned_helper(array_float, 50);
|
||||||
|
test_first_aligned_helper(array_float+1, 50);
|
||||||
|
test_first_aligned_helper(array_float+2, 50);
|
||||||
|
|
||||||
|
double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4);
|
||||||
|
test_none_aligned_helper(array_double_plus_4_bytes, 50);
|
||||||
|
test_none_aligned_helper(array_double_plus_4_bytes+1, 50);
|
||||||
|
|
||||||
|
some_non_vectorizable_type array_nonvec[100];
|
||||||
|
test_first_aligned_helper(array_nonvec, 100);
|
||||||
|
test_none_aligned_helper(array_nonvec, 100);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user