From 626942d9ddcc17c21c2d79a690537e54237275bc Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 28 Sep 2018 16:57:32 +0200 Subject: [PATCH] fix alignment issue in ploaddup for AVX512 --- Eigen/src/Core/arch/AVX512/PacketMath.h | 4 +++- test/packetmath.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 9fbb256a1..e3f8c1c87 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -466,7 +466,9 @@ EIGEN_STRONG_INLINE Packet16i ploadu(const int* from) { // {a0, a0 a1, a1, a2, a2, a3, a3, a4, a4, a5, a5, a6, a6, a7, a7} template <> EIGEN_STRONG_INLINE Packet16f ploaddup(const float* from) { - __m256i low_half = _mm256_load_si256(reinterpret_cast(from)); + // an unaligned load is required here as there is no requirement + // on the alignment of input pointer 'from' + __m256i low_half = _mm256_loadu_si256(reinterpret_cast(from)); __m512 even_elements = _mm512_castsi512_ps(_mm512_cvtepu32_epi64(low_half)); __m512 pairs = _mm512_permute_ps(even_elements, _MM_SHUFFLE(2, 2, 0, 0)); return pairs; diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 2b0dda573..babb7c20e 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -227,6 +227,7 @@ template void packetmath() if(PacketSize>1) { + // apply different offsets to check that ploaddup is robust to unaligned inputs for(int offset=0;offset<4;++offset) { for(int i=0;i void packetmath() if(PacketSize>2) { + // apply different offsets to check that ploadquad is robust to unaligned inputs for(int offset=0;offset<4;++offset) { for(int i=0;i