From f91500d3035fd34683210eea6064b95a7aad4306 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 30 Nov 2018 14:32:06 +0100 Subject: [PATCH] Fix pandnot order in AVX512 --- Eigen/Core | 2 ++ Eigen/src/Core/arch/AVX512/PacketMath.h | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 41529bb63..bc6cf8a96 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -154,8 +154,10 @@ using std::ptrdiff_t; #if defined EIGEN_VECTORIZE_AVX512 #include "src/Core/arch/SSE/PacketMath.h" #include "src/Core/arch/SSE/TypeCasting.h" + #include "src/Core/arch/SSE/Complex.h" #include "src/Core/arch/AVX/PacketMath.h" #include "src/Core/arch/AVX/TypeCasting.h" + #include "src/Core/arch/AVX/Complex.h" #include "src/Core/arch/AVX512/PacketMath.h" #include "src/Core/arch/SSE/MathFunctions.h" #include "src/Core/arch/AVX/MathFunctions.h" diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 86cefba92..9a053fb1a 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -393,24 +393,24 @@ template <> EIGEN_STRONG_INLINE Packet16f pandnot(const Packet16f& a, const Packet16f& b) { #ifdef EIGEN_VECTORIZE_AVX512DQ - return _mm512_andnot_ps(a, b); + return _mm512_andnot_ps(b, a); #else Packet16f res = _mm512_undefined_ps(); Packet4f lane0_a = _mm512_extractf32x4_ps(a, 0); Packet4f lane0_b = _mm512_extractf32x4_ps(b, 0); - res = _mm512_insertf32x4(res, _mm_andnot_ps(lane0_a, lane0_b), 0); + res = _mm512_insertf32x4(res, pandnot(lane0_a, lane0_b), 0); Packet4f lane1_a = _mm512_extractf32x4_ps(a, 1); Packet4f lane1_b = _mm512_extractf32x4_ps(b, 1); - res = _mm512_insertf32x4(res, _mm_andnot_ps(lane1_a, lane1_b), 1); + res = _mm512_insertf32x4(res, pandnot(lane1_a, lane1_b), 1); Packet4f lane2_a = _mm512_extractf32x4_ps(a, 2); Packet4f lane2_b = _mm512_extractf32x4_ps(b, 2); - res = _mm512_insertf32x4(res, _mm_andnot_ps(lane2_a, lane2_b), 2); + res = _mm512_insertf32x4(res, pandnot(lane2_a, lane2_b), 2); Packet4f lane3_a = _mm512_extractf32x4_ps(a, 3); Packet4f lane3_b = _mm512_extractf32x4_ps(b, 3); - res = _mm512_insertf32x4(res, _mm_andnot_ps(lane3_a, lane3_b), 3); + res = _mm512_insertf32x4(res, pandnot(lane3_a, lane3_b), 3); return res; #endif