From 394aabb0a3976d95a5c6f286d49e43bb49558cc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Fri, 10 Mar 2023 22:36:57 +0000 Subject: [PATCH] Fix failing MSVC tests due to compiler bugs. --- Eigen/src/Core/arch/AVX/PacketMath.h | 13 ++++++++++--- Eigen/src/Core/arch/AVX512/PacketMath.h | 11 +++++++++-- test/packet_ostream.h | 22 ++++++++++++++++++++++ test/packetmath.cpp | 1 + 4 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 test/packet_ostream.h diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 89963eb7a..3eb439434 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -984,13 +984,20 @@ template<> EIGEN_STRONG_INLINE void pstoreu(int* to, const Packet8i& template<> EIGEN_STRONG_INLINE void pstoreu(float* to, const Packet8f& from, uint8_t umask) { #ifdef EIGEN_VECTORIZE_AVX512 __mmask16 mask = static_cast<__mmask16>(umask & 0x00FF); - EIGEN_DEBUG_UNALIGNED_STORE return _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from)); + EIGEN_DEBUG_UNALIGNED_STORE _mm512_mask_storeu_ps(to, mask, _mm512_castps256_ps512(from)); #else Packet8i mask = _mm256_set1_epi8(static_cast(umask)); - const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe); + const Packet8i bit_mask = _mm256_set_epi32(0x7f7f7f7f, 0xbfbfbfbf, 0xdfdfdfdf, 0xefefefef, 0xf7f7f7f7, 0xfbfbfbfb, 0xfdfdfdfd, 0xfefefefe); mask = por(mask, bit_mask); mask = pcmp_eq(mask, _mm256_set1_epi32(0xffffffff)); - EIGEN_DEBUG_UNALIGNED_STORE return _mm256_maskstore_ps(to, mask, from); +#if EIGEN_COMP_MSVC + // MSVC sometimes seems to use a bogus mask with maskstore. + const __m256i ifrom = _mm256_castps_si256(from); + EIGEN_DEBUG_UNALIGNED_STORE _mm_maskmoveu_si128(_mm256_extractf128_si256(ifrom, 0), _mm256_extractf128_si256(mask, 0), reinterpret_cast(to)); + EIGEN_DEBUG_UNALIGNED_STORE _mm_maskmoveu_si128(_mm256_extractf128_si256(ifrom, 1), _mm256_extractf128_si256(mask, 1), reinterpret_cast(to + 4)); +#else + EIGEN_DEBUG_UNALIGNED_STORE _mm256_maskstore_ps(to, mask, from); +#endif #endif } diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 262211705..98b55ea06 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -336,12 +336,19 @@ EIGEN_STRONG_INLINE Packet16i psub(const Packet16i& a, template <> EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) { - const __m512i mask = _mm512_set1_epi32(0x80000000); + // NOTE: MSVC seems to struggle with _mm512_set1_epi32, leading to random results. + // The intel docs give it a relatively high latency as well, so we're probably + // better off with using _mm512_set_epi32 directly anyways. + const __m512i mask = _mm512_set_epi32(0x80000000,0x80000000,0x80000000,0x80000000, + 0x80000000,0x80000000,0x80000000,0x80000000, + 0x80000000,0x80000000,0x80000000,0x80000000, + 0x80000000,0x80000000,0x80000000,0x80000000); return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask)); } template <> EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) { - const __m512i mask = _mm512_set1_epi64(0x8000000000000000ULL); + const __m512i mask = _mm512_set_epi64(0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, + 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL, 0x8000000000000000ULL); return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask)); } template <> diff --git a/test/packet_ostream.h b/test/packet_ostream.h new file mode 100644 index 000000000..ece499575 --- /dev/null +++ b/test/packet_ostream.h @@ -0,0 +1,22 @@ +#ifndef TEST_PACKET_OSTREAM +#define TEST_PACKET_OSTREAM + +#include +#include + +// Include this header to be able to print Packets while debugging. + +template::vectorizable> > +std::ostream& operator<<(std::ostream& os, const Packet& packet) { + using Scalar = typename Eigen::internal::unpacket_traits::type; + Scalar v[Eigen::internal::unpacket_traits::size]; + Eigen::internal::pstoreu(v, packet); + os << "{" << v[0]; + for (int i=1; i::size; ++i) { + os << "," << v[i]; + } + os << "}"; + return os; +} + +#endif // TEST_PACKET_OSTREAM \ No newline at end of file diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 76332d7f7..a98a014df 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -10,6 +10,7 @@ #include "packetmath_test_shared.h" #include "random_without_cast_overflow.h" +#include "packet_ostream.h" template inline T REF_ADD(const T& a, const T& b) {