From 8f178429b9d0517a5a63da6dd73adbd84a15b375 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 9 Jan 2019 16:34:23 -0800 Subject: [PATCH] Collapsed revision * Collapsed revision * Add packet up "pones". Write pnot(a) as pxor(pones(a), a). * Collapsed revision * Simplify a bit. * Undo useless diffs. * Fix typo. --- Eigen/src/Core/GenericPacketMath.h | 8 ++++---- Eigen/src/Core/arch/AVX512/PacketMath.h | 23 +++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 048fc5157..8bcceaa7b 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -214,6 +214,10 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; } template EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return a & (~b); } +/** \internal \returns ones */ +template EIGEN_DEVICE_FUNC inline Packet +pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} + /** \internal \returns the bitwise not of \a a */ template EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) { return pxor(pones(a), a);} @@ -246,10 +250,6 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent); template EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pxor(a,a); } -/** \internal \returns ones */ -template EIGEN_DEVICE_FUNC inline Packet -pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;} - /** \internal \returns bits of \a or \b according to the input bit mask \a mask */ template EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) { diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index d258fd07b..9666c4e22 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -284,27 +284,26 @@ EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) { #endif template<> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_le(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_le(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_lt(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_lt(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) { - __m256 lo = pcmp_lt_or_nan(extract256<0>(a), extract256<0>(b)); - __m256 hi = pcmp_lt_or_nan(extract256<1>(a), extract256<1>(b)); - return cat256(lo, hi); + __mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ); + return _mm512_castsi512_ps( + _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu)); } template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) { - __m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0)); - __m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1)); - return _mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1); + __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _CMP_EQ_OQ); + return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu); } template <>