From e618c4a5e9bc688d5b7fb5a599f7e0a1a2d6f014 Mon Sep 17 00:00:00 2001 From: Ilya Tokar Date: Fri, 29 Jul 2022 18:45:33 +0000 Subject: [PATCH] Improve pblend AVX implementation --- Eigen/src/Core/arch/AVX/PacketMath.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 4a646c0dc..9ca4b7c9d 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -1334,16 +1334,16 @@ ptranspose(PacketBlock& kernel) { } template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) { - const __m256 zero = _mm256_setzero_ps(); - const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ); - return _mm256_blendv_ps(thenPacket, elsePacket, false_mask); + const __m256i zero = _mm256_setzero_si256(); + const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); + __m256i false_mask = _mm256_cmpeq_epi32(zero, select); + return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask)); } template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) { - const __m256d zero = _mm256_setzero_pd(); - const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); - __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ); - return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); + const __m256i zero = _mm256_setzero_si256(); + const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); + __m256i false_mask = _mm256_cmpeq_epi64(select, zero); + return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask)); } // Packet math for Eigen::half