diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 0e0744db0..1542215ce 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -1338,16 +1338,31 @@ ptranspose(PacketBlock& kernel) { } template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) { +#ifdef EIGEN_VECTORIZE_AVX2 const __m256i zero = _mm256_setzero_si256(); const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m256i false_mask = _mm256_cmpeq_epi32(zero, select); return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask)); +#else + const __m256 zero = _mm256_setzero_ps(); + const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); + __m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ); + return _mm256_blendv_ps(thenPacket, elsePacket, false_mask); +#endif } + template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) { +#ifdef EIGEN_VECTORIZE_AVX2 const __m256i zero = _mm256_setzero_si256(); const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); __m256i false_mask = _mm256_cmpeq_epi64(select, zero); return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask)); +#else + const __m256d zero = _mm256_setzero_pd(); + const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); + __m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ); + return _mm256_blendv_pd(thenPacket, elsePacket, false_mask); +#endif } // Packet math for Eigen::half