Protect new pblend implementation with EIGEN_VECTORIZE_AVX2

This commit is contained in:
Rasmus Munk Larsen 2022-08-22 18:28:03 +00:00
parent 7c67dc67ae
commit 1a09defce7

View File

@ -1338,16 +1338,31 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
} }
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) { template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
#ifdef EIGEN_VECTORIZE_AVX2
const __m256i zero = _mm256_setzero_si256(); const __m256i zero = _mm256_setzero_si256();
const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256i false_mask = _mm256_cmpeq_epi32(zero, select); __m256i false_mask = _mm256_cmpeq_epi32(zero, select);
return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask)); return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask));
#else
const __m256 zero = _mm256_setzero_ps();
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
#endif
} }
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) { template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
#ifdef EIGEN_VECTORIZE_AVX2
const __m256i zero = _mm256_setzero_si256(); const __m256i zero = _mm256_setzero_si256();
const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]); const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256i false_mask = _mm256_cmpeq_epi64(select, zero); __m256i false_mask = _mm256_cmpeq_epi64(select, zero);
return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask)); return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask));
#else
const __m256d zero = _mm256_setzero_pd();
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
#endif
} }
// Packet math for Eigen::half // Packet math for Eigen::half