Improve pblend AVX implementation

This commit is contained in:
Ilya Tokar 2022-07-29 18:45:33 +00:00 committed by Rasmus Munk Larsen
parent ef4654bae7
commit e618c4a5e9

View File

@ -1334,16 +1334,16 @@ ptranspose(PacketBlock<Packet4d,4>& kernel) {
}
template<> EIGEN_STRONG_INLINE Packet8f pblend(const Selector<8>& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket) {
const __m256 zero = _mm256_setzero_ps();
const __m256 select = _mm256_set_ps(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256 false_mask = _mm256_cmp_ps(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_ps(thenPacket, elsePacket, false_mask);
const __m256i zero = _mm256_setzero_si256();
const __m256i select = _mm256_set_epi32(ifPacket.select[7], ifPacket.select[6], ifPacket.select[5], ifPacket.select[4], ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256i false_mask = _mm256_cmpeq_epi32(zero, select);
return _mm256_blendv_ps(thenPacket, elsePacket, _mm256_castsi256_ps(false_mask));
}
template<> EIGEN_STRONG_INLINE Packet4d pblend(const Selector<4>& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket) {
const __m256d zero = _mm256_setzero_pd();
const __m256d select = _mm256_set_pd(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256d false_mask = _mm256_cmp_pd(select, zero, _CMP_EQ_UQ);
return _mm256_blendv_pd(thenPacket, elsePacket, false_mask);
const __m256i zero = _mm256_setzero_si256();
const __m256i select = _mm256_set_epi64x(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
__m256i false_mask = _mm256_cmpeq_epi64(select, zero);
return _mm256_blendv_pd(thenPacket, elsePacket, _mm256_castsi256_pd(false_mask));
}
// Packet math for Eigen::half