Use packet ops instead of AVX2 intrinsics

This commit is contained in:
Eugene Zhulenev 2019-04-23 11:41:02 -07:00
parent 8c7a6feb8e
commit 68a2a8c445

View File

@ -387,10 +387,10 @@ template<> EIGEN_STRONG_INLINE Packet4d ploadu<Packet4d>(const double* from) { E
template<> EIGEN_STRONG_INLINE Packet8i ploadu<Packet8i>(const int* from) { EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(reinterpret_cast<const __m256i*>(from)); }
template<> EIGEN_STRONG_INLINE Packet8f ploadu<Packet8f>(const float* from, uint8_t umask) {
__m256i mask = _mm256_set1_epi8(static_cast<char>(umask));
const __m256i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
mask = _mm256_or_si256(mask, bit_mask);
mask = _mm256_cmpeq_epi32(mask, _mm256_set1_epi32(0xffffffff));
Packet8i mask = _mm256_set1_epi8(static_cast<char>(umask));
const Packet8i bit_mask = _mm256_set_epi32(0xffffff7f, 0xffffffbf, 0xffffffdf, 0xffffffef, 0xfffffff7, 0xfffffffb, 0xfffffffd, 0xfffffffe);
mask = por<Packet8i>(mask, bit_mask);
mask = pcmp_eq<Packet8i>(mask, _mm256_set1_epi32(0xffffffff));
EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_maskload_ps(from, mask);
}