mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 20:26:03 +08:00
Add missing pcmp_lt_or_nan for AVX512
This commit is contained in:
parent
0f6f75bd8a
commit
961ff567e8
@ -262,6 +262,26 @@ EIGEN_STRONG_INLINE Packet8d pmax<Packet8d>(const Packet8d& a,
|
||||
return _mm512_max_pd(b, a);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||
template<int I> Packet8f extract256(Packet16f x) { return _mm512_extractf32x8_ps(x,I); }
|
||||
Packet16f cat256(Packet8f a, Packet8f b) { return _mm512_insertf32x8(_mm512_castps256_ps512(a),b,1); }
|
||||
#else
|
||||
// AVX512F does not define _mm512_extractf32x8_ps to extract _m256 from _m512
|
||||
template<int I> Packet8f extract256(Packet16f x) {
|
||||
return _mm256_castsi256_ps(_mm512_extracti64x4_epi64( _mm512_castps_si512(x),I));
|
||||
}
|
||||
Packet16f cat256(Packet8f a, Packet8f b) {
|
||||
return _mm512_castsi512_ps(_mm512_inserti64x4(_mm512_castsi256_si512(_mm256_castps_si256(a)),
|
||||
_mm256_castps_si256(b),1));
|
||||
}
|
||||
#endif
|
||||
|
||||
Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
|
||||
__m256 lo = _mm256_cmp_ps(extract256<0>(a), extract256<0>(b), _CMP_NGE_UQ);
|
||||
__m256 hi = _mm256_cmp_ps(extract256<1>(a), extract256<1>(b), _CMP_NGE_UQ);
|
||||
return cat256(lo, hi);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
|
||||
__m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0));
|
||||
__m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1));
|
||||
|
Loading…
x
Reference in New Issue
Block a user