Collapsed revision

* Collapsed revision
* Add packet up "pones". Write pnot(a) as pxor(pones(a), a).
* Collapsed revision
* Simplify a bit.
* Undo useless diffs.
* Fix typo.
This commit is contained in:
Rasmus Munk Larsen 2019-01-09 16:34:23 -08:00
parent 1119c73d22
commit 8f178429b9
2 changed files with 15 additions and 16 deletions

View File

@ -214,6 +214,10 @@ pxor(const Packet& a, const Packet& b) { return a ^ b; }
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pandnot(const Packet& a, const Packet& b) { return a & (~b); }
/** \internal \returns ones */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;}
/** \internal \returns the bitwise not of \a a */
template <typename Packet> EIGEN_DEVICE_FUNC inline Packet
pnot(const Packet& a) { return pxor(pones(a), a);}
@ -246,10 +250,6 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent);
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pzero(const Packet& a) { return pxor(a,a); }
/** \internal \returns ones */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pones(const Packet& /*a*/) { Packet b; memset(&b, 0xff, sizeof(b)); return b;}
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pselect(const Packet& mask, const Packet& a, const Packet& b) {

View File

@ -284,27 +284,26 @@ EIGEN_STRONG_INLINE Packet16f cat256(Packet8f a, Packet8f b) {
#endif
template<> EIGEN_STRONG_INLINE Packet16f pcmp_le(const Packet16f& a, const Packet16f& b) {
__m256 lo = pcmp_le(extract256<0>(a), extract256<0>(b));
__m256 hi = pcmp_le(extract256<1>(a), extract256<1>(b));
return cat256(lo, hi);
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ);
return _mm512_castsi512_ps(
_mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
}
template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt(const Packet16f& a, const Packet16f& b) {
__m256 lo = pcmp_lt(extract256<0>(a), extract256<0>(b));
__m256 hi = pcmp_lt(extract256<1>(a), extract256<1>(b));
return cat256(lo, hi);
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
return _mm512_castsi512_ps(
_mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
}
template<> EIGEN_STRONG_INLINE Packet16f pcmp_lt_or_nan(const Packet16f& a, const Packet16f& b) {
__m256 lo = pcmp_lt_or_nan(extract256<0>(a), extract256<0>(b));
__m256 hi = pcmp_lt_or_nan(extract256<1>(a), extract256<1>(b));
return cat256(lo, hi);
__mmask16 mask = _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ);
return _mm512_castsi512_ps(
_mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu));
}
template<> EIGEN_STRONG_INLINE Packet16i pcmp_eq(const Packet16i& a, const Packet16i& b) {
__m256i lo = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 0), _mm512_extracti64x4_epi64(b, 0));
__m256i hi = _mm256_cmpeq_epi32(_mm512_extracti64x4_epi64(a, 1), _mm512_extracti64x4_epi64(b, 1));
return _mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1);
__mmask16 mask = _mm512_cmp_epi32_mask(a, b, _CMP_EQ_OQ);
return _mm512_mask_set1_epi32(_mm512_set1_epi32(0), mask, 0xffffffffu);
}
template <>