Define pcmp_le generically in terms of pcmp_eq and pcmp_lt.

This commit is contained in:
Rasmus Munk Larsen 2025-09-23 14:34:57 +00:00
parent ea869e183b
commit 2d170aea11
2 changed files with 29 additions and 29 deletions

View File

@ -65,7 +65,7 @@ struct default_packet_traits {
HasAbsDiff = 0, HasAbsDiff = 0,
HasBlend = 0, HasBlend = 0,
// This flag is used to indicate whether packet comparison is supported. // This flag is used to indicate whether packet comparison is supported.
// pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. // pcmp_eq and pcmp_lt should be defined for it to be true.
HasCmp = 0, HasCmp = 0,
HasDiv = 0, HasDiv = 0,
@ -432,30 +432,6 @@ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) {
return pzero_impl<Packet>::run(a); return pzero_impl<Packet>::run(a);
} }
/** \internal \returns a <= b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
return a <= b ? ptrue(a) : pzero(a);
}
/** \internal \returns a < b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
return a < b ? ptrue(a) : pzero(a);
}
/** \internal \returns a == b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
return a == b ? ptrue(a) : pzero(a);
}
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
return a >= b ? pzero(a) : ptrue(a);
}
template <typename T> template <typename T>
struct bit_and { struct bit_and {
EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; }
@ -582,6 +558,30 @@ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) {
return pand(a, pnot(b)); return pand(a, pnot(b));
} }
/** \internal \returns a < b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) {
return a < b ? ptrue(a) : pzero(a);
}
/** \internal \returns a == b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) {
return a == b ? ptrue(a) : pzero(a);
}
/** \internal \returns a <= b as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) {
return por(pcmp_eq(a, b), pcmp_lt(a, b));
}
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) {
return a >= b ? pzero(a) : ptrue(a);
}
// In the general case, use bitwise select. // In the general case, use bitwise select.
template <typename Packet, bool is_scalar = is_scalar<Packet>::value> template <typename Packet, bool is_scalar = is_scalar<Packet>::value>
struct pselect_impl { struct pselect_impl {

View File

@ -934,7 +934,11 @@ EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) {
} }
template <> template <>
EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) {
#ifdef EIGEN_VECTORIZE_SSE4_1
return _mm_cmpeq_epi32(a, _mm_min_epi32(a, b));
#else
return por(pcmp_lt(a, b), pcmp_eq(a, b)); return por(pcmp_lt(a, b), pcmp_eq(a, b));
#endif
} }
template <> template <>
EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) { EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) {
@ -970,10 +974,6 @@ EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) {
return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask); return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask);
} }
template <> template <>
EIGEN_STRONG_INLINE Packet16b pcmp_le(const Packet16b& a, const Packet16b& b) {
return por(pcmp_lt(a, b), pcmp_eq(a, b));
}
template <>
EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) { EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) {
return _mm_cmpeq_epi32(a, b); return _mm_cmpeq_epi32(a, b);
} }