From 2d170aea1123ea21bac1a0c414d31fb885e5ef6f Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 23 Sep 2025 14:34:57 +0000 Subject: [PATCH] Define pcmp_le generically in terms of pcmp_eq and pcmp_lt. --- Eigen/src/Core/GenericPacketMath.h | 50 ++++++++++++++-------------- Eigen/src/Core/arch/SSE/PacketMath.h | 8 ++--- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 139b10e8a..64e11231e 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -65,7 +65,7 @@ struct default_packet_traits { HasAbsDiff = 0, HasBlend = 0, // This flag is used to indicate whether packet comparison is supported. - // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true. + // pcmp_eq and pcmp_lt should be defined for it to be true. HasCmp = 0, HasDiv = 0, @@ -432,30 +432,6 @@ EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { return pzero_impl::run(a); } -/** \internal \returns a <= b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { - return a <= b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { - return a < b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a == b as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { - return a == b ? ptrue(a) : pzero(a); -} - -/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ -template -EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { - return a >= b ? pzero(a) : ptrue(a); -} - template struct bit_and { EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } @@ -582,6 +558,30 @@ EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); } +/** \internal \returns a < b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { + return a < b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a == b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { + return a == b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a <= b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { + return por(pcmp_eq(a, b), pcmp_lt(a, b)); +} + +/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { + return a >= b ? pzero(a) : ptrue(a); +} + // In the general case, use bitwise select. template ::value> struct pselect_impl { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 129e1be39..e0119dd76 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -934,7 +934,11 @@ EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { } template <> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { +#ifdef EIGEN_VECTORIZE_SSE4_1 + return _mm_cmpeq_epi32(a, _mm_min_epi32(a, b)); +#else return por(pcmp_lt(a, b), pcmp_eq(a, b)); +#endif } template <> EIGEN_STRONG_INLINE Packet2l pcmp_lt(const Packet2l& a, const Packet2l& b) { @@ -970,10 +974,6 @@ EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask); } template <> -EIGEN_STRONG_INLINE Packet16b pcmp_le(const Packet16b& a, const Packet16b& b) { - return por(pcmp_lt(a, b), pcmp_eq(a, b)); -} -template <> EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) { return _mm_cmpeq_epi32(a, b); }