diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 3a302c077..4b56f0f7d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -376,6 +376,12 @@ struct ptrue_impl { } }; +// For booleans, we can only directly set a valid `bool` value to avoid UB. +template <> +struct ptrue_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool& /*a*/) { return true; } +}; + // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value). // Although this is technically not a valid bitmask, the scalar path for pselect // uses a comparison to zero, so this should still work in most cases. We don't @@ -458,6 +464,32 @@ struct bit_not { EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } }; +template <> +struct bit_and { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a && b; + } +}; + +template <> +struct bit_or { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a || b; + } +}; + +template <> +struct bit_xor { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { + return a != b; + } +}; + +template <> +struct bit_not { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } +}; + // Use operators &, |, ^, ~. template struct operator_bitwise_helper { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index be8183cd7..bdbf75956 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -527,7 +527,7 @@ EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { template <> EIGEN_STRONG_INLINE Packet16b pnegate(const Packet16b& a) { - return psub(pset1(false), a); + return a; } template <> @@ -677,18 +677,6 @@ template <> EIGEN_DEVICE_FUNC inline Packet2d pselect(const Packet2d& mask, const Packet2d& a, const Packet2d& b) { return _mm_blendv_pd(b, a, mask); } - -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - return _mm_blendv_epi8(b, a, mask); -} -#else -template <> -EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, const Packet16b& a, const Packet16b& b) { - Packet16b a_part = _mm_and_si128(mask, a); - Packet16b b_part = _mm_andnot_si128(mask, b); - return _mm_or_si128(a_part, b_part); -} #endif template <> @@ -696,8 +684,8 @@ EIGEN_STRONG_INLINE Packet4i ptrue(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); } template <> -EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& a) { - return _mm_cmpeq_epi8(a, a); +EIGEN_STRONG_INLINE Packet16b ptrue(const Packet16b& /*a*/) { + return pset1(true); } template <> EIGEN_STRONG_INLINE Packet4f ptrue(const Packet4f& a) { @@ -838,7 +826,9 @@ EIGEN_STRONG_INLINE Packet4ui pcmp_eq(const Packet4ui& a, const Packet4ui& b) { } template <> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { - return _mm_cmpeq_epi8(a, b); + // Mask out invalid bool bits to avoid UB. + const Packet16b kBoolMask = pset1(true); + return _mm_and_si128(_mm_cmpeq_epi8(a, b), kBoolMask); } template <> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { @@ -1377,7 +1367,7 @@ EIGEN_STRONG_INLINE void pstoreu(uint32_t* to, const Packet4ui& from) } template <> EIGEN_STRONG_INLINE void pstoreu(bool* to, const Packet16b& from) { - EIGEN_DEBUG_ALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); + EIGEN_DEBUG_UNALIGNED_STORE _mm_storeu_si128(reinterpret_cast<__m128i*>(to), from); } template diff --git a/test/packetmath.cpp b/test/packetmath.cpp index f68d51391..bea00f284 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -382,12 +382,6 @@ struct packetmath_boolean_mask_ops_notcomplex_test< } }; -// Packet16b representing bool does not support ptrue, pandnot or pcmp_eq, since -// the scalar path (for some compilers) compute the bitwise and with 0x1 of the -// results to keep the value in [0,1]. -template <> -void packetmath_boolean_mask_ops::type>() {} - template struct packetmath_minus_zero_add_test { static void run() {} @@ -662,11 +656,11 @@ void packetmath() { { for (int i = 0; i < PacketSize; ++i) { // "if" mask - unsigned char v = internal::random() ? 0xff : 0; - char* bytes = (char*)(data1 + i); - for (int k = 0; k < int(sizeof(Scalar)); ++k) { - bytes[k] = v; - } + // Note: it's UB to load 0xFF directly into a `bool`. + uint8_t v = + internal::random() ? (std::is_same::value ? static_cast(true) : 0xff) : 0; + // Avoid strict aliasing violation by using memset. + memset(data1 + i, v, sizeof(Scalar)); // "then" packet data1[i + PacketSize] = internal::random(); // "else" packet