From 3d98a6ef5ce0ba85acaee4ffffc53f0f21bd8fd2 Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Thu, 8 Jul 2021 20:32:23 -0700 Subject: [PATCH] Modify scalar pzero, ptrue, pselect, and p operations to avoid memset. The `memset` function and bitwise manipulation only apply to POD types that do not require initialization, otherwise resulting in UB. We currently violate this in `ptrue` and `pzero`, we assume bitmasks for `pselect`, and bitwise operations are applied byte-by-byte in the generic implementations. This is causing issues for scalar types that do require initialization or that contain non-POD info such as pointers (#2201). We either break them, or force specializations of these functions for custom scalars, even if they are not vectorized. Here we modify these functions for scalars only - instead using only scalar operations: - `pzero`: `Scalar(0)` for all scalars. - `ptrue`: `Scalar(1)` for non-trivial scalars, bitset to one bits for trivial scalars. - `pselect`: ternary select comparing mask to `Scalar(0)` for all scalars - `pand`, `por`, `pxor`, `pnot`: use operators `&`, `|`, `^`, `~` for all integer or non-trivial scalars, otherwise apply bytewise. For non-scalar types, the original implementations are used to maintain compatibility and minimize the number of changes. Fixes #2201. --- Eigen/src/Core/GenericPacketMath.h | 222 +++++++++++++++++++++++------ Eigen/src/Core/util/XprHelper.h | 14 +- test/AnnoyingScalar.h | 6 +- 3 files changed, 177 insertions(+), 65 deletions(-) diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 53800a005..cf677a190 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -129,6 +129,22 @@ template struct packet_traits : default_packet_traits template struct packet_traits : packet_traits { }; +template struct unpacket_traits +{ + typedef T type; + typedef T half; + enum + { + size = 1, + alignment = 1, + vectorizable = false, + masked_load_available=false, + masked_store_available=false + }; +}; + +template struct unpacket_traits : unpacket_traits { }; + template struct type_casting_traits { enum { VectorizedCast = 0, @@ -154,6 +170,18 @@ struct eigen_packet_wrapper T m_val; }; + +/** \internal A convenience utility for determining if the type is a scalar. + * This is used to enable some generic packet implementations. + */ +template +struct is_scalar { + typedef typename unpacket_traits::type Scalar; + enum { + value = internal::is_same::value + }; +}; + /** \internal \returns static_cast(a) (coeff-wise) */ template EIGEN_DEVICE_FUNC inline TgtPacket @@ -215,13 +243,59 @@ pmul(const bool& a, const bool& b) { return a && b; } template EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { return a/b; } -/** \internal \returns one bits */ -template EIGEN_DEVICE_FUNC inline Packet -ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;} +// In the generic case, memset to all one bits. +template +struct ptrue_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){ + Packet b; + memset(static_cast(&b), 0xff, sizeof(Packet)); + return b; + } +}; -/** \internal \returns zero bits */ +// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value). +// Although this is technically not a valid bitmask, the scalar path for pselect +// uses a comparison to zero, so this should still work in most cases. We don't +// have another option, since the scalar type requires initialization. +template +struct ptrue_impl::value && NumTraits::RequireInitialization>::type > { + static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){ + return T(1); + } +}; + +/** \internal \returns one bits. */ template EIGEN_DEVICE_FUNC inline Packet -pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;} +ptrue(const Packet& a) { + return ptrue_impl::run(a); +} + +// In the general case, memset to zero. +template +struct pzero_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) { + Packet b; + memset(static_cast(&b), 0x00, sizeof(Packet)); + return b; + } +}; + +// For scalars, explicitly set to Scalar(0), since the underlying representation +// for zero may not consist of all-zero bits. +template +struct pzero_impl::value>::type> { + static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { + return T(0); + } +}; + +/** \internal \returns packet of zeros */ +template EIGEN_DEVICE_FUNC inline Packet +pzero(const Packet& a) { + return pzero_impl::run(a); +} /** \internal \returns a <= b as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet @@ -238,33 +312,6 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); } /** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ template EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); } -template<> EIGEN_DEVICE_FUNC inline float pzero(const float& a) { - EIGEN_UNUSED_VARIABLE(a) - return 0.f; -} - -template<> EIGEN_DEVICE_FUNC inline double pzero(const double& a) { - EIGEN_UNUSED_VARIABLE(a) - return 0.; -} - -template -EIGEN_DEVICE_FUNC inline std::complex ptrue(const std::complex& /*a*/) { - RealScalar b = ptrue(RealScalar(0)); - return std::complex(b, b); -} - -template -EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) { - const unsigned char* a_ptr = reinterpret_cast(&a); - const unsigned char* b_ptr = reinterpret_cast(&b); - Packet c; - unsigned char* c_ptr = reinterpret_cast(&c); - for (size_t i = 0; i < sizeof(Packet); ++i) { - *c_ptr++ = op(*a_ptr++, *b_ptr++); - } - return c; -} template struct bit_and { @@ -287,42 +334,123 @@ struct bit_xor { } }; +template +struct bit_not { + EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const { + return ~a; + } +}; + +// Use operators &, |, ^, ~. +template +struct operator_bitwise_helper { + EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not()(a); } +}; + +// Apply binary operations byte-by-byte +template +struct bytewise_bitwise_helper { + EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { + return binary(a, b, bit_and()); + } + EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { + return binary(a, b, bit_or()); + } + EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { + return binary(a, b, bit_xor()); + } + EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { + return unary(a,bit_not()); + } + + private: + template + EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) { + const unsigned char* a_ptr = reinterpret_cast(&a); + T c; + unsigned char* c_ptr = reinterpret_cast(&c); + for (size_t i = 0; i < sizeof(T); ++i) { + *c_ptr++ = op(*a_ptr++); + } + return c; + } + + template + EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) { + const unsigned char* a_ptr = reinterpret_cast(&a); + const unsigned char* b_ptr = reinterpret_cast(&b); + T c; + unsigned char* c_ptr = reinterpret_cast(&c); + for (size_t i = 0; i < sizeof(T); ++i) { + *c_ptr++ = op(*a_ptr++, *b_ptr++); + } + return c; + } +}; + +// In the general case, use byte-by-byte manipulation. +template +struct bitwise_helper : public bytewise_bitwise_helper {}; + +// For integers or non-trivial scalars, use binary operators. +template +struct bitwise_helper::value && (NumTraits::IsInteger || NumTraits::RequireInitialization)>::type + > : public operator_bitwise_helper {}; + /** \internal \returns the bitwise and of \a a and \a b */ template EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) { - return bitwise_helper(a, b, bit_and()); + return bitwise_helper::bitwise_and(a, b); } /** \internal \returns the bitwise or of \a a and \a b */ template EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) { - return bitwise_helper(a ,b, bit_or()); + return bitwise_helper::bitwise_or(a, b); } /** \internal \returns the bitwise xor of \a a and \a b */ template EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) { - return bitwise_helper(a ,b, bit_xor()); + return bitwise_helper::bitwise_xor(a, b); +} + +/** \internal \returns the bitwise not of \a a */ +template EIGEN_DEVICE_FUNC inline Packet +pnot(const Packet& a) { + return bitwise_helper::bitwise_not(a); } /** \internal \returns the bitwise and of \a a and not \a b */ template EIGEN_DEVICE_FUNC inline Packet -pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); } +pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); } + +// In the general case, use bitwise select. +template +struct pselect_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { + return por(pand(a,mask),pandnot(b,mask)); + } +}; + +// For scalars, use ternary select. +template +struct pselect_impl::value>::type > { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { + return numext::equal_strict(mask, Packet(0)) ? b : a; + } +}; /** \internal \returns \a or \b for each field in packet according to \mask */ template EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) { - return por(pand(a,mask),pandnot(b,mask)); -} - -template<> EIGEN_DEVICE_FUNC inline float pselect( - const float& cond, const float& a, const float&b) { - return numext::equal_strict(cond,0.f) ? b : a; -} - -template<> EIGEN_DEVICE_FUNC inline double pselect( - const double& cond, const double& a, const double& b) { - return numext::equal_strict(cond,0.) ? b : a; + return pselect_impl::run(mask, a, b); } template<> EIGEN_DEVICE_FUNC inline bool pselect( diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index f2323174e..71c32b8a1 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -184,19 +184,7 @@ template struct functor_traits template struct packet_traits; -template struct unpacket_traits -{ - typedef T type; - typedef T half; - enum - { - size = 1, - alignment = 1, - vectorizable = false, - masked_load_available=false, - masked_store_available=false - }; -}; +template struct unpacket_traits; template::size)==0 || is_same::half>::value> diff --git a/test/AnnoyingScalar.h b/test/AnnoyingScalar.h index 0f8e70d36..7ace083c5 100644 --- a/test/AnnoyingScalar.h +++ b/test/AnnoyingScalar.h @@ -126,7 +126,7 @@ template<> struct NumTraits : NumTraits { enum { - RequireInitialization = true + RequireInitialization = 1, }; typedef AnnoyingScalar Real; typedef AnnoyingScalar Nested; @@ -145,10 +145,6 @@ bool (isfinite)(const AnnoyingScalar& x) { } namespace internal { - template<> EIGEN_STRONG_INLINE AnnoyingScalar pcmp_eq(const AnnoyingScalar& a, const AnnoyingScalar& b) - { return AnnoyingScalar(pcmp_eq(*a.v, *b.v)); } - template<> EIGEN_STRONG_INLINE AnnoyingScalar pselect(const AnnoyingScalar& mask, const AnnoyingScalar& a, const AnnoyingScalar& b) - { return numext::equal_strict(*mask.v, 0.f) ? b : a; } template<> EIGEN_STRONG_INLINE double cast(const AnnoyingScalar& x) { return double(*x.v); } template<> EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) { return *x.v; } }