mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Modify scalar pzero, ptrue, pselect, and p<binary> operations to avoid memset.
The `memset` function and bitwise manipulation only apply to POD types that do not require initialization, otherwise resulting in UB. We currently violate this in `ptrue` and `pzero`, we assume bitmasks for `pselect`, and bitwise operations are applied byte-by-byte in the generic implementations. This is causing issues for scalar types that do require initialization or that contain non-POD info such as pointers (#2201). We either break them, or force specializations of these functions for custom scalars, even if they are not vectorized. Here we modify these functions for scalars only - instead using only scalar operations: - `pzero`: `Scalar(0)` for all scalars. - `ptrue`: `Scalar(1)` for non-trivial scalars, bitset to one bits for trivial scalars. - `pselect`: ternary select comparing mask to `Scalar(0)` for all scalars - `pand`, `por`, `pxor`, `pnot`: use operators `&`, `|`, `^`, `~` for all integer or non-trivial scalars, otherwise apply bytewise. For non-scalar types, the original implementations are used to maintain compatibility and minimize the number of changes. Fixes #2201.
This commit is contained in:
parent
7880f10526
commit
3d98a6ef5c
@ -129,6 +129,22 @@ template<typename T> struct packet_traits : default_packet_traits
|
|||||||
|
|
||||||
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
|
template<typename T> struct packet_traits<const T> : packet_traits<T> { };
|
||||||
|
|
||||||
|
template<typename T> struct unpacket_traits
|
||||||
|
{
|
||||||
|
typedef T type;
|
||||||
|
typedef T half;
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
size = 1,
|
||||||
|
alignment = 1,
|
||||||
|
vectorizable = false,
|
||||||
|
masked_load_available=false,
|
||||||
|
masked_store_available=false
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
|
||||||
|
|
||||||
template <typename Src, typename Tgt> struct type_casting_traits {
|
template <typename Src, typename Tgt> struct type_casting_traits {
|
||||||
enum {
|
enum {
|
||||||
VectorizedCast = 0,
|
VectorizedCast = 0,
|
||||||
@ -154,6 +170,18 @@ struct eigen_packet_wrapper
|
|||||||
T m_val;
|
T m_val;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** \internal A convenience utility for determining if the type is a scalar.
|
||||||
|
* This is used to enable some generic packet implementations.
|
||||||
|
*/
|
||||||
|
template<typename Packet>
|
||||||
|
struct is_scalar {
|
||||||
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||||
|
enum {
|
||||||
|
value = internal::is_same<Packet, Scalar>::value
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
/** \internal \returns static_cast<TgtType>(a) (coeff-wise) */
|
||||||
template <typename SrcPacket, typename TgtPacket>
|
template <typename SrcPacket, typename TgtPacket>
|
||||||
EIGEN_DEVICE_FUNC inline TgtPacket
|
EIGEN_DEVICE_FUNC inline TgtPacket
|
||||||
@ -215,13 +243,59 @@ pmul(const bool& a, const bool& b) { return a && b; }
|
|||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pdiv(const Packet& a, const Packet& b) { return a/b; }
|
pdiv(const Packet& a, const Packet& b) { return a/b; }
|
||||||
|
|
||||||
/** \internal \returns one bits */
|
// In the generic case, memset to all one bits.
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet, typename EnableIf = void>
|
||||||
ptrue(const Packet& /*a*/) { Packet b; memset((void*)&b, 0xff, sizeof(b)); return b;}
|
struct ptrue_impl {
|
||||||
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
|
||||||
|
Packet b;
|
||||||
|
memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal \returns zero bits */
|
// For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
|
||||||
|
// Although this is technically not a valid bitmask, the scalar path for pselect
|
||||||
|
// uses a comparison to zero, so this should still work in most cases. We don't
|
||||||
|
// have another option, since the scalar type requires initialization.
|
||||||
|
template<typename T>
|
||||||
|
struct ptrue_impl<T,
|
||||||
|
typename internal::enable_if<is_scalar<T>::value && NumTraits<T>::RequireInitialization>::type > {
|
||||||
|
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
|
||||||
|
return T(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** \internal \returns one bits. */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pzero(const Packet& /*a*/) { Packet b; memset((void*)&b, 0, sizeof(b)); return b;}
|
ptrue(const Packet& a) {
|
||||||
|
return ptrue_impl<Packet>::run(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
// In the general case, memset to zero.
|
||||||
|
template<typename Packet, typename EnableIf = void>
|
||||||
|
struct pzero_impl {
|
||||||
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
|
||||||
|
Packet b;
|
||||||
|
memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// For scalars, explicitly set to Scalar(0), since the underlying representation
|
||||||
|
// for zero may not consist of all-zero bits.
|
||||||
|
template<typename T>
|
||||||
|
struct pzero_impl<T,
|
||||||
|
typename internal::enable_if<is_scalar<T>::value>::type> {
|
||||||
|
static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
|
||||||
|
return T(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/** \internal \returns packet of zeros */
|
||||||
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
|
pzero(const Packet& a) {
|
||||||
|
return pzero_impl<Packet>::run(a);
|
||||||
|
}
|
||||||
|
|
||||||
/** \internal \returns a <= b as a bit mask */
|
/** \internal \returns a <= b as a bit mask */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
@ -238,33 +312,6 @@ pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
|
|||||||
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
|
pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
|
||||||
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
|
|
||||||
EIGEN_UNUSED_VARIABLE(a)
|
|
||||||
return 0.f;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
|
|
||||||
EIGEN_UNUSED_VARIABLE(a)
|
|
||||||
return 0.;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename RealScalar>
|
|
||||||
EIGEN_DEVICE_FUNC inline std::complex<RealScalar> ptrue(const std::complex<RealScalar>& /*a*/) {
|
|
||||||
RealScalar b = ptrue(RealScalar(0));
|
|
||||||
return std::complex<RealScalar>(b, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename Packet, typename Op>
|
|
||||||
EIGEN_DEVICE_FUNC inline Packet bitwise_helper(const Packet& a, const Packet& b, Op op) {
|
|
||||||
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
|
||||||
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
|
|
||||||
Packet c;
|
|
||||||
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
|
||||||
for (size_t i = 0; i < sizeof(Packet); ++i) {
|
|
||||||
*c_ptr++ = op(*a_ptr++, *b_ptr++);
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
struct bit_and {
|
struct bit_and {
|
||||||
@ -287,42 +334,123 @@ struct bit_xor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct bit_not {
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR EIGEN_ALWAYS_INLINE T operator()(const T& a) const {
|
||||||
|
return ~a;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Use operators &, |, ^, ~.
|
||||||
|
template<typename T>
|
||||||
|
struct operator_bitwise_helper {
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Apply binary operations byte-by-byte
|
||||||
|
template<typename T>
|
||||||
|
struct bytewise_bitwise_helper {
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
|
||||||
|
return binary(a, b, bit_and<unsigned char>());
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
|
||||||
|
return binary(a, b, bit_or<unsigned char>());
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
|
||||||
|
return binary(a, b, bit_xor<unsigned char>());
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
|
||||||
|
return unary(a,bit_not<unsigned char>());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
template<typename Op>
|
||||||
|
EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
|
||||||
|
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
||||||
|
T c;
|
||||||
|
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
||||||
|
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||||
|
*c_ptr++ = op(*a_ptr++);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Op>
|
||||||
|
EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
|
||||||
|
const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
|
||||||
|
const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
|
||||||
|
T c;
|
||||||
|
unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
|
||||||
|
for (size_t i = 0; i < sizeof(T); ++i) {
|
||||||
|
*c_ptr++ = op(*a_ptr++, *b_ptr++);
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// In the general case, use byte-by-byte manipulation.
|
||||||
|
template<typename T, typename EnableIf = void>
|
||||||
|
struct bitwise_helper : public bytewise_bitwise_helper<T> {};
|
||||||
|
|
||||||
|
// For integers or non-trivial scalars, use binary operators.
|
||||||
|
template<typename T>
|
||||||
|
struct bitwise_helper<T,
|
||||||
|
typename internal::enable_if<
|
||||||
|
is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>::type
|
||||||
|
> : public operator_bitwise_helper<T> {};
|
||||||
|
|
||||||
/** \internal \returns the bitwise and of \a a and \a b */
|
/** \internal \returns the bitwise and of \a a and \a b */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pand(const Packet& a, const Packet& b) {
|
pand(const Packet& a, const Packet& b) {
|
||||||
return bitwise_helper(a, b, bit_and<unsigned char>());
|
return bitwise_helper<Packet>::bitwise_and(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns the bitwise or of \a a and \a b */
|
/** \internal \returns the bitwise or of \a a and \a b */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
por(const Packet& a, const Packet& b) {
|
por(const Packet& a, const Packet& b) {
|
||||||
return bitwise_helper(a ,b, bit_or<unsigned char>());
|
return bitwise_helper<Packet>::bitwise_or(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns the bitwise xor of \a a and \a b */
|
/** \internal \returns the bitwise xor of \a a and \a b */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pxor(const Packet& a, const Packet& b) {
|
pxor(const Packet& a, const Packet& b) {
|
||||||
return bitwise_helper(a ,b, bit_xor<unsigned char>());
|
return bitwise_helper<Packet>::bitwise_xor(a, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal \returns the bitwise not of \a a */
|
||||||
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
|
pnot(const Packet& a) {
|
||||||
|
return bitwise_helper<Packet>::bitwise_not(a);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns the bitwise and of \a a and not \a b */
|
/** \internal \returns the bitwise and of \a a and not \a b */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pandnot(const Packet& a, const Packet& b) { return pand(a, pxor(ptrue(b), b)); }
|
pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
|
||||||
|
|
||||||
|
// In the general case, use bitwise select.
|
||||||
|
template<typename Packet, typename EnableIf = void>
|
||||||
|
struct pselect_impl {
|
||||||
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
||||||
|
return por(pand(a,mask),pandnot(b,mask));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// For scalars, use ternary select.
|
||||||
|
template<typename Packet>
|
||||||
|
struct pselect_impl<Packet,
|
||||||
|
typename internal::enable_if<is_scalar<Packet>::value>::type > {
|
||||||
|
static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
|
||||||
|
return numext::equal_strict(mask, Packet(0)) ? b : a;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal \returns \a or \b for each field in packet according to \mask */
|
/** \internal \returns \a or \b for each field in packet according to \mask */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
||||||
return por(pand(a,mask),pandnot(b,mask));
|
return pselect_impl<Packet>::run(mask, a, b);
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
|
|
||||||
const float& cond, const float& a, const float&b) {
|
|
||||||
return numext::equal_strict(cond,0.f) ? b : a;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
|
|
||||||
const double& cond, const double& a, const double& b) {
|
|
||||||
return numext::equal_strict(cond,0.) ? b : a;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
|
template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
|
||||||
|
@ -184,19 +184,7 @@ template<typename T> struct functor_traits
|
|||||||
|
|
||||||
template<typename T> struct packet_traits;
|
template<typename T> struct packet_traits;
|
||||||
|
|
||||||
template<typename T> struct unpacket_traits
|
template<typename T> struct unpacket_traits;
|
||||||
{
|
|
||||||
typedef T type;
|
|
||||||
typedef T half;
|
|
||||||
enum
|
|
||||||
{
|
|
||||||
size = 1,
|
|
||||||
alignment = 1,
|
|
||||||
vectorizable = false,
|
|
||||||
masked_load_available=false,
|
|
||||||
masked_store_available=false
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
template<int Size, typename PacketType,
|
template<int Size, typename PacketType,
|
||||||
bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
|
bool Stop = Size==Dynamic || (Size%unpacket_traits<PacketType>::size)==0 || is_same<PacketType,typename unpacket_traits<PacketType>::half>::value>
|
||||||
|
@ -126,7 +126,7 @@ template<>
|
|||||||
struct NumTraits<AnnoyingScalar> : NumTraits<float>
|
struct NumTraits<AnnoyingScalar> : NumTraits<float>
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
RequireInitialization = true
|
RequireInitialization = 1,
|
||||||
};
|
};
|
||||||
typedef AnnoyingScalar Real;
|
typedef AnnoyingScalar Real;
|
||||||
typedef AnnoyingScalar Nested;
|
typedef AnnoyingScalar Nested;
|
||||||
@ -145,10 +145,6 @@ bool (isfinite)(const AnnoyingScalar& x) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
template<> EIGEN_STRONG_INLINE AnnoyingScalar pcmp_eq(const AnnoyingScalar& a, const AnnoyingScalar& b)
|
|
||||||
{ return AnnoyingScalar(pcmp_eq(*a.v, *b.v)); }
|
|
||||||
template<> EIGEN_STRONG_INLINE AnnoyingScalar pselect(const AnnoyingScalar& mask, const AnnoyingScalar& a, const AnnoyingScalar& b)
|
|
||||||
{ return numext::equal_strict(*mask.v, 0.f) ? b : a; }
|
|
||||||
template<> EIGEN_STRONG_INLINE double cast(const AnnoyingScalar& x) { return double(*x.v); }
|
template<> EIGEN_STRONG_INLINE double cast(const AnnoyingScalar& x) { return double(*x.v); }
|
||||||
template<> EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) { return *x.v; }
|
template<> EIGEN_STRONG_INLINE float cast(const AnnoyingScalar& x) { return *x.v; }
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user