Add missing logical packet ops for GPU and NEON.

2025-10-19 11:31:07 +08:00 · 2019-01-17 17:45:08 -08:00 · 2019-01-17 17:45:08 -08:00 · 2eccbaf3f7
commit 2eccbaf3f7
parent d575505d25
2 changed files with 113 additions and 0 deletions
--- a/Eigen/src/Core/arch/GPU/PacketMath.h
+++ b/Eigen/src/Core/arch/GPU/PacketMath.h
@ -100,6 +100,117 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
  return make_double2(from, from);
 }
 namespace {
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
                                                        const float& b) {
  return __int_as_float(__float_as_int(a) & __float_as_int(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
                                                         const double& b) {
  return __longlong_as_double(__double_as_longlong(a) &
                              __double_as_longlong(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
                                                       const float& b) {
  return __int_as_float(__float_as_int(a) | __float_as_int(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
                                                        const double& b) {
  return __longlong_as_double(__double_as_longlong(a) |
                              __double_as_longlong(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
                                                        const float& b) {
  return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
                                                         const double& b) {
  return __longlong_as_double(__double_as_longlong(a) ^
                              __double_as_longlong(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
                                                           const float& b) {
  return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
                                                            const double& b) {
  return __longlong_as_double(__double_as_longlong(a) &
                              ~__double_as_longlong(b));
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
                                                    const float& b) {
  return __int_as_float(a == b ? 0xffffffffu : 0u);
 }
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
                                                     const double& b) {
  return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
 }
 }  // namespace
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
                                                          const float4& b) {
  return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
                     bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
                                                            const double2& b) {
  return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
                                                         const float4& b) {
  return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
                     bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
                                                           const double2& b) {
  return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
                                                          const float4& b) {
  return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
                     bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
                                                            const double2& b) {
  return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
                                                             const float4& b) {
  return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
                     bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
 pandnot<double2>(const double2& a, const double2& b) {
  return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
                                                             const float4& b) {
  return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
                     eq_mask(a.w, b.w));
 }
 template <>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
 pcmp_eq<double2>(const double2& a, const double2& b) {
  return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
 }
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
  return make_float4(a, a+1, a+2, a+3);
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@ -711,6 +711,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, con
  return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
 }
 template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return vreinterpretq_f64_u64(vceqq_f64(a,b)); }
 template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
 template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }