mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-29 07:14:12 +08:00
Add missing logical packet ops for GPU and NEON.
This commit is contained in:
parent
d575505d25
commit
2eccbaf3f7
@ -100,6 +100,117 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
|
||||
return make_double2(from, from);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
|
||||
const float& b) {
|
||||
return __int_as_float(__float_as_int(a) & __float_as_int(b));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
|
||||
const double& b) {
|
||||
return __longlong_as_double(__double_as_longlong(a) &
|
||||
__double_as_longlong(b));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
|
||||
const float& b) {
|
||||
return __int_as_float(__float_as_int(a) | __float_as_int(b));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
|
||||
const double& b) {
|
||||
return __longlong_as_double(__double_as_longlong(a) |
|
||||
__double_as_longlong(b));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
|
||||
const float& b) {
|
||||
return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
|
||||
const double& b) {
|
||||
return __longlong_as_double(__double_as_longlong(a) ^
|
||||
__double_as_longlong(b));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
|
||||
const float& b) {
|
||||
return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
|
||||
const double& b) {
|
||||
return __longlong_as_double(__double_as_longlong(a) &
|
||||
~__double_as_longlong(b));
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
|
||||
const float& b) {
|
||||
return __int_as_float(a == b ? 0xffffffffu : 0u);
|
||||
}
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
|
||||
const double& b) {
|
||||
return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
|
||||
const float4& b) {
|
||||
return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
|
||||
bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
|
||||
const double2& b) {
|
||||
return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
|
||||
const float4& b) {
|
||||
return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
|
||||
bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
|
||||
const double2& b) {
|
||||
return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
|
||||
const float4& b) {
|
||||
return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
|
||||
bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
|
||||
const double2& b) {
|
||||
return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
|
||||
const float4& b) {
|
||||
return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
|
||||
bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pandnot<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
|
||||
const float4& b) {
|
||||
return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
|
||||
eq_mask(a.w, b.w));
|
||||
}
|
||||
template <>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||
pcmp_eq<double2>(const double2& a, const double2& b) {
|
||||
return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
||||
return make_float4(a, a+1, a+2, a+3);
|
||||
|
@ -711,6 +711,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, con
|
||||
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return vreinterpretq_f64_u64(vceqq_f64(a,b)); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user