mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-30 07:44:10 +08:00
Add missing logical packet ops for GPU and NEON.
This commit is contained in:
parent
d575505d25
commit
2eccbaf3f7
@ -100,6 +100,117 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1<double2>(const do
|
|||||||
return make_double2(from, from);
|
return make_double2(from, from);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_and(const float& a,
|
||||||
|
const float& b) {
|
||||||
|
return __int_as_float(__float_as_int(a) & __float_as_int(b));
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_and(const double& a,
|
||||||
|
const double& b) {
|
||||||
|
return __longlong_as_double(__double_as_longlong(a) &
|
||||||
|
__double_as_longlong(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_or(const float& a,
|
||||||
|
const float& b) {
|
||||||
|
return __int_as_float(__float_as_int(a) | __float_as_int(b));
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_or(const double& a,
|
||||||
|
const double& b) {
|
||||||
|
return __longlong_as_double(__double_as_longlong(a) |
|
||||||
|
__double_as_longlong(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_xor(const float& a,
|
||||||
|
const float& b) {
|
||||||
|
return __int_as_float(__float_as_int(a) ^ __float_as_int(b));
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_xor(const double& a,
|
||||||
|
const double& b) {
|
||||||
|
return __longlong_as_double(__double_as_longlong(a) ^
|
||||||
|
__double_as_longlong(b));
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float bitwise_andnot(const float& a,
|
||||||
|
const float& b) {
|
||||||
|
return __int_as_float(__float_as_int(a) & ~__float_as_int(b));
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double bitwise_andnot(const double& a,
|
||||||
|
const double& b) {
|
||||||
|
return __longlong_as_double(__double_as_longlong(a) &
|
||||||
|
~__double_as_longlong(b));
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float eq_mask(const float& a,
|
||||||
|
const float& b) {
|
||||||
|
return __int_as_float(a == b ? 0xffffffffu : 0u);
|
||||||
|
}
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double eq_mask(const double& a,
|
||||||
|
const double& b) {
|
||||||
|
return __longlong_as_double(a == b ? 0xffffffffffffffffull : 0ull);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pand<float4>(const float4& a,
|
||||||
|
const float4& b) {
|
||||||
|
return make_float4(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y),
|
||||||
|
bitwise_and(a.z, b.z), bitwise_and(a.w, b.w));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pand<double2>(const double2& a,
|
||||||
|
const double2& b) {
|
||||||
|
return make_double2(bitwise_and(a.x, b.x), bitwise_and(a.y, b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 por<float4>(const float4& a,
|
||||||
|
const float4& b) {
|
||||||
|
return make_float4(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y),
|
||||||
|
bitwise_or(a.z, b.z), bitwise_or(a.w, b.w));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 por<double2>(const double2& a,
|
||||||
|
const double2& b) {
|
||||||
|
return make_double2(bitwise_or(a.x, b.x), bitwise_or(a.y, b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pxor<float4>(const float4& a,
|
||||||
|
const float4& b) {
|
||||||
|
return make_float4(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y),
|
||||||
|
bitwise_xor(a.z, b.z), bitwise_xor(a.w, b.w));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pxor<double2>(const double2& a,
|
||||||
|
const double2& b) {
|
||||||
|
return make_double2(bitwise_xor(a.x, b.x), bitwise_xor(a.y, b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pandnot<float4>(const float4& a,
|
||||||
|
const float4& b) {
|
||||||
|
return make_float4(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y),
|
||||||
|
bitwise_andnot(a.z, b.z), bitwise_andnot(a.w, b.w));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||||
|
pandnot<double2>(const double2& a, const double2& b) {
|
||||||
|
return make_double2(bitwise_andnot(a.x, b.x), bitwise_andnot(a.y, b.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcmp_eq<float4>(const float4& a,
|
||||||
|
const float4& b) {
|
||||||
|
return make_float4(eq_mask(a.x, b.x), eq_mask(a.y, b.y), eq_mask(a.z, b.z),
|
||||||
|
eq_mask(a.w, b.w));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2
|
||||||
|
pcmp_eq<double2>(const double2& a, const double2& b) {
|
||||||
|
return make_double2(eq_mask(a.x, b.x), eq_mask(a.y, b.y));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset<float4>(const float& a) {
|
||||||
return make_float4(a, a+1, a+2, a+3);
|
return make_float4(a, a+1, a+2, a+3);
|
||||||
|
@ -711,6 +711,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, con
|
|||||||
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
return vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a),vreinterpretq_u64_f64(b)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return vreinterpretq_f64_u64(vceqq_f64(a,b)); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
|
template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) { EIGEN_DEBUG_ALIGNED_LOAD return vld1q_f64(from); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
|
template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { EIGEN_DEBUG_UNALIGNED_LOAD return vld1q_f64(from); }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user