mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Add a few missing packet ops: cmp_eq for NEON. pfloor for GPU.
This commit is contained in:
parent
2a39659d79
commit
4d7f317102
@ -53,6 +53,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
HasFloor = 1,
|
||||
};
|
||||
};
|
||||
|
||||
@ -86,6 +87,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasBetaInc = 1,
|
||||
|
||||
HasBlend = 0,
|
||||
HasFloor = 1,
|
||||
};
|
||||
};
|
||||
|
||||
@ -408,6 +410,13 @@ template<> EIGEN_DEVICE_FUNC inline double2 pabs<double2>(const double2& a) {
|
||||
return make_double2(fabs(a.x), fabs(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float4 pfloor<float4>(const float4& a) {
|
||||
return make_float4(floorf(a.x), floorf(a.y), floorf(a.z), floorf(a.w));
|
||||
}
|
||||
template<> EIGEN_DEVICE_FUNC inline double2 pfloor<double2>(const double2& a) {
|
||||
return make_double2(floor(a.x), floor(a.y));
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC inline void
|
||||
ptranspose(PacketBlock<float4,4>& kernel) {
|
||||
float tmp = kernel.packet[0].y;
|
||||
|
@ -101,6 +101,18 @@ template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, con
|
||||
return Packet2cf(vaddq_f32(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
// Compare real and imaginary parts of a and b to get the mask vector:
|
||||
// [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
|
||||
Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
|
||||
// Swap real/imag elements in the mask in to get:
|
||||
// [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
|
||||
Packet4f eq_swapped = vrev64q_f32(eq);
|
||||
// Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
||||
return Packet2cf(pand<Packet4f>(eq, eq_swapped));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
|
||||
{
|
||||
return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
|
||||
@ -361,6 +373,18 @@ template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, con
|
||||
return Packet1cd(vaddq_f64(v1, v2));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
// Compare real and imaginary parts of a and b to get the mask vector:
|
||||
// [re(a)==re(b), im(a)==im(b)]
|
||||
Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
|
||||
// Swap real/imag elements in the mask in to get:
|
||||
// [im(a)==im(b), re(a)==re(b)]
|
||||
Packet2d eq_swapped = vrev64q_u32(eq);
|
||||
// Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
|
||||
return Packet1cd(pand<Packet2d>(eq, eq_swapped));
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b)
|
||||
{
|
||||
return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v))));
|
||||
|
Loading…
x
Reference in New Issue
Block a user