mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-16 10:01:49 +08:00
More NEON packetmath fixes.
(cherry picked from commit 384269937f707669fb1ab65bee7e9bfca2c2dfa1)
This commit is contained in:
parent
a659b5dbb2
commit
2dfdaa2abf
@ -57,16 +57,6 @@ typedef eigen_packet_wrapper<uint32x4_t ,15> Packet4ui;
|
||||
typedef eigen_packet_wrapper<int64x2_t ,16> Packet2l;
|
||||
typedef eigen_packet_wrapper<uint64x2_t ,17> Packet2ul;
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
|
||||
float from[4] = {a, b, c, d};
|
||||
return vld1q_f32(from);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
|
||||
float from[2] = {a, b};
|
||||
return vld1_f32(from);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef float32x2_t Packet2f;
|
||||
@ -88,11 +78,18 @@ typedef uint32x4_t Packet4ui;
|
||||
typedef int64x2_t Packet2l;
|
||||
typedef uint64x2_t Packet2ul;
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { return {a, b, c, d}; }
|
||||
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { return {a, b}; }
|
||||
|
||||
#endif // EIGEN_COMP_MSVC_STRICT
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
|
||||
float from[4] = {a, b, c, d};
|
||||
return vld1q_f32(from);
|
||||
}
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
|
||||
float from[2] = {a, b};
|
||||
return vld1_f32(from);
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){
|
||||
const float* a = reinterpret_cast<const float*>(&m);
|
||||
Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3)));
|
||||
@ -3665,20 +3662,16 @@ template <typename T> float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2
|
||||
#if EIGEN_COMP_MSVC_STRICT
|
||||
typedef eigen_packet_wrapper<float64x2_t, 18> Packet2d;
|
||||
typedef eigen_packet_wrapper<float64x1_t, 19> Packet1d;
|
||||
#else
|
||||
typedef float64x2_t Packet2d;
|
||||
typedef float64x1_t Packet1d;
|
||||
#endif
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) {
|
||||
double from[2] = {a, b};
|
||||
return vld1q_f64(from);
|
||||
}
|
||||
|
||||
#else
|
||||
typedef float64x2_t Packet2d;
|
||||
typedef float64x1_t Packet1d;
|
||||
|
||||
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { return {a, b}; }
|
||||
#endif
|
||||
|
||||
|
||||
// fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask))
|
||||
// Currently used in LU/arch/InverseSize4.h to enable a shared implementation
|
||||
// for fast inversion of matrices of size 4.
|
||||
|
@ -41,7 +41,7 @@ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc&
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
|
||||
return Packet8c(preinterpret<Packet8c>(a));
|
||||
return Packet8c(vreinterpret_s8_u8(a));
|
||||
}
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
|
||||
@ -1373,7 +1373,7 @@ struct type_casting_traits<numext::uint16_t, double> {
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
|
||||
// Discard all but first two values.
|
||||
Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_s16(a));
|
||||
Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_u16(a));
|
||||
return vcvt_f64_f32(tmp);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user