More NEON packetmath fixes.

(cherry picked from commit 384269937f707669fb1ab65bee7e9bfca2c2dfa1)
This commit is contained in:
Antonio Sánchez 2023-02-14 21:45:25 +00:00 committed by Antonio Sanchez
parent a659b5dbb2
commit 2dfdaa2abf
2 changed files with 16 additions and 23 deletions

View File

@ -57,16 +57,6 @@ typedef eigen_packet_wrapper<uint32x4_t ,15> Packet4ui;
typedef eigen_packet_wrapper<int64x2_t ,16> Packet2l;
typedef eigen_packet_wrapper<uint64x2_t ,17> Packet2ul;
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
float from[4] = {a, b, c, d};
return vld1q_f32(from);
}
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
float from[2] = {a, b};
return vld1_f32(from);
}
#else
typedef float32x2_t Packet2f;
@ -88,11 +78,18 @@ typedef uint32x4_t Packet4ui;
typedef int64x2_t Packet2l;
typedef uint64x2_t Packet2ul;
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { return {a, b, c, d}; }
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { return {a, b}; }
#endif // EIGEN_COMP_MSVC_STRICT
EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
float from[4] = {a, b, c, d};
return vld1q_f32(from);
}
EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
float from[2] = {a, b};
return vld1_f32(from);
}
EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){
const float* a = reinterpret_cast<const float*>(&m);
Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3)));
@ -3665,20 +3662,16 @@ template <typename T> float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2
#if EIGEN_COMP_MSVC_STRICT
typedef eigen_packet_wrapper<float64x2_t, 18> Packet2d;
typedef eigen_packet_wrapper<float64x1_t, 19> Packet1d;
#else
typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d;
#endif
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) {
double from[2] = {a, b};
return vld1q_f64(from);
}
#else
typedef float64x2_t Packet2d;
typedef float64x1_t Packet1d;
EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { return {a, b}; }
#endif
// fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask))
// Currently used in LU/arch/InverseSize4.h to enable a shared implementation
// for fast inversion of matrices of size 4.

View File

@ -41,7 +41,7 @@ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc&
}
template <>
EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
return Packet8c(preinterpret<Packet8c>(a));
return Packet8c(vreinterpret_s8_u8(a));
}
template <>
EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
@ -1373,7 +1373,7 @@ struct type_casting_traits<numext::uint16_t, double> {
template <>
EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
// Discard all but first two values.
Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_s16(a));
Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_u16(a));
return vcvt_f64_f32(tmp);
}