More NEON packetmath fixes.

(cherry picked from commit 384269937f707669fb1ab65bee7e9bfca2c2dfa1)
2025-07-16 10:01:49 +08:00 · 2023-02-14 21:45:25 +00:00 · 2023-02-14 21:45:25 +00:00 · 2dfdaa2abf
commit 2dfdaa2abf
parent a659b5dbb2
2 changed files with 16 additions and 23 deletions
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@ -57,16 +57,6 @@ typedef eigen_packet_wrapper<uint32x4_t ,15> Packet4ui;
 typedef eigen_packet_wrapper<int64x2_t  ,16> Packet2l;
 typedef eigen_packet_wrapper<uint64x2_t ,17> Packet2ul;

-EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
-  float from[4] = {a, b, c, d};
-  return vld1q_f32(from);
-}
-
-EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
-  float from[2] = {a, b};
-  return vld1_f32(from);
-}
-
 #else

 typedef float32x2_t                          Packet2f;
@ -88,11 +78,18 @@ typedef uint32x4_t                           Packet4ui;
 typedef int64x2_t                            Packet2l;
 typedef uint64x2_t                           Packet2ul;

-EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) { return {a, b, c, d}; }
-EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) { return {a, b}; }
-
 #endif // EIGEN_COMP_MSVC_STRICT

+EIGEN_ALWAYS_INLINE Packet4f make_packet4f(float a, float b, float c, float d) {
+  float from[4] = {a, b, c, d};
+  return vld1q_f32(from);
+}
+
+EIGEN_ALWAYS_INLINE Packet2f make_packet2f(float a, float b) {
+  float from[2] = {a, b};
+  return vld1_f32(from);
+}
+
 EIGEN_STRONG_INLINE Packet4f shuffle1(const Packet4f& m, int mask){
  const float* a = reinterpret_cast<const float*>(&m);
  Packet4f res = make_packet4f(*(a + (mask & 3)), *(a + ((mask >> 2) & 3)), *(a + ((mask >> 4) & 3 )), *(a + ((mask >> 6) & 3)));
@ -3665,20 +3662,16 @@ template <typename T> float64x2_t vreinterpretq_f64_u64(T a) { return (float64x2
 #if EIGEN_COMP_MSVC_STRICT
 typedef eigen_packet_wrapper<float64x2_t, 18> Packet2d;
 typedef eigen_packet_wrapper<float64x1_t, 19> Packet1d;
+#else
+typedef float64x2_t Packet2d;
+typedef float64x1_t Packet1d;
+#endif

 EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) {
  double from[2] = {a, b};
  return vld1q_f64(from);
 }

-#else
-typedef float64x2_t Packet2d;
-typedef float64x1_t Packet1d;
-
-EIGEN_ALWAYS_INLINE Packet2d make_packet2d(double a, double b) { return {a, b}; }
-#endif
-
-
 // fuctionally equivalent to _mm_shuffle_pd in SSE (i.e. shuffle(m, n, mask) equals _mm_shuffle_pd(m,n,mask))
 // Currently used in LU/arch/InverseSize4.h to enable a shared implementation
 // for fast inversion of matrices of size 4.
--- a/Eigen/src/Core/arch/NEON/TypeCasting.h
+++ b/Eigen/src/Core/arch/NEON/TypeCasting.h
@ -41,7 +41,7 @@ EIGEN_STRONG_INLINE Packet4c preinterpret<Packet4c, Packet4uc>(const Packet4uc&
 }
 template <>
 EIGEN_STRONG_INLINE Packet8c preinterpret<Packet8c, Packet8uc>(const Packet8uc& a) {
-  return Packet8c(preinterpret<Packet8c>(a));
+  return Packet8c(vreinterpret_s8_u8(a));
 }
 template <>
 EIGEN_STRONG_INLINE Packet16c preinterpret<Packet16c, Packet16uc>(const Packet16uc& a) {
@ -1373,7 +1373,7 @@ struct type_casting_traits<numext::uint16_t, double> {
 template <>
 EIGEN_STRONG_INLINE Packet2d pcast<Packet8us, Packet2d>(const Packet8us& a) {
  // Discard all but first two values.
-  Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_s16(a));
+  Packet2f tmp = pcast<Packet4us, Packet2f>(vget_low_u16(a));
  return vcvt_f64_f32(tmp);
 }