Fix a bug for pcmp_lt_or_nan and Add sqrt support for SVE

2025-09-12 17:33:15 +08:00 · 2024-09-04 21:45:39 +00:00 · 2024-09-04 21:45:39 +00:00 · 072ec9d954
commit 072ec9d954
parent 9315389795
1 changed files with 8 additions and 3 deletions
--- a/Eigen/src/Core/arch/SVE/PacketMath.h
+++ b/Eigen/src/Core/arch/SVE/PacketMath.h
@ -358,7 +358,7 @@ struct packet_traits<float> : default_packet_traits {
    HasCos = EIGEN_FAST_MATH,
    HasLog = 1,
    HasExp = 1,
-    HasSqrt = 0,
+    HasSqrt = 1,
    HasTanh = EIGEN_FAST_MATH,
    HasErf = EIGEN_FAST_MATH
  };
@ -478,12 +478,12 @@ EIGEN_STRONG_INLINE PacketXf pcmp_eq<PacketXf>(const PacketXf& a, const PacketXf
  return svreinterpret_f32_u32(svdup_n_u32_z(svcmpeq_f32(svptrue_b32(), a, b), 0xffffffffu));
 }

-// Do a predicate inverse (svnot_b_x) on the predicate resulted from the
+// Do a predicate inverse (svnot_b_z) on the predicate resulted from the
 // greater/equal comparison (svcmpge_f32). Then fill a float vector with the
 // active elements.
 template <>
 EIGEN_STRONG_INLINE PacketXf pcmp_lt_or_nan<PacketXf>(const PacketXf& a, const PacketXf& b) {
-  return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_x(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
+  return svreinterpret_f32_u32(svdup_n_u32_z(svnot_b_z(svptrue_b32(), svcmpge_f32(svptrue_b32(), a, b)), 0xffffffffu));
 }

 template <>
@ -660,6 +660,11 @@ EIGEN_STRONG_INLINE PacketXf pldexp<PacketXf>(const PacketXf& a, const PacketXf&
  return pldexp_generic(a, exponent);
 }

+template <>
+EIGEN_STRONG_INLINE PacketXf psqrt<PacketXf>(const PacketXf& a) {
+  return svsqrt_f32_x(svptrue_b32(), a);
+}
+
 }  // namespace internal
 }  // namespace Eigen