Add a macro to set the nr trait in the BEBP kernel for NEON.

2025-09-23 14:53:13 +08:00 · 2022-09-22 23:56:34 +00:00 · 2022-09-22 23:56:34 +00:00 · e2ea866515
commit e2ea866515
parent 23299632c2
1 changed files with 9 additions and 11 deletions
--- a/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h
@ -5,6 +5,10 @@ namespace internal {
 #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
 #ifndef EIGEN_NEON_GEBP_NR
 #define EIGEN_NEON_GEBP_NR 8
 #endif
 // Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
 // Here we specialize gebp_traits to eliminate these register spills.
 // See #2138.
@ -49,11 +53,8 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
 {
  typedef float RhsPacket;
  typedef float32x4_t RhsPacketx4;
-  enum {
+  enum { nr = EIGEN_NEON_GEBP_NR };
-    nr = 8
+  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const {
  };
  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
  {
    dest = *b;
  }
@ -115,9 +116,7 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
 : gebp_traits<double,double,false,false,Architecture::Generic>
 {
  typedef double RhsPacket;
-  enum {
+  enum { nr = EIGEN_NEON_GEBP_NR };
    nr = 8
  };
  struct RhsPacketx4 {
    float64x2_t B_0, B_1;
  };
@ -193,9 +192,8 @@ struct gebp_traits <half,half,false,false,Architecture::NEON>
  typedef half RhsPacket;
  typedef float16x4_t RhsPacketx4;
  typedef float16x4_t PacketHalf;
-  enum {
+  enum { nr = EIGEN_NEON_GEBP_NR };
-    nr = 8
+
  };
  EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
  {
    dest = *b;