Add a macro to set the nr trait in the BEBP kernel for NEON.

This commit is contained in:
Rasmus Munk Larsen 2022-09-22 23:56:34 +00:00
parent 23299632c2
commit e2ea866515

View File

@ -5,6 +5,10 @@ namespace internal {
#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
#ifndef EIGEN_NEON_GEBP_NR
#define EIGEN_NEON_GEBP_NR 8
#endif
// Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm. // Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
// Here we specialize gebp_traits to eliminate these register spills. // Here we specialize gebp_traits to eliminate these register spills.
// See #2138. // See #2138.
@ -49,11 +53,8 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
{ {
typedef float RhsPacket; typedef float RhsPacket;
typedef float32x4_t RhsPacketx4; typedef float32x4_t RhsPacketx4;
enum { enum { nr = EIGEN_NEON_GEBP_NR };
nr = 8 EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const {
};
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{
dest = *b; dest = *b;
} }
@ -115,9 +116,7 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
: gebp_traits<double,double,false,false,Architecture::Generic> : gebp_traits<double,double,false,false,Architecture::Generic>
{ {
typedef double RhsPacket; typedef double RhsPacket;
enum { enum { nr = EIGEN_NEON_GEBP_NR };
nr = 8
};
struct RhsPacketx4 { struct RhsPacketx4 {
float64x2_t B_0, B_1; float64x2_t B_0, B_1;
}; };
@ -193,9 +192,8 @@ struct gebp_traits <half,half,false,false,Architecture::NEON>
typedef half RhsPacket; typedef half RhsPacket;
typedef float16x4_t RhsPacketx4; typedef float16x4_t RhsPacketx4;
typedef float16x4_t PacketHalf; typedef float16x4_t PacketHalf;
enum { enum { nr = EIGEN_NEON_GEBP_NR };
nr = 8
};
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
{ {
dest = *b; dest = *b;