mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-23 14:53:13 +08:00
Add a macro to set the nr trait in the BEBP kernel for NEON.
This commit is contained in:
parent
23299632c2
commit
e2ea866515
@ -5,6 +5,10 @@ namespace internal {
|
|||||||
|
|
||||||
#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
|
#if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
|
||||||
|
|
||||||
|
#ifndef EIGEN_NEON_GEBP_NR
|
||||||
|
#define EIGEN_NEON_GEBP_NR 8
|
||||||
|
#endif
|
||||||
|
|
||||||
// Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
|
// Clang seems to excessively spill registers in the GEBP kernel on 32-bit arm.
|
||||||
// Here we specialize gebp_traits to eliminate these register spills.
|
// Here we specialize gebp_traits to eliminate these register spills.
|
||||||
// See #2138.
|
// See #2138.
|
||||||
@ -49,11 +53,8 @@ struct gebp_traits <float,float,false,false,Architecture::NEON,GEBPPacketFull>
|
|||||||
{
|
{
|
||||||
typedef float RhsPacket;
|
typedef float RhsPacket;
|
||||||
typedef float32x4_t RhsPacketx4;
|
typedef float32x4_t RhsPacketx4;
|
||||||
enum {
|
enum { nr = EIGEN_NEON_GEBP_NR };
|
||||||
nr = 8
|
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const {
|
||||||
};
|
|
||||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
|
|
||||||
{
|
|
||||||
dest = *b;
|
dest = *b;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -115,9 +116,7 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
|
|||||||
: gebp_traits<double,double,false,false,Architecture::Generic>
|
: gebp_traits<double,double,false,false,Architecture::Generic>
|
||||||
{
|
{
|
||||||
typedef double RhsPacket;
|
typedef double RhsPacket;
|
||||||
enum {
|
enum { nr = EIGEN_NEON_GEBP_NR };
|
||||||
nr = 8
|
|
||||||
};
|
|
||||||
struct RhsPacketx4 {
|
struct RhsPacketx4 {
|
||||||
float64x2_t B_0, B_1;
|
float64x2_t B_0, B_1;
|
||||||
};
|
};
|
||||||
@ -193,9 +192,8 @@ struct gebp_traits <half,half,false,false,Architecture::NEON>
|
|||||||
typedef half RhsPacket;
|
typedef half RhsPacket;
|
||||||
typedef float16x4_t RhsPacketx4;
|
typedef float16x4_t RhsPacketx4;
|
||||||
typedef float16x4_t PacketHalf;
|
typedef float16x4_t PacketHalf;
|
||||||
enum {
|
enum { nr = EIGEN_NEON_GEBP_NR };
|
||||||
nr = 8
|
|
||||||
};
|
|
||||||
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
|
EIGEN_STRONG_INLINE void loadRhs(const RhsScalar* b, RhsPacket& dest) const
|
||||||
{
|
{
|
||||||
dest = *b;
|
dest = *b;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user