mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Fix the bug using neon instruction fmla for data type half
This commit is contained in:
parent
31ab62d347
commit
d194167149
@ -183,7 +183,11 @@ struct gebp_traits <double,double,false,false,Architecture::NEON>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
|
// The register at operand 3 of fmla for data type half must be v0~v15, the compiler may not
|
||||||
|
// allocate a required register for the '%2' of inline asm 'fmla %0.8h, %1.8h, %2.h[id]',
|
||||||
|
// so inline assembly can't be used here to advoid the bug that vfmaq_lane_f16 is implemented
|
||||||
|
// through a costly dup in gcc compiler.
|
||||||
|
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
struct gebp_traits <half,half,false,false,Architecture::NEON>
|
struct gebp_traits <half,half,false,false,Architecture::NEON>
|
||||||
@ -240,19 +244,10 @@ struct gebp_traits <half,half,false,false,Architecture::NEON>
|
|||||||
template<int LaneID>
|
template<int LaneID>
|
||||||
EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
|
EIGEN_STRONG_INLINE void madd_helper(const LhsPacket& a, const RhsPacketx4& b, AccPacket& c) const
|
||||||
{
|
{
|
||||||
#if EIGEN_COMP_GNUC_STRICT
|
|
||||||
// 1. vfmaq_lane_f16 is implemented through a costly dup
|
|
||||||
// 2. workaround the gcc register split problem on arm64-neon
|
|
||||||
if(LaneID==0) asm("fmla %0.8h, %1.8h, %2.h[0]\n" : "+w" (c) : "w" (a), "w" (b) : );
|
|
||||||
else if(LaneID==1) asm("fmla %0.8h, %1.8h, %2.h[1]\n" : "+w" (c) : "w" (a), "w" (b) : );
|
|
||||||
else if(LaneID==2) asm("fmla %0.8h, %1.8h, %2.h[2]\n" : "+w" (c) : "w" (a), "w" (b) : );
|
|
||||||
else if(LaneID==3) asm("fmla %0.8h, %1.8h, %2.h[3]\n" : "+w" (c) : "w" (a), "w" (b) : );
|
|
||||||
#else
|
|
||||||
c = vfmaq_lane_f16(c, a, b, LaneID);
|
c = vfmaq_lane_f16(c, a, b, LaneID);
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
|
#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
|
||||||
#endif // EIGEN_ARCH_ARM64
|
#endif // EIGEN_ARCH_ARM64
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
Loading…
x
Reference in New Issue
Block a user