mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-13 00:21:49 +08:00
bug #1637: workaround register spilling in gebp with clang>=6.0+AVX+FMA
(grafted from f233c6194db032f31c14d06b5e962197b66ea296 )
This commit is contained in:
parent
24d56f2e0e
commit
c4ea9a916f
@ -159,11 +159,12 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
|||||||
|
|
||||||
#ifdef __FMA__
|
#ifdef __FMA__
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
|
||||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||||
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
// Clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
|
||||||
// and gcc stupidly generates a vfmadd132ps instruction,
|
// and even register spilling with clang>=6.0 (bug 1637).
|
||||||
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
|
// Gcc stupidly generates a vfmadd132ps instruction.
|
||||||
// the result of the product.
|
// So let's enforce it to generate a vfmadd231ps instruction since the most common use
|
||||||
|
// case is to accumulate the result of the product.
|
||||||
Packet8f res = c;
|
Packet8f res = c;
|
||||||
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
__asm__("vfmadd231ps %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||||
return res;
|
return res;
|
||||||
@ -172,7 +173,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
|
||||||
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
|
#if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG) )
|
||||||
// see above
|
// see above
|
||||||
Packet4d res = c;
|
Packet4d res = c;
|
||||||
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user