Disable fma gcc's workaround for gcc >= 8 (based on GEMM benchmarks)

This commit is contained in:
Gael Guennebaud 2018-11-28 17:56:24 +01:00
parent 41052f63b7
commit e19ece822d

View File

@ -174,7 +174,7 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
#ifdef __FMA__ #ifdef __FMA__
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) {
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) #if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers, // clang stupidly generates a vfmadd213ps instruction plus some vmovaps on registers,
// and gcc stupidly generates a vfmadd132ps instruction, // and gcc stupidly generates a vfmadd132ps instruction,
// so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate // so let's enforce it to generate a vfmadd231ps instruction since the most common use case is to accumulate
@ -187,7 +187,7 @@ template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f&
#endif #endif
} }
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) {
#if ( EIGEN_COMP_GNUC_STRICT || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) ) #if ( (EIGEN_COMP_GNUC_STRICT && EIGEN_COMP_GNUC<80) || (EIGEN_COMP_CLANG && (EIGEN_COMP_CLANG<308)) )
// see above // see above
Packet4d res = c; Packet4d res = c;
__asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b)); __asm__("vfmadd231pd %[a], %[b], %[c]" : [c] "+x" (res) : [a] "x" (a), [b] "x" (b));