Enable fused madd for Altivec

2025-09-28 01:03:14 +08:00 · 2014-04-24 23:17:18 +02:00 · 2014-04-24 23:17:18 +02:00 · b0e19db1cf
commit b0e19db1cf
parent 8d85ce88e1
4 changed files with 21 additions and 5 deletions
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@ -22,6 +22,12 @@ namespace internal {
 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
 #endif
 #ifdef EIGEN_VECTORIZE_FMA
 #ifndef EIGEN_HAS_FUSED_MADD
 #define EIGEN_HAS_FUSED_MADD 1
 #endif
 #endif
 typedef __m256  Packet8f;
 typedef __m256i Packet8i;
 typedef __m256d Packet4d;
--- a/Eigen/src/Core/arch/AltiVec/PacketMath.h
+++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h
@ -18,6 +18,10 @@ namespace internal {
 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
 #endif
 #ifndef EIGEN_HAS_FUSED_MADD
 #define EIGEN_HAS_FUSED_MADD 1
 #endif
 #ifndef EIGEN_HAS_FUSE_CJMADD
 #define EIGEN_HAS_FUSE_CJMADD 1
 #endif
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -22,6 +22,12 @@ namespace internal {
 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
 #endif
 #ifdef EIGEN_VECTORIZE_FMA
 #ifndef EIGEN_HAS_FUSED_MADD
 #define EIGEN_HAS_FUSED_MADD 1
 #endif
 #endif
 #if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
 // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
 // have overloads for both types without linking error.
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -188,7 +188,7 @@ public:
    nr = 4,
    // register block size along the M direction (currently, this one cannot be modified)
-#ifdef __FMA__
+#ifdef EIGEN_HAS_FUSED_MADD
    // we assume 16 registers
    mr = 3*LhsPacketSize,
 #else
@ -254,7 +254,7 @@ public:
    // let gcc allocate the register in which to store the result of the pmul
    // (in the case where there is no FMA) gcc fails to figure out how to avoid
    // spilling register.
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
    EIGEN_UNUSED_VARIABLE(tmp);
    c = pmadd(a,b,c);
 #else
@ -296,7 +296,7 @@ public:
    NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
    nr = 4,
-#ifdef __FMA__
+#ifdef EIGEN_HAS_FUSED_MADD
    // we assume 16 registers
    mr = 3*LhsPacketSize,
 #else
@ -359,7 +359,7 @@ public:
  EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
  {
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
    EIGEN_UNUSED_VARIABLE(tmp);
    c.v = pmadd(a.v,b,c.v);
 #else
@ -635,7 +635,7 @@ public:
  EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
  {
-#ifdef EIGEN_VECTORIZE_FMA
+#ifdef EIGEN_HAS_FUSED_MADD
    EIGEN_UNUSED_VARIABLE(tmp);
    c.v = pmadd(a,b.v,c.v);
 #else