Enable fused madd for Altivec

This commit is contained in:
Gael Guennebaud 2014-04-24 23:17:18 +02:00
parent 8d85ce88e1
commit b0e19db1cf
4 changed files with 21 additions and 5 deletions

View File

@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif #endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
typedef __m256 Packet8f; typedef __m256 Packet8f;
typedef __m256i Packet8i; typedef __m256i Packet8i;
typedef __m256d Packet4d; typedef __m256d Packet4d;

View File

@ -18,6 +18,10 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif #endif
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#ifndef EIGEN_HAS_FUSE_CJMADD #ifndef EIGEN_HAS_FUSE_CJMADD
#define EIGEN_HAS_FUSE_CJMADD 1 #define EIGEN_HAS_FUSE_CJMADD 1
#endif #endif

View File

@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*)) #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif #endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER) #if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot // With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error. // have overloads for both types without linking error.

View File

@ -188,7 +188,7 @@ public:
nr = 4, nr = 4,
// register block size along the M direction (currently, this one cannot be modified) // register block size along the M direction (currently, this one cannot be modified)
#ifdef __FMA__ #ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers // we assume 16 registers
mr = 3*LhsPacketSize, mr = 3*LhsPacketSize,
#else #else
@ -254,7 +254,7 @@ public:
// let gcc allocate the register in which to store the result of the pmul // let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid // (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register. // spilling register.
#ifdef EIGEN_VECTORIZE_FMA #ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp); EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c); c = pmadd(a,b,c);
#else #else
@ -296,7 +296,7 @@ public:
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4, nr = 4,
#ifdef __FMA__ #ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers // we assume 16 registers
mr = 3*LhsPacketSize, mr = 3*LhsPacketSize,
#else #else
@ -359,7 +359,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{ {
#ifdef EIGEN_VECTORIZE_FMA #ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp); EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a.v,b,c.v); c.v = pmadd(a.v,b,c.v);
#else #else
@ -635,7 +635,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{ {
#ifdef EIGEN_VECTORIZE_FMA #ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp); EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a,b.v,c.v); c.v = pmadd(a,b.v,c.v);
#else #else