Enable fused madd for Altivec

This commit is contained in:
Gael Guennebaud 2014-04-24 23:17:18 +02:00
parent 8d85ce88e1
commit b0e19db1cf
4 changed files with 21 additions and 5 deletions

View File

@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
typedef __m256 Packet8f;
typedef __m256i Packet8i;
typedef __m256d Packet4d;

View File

@ -18,6 +18,10 @@ namespace internal {
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
#endif
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#ifndef EIGEN_HAS_FUSE_CJMADD
#define EIGEN_HAS_FUSE_CJMADD 1
#endif

View File

@ -22,6 +22,12 @@ namespace internal {
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
#endif
#ifdef EIGEN_VECTORIZE_FMA
#ifndef EIGEN_HAS_FUSED_MADD
#define EIGEN_HAS_FUSED_MADD 1
#endif
#endif
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
// have overloads for both types without linking error.

View File

@ -188,7 +188,7 @@ public:
nr = 4,
// register block size along the M direction (currently, this one cannot be modified)
#ifdef __FMA__
#ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@ -254,7 +254,7 @@ public:
// let gcc allocate the register in which to store the result of the pmul
// (in the case where there is no FMA) gcc fails to figure out how to avoid
// spilling register.
#ifdef EIGEN_VECTORIZE_FMA
#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c = pmadd(a,b,c);
#else
@ -296,7 +296,7 @@ public:
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
nr = 4,
#ifdef __FMA__
#ifdef EIGEN_HAS_FUSED_MADD
// we assume 16 registers
mr = 3*LhsPacketSize,
#else
@ -359,7 +359,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
#ifdef EIGEN_VECTORIZE_FMA
#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a.v,b,c.v);
#else
@ -635,7 +635,7 @@ public:
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
{
#ifdef EIGEN_VECTORIZE_FMA
#ifdef EIGEN_HAS_FUSED_MADD
EIGEN_UNUSED_VARIABLE(tmp);
c.v = pmadd(a,b.v,c.v);
#else