mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 20:26:03 +08:00
Enable fused madd for Altivec
This commit is contained in:
parent
8d85ce88e1
commit
b0e19db1cf
@ -22,6 +22,12 @@ namespace internal {
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
typedef __m256 Packet8f;
|
||||
typedef __m256i Packet8i;
|
||||
typedef __m256d Packet4d;
|
||||
|
@ -18,6 +18,10 @@ namespace internal {
|
||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
||||
#endif
|
||||
|
@ -22,6 +22,12 @@ namespace internal {
|
||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifndef EIGEN_HAS_FUSED_MADD
|
||||
#define EIGEN_HAS_FUSED_MADD 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
|
||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||
// have overloads for both types without linking error.
|
||||
|
@ -188,7 +188,7 @@ public:
|
||||
nr = 4,
|
||||
|
||||
// register block size along the M direction (currently, this one cannot be modified)
|
||||
#ifdef __FMA__
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
#else
|
||||
@ -254,7 +254,7 @@ public:
|
||||
// let gcc allocate the register in which to store the result of the pmul
|
||||
// (in the case where there is no FMA) gcc fails to figure out how to avoid
|
||||
// spilling register.
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c = pmadd(a,b,c);
|
||||
#else
|
||||
@ -296,7 +296,7 @@ public:
|
||||
|
||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||
nr = 4,
|
||||
#ifdef __FMA__
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
// we assume 16 registers
|
||||
mr = 3*LhsPacketSize,
|
||||
#else
|
||||
@ -359,7 +359,7 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c.v = pmadd(a.v,b,c.v);
|
||||
#else
|
||||
@ -635,7 +635,7 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
#ifdef EIGEN_HAS_FUSED_MADD
|
||||
EIGEN_UNUSED_VARIABLE(tmp);
|
||||
c.v = pmadd(a,b.v,c.v);
|
||||
#else
|
||||
|
Loading…
x
Reference in New Issue
Block a user