mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 12:19:12 +08:00
Enable fused madd for Altivec
This commit is contained in:
parent
8d85ce88e1
commit
b0e19db1cf
@ -22,6 +22,12 @@ namespace internal {
|
|||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_FMA
|
||||||
|
#ifndef EIGEN_HAS_FUSED_MADD
|
||||||
|
#define EIGEN_HAS_FUSED_MADD 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef __m256 Packet8f;
|
typedef __m256 Packet8f;
|
||||||
typedef __m256i Packet8i;
|
typedef __m256i Packet8i;
|
||||||
typedef __m256d Packet4d;
|
typedef __m256d Packet4d;
|
||||||
|
@ -18,6 +18,10 @@ namespace internal {
|
|||||||
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef EIGEN_HAS_FUSED_MADD
|
||||||
|
#define EIGEN_HAS_FUSED_MADD 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||||
#define EIGEN_HAS_FUSE_CJMADD 1
|
#define EIGEN_HAS_FUSE_CJMADD 1
|
||||||
#endif
|
#endif
|
||||||
|
@ -22,6 +22,12 @@ namespace internal {
|
|||||||
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_FMA
|
||||||
|
#ifndef EIGEN_HAS_FUSED_MADD
|
||||||
|
#define EIGEN_HAS_FUSED_MADD 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
|
#if defined EIGEN_VECTORIZE_AVX && defined __GNUC__ && !(defined __clang__ || defined __INTEL_COMPILER)
|
||||||
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
// With GCC's default ABI version, a __m128 or __m256 are the same types and therefore we cannot
|
||||||
// have overloads for both types without linking error.
|
// have overloads for both types without linking error.
|
||||||
|
@ -188,7 +188,7 @@ public:
|
|||||||
nr = 4,
|
nr = 4,
|
||||||
|
|
||||||
// register block size along the M direction (currently, this one cannot be modified)
|
// register block size along the M direction (currently, this one cannot be modified)
|
||||||
#ifdef __FMA__
|
#ifdef EIGEN_HAS_FUSED_MADD
|
||||||
// we assume 16 registers
|
// we assume 16 registers
|
||||||
mr = 3*LhsPacketSize,
|
mr = 3*LhsPacketSize,
|
||||||
#else
|
#else
|
||||||
@ -254,7 +254,7 @@ public:
|
|||||||
// let gcc allocate the register in which to store the result of the pmul
|
// let gcc allocate the register in which to store the result of the pmul
|
||||||
// (in the case where there is no FMA) gcc fails to figure out how to avoid
|
// (in the case where there is no FMA) gcc fails to figure out how to avoid
|
||||||
// spilling register.
|
// spilling register.
|
||||||
#ifdef EIGEN_VECTORIZE_FMA
|
#ifdef EIGEN_HAS_FUSED_MADD
|
||||||
EIGEN_UNUSED_VARIABLE(tmp);
|
EIGEN_UNUSED_VARIABLE(tmp);
|
||||||
c = pmadd(a,b,c);
|
c = pmadd(a,b,c);
|
||||||
#else
|
#else
|
||||||
@ -296,7 +296,7 @@ public:
|
|||||||
|
|
||||||
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS,
|
||||||
nr = 4,
|
nr = 4,
|
||||||
#ifdef __FMA__
|
#ifdef EIGEN_HAS_FUSED_MADD
|
||||||
// we assume 16 registers
|
// we assume 16 registers
|
||||||
mr = 3*LhsPacketSize,
|
mr = 3*LhsPacketSize,
|
||||||
#else
|
#else
|
||||||
@ -359,7 +359,7 @@ public:
|
|||||||
|
|
||||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_VECTORIZE_FMA
|
#ifdef EIGEN_HAS_FUSED_MADD
|
||||||
EIGEN_UNUSED_VARIABLE(tmp);
|
EIGEN_UNUSED_VARIABLE(tmp);
|
||||||
c.v = pmadd(a.v,b,c.v);
|
c.v = pmadd(a.v,b,c.v);
|
||||||
#else
|
#else
|
||||||
@ -635,7 +635,7 @@ public:
|
|||||||
|
|
||||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||||
{
|
{
|
||||||
#ifdef EIGEN_VECTORIZE_FMA
|
#ifdef EIGEN_HAS_FUSED_MADD
|
||||||
EIGEN_UNUSED_VARIABLE(tmp);
|
EIGEN_UNUSED_VARIABLE(tmp);
|
||||||
c.v = pmadd(a,b.v,c.v);
|
c.v = pmadd(a,b.v,c.v);
|
||||||
#else
|
#else
|
||||||
|
Loading…
x
Reference in New Issue
Block a user