mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Added support for FMA instructions
This commit is contained in:
parent
7ed9441ea4
commit
db7d49efbb
@ -117,6 +117,9 @@
|
||||
#define EIGEN_VECTORIZE_SSE4_1
|
||||
#define EIGEN_VECTORIZE_SSE4_2
|
||||
#endif
|
||||
#ifdef __FMA__
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
// include files
|
||||
|
||||
// This extern "C" works around a MINGW-w64 compilation issue
|
||||
|
@ -120,6 +120,11 @@ template<> EIGEN_STRONG_INLINE Packet8i pdiv<Packet8i>(const Packet8i& /*a*/, co
|
||||
return pset1<Packet8i>(0);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmadd(const Packet8f& a, const Packet8f& b, const Packet8f& c) { return _mm256_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmadd(const Packet4d& a, const Packet4d& b, const Packet4d& c) { return _mm256_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8f pmin<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_min_pd(a,b); }
|
||||
|
||||
|
@ -179,6 +179,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& /*a*/, co
|
||||
|
||||
// for some weird raisons, it has to be overloaded for packet of integers
|
||||
template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd(pmul(a,b), c); }
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) { return _mm_fmadd_ps(a,b,c); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return _mm_fmadd_pd(a,b,c); }
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_min_ps(a,b); }
|
||||
template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
|
||||
|
@ -205,7 +205,15 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd(const LhsPacket& a, const RhsPacket& b, AccPacket& c, AccPacket& tmp) const
|
||||
{
|
||||
// It would be a lot cleaner to call pmadd all the time. Unfortunately if we
|
||||
// let gcc allocate the register in which to store the result of the pmul
|
||||
// (in the case where there is no FMA) gcc fails to figure out how to avoid
|
||||
// spilling register.
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
c = pmadd(a,b,c);
|
||||
#else
|
||||
tmp = b; tmp = pmul(a,tmp); c = padd(c,tmp);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void acc(const AccPacket& c, const ResPacket& alpha, ResPacket& r) const
|
||||
@ -281,7 +289,11 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
c.v = pmadd(a.v,b,c.v);
|
||||
#else
|
||||
tmp = b; tmp = pmul(a.v,tmp); c.v = padd(c.v,tmp);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
|
||||
@ -486,7 +498,11 @@ public:
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsPacket& a, const RhsPacket& b, AccPacket& c, RhsPacket& tmp, const true_type&) const
|
||||
{
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
c = pmadd(a,b,c);
|
||||
#else
|
||||
tmp = b; tmp.v = pmul(a,tmp.v); c = padd(c,tmp);
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void madd_impl(const LhsScalar& a, const RhsScalar& b, ResScalar& c, RhsScalar& /*tmp*/, const false_type&) const
|
||||
|
Loading…
x
Reference in New Issue
Block a user