mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-20 00:29:38 +08:00
optimize non fused MADD, and add a flatten attribute macro to enforce
inlining within a function
This commit is contained in:
parent
b72b7ab76f
commit
36d9b51a44
@ -284,7 +284,7 @@ template<> EIGEN_STRONG_INLINE Packet1cd ei_por <Packet1cd>(const Packet1cd&
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(a.v,b.v)); }
|
||||
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
// FIXME force unaligned load, this is a temporary fix
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_pload <Packet1cd>(const std::complex<double>* from)
|
||||
{ EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(ei_ploadu<Packet2d>((const double*)from)); }
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd ei_ploadu<Packet1cd>(const std::complex<double>* from)
|
||||
|
@ -133,14 +133,34 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
||||
computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
|
||||
}
|
||||
|
||||
// FIXME
|
||||
#ifndef EIGEN_HAS_FUSE_CJMADD
|
||||
#define EIGEN_HAS_FUSE_CJMADD
|
||||
#endif
|
||||
#ifdef EIGEN_HAS_FUSE_CJMADD
|
||||
#define MADD(CJ,A,B,C,T) C = CJ.pmadd(A,B,C);
|
||||
#else
|
||||
#define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,T);
|
||||
|
||||
// FIXME (a bit overkill maybe ?)
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T> struct ei_gebp_madd_selector {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
{
|
||||
c = cj.pmadd(a,b,c);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename T> struct ei_gebp_madd_selector<CJ,T,T,T,T> {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
{
|
||||
t = b; t = cj.pmul(a,t); c = ei_padd(c,t);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T>
|
||||
EIGEN_STRONG_INLINE void ei_gebp_madd(const CJ& cj, A& a, B& b, C& c, T& t)
|
||||
{
|
||||
ei_gebp_madd_selector<CJ,A,B,C,T>::run(cj,a,b,c,t);
|
||||
}
|
||||
|
||||
#define MADD(CJ,A,B,C,T) ei_gebp_madd(CJ,A,B,C,T);
|
||||
// #define MADD(CJ,A,B,C,T) T = B; T = CJ.pmul(A,T); C = ei_padd(C,T);
|
||||
#endif
|
||||
|
||||
/* optimized GEneral packed Block * packed Panel product kernel
|
||||
@ -170,6 +190,7 @@ struct ei_gebp_kernel
|
||||
typedef typename ei_meta_if<Vectorizable,_RhsPacket,RhsScalar>::ret RhsPacket;
|
||||
typedef typename ei_meta_if<Vectorizable,_ResPacket,ResScalar>::ret ResPacket;
|
||||
|
||||
EIGEN_FLATTEN_ATTRIB
|
||||
void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
|
||||
Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
|
||||
{
|
||||
|
@ -147,6 +147,12 @@
|
||||
#define EIGEN_ALWAYS_INLINE_ATTRIB
|
||||
#endif
|
||||
|
||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
|
||||
#else
|
||||
#define EIGEN_FLATTEN_ATTRIB
|
||||
#endif
|
||||
|
||||
// EIGEN_FORCE_INLINE means "inline as much as possible"
|
||||
#if (defined _MSC_VER) || (defined __intel_compiler)
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
|
Loading…
x
Reference in New Issue
Block a user