mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 12:19:12 +08:00
Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches'
This commit is contained in:
parent
6021b5c467
commit
7764885d04
@ -35,7 +35,7 @@
|
||||
namespace internal {
|
||||
|
||||
template<typename Index>
|
||||
inline void check_rows_cols_for_overflow(Index rows, Index cols)
|
||||
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
|
||||
{
|
||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||
// we assume Index is signed
|
||||
|
@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
||||
// FIXME (a bit overkill maybe ?)
|
||||
|
||||
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||
{
|
||||
c = cj.pmadd(a,b,c);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||
{
|
||||
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
||||
}
|
||||
|
@ -130,31 +130,34 @@
|
||||
#define EIGEN_MAKESTRING2(a) #a
|
||||
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
|
||||
|
||||
// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
|
||||
// which should be inlined even in debug mode.
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
// : function body not available
|
||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||
#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
|
||||
#else
|
||||
#define EIGEN_ALWAYS_INLINE_ATTRIB
|
||||
#endif
|
||||
|
||||
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
|
||||
#else
|
||||
#define EIGEN_FLATTEN_ATTRIB
|
||||
#endif
|
||||
|
||||
// EIGEN_FORCE_INLINE means "inline as much as possible"
|
||||
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
|
||||
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
|
||||
// but GCC is still doing fine with just inline.
|
||||
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
|
||||
#define EIGEN_STRONG_INLINE __forceinline
|
||||
#else
|
||||
#define EIGEN_STRONG_INLINE inline
|
||||
#endif
|
||||
|
||||
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
|
||||
// attribute to maximize inlining. This should only be used when really necessary: in particular,
|
||||
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
|
||||
// FIXME with the always_inline attribute,
|
||||
// gcc 3.4.x reports the following compilation error:
|
||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||
// : function body not available
|
||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
#else
|
||||
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
||||
#endif
|
||||
|
||||
#if (defined __GNUC__)
|
||||
#define EIGEN_DONT_INLINE __attribute__((noinline))
|
||||
#elif (defined _MSC_VER)
|
||||
|
@ -354,7 +354,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
|
||||
*****************************************************************************/
|
||||
|
||||
template<typename T>
|
||||
inline void check_size_for_overflow(size_t size)
|
||||
EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||
{
|
||||
if(size > size_t(-1) / sizeof(T))
|
||||
throw_std_bad_alloc();
|
||||
|
Loading…
x
Reference in New Issue
Block a user