Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches'

This commit is contained in:
Benoit Jacob 2011-11-06 16:27:41 -05:00
parent 6021b5c467
commit 7764885d04
4 changed files with 20 additions and 17 deletions

View File

@ -35,7 +35,7 @@
namespace internal {
template<typename Index>
inline void check_rows_cols_for_overflow(Index rows, Index cols)
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
{
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
// we assume Index is signed

View File

@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
// FIXME (a bit overkill maybe ?)
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
{
c = cj.pmadd(a,b,c);
}
};
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
{
t = b; t = cj.pmul(a,t); c = padd(c,t);
}

View File

@ -130,31 +130,34 @@
#define EIGEN_MAKESTRING2(a) #a
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
// which should be inlined even in debug mode.
// FIXME with the always_inline attribute,
// gcc 3.4.x reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
// : function body not available
#if EIGEN_GNUC_AT_LEAST(4,0)
#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
#else
#define EIGEN_ALWAYS_INLINE_ATTRIB
#endif
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
#else
#define EIGEN_FLATTEN_ATTRIB
#endif
// EIGEN_FORCE_INLINE means "inline as much as possible"
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
// but GCC is still doing fine with just inline.
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
#define EIGEN_STRONG_INLINE __forceinline
#else
#define EIGEN_STRONG_INLINE inline
#endif
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
// attribute to maximize inlining. This should only be used when really necessary: in particular,
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
// FIXME with the always_inline attribute,
// gcc 3.4.x reports the following compilation error:
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
// : function body not available
#if EIGEN_GNUC_AT_LEAST(4,0)
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
#else
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
#endif
#if (defined __GNUC__)
#define EIGEN_DONT_INLINE __attribute__((noinline))
#elif (defined _MSC_VER)

View File

@ -354,7 +354,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
*****************************************************************************/
template<typename T>
inline void check_size_for_overflow(size_t size)
EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
{
if(size > size_t(-1) / sizeof(T))
throw_std_bad_alloc();