mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 12:46:00 +08:00
Refactor force-inlining macros and use EIGEN_ALWAYS_INLINE to force inlining of the integer overflow helpers, whose non-inlining caused major performance problems, see the mailing list thread 'Significant perf regression probably due to bug #363 patches'
This commit is contained in:
parent
6021b5c467
commit
7764885d04
@ -35,7 +35,7 @@
|
|||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
template<typename Index>
|
template<typename Index>
|
||||||
inline void check_rows_cols_for_overflow(Index rows, Index cols)
|
EIGEN_ALWAYS_INLINE void check_rows_cols_for_overflow(Index rows, Index cols)
|
||||||
{
|
{
|
||||||
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
// http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
|
||||||
// we assume Index is signed
|
// we assume Index is signed
|
||||||
|
@ -118,14 +118,14 @@ inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, st
|
|||||||
// FIXME (a bit overkill maybe ?)
|
// FIXME (a bit overkill maybe ?)
|
||||||
|
|
||||||
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
template<typename CJ, typename A, typename B, typename C, typename T> struct gebp_madd_selector {
|
||||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, A& a, B& b, C& c, T& /*t*/)
|
||||||
{
|
{
|
||||||
c = cj.pmadd(a,b,c);
|
c = cj.pmadd(a,b,c);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
template<typename CJ, typename T> struct gebp_madd_selector<CJ,T,T,T,T> {
|
||||||
EIGEN_STRONG_INLINE EIGEN_ALWAYS_INLINE_ATTRIB static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
EIGEN_ALWAYS_INLINE static void run(const CJ& cj, T& a, T& b, T& c, T& t)
|
||||||
{
|
{
|
||||||
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
t = b; t = cj.pmul(a,t); c = padd(c,t);
|
||||||
}
|
}
|
||||||
|
@ -130,31 +130,34 @@
|
|||||||
#define EIGEN_MAKESTRING2(a) #a
|
#define EIGEN_MAKESTRING2(a) #a
|
||||||
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
|
#define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
|
||||||
|
|
||||||
// EIGEN_ALWAYS_INLINE_ATTRIB should be use in the declaration of function
|
|
||||||
// which should be inlined even in debug mode.
|
|
||||||
// FIXME with the always_inline attribute,
|
|
||||||
// gcc 3.4.x reports the following compilation error:
|
|
||||||
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
|
||||||
// : function body not available
|
|
||||||
#if EIGEN_GNUC_AT_LEAST(4,0)
|
|
||||||
#define EIGEN_ALWAYS_INLINE_ATTRIB __attribute__((always_inline))
|
|
||||||
#else
|
|
||||||
#define EIGEN_ALWAYS_INLINE_ATTRIB
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
#if EIGEN_GNUC_AT_LEAST(4,1) && !defined(__clang__) && !defined(__INTEL_COMPILER)
|
||||||
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
|
#define EIGEN_FLATTEN_ATTRIB __attribute__((flatten))
|
||||||
#else
|
#else
|
||||||
#define EIGEN_FLATTEN_ATTRIB
|
#define EIGEN_FLATTEN_ATTRIB
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// EIGEN_FORCE_INLINE means "inline as much as possible"
|
// EIGEN_STRONG_INLINE is a stronger version of the inline, using __forceinline on MSVC,
|
||||||
|
// but it still doesn't use GCC's always_inline. This is useful in (common) situations where MSVC needs forceinline
|
||||||
|
// but GCC is still doing fine with just inline.
|
||||||
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
|
#if (defined _MSC_VER) || (defined __INTEL_COMPILER)
|
||||||
#define EIGEN_STRONG_INLINE __forceinline
|
#define EIGEN_STRONG_INLINE __forceinline
|
||||||
#else
|
#else
|
||||||
#define EIGEN_STRONG_INLINE inline
|
#define EIGEN_STRONG_INLINE inline
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// EIGEN_ALWAYS_INLINE is the stronget, it has the effect of making the function inline and adding every possible
|
||||||
|
// attribute to maximize inlining. This should only be used when really necessary: in particular,
|
||||||
|
// it uses __attribute__((always_inline)) on GCC, which most of the time is useless and can severely harm compile times.
|
||||||
|
// FIXME with the always_inline attribute,
|
||||||
|
// gcc 3.4.x reports the following compilation error:
|
||||||
|
// Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval<Derived> Eigen::MatrixBase<Scalar, Derived>::eval() const'
|
||||||
|
// : function body not available
|
||||||
|
#if EIGEN_GNUC_AT_LEAST(4,0)
|
||||||
|
#define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||||
|
#else
|
||||||
|
#define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (defined __GNUC__)
|
#if (defined __GNUC__)
|
||||||
#define EIGEN_DONT_INLINE __attribute__((noinline))
|
#define EIGEN_DONT_INLINE __attribute__((noinline))
|
||||||
#elif (defined _MSC_VER)
|
#elif (defined _MSC_VER)
|
||||||
|
@ -354,7 +354,7 @@ template<typename T> inline void destruct_elements_of_array(T *ptr, size_t size)
|
|||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
inline void check_size_for_overflow(size_t size)
|
EIGEN_ALWAYS_INLINE void check_size_for_overflow(size_t size)
|
||||||
{
|
{
|
||||||
if(size > size_t(-1) / sizeof(T))
|
if(size > size_t(-1) / sizeof(T))
|
||||||
throw_std_bad_alloc();
|
throw_std_bad_alloc();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user