From 759bd92a85393617a56405ec0372e87416cfaebb Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Fri, 30 Jan 2015 17:27:56 -0500 Subject: [PATCH] bug #935: Add asm comments in GEBP kernels to work around a bug in both GCC and Clang on ARM/NEON, whereby they spill registers, severely harming performance. The reason why the asm comments make a difference is that they prevent the compiler from reordering code across these boundaries, which has the effect of extending the lifetime of local variables and increasing register pressure on this register-tight code. --- .../Core/products/GeneralBlockPanelKernel.h | 160 +++++++++++------- Eigen/src/Core/util/Macros.h | 8 +- 2 files changed, 108 insertions(+), 60 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 7b2ed6728..1b39642fb 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -760,31 +760,36 @@ void gebp_kernel for(Index k=0; k blB += pk*4*RhsProgress; blA += pk*3*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 3pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k for(Index k=0; k blB += pk*RhsProgress; blA += pk*3*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 3pX1"); } // process remaining peeled loop @@ -963,21 +977,26 @@ void gebp_kernel for(Index k=0; k blB += pk*4*RhsProgress; blA += pk*(2*Traits::LhsProgress); + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k for(Index k=0; k blB += pk*RhsProgress; blA += pk*2*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1"); } // process remaining peeled loop @@ -1137,16 +1165,21 @@ void gebp_kernel for(Index k=0; k blB += pk*4*RhsProgress; blA += pk*1*LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 1pX4"); } // process remaining peeled loop for(Index k=peeled_kc; k for(Index k=0; k blB += pk*RhsProgress; blA += pk*1*Traits::LhsProgress; + + EIGEN_ASM_COMMENT("end gebp micro kernel 2pX1"); } // process remaining peeled loop diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 687ba41dd..13f8fdd4e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -160,6 +160,12 @@ #define EIGEN_ARCH_ARM64 0 #endif +#if EIGEN_ARCH_ARM || EIGEN_ARCH_ARM64 + #define EIGEN_ARCH_ARM_OR_ARM64 1 +#else + #define EIGEN_ARCH_ARM_OR_ARM64 0 +#endif + /// \internal EIGEN_ARCH_MIPS set to 1 if the architecture is MIPS #if defined(__mips__) || defined(__mips) #define EIGEN_ARCH_MIPS 1 @@ -526,7 +532,7 @@ namespace Eigen { #define EIGEN_UNUSED_VARIABLE(var) Eigen::internal::ignore_unused_variable(var); #if !defined(EIGEN_ASM_COMMENT) - #if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64 + #if EIGEN_COMP_GNUC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64) #define EIGEN_ASM_COMMENT(X) __asm__("#" X) #else #define EIGEN_ASM_COMMENT(X)