From ea8cad51518cfae6eb7406268aef6c28ff62389f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 4 Mar 2010 18:58:12 +0100 Subject: [PATCH] make the number of registers easier to configure per architectures --- Eigen/src/Core/arch/Default/Settings.h | 13 +++++++++++++ Eigen/src/Core/arch/NEON/PacketMath.h | 6 ++++++ Eigen/src/Core/util/BlasUtil.h | 8 ++------ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/Default/Settings.h b/Eigen/src/Core/arch/Default/Settings.h index 1e7cebdba..1ab2877b6 100644 --- a/Eigen/src/Core/arch/Default/Settings.h +++ b/Eigen/src/Core/arch/Default/Settings.h @@ -62,4 +62,17 @@ #define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 #endif + +/** Defines the default number of registers available for that architecture. + * Currently it must be 8 or 16. Other values will fail. + */ +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#if (defined __i386__) +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#else +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 +#endif + +#endif + #endif // EIGEN_DEFAULT_SETTINGS_H diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index f71b92a75..2acb3633a 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -35,6 +35,12 @@ #define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96 #endif +// FIXME NEON has 16 quad registers, but since the current register allocator +// is so bad, it is much better to reduce it to 8 +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#endif + typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 4d216d77a..95ff446c7 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -130,14 +130,10 @@ struct ei_product_blocking_traits typedef typename ei_packet_traits::type PacketType; enum { PacketSize = sizeof(PacketType)/sizeof(Scalar), - #if (defined __i386__) - HalfRegisterCount = 4, - #else - HalfRegisterCount = 8, - #endif + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, // register block size along the N direction (must be either 2 or 4) - nr = HalfRegisterCount/2, + nr = NumberOfRegisters/4, // register block size along the M direction (currently, this one cannot be modified) mr = 2 * PacketSize,