From 17230686944e63cca8d9ce1c981d6e869fcbcb62 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Thu, 4 Mar 2010 18:33:51 +0100 Subject: [PATCH 1/2] Moved x()/y()/z() and w() access functions to DenseBase; they are now available for Arrays as well. --- Eigen/src/Core/Coeffs.h | 16 ++++++++-------- Eigen/src/Core/DenseBase.h | 9 +++++++++ Eigen/src/Core/MatrixBase.h | 9 --------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Eigen/src/Core/Coeffs.h b/Eigen/src/Core/Coeffs.h index da7b9153f..727dea75e 100644 --- a/Eigen/src/Core/Coeffs.h +++ b/Eigen/src/Core/Coeffs.h @@ -239,42 +239,42 @@ EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase /** equivalent to operator[](0). */ template -EIGEN_STRONG_INLINE const typename MatrixBase::CoeffReturnType MatrixBase +EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase ::x() const { return (*this)[0]; } /** equivalent to operator[](1). */ template -EIGEN_STRONG_INLINE const typename MatrixBase::CoeffReturnType MatrixBase +EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase ::y() const { return (*this)[1]; } /** equivalent to operator[](2). */ template -EIGEN_STRONG_INLINE const typename MatrixBase::CoeffReturnType MatrixBase +EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase ::z() const { return (*this)[2]; } /** equivalent to operator[](3). */ template -EIGEN_STRONG_INLINE const typename MatrixBase::CoeffReturnType MatrixBase +EIGEN_STRONG_INLINE const typename DenseBase::CoeffReturnType DenseBase ::w() const { return (*this)[3]; } /** equivalent to operator[](0). */ template -EIGEN_STRONG_INLINE typename ei_traits::Scalar& MatrixBase +EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase ::x() { return (*this)[0]; } /** equivalent to operator[](1). */ template -EIGEN_STRONG_INLINE typename ei_traits::Scalar& MatrixBase +EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase ::y() { return (*this)[1]; } /** equivalent to operator[](2). */ template -EIGEN_STRONG_INLINE typename ei_traits::Scalar& MatrixBase +EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase ::z() { return (*this)[2]; } /** equivalent to operator[](3). */ template -EIGEN_STRONG_INLINE typename ei_traits::Scalar& MatrixBase +EIGEN_STRONG_INLINE typename ei_traits::Scalar& DenseBase ::w() { return (*this)[3]; } /** \returns the packet of coefficients starting at the given row and column. It is your responsibility diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 67540bd8c..52a883811 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -280,6 +280,15 @@ template class DenseBase typedef Block::ColsAtCompileTime> RowXpr; #endif // not EIGEN_PARSED_BY_DOXYGEN + const CoeffReturnType x() const; + const CoeffReturnType y() const; + const CoeffReturnType z() const; + const CoeffReturnType w() const; + Scalar& x(); + Scalar& y(); + Scalar& z(); + Scalar& w(); + /** Copies \a other into *this. \returns a reference to *this. */ template Derived& operator=(const DenseBase& other); diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index 9c62163ba..ac79de66d 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -169,15 +169,6 @@ template class MatrixBase Derived& lazyAssign(const ProductBase& other); #endif // not EIGEN_PARSED_BY_DOXYGEN - const CoeffReturnType x() const; - const CoeffReturnType y() const; - const CoeffReturnType z() const; - const CoeffReturnType w() const; - Scalar& x(); - Scalar& y(); - Scalar& z(); - Scalar& w(); - template Derived& operator+=(const MatrixBase& other); template From ea8cad51518cfae6eb7406268aef6c28ff62389f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 4 Mar 2010 18:58:12 +0100 Subject: [PATCH 2/2] make the number of registers easier to configure per architectures --- Eigen/src/Core/arch/Default/Settings.h | 13 +++++++++++++ Eigen/src/Core/arch/NEON/PacketMath.h | 6 ++++++ Eigen/src/Core/util/BlasUtil.h | 8 ++------ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Eigen/src/Core/arch/Default/Settings.h b/Eigen/src/Core/arch/Default/Settings.h index 1e7cebdba..1ab2877b6 100644 --- a/Eigen/src/Core/arch/Default/Settings.h +++ b/Eigen/src/Core/arch/Default/Settings.h @@ -62,4 +62,17 @@ #define EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH 8 #endif + +/** Defines the default number of registers available for that architecture. + * Currently it must be 8 or 16. Other values will fail. + */ +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#if (defined __i386__) +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#else +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 16 +#endif + +#endif + #endif // EIGEN_DEFAULT_SETTINGS_H diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index f71b92a75..2acb3633a 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -35,6 +35,12 @@ #define EIGEN_TUNE_FOR_CPU_CACHE_SIZE 4*96*96 #endif +// FIXME NEON has 16 quad registers, but since the current register allocator +// is so bad, it is much better to reduce it to 8 +#ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS +#define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 8 +#endif + typedef float32x4_t Packet4f; typedef int32x4_t Packet4i; diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h index 4d216d77a..95ff446c7 100644 --- a/Eigen/src/Core/util/BlasUtil.h +++ b/Eigen/src/Core/util/BlasUtil.h @@ -130,14 +130,10 @@ struct ei_product_blocking_traits typedef typename ei_packet_traits::type PacketType; enum { PacketSize = sizeof(PacketType)/sizeof(Scalar), - #if (defined __i386__) - HalfRegisterCount = 4, - #else - HalfRegisterCount = 8, - #endif + NumberOfRegisters = EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS, // register block size along the N direction (must be either 2 or 4) - nr = HalfRegisterCount/2, + nr = NumberOfRegisters/4, // register block size along the M direction (currently, this one cannot be modified) mr = 2 * PacketSize,