From d9e5fd393a48db368dd90cf7119ebb3d774111cb Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Sat, 3 Jan 2009 22:33:08 +0000 Subject: [PATCH] * In LU solvers: no need anymore to use row-major matrices * Matrix: always inherit WithAlignedOperatorNew, regardless of vectorization or not * rename ei_alloc_stack to ei_aligned_stack_alloc * mixingtypes test: disable vectorization as SSE intrinsics don't allow mixing types and we just get compile errors there. --- Eigen/src/Core/CacheFriendlyProduct.h | 8 ++++---- Eigen/src/Core/Matrix.h | 3 +-- Eigen/src/Core/Product.h | 16 ++++++++-------- Eigen/src/Core/util/Memory.h | 16 ++++++++-------- Eigen/src/LU/LU.h | 10 +++++----- Eigen/src/Sparse/SparseLDLT.h | 16 ++++++++-------- test/mixingtypes.cpp | 1 + 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h index 0daccaf5f..f23bdc9ea 100644 --- a/Eigen/src/Core/CacheFriendlyProduct.h +++ b/Eigen/src/Core/CacheFriendlyProduct.h @@ -95,9 +95,9 @@ static void ei_cache_friendly_product( const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0)); Scalar* EIGEN_RESTRICT block = 0; const int allocBlockSize = l2BlockRows*size; - block = ei_alloc_stack(Scalar, allocBlockSize); + block = ei_aligned_stack_alloc(Scalar, allocBlockSize); Scalar* EIGEN_RESTRICT rhsCopy - = ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned); + = ei_aligned_stack_alloc(Scalar, l2BlockSizeAligned*l2BlockSizeAligned); // loops on each L2 cache friendly blocks of the result for(int l2i=0; l2i class Matrix : public MatrixBase > - #ifdef EIGEN_VECTORIZE , public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret> - #endif { public: EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix) + enum { StorageOrder = _StorageOrder }; friend class Eigen::Map; typedef class Eigen::Map UnalignedMapType; friend class Eigen::Map; diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index 77c18817d..0e4aa6c20 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -573,7 +573,7 @@ struct ei_cache_friendly_product_selector >(_res, res.size()) = res; } ei_cache_friendly_product_colmajor_times_vector(res.size(), @@ -583,7 +583,7 @@ struct ei_cache_friendly_product_selector >(_res, res.size()); - ei_free_stack(_res, Scalar, res.size()); + ei_aligned_stack_free(_res, Scalar, res.size()); } } }; @@ -619,7 +619,7 @@ struct ei_cache_friendly_product_selector >(_res, res.size()) = res; } ei_cache_friendly_product_colmajor_times_vector(res.size(), @@ -629,7 +629,7 @@ struct ei_cache_friendly_product_selector >(_res, res.size()); - ei_free_stack(_res, Scalar, res.size()); + ei_aligned_stack_free(_res, Scalar, res.size()); } } }; @@ -652,13 +652,13 @@ struct ei_cache_friendly_product_selector >(_rhs, product.rhs().size()) = product.rhs(); } ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(), _rhs, product.rhs().size(), res); - if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size()); + if (!UseRhsDirectly) ei_aligned_stack_free(_rhs, Scalar, product.rhs().size()); } }; @@ -680,13 +680,13 @@ struct ei_cache_friendly_product_selector >(_lhs, product.lhs().size()) = product.lhs(); } ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(), _lhs, product.lhs().size(), res); - if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size()); + if(!UseLhsDirectly) ei_aligned_stack_free(_lhs, Scalar, product.lhs().size()); } }; diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h index 36dd87632..35b6b4ab9 100644 --- a/Eigen/src/Core/util/Memory.h +++ b/Eigen/src/Core/util/Memory.h @@ -139,24 +139,24 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset) } /** \internal - * ei_alloc_stack(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes + * ei_aligned_stack_alloc(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes * on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT. * Otherwise the memory is allocated on the heap. - * Data allocated with ei_alloc_stack \b must be freed by calling ei_free_stack(PTR,TYPE,SIZE). + * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,TYPE,SIZE). * \code - * float * data = ei_alloc_stack(float,array.size()); + * float * data = ei_aligned_stack_alloc(float,array.size()); * // ... - * ei_free_stack(data,float,array.size()); + * ei_aligned_stack_free(data,float,array.size()); * \endcode */ #ifdef __linux__ - #define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \ + #define ei_aligned_stack_alloc(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \ ? ei_aligned_malloc(SIZE) \ : (TYPE*)alloca(sizeof(TYPE)*(SIZE))) - #define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE) + #define ei_aligned_stack_free(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE) #else - #define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc(SIZE) - #define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE) + #define ei_aligned_stack_alloc(TYPE,SIZE) ei_aligned_malloc(SIZE) + #define ei_aligned_stack_free(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE) #endif /** \class WithAlignedOperatorNew diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h index f3c93a4e8..1ac14034f 100644 --- a/Eigen/src/LU/LU.h +++ b/Eigen/src/LU/LU.h @@ -76,7 +76,7 @@ template class LU MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix // so that the product "matrix * kernel = zero" makes sense Dynamic, // we don't know at compile-time the dimension of the kernel - MatrixType::Flags&RowMajorBit, + MatrixType::StorageOrder, MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number // of columns of the original matrix @@ -86,7 +86,7 @@ template class LU MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number // of rows of the original matrix Dynamic, // we don't know at compile time the dimension of the image (the rank) - MatrixType::Flags, + MatrixType::StorageOrder, MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix, MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns. > ImageResultType; @@ -436,7 +436,7 @@ void LU::computeKernel(KernelMatrixType *result) const * independent vectors in Ker U. */ - Matrix y(-m_lu.corner(TopRight, m_rank, dimker)); @@ -504,7 +504,7 @@ bool LU::solve( // Step 2 Matrix l(rows, rows); l.setZero(); @@ -523,7 +523,7 @@ bool LU::solve( return false; } Matrix d(c.corner(TopLeft, m_rank, c.cols())); m_lu.corner(TopLeft, m_rank, m_rank) diff --git a/Eigen/src/Sparse/SparseLDLT.h b/Eigen/src/Sparse/SparseLDLT.h index b1f58b4b1..2441646be 100644 --- a/Eigen/src/Sparse/SparseLDLT.h +++ b/Eigen/src/Sparse/SparseLDLT.h @@ -192,7 +192,7 @@ void SparseLDLT::_symbolic(const MatrixType& a) m_matrix.resize(size, size); m_parent.resize(size); m_nonZerosPerCol.resize(size); - int * tags = ei_alloc_stack(int, size); + int * tags = ei_aligned_stack_alloc(int, size); const int* Ap = a._outerIndexPtr(); const int* Ai = a._innerIndexPtr(); @@ -238,7 +238,7 @@ void SparseLDLT::_symbolic(const MatrixType& a) Lp[k+1] = Lp[k] + m_nonZerosPerCol[k]; m_matrix.resizeNonZeros(Lp[size]); - ei_free_stack(tags, int, size); + ei_aligned_stack_free(tags, int, size); } template @@ -257,9 +257,9 @@ bool SparseLDLT::_numeric(const MatrixType& a) Scalar* Lx = m_matrix._valuePtr(); m_diag.resize(size); - Scalar * y = ei_alloc_stack(Scalar, size); - int * pattern = ei_alloc_stack(int, size); - int * tags = ei_alloc_stack(int, size); + Scalar * y = ei_aligned_stack_alloc(Scalar, size); + int * pattern = ei_aligned_stack_alloc(int, size); + int * tags = ei_aligned_stack_alloc(int, size); const int* P = 0; const int* Pinv = 0; @@ -315,9 +315,9 @@ bool SparseLDLT::_numeric(const MatrixType& a) } } - ei_free_stack(y, Scalar, size); - ei_free_stack(pattern, int, size); - ei_free_stack(tags, int, size); + ei_aligned_stack_free(y, Scalar, size); + ei_aligned_stack_free(pattern, int, size); + ei_aligned_stack_free(tags, int, size); return ok; /* success, diagonal of D is all nonzero */ } diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp index ddf1c2b4d..686839161 100644 --- a/test/mixingtypes.cpp +++ b/test/mixingtypes.cpp @@ -24,6 +24,7 @@ // Eigen. If not, see . #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them +#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types #include "main.h"