* In LU solvers: no need anymore to use row-major matrices

* Matrix: always inherit WithAlignedOperatorNew, regardless of vectorization or not * rename ei_alloc_stack to ei_aligned_stack_alloc * mixingtypes test: disable vectorization as SSE intrinsics don't allow mixing types and we just get compile errors there.
2025-07-03 03:35:11 +08:00 · 2009-01-03 22:33:08 +00:00 · 2009-01-03 22:33:08 +00:00 · d9e5fd393a
commit d9e5fd393a
parent fd7eba3394
7 changed files with 35 additions and 35 deletions
--- a/Eigen/src/Core/CacheFriendlyProduct.h
+++ b/Eigen/src/Core/CacheFriendlyProduct.h
@ -95,9 +95,9 @@ static void ei_cache_friendly_product(
  const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
  Scalar* EIGEN_RESTRICT block = 0;
  const int allocBlockSize = l2BlockRows*size;
-  block = ei_alloc_stack(Scalar, allocBlockSize);
+  block = ei_aligned_stack_alloc(Scalar, allocBlockSize);
  Scalar* EIGEN_RESTRICT rhsCopy
-    = ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
+    = ei_aligned_stack_alloc(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);

  // loops on each L2 cache friendly blocks of the result
  for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
@ -338,8 +338,8 @@ static void ei_cache_friendly_product(
    }
  }

-  ei_free_stack(block, Scalar, allocBlockSize);
-  ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
+  ei_aligned_stack_free(block, Scalar, allocBlockSize);
+  ei_aligned_stack_free(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
 }

 #endif // EIGEN_EXTERN_INSTANTIATIONS
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@ -122,12 +122,11 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols
 template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
 class Matrix
  : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
-    #ifdef EIGEN_VECTORIZE
    , public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
-    #endif
 {
  public:
    EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
+    enum { StorageOrder = _StorageOrder };
    friend class Eigen::Map<Matrix, Unaligned>;
    typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
    friend class Eigen::Map<Matrix, Aligned>;
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -573,7 +573,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
       _res = &res.coeffRef(0);
    else
    {
-      _res = ei_alloc_stack(Scalar,res.size());
+      _res = ei_aligned_stack_alloc(Scalar,res.size());
      Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
@ -583,7 +583,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
    if (!EvalToRes)
    {
      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_free(_res, Scalar, res.size());
    }
  }
 };
@ -619,7 +619,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
       _res = &res.coeffRef(0);
    else
    {
-      _res = ei_alloc_stack(Scalar, res.size());
+      _res = ei_aligned_stack_alloc(Scalar, res.size());
      Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
    }
    ei_cache_friendly_product_colmajor_times_vector(res.size(),
@ -629,7 +629,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
    if (!EvalToRes)
    {
      res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_free(_res, Scalar, res.size());
    }
  }
 };
@ -652,13 +652,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
       _rhs = &product.rhs().const_cast_derived().coeffRef(0);
    else
    {
-      _rhs = ei_alloc_stack(Scalar, product.rhs().size());
+      _rhs = ei_aligned_stack_alloc(Scalar, product.rhs().size());
      Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
                                                    _rhs, product.rhs().size(), res);

-    if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
+    if (!UseRhsDirectly) ei_aligned_stack_free(_rhs, Scalar, product.rhs().size());
  }
 };

@ -680,13 +680,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
       _lhs = &product.lhs().const_cast_derived().coeffRef(0);
    else
    {
-      _lhs = ei_alloc_stack(Scalar, product.lhs().size());
+      _lhs = ei_aligned_stack_alloc(Scalar, product.lhs().size());
      Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
    }
    ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
                                                    _lhs, product.lhs().size(), res);

-    if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
+    if(!UseLhsDirectly) ei_aligned_stack_free(_lhs, Scalar, product.lhs().size());
  }
 };

--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -139,24 +139,24 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
 }

 /** \internal
-  * ei_alloc_stack(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
+  * ei_aligned_stack_alloc(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
  * on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
  * Otherwise the memory is allocated on the heap.
-  * Data allocated with ei_alloc_stack \b must be freed by calling ei_free_stack(PTR,TYPE,SIZE).
+  * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,TYPE,SIZE).
  * \code
-  * float * data = ei_alloc_stack(float,array.size());
+  * float * data = ei_aligned_stack_alloc(float,array.size());
  * // ...
-  * ei_free_stack(data,float,array.size());
+  * ei_aligned_stack_free(data,float,array.size());
  * \endcode
  */
 #ifdef __linux__
-  #define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
+  #define ei_aligned_stack_alloc(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
                                    ? ei_aligned_malloc<TYPE>(SIZE) \
                                    : (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
-  #define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
+  #define ei_aligned_stack_free(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
 #else
-  #define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
-  #define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
+  #define ei_aligned_stack_alloc(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
+  #define ei_aligned_stack_free(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
 #endif

 /** \class WithAlignedOperatorNew
--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@ -76,7 +76,7 @@ template<typename MatrixType> class LU
                  MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix
                                                 // so that the product "matrix * kernel = zero" makes sense
                  Dynamic,                       // we don't know at compile-time the dimension of the kernel
-                  MatrixType::Flags&RowMajorBit,
+                  MatrixType::StorageOrder,
                  MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
                  MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number
                                                   // of columns of the original matrix
@ -86,7 +86,7 @@ template<typename MatrixType> class LU
                   MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number
                                                  // of rows of the original matrix
                   Dynamic,                       // we don't know at compile time the dimension of the image (the rank)
-                   MatrixType::Flags,
+                   MatrixType::StorageOrder,
                   MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
                   MatrixType::MaxColsAtCompileTime  // so it has the same number of rows and at most as many columns.
    > ImageResultType;
@ -436,7 +436,7 @@ void LU<MatrixType>::computeKernel(KernelMatrixType *result) const
    * independent vectors in Ker U.
    */

-  Matrix<Scalar, Dynamic, Dynamic, MatrixType::Flags&RowMajorBit,
+  Matrix<Scalar, Dynamic, Dynamic, MatrixType::StorageOrder,
         MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime>
    y(-m_lu.corner(TopRight, m_rank, dimker));

@ -504,7 +504,7 @@ bool LU<MatrixType>::solve(

  // Step 2
  Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime,
-         MatrixType::Flags&RowMajorBit,
+         MatrixType::StorageOrder,
         MatrixType::MaxRowsAtCompileTime,
         MatrixType::MaxRowsAtCompileTime> l(rows, rows);
  l.setZero();
@ -523,7 +523,7 @@ bool LU<MatrixType>::solve(
          return false;
  }
  Matrix<Scalar, Dynamic, OtherDerived::ColsAtCompileTime,
-         MatrixType::Flags&RowMajorBit,
+         MatrixType::StorageOrder,
         MatrixType::MaxRowsAtCompileTime, OtherDerived::MaxColsAtCompileTime>
    d(c.corner(TopLeft, m_rank, c.cols()));
  m_lu.corner(TopLeft, m_rank, m_rank)
--- a/Eigen/src/Sparse/SparseLDLT.h
+++ b/Eigen/src/Sparse/SparseLDLT.h
@ -192,7 +192,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
  m_matrix.resize(size, size);
  m_parent.resize(size);
  m_nonZerosPerCol.resize(size);
-  int * tags = ei_alloc_stack(int, size);
+  int * tags = ei_aligned_stack_alloc(int, size);

  const int* Ap = a._outerIndexPtr();
  const int* Ai = a._innerIndexPtr();
@ -238,7 +238,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k];

  m_matrix.resizeNonZeros(Lp[size]);
-  ei_free_stack(tags, int, size);
+  ei_aligned_stack_free(tags, int, size);
 }

 template<typename MatrixType, int Backend>
@ -257,9 +257,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
  Scalar* Lx = m_matrix._valuePtr();
  m_diag.resize(size);

-  Scalar * y = ei_alloc_stack(Scalar, size);
-  int * pattern = ei_alloc_stack(int, size);
-  int * tags = ei_alloc_stack(int, size);
+  Scalar * y = ei_aligned_stack_alloc(Scalar, size);
+  int * pattern = ei_aligned_stack_alloc(int, size);
+  int * tags = ei_aligned_stack_alloc(int, size);

  const int* P = 0;
  const int* Pinv = 0;
@ -315,9 +315,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
    }
  }

-  ei_free_stack(y, Scalar, size);
-  ei_free_stack(pattern, int, size);
-  ei_free_stack(tags, int, size);
+  ei_aligned_stack_free(y, Scalar, size);
+  ei_aligned_stack_free(pattern, int, size);
+  ei_aligned_stack_free(tags, int, size);

  return ok;  /* success, diagonal of D is all nonzero */
 }
--- a/test/mixingtypes.cpp
+++ b/test/mixingtypes.cpp
@ -24,6 +24,7 @@
 // Eigen. If not, see <http://www.gnu.org/licenses/>.

 #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
+#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types
 #include "main.h"