From d9e5fd393a48db368dd90cf7119ebb3d774111cb Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Sat, 3 Jan 2009 22:33:08 +0000
Subject: [PATCH] * In LU solvers: no need anymore to use row-major matrices *
 Matrix: always inherit WithAlignedOperatorNew, regardless of vectorization or
 not * rename ei_alloc_stack to ei_aligned_stack_alloc * mixingtypes test:
 disable vectorization as SSE intrinsics don't allow mixing types and we just
 get compile errors there.

---
 Eigen/src/Core/CacheFriendlyProduct.h |  8 ++++----
 Eigen/src/Core/Matrix.h               |  3 +--
 Eigen/src/Core/Product.h              | 16 ++++++++--------
 Eigen/src/Core/util/Memory.h          | 16 ++++++++--------
 Eigen/src/LU/LU.h                     | 10 +++++-----
 Eigen/src/Sparse/SparseLDLT.h         | 16 ++++++++--------
 test/mixingtypes.cpp                  |  1 +
 7 files changed, 35 insertions(+), 35 deletions(-)
diff --git a/Eigen/src/Core/CacheFriendlyProduct.h b/Eigen/src/Core/CacheFriendlyProduct.h
index 0daccaf5f..f23bdc9ea 100644
--- a/Eigen/src/Core/CacheFriendlyProduct.h
+++ b/Eigen/src/Core/CacheFriendlyProduct.h
@@ -95,9 +95,9 @@ static void ei_cache_friendly_product(
   const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
   Scalar* EIGEN_RESTRICT block = 0;
   const int allocBlockSize = l2BlockRows*size;
-  block = ei_alloc_stack(Scalar, allocBlockSize);
+  block = ei_aligned_stack_alloc(Scalar, allocBlockSize);
   Scalar* EIGEN_RESTRICT rhsCopy
-    = ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
+    = ei_aligned_stack_alloc(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
 
   // loops on each L2 cache friendly blocks of the result
   for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
@@ -338,8 +338,8 @@ static void ei_cache_friendly_product(
     }
   }
 
-  ei_free_stack(block, Scalar, allocBlockSize);
-  ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
+  ei_aligned_stack_free(block, Scalar, allocBlockSize);
+  ei_aligned_stack_free(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
 }
 
 #endif // EIGEN_EXTERN_INSTANTIATIONS
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 39b3da6fb..888729ee7 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -122,12 +122,11 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols
 template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
 class Matrix
   : public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
-    #ifdef EIGEN_VECTORIZE
     , public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
-    #endif
 {
   public:
     EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
+    enum { StorageOrder = _StorageOrder };
     friend class Eigen::Map<Matrix, Unaligned>;
     typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
     friend class Eigen::Map<Matrix, Aligned>;
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 77c18817d..0e4aa6c20 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -573,7 +573,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
        _res = &res.coeffRef(0);
     else
     {
-      _res = ei_alloc_stack(Scalar,res.size());
+      _res = ei_aligned_stack_alloc(Scalar,res.size());
       Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
     }
     ei_cache_friendly_product_colmajor_times_vector(res.size(),
@@ -583,7 +583,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
     if (!EvalToRes)
     {
       res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_free(_res, Scalar, res.size());
     }
   }
 };
@@ -619,7 +619,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
        _res = &res.coeffRef(0);
     else
     {
-      _res = ei_alloc_stack(Scalar, res.size());
+      _res = ei_aligned_stack_alloc(Scalar, res.size());
       Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
     }
     ei_cache_friendly_product_colmajor_times_vector(res.size(),
@@ -629,7 +629,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
     if (!EvalToRes)
     {
       res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
-      ei_free_stack(_res, Scalar, res.size());
+      ei_aligned_stack_free(_res, Scalar, res.size());
     }
   }
 };
@@ -652,13 +652,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
        _rhs = &product.rhs().const_cast_derived().coeffRef(0);
     else
     {
-      _rhs = ei_alloc_stack(Scalar, product.rhs().size());
+      _rhs = ei_aligned_stack_alloc(Scalar, product.rhs().size());
       Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
     }
     ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
                                                     _rhs, product.rhs().size(), res);
 
-    if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
+    if (!UseRhsDirectly) ei_aligned_stack_free(_rhs, Scalar, product.rhs().size());
   }
 };
 
@@ -680,13 +680,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
        _lhs = &product.lhs().const_cast_derived().coeffRef(0);
     else
     {
-      _lhs = ei_alloc_stack(Scalar, product.lhs().size());
+      _lhs = ei_aligned_stack_alloc(Scalar, product.lhs().size());
       Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
     }
     ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
                                                     _lhs, product.lhs().size(), res);
 
-    if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
+    if(!UseLhsDirectly) ei_aligned_stack_free(_lhs, Scalar, product.lhs().size());
   }
 };
 
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 36dd87632..35b6b4ab9 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -139,24 +139,24 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
 }
 
 /** \internal
-  * ei_alloc_stack(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
+  * ei_aligned_stack_alloc(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
   * on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
   * Otherwise the memory is allocated on the heap.
-  * Data allocated with ei_alloc_stack \b must be freed by calling ei_free_stack(PTR,TYPE,SIZE).
+  * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,TYPE,SIZE).
   * \code
-  * float * data = ei_alloc_stack(float,array.size());
+  * float * data = ei_aligned_stack_alloc(float,array.size());
   * // ...
-  * ei_free_stack(data,float,array.size());
+  * ei_aligned_stack_free(data,float,array.size());
   * \endcode
   */
 #ifdef __linux__
-  #define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
+  #define ei_aligned_stack_alloc(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
                                     ? ei_aligned_malloc<TYPE>(SIZE) \
                                     : (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
-  #define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
+  #define ei_aligned_stack_free(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
 #else
-  #define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
-  #define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
+  #define ei_aligned_stack_alloc(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
+  #define ei_aligned_stack_free(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
 #endif
 
 /** \class WithAlignedOperatorNew
diff --git a/Eigen/src/LU/LU.h b/Eigen/src/LU/LU.h
index f3c93a4e8..1ac14034f 100644
--- a/Eigen/src/LU/LU.h
+++ b/Eigen/src/LU/LU.h
@@ -76,7 +76,7 @@ template<typename MatrixType> class LU
                   MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix
                                                  // so that the product "matrix * kernel = zero" makes sense
                   Dynamic,                       // we don't know at compile-time the dimension of the kernel
-                  MatrixType::Flags&RowMajorBit,
+                  MatrixType::StorageOrder,
                   MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
                   MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number
                                                    // of columns of the original matrix
@@ -86,7 +86,7 @@ template<typename MatrixType> class LU
                    MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number
                                                   // of rows of the original matrix
                    Dynamic,                       // we don't know at compile time the dimension of the image (the rank)
-                   MatrixType::Flags,
+                   MatrixType::StorageOrder,
                    MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
                    MatrixType::MaxColsAtCompileTime  // so it has the same number of rows and at most as many columns.
     > ImageResultType;
@@ -436,7 +436,7 @@ void LU<MatrixType>::computeKernel(KernelMatrixType *result) const
     * independent vectors in Ker U.
     */
 
-  Matrix<Scalar, Dynamic, Dynamic, MatrixType::Flags&RowMajorBit,
+  Matrix<Scalar, Dynamic, Dynamic, MatrixType::StorageOrder,
          MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime>
     y(-m_lu.corner(TopRight, m_rank, dimker));
 
@@ -504,7 +504,7 @@ bool LU<MatrixType>::solve(
 
   // Step 2
   Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime,
-         MatrixType::Flags&RowMajorBit,
+         MatrixType::StorageOrder,
          MatrixType::MaxRowsAtCompileTime,
          MatrixType::MaxRowsAtCompileTime> l(rows, rows);
   l.setZero();
@@ -523,7 +523,7 @@ bool LU<MatrixType>::solve(
           return false;
   }
   Matrix<Scalar, Dynamic, OtherDerived::ColsAtCompileTime,
-         MatrixType::Flags&RowMajorBit,
+         MatrixType::StorageOrder,
          MatrixType::MaxRowsAtCompileTime, OtherDerived::MaxColsAtCompileTime>
     d(c.corner(TopLeft, m_rank, c.cols()));
   m_lu.corner(TopLeft, m_rank, m_rank)
diff --git a/Eigen/src/Sparse/SparseLDLT.h b/Eigen/src/Sparse/SparseLDLT.h
index b1f58b4b1..2441646be 100644
--- a/Eigen/src/Sparse/SparseLDLT.h
+++ b/Eigen/src/Sparse/SparseLDLT.h
@@ -192,7 +192,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
   m_matrix.resize(size, size);
   m_parent.resize(size);
   m_nonZerosPerCol.resize(size);
-  int * tags = ei_alloc_stack(int, size);
+  int * tags = ei_aligned_stack_alloc(int, size);
 
   const int* Ap = a._outerIndexPtr();
   const int* Ai = a._innerIndexPtr();
@@ -238,7 +238,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
     Lp[k+1] = Lp[k] + m_nonZerosPerCol[k];
 
   m_matrix.resizeNonZeros(Lp[size]);
-  ei_free_stack(tags, int, size);
+  ei_aligned_stack_free(tags, int, size);
 }
 
 template<typename MatrixType, int Backend>
@@ -257,9 +257,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
   Scalar* Lx = m_matrix._valuePtr();
   m_diag.resize(size);
 
-  Scalar * y = ei_alloc_stack(Scalar, size);
-  int * pattern = ei_alloc_stack(int, size);
-  int * tags = ei_alloc_stack(int, size);
+  Scalar * y = ei_aligned_stack_alloc(Scalar, size);
+  int * pattern = ei_aligned_stack_alloc(int, size);
+  int * tags = ei_aligned_stack_alloc(int, size);
 
   const int* P = 0;
   const int* Pinv = 0;
@@ -315,9 +315,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
     }
   }
 
-  ei_free_stack(y, Scalar, size);
-  ei_free_stack(pattern, int, size);
-  ei_free_stack(tags, int, size);
+  ei_aligned_stack_free(y, Scalar, size);
+  ei_aligned_stack_free(pattern, int, size);
+  ei_aligned_stack_free(tags, int, size);
 
   return ok;  /* success, diagonal of D is all nonzero */
 }
diff --git a/test/mixingtypes.cpp b/test/mixingtypes.cpp
index ddf1c2b4d..686839161 100644
--- a/test/mixingtypes.cpp
+++ b/test/mixingtypes.cpp
@@ -24,6 +24,7 @@
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
 #define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
+#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types
 #include "main.h"