mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-21 17:19:36 +08:00
* In LU solvers: no need anymore to use row-major matrices
* Matrix: always inherit WithAlignedOperatorNew, regardless of vectorization or not * rename ei_alloc_stack to ei_aligned_stack_alloc * mixingtypes test: disable vectorization as SSE intrinsics don't allow mixing types and we just get compile errors there.
This commit is contained in:
parent
fd7eba3394
commit
d9e5fd393a
@ -95,9 +95,9 @@ static void ei_cache_friendly_product(
|
||||
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
|
||||
Scalar* EIGEN_RESTRICT block = 0;
|
||||
const int allocBlockSize = l2BlockRows*size;
|
||||
block = ei_alloc_stack(Scalar, allocBlockSize);
|
||||
block = ei_aligned_stack_alloc(Scalar, allocBlockSize);
|
||||
Scalar* EIGEN_RESTRICT rhsCopy
|
||||
= ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||
= ei_aligned_stack_alloc(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||
|
||||
// loops on each L2 cache friendly blocks of the result
|
||||
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
|
||||
@ -338,8 +338,8 @@ static void ei_cache_friendly_product(
|
||||
}
|
||||
}
|
||||
|
||||
ei_free_stack(block, Scalar, allocBlockSize);
|
||||
ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||
ei_aligned_stack_free(block, Scalar, allocBlockSize);
|
||||
ei_aligned_stack_free(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||
}
|
||||
|
||||
#endif // EIGEN_EXTERN_INSTANTIATIONS
|
||||
|
@ -122,12 +122,11 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols
|
||||
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
|
||||
class Matrix
|
||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
, public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
|
||||
#endif
|
||||
{
|
||||
public:
|
||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
|
||||
enum { StorageOrder = _StorageOrder };
|
||||
friend class Eigen::Map<Matrix, Unaligned>;
|
||||
typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
|
||||
friend class Eigen::Map<Matrix, Aligned>;
|
||||
|
@ -573,7 +573,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
|
||||
_res = &res.coeffRef(0);
|
||||
else
|
||||
{
|
||||
_res = ei_alloc_stack(Scalar,res.size());
|
||||
_res = ei_aligned_stack_alloc(Scalar,res.size());
|
||||
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||
}
|
||||
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||
@ -583,7 +583,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
|
||||
if (!EvalToRes)
|
||||
{
|
||||
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
||||
ei_free_stack(_res, Scalar, res.size());
|
||||
ei_aligned_stack_free(_res, Scalar, res.size());
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -619,7 +619,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
||||
_res = &res.coeffRef(0);
|
||||
else
|
||||
{
|
||||
_res = ei_alloc_stack(Scalar, res.size());
|
||||
_res = ei_aligned_stack_alloc(Scalar, res.size());
|
||||
Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
|
||||
}
|
||||
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||
@ -629,7 +629,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
||||
if (!EvalToRes)
|
||||
{
|
||||
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
||||
ei_free_stack(_res, Scalar, res.size());
|
||||
ei_aligned_stack_free(_res, Scalar, res.size());
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -652,13 +652,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
|
||||
_rhs = &product.rhs().const_cast_derived().coeffRef(0);
|
||||
else
|
||||
{
|
||||
_rhs = ei_alloc_stack(Scalar, product.rhs().size());
|
||||
_rhs = ei_aligned_stack_alloc(Scalar, product.rhs().size());
|
||||
Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
|
||||
}
|
||||
ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
||||
_rhs, product.rhs().size(), res);
|
||||
|
||||
if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
|
||||
if (!UseRhsDirectly) ei_aligned_stack_free(_rhs, Scalar, product.rhs().size());
|
||||
}
|
||||
};
|
||||
|
||||
@ -680,13 +680,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
||||
_lhs = &product.lhs().const_cast_derived().coeffRef(0);
|
||||
else
|
||||
{
|
||||
_lhs = ei_alloc_stack(Scalar, product.lhs().size());
|
||||
_lhs = ei_aligned_stack_alloc(Scalar, product.lhs().size());
|
||||
Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
|
||||
}
|
||||
ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
||||
_lhs, product.lhs().size(), res);
|
||||
|
||||
if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
|
||||
if(!UseLhsDirectly) ei_aligned_stack_free(_lhs, Scalar, product.lhs().size());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -139,24 +139,24 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
|
||||
}
|
||||
|
||||
/** \internal
|
||||
* ei_alloc_stack(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
|
||||
* ei_aligned_stack_alloc(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
|
||||
* on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
|
||||
* Otherwise the memory is allocated on the heap.
|
||||
* Data allocated with ei_alloc_stack \b must be freed by calling ei_free_stack(PTR,TYPE,SIZE).
|
||||
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,TYPE,SIZE).
|
||||
* \code
|
||||
* float * data = ei_alloc_stack(float,array.size());
|
||||
* float * data = ei_aligned_stack_alloc(float,array.size());
|
||||
* // ...
|
||||
* ei_free_stack(data,float,array.size());
|
||||
* ei_aligned_stack_free(data,float,array.size());
|
||||
* \endcode
|
||||
*/
|
||||
#ifdef __linux__
|
||||
#define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||
#define ei_aligned_stack_alloc(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||
? ei_aligned_malloc<TYPE>(SIZE) \
|
||||
: (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
|
||||
#define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
|
||||
#define ei_aligned_stack_free(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
|
||||
#else
|
||||
#define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
|
||||
#define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
|
||||
#define ei_aligned_stack_alloc(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
|
||||
#define ei_aligned_stack_free(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
|
||||
#endif
|
||||
|
||||
/** \class WithAlignedOperatorNew
|
||||
|
@ -76,7 +76,7 @@ template<typename MatrixType> class LU
|
||||
MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix
|
||||
// so that the product "matrix * kernel = zero" makes sense
|
||||
Dynamic, // we don't know at compile-time the dimension of the kernel
|
||||
MatrixType::Flags&RowMajorBit,
|
||||
MatrixType::StorageOrder,
|
||||
MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
|
||||
MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number
|
||||
// of columns of the original matrix
|
||||
@ -86,7 +86,7 @@ template<typename MatrixType> class LU
|
||||
MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number
|
||||
// of rows of the original matrix
|
||||
Dynamic, // we don't know at compile time the dimension of the image (the rank)
|
||||
MatrixType::Flags,
|
||||
MatrixType::StorageOrder,
|
||||
MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
|
||||
MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns.
|
||||
> ImageResultType;
|
||||
@ -436,7 +436,7 @@ void LU<MatrixType>::computeKernel(KernelMatrixType *result) const
|
||||
* independent vectors in Ker U.
|
||||
*/
|
||||
|
||||
Matrix<Scalar, Dynamic, Dynamic, MatrixType::Flags&RowMajorBit,
|
||||
Matrix<Scalar, Dynamic, Dynamic, MatrixType::StorageOrder,
|
||||
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime>
|
||||
y(-m_lu.corner(TopRight, m_rank, dimker));
|
||||
|
||||
@ -504,7 +504,7 @@ bool LU<MatrixType>::solve(
|
||||
|
||||
// Step 2
|
||||
Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime,
|
||||
MatrixType::Flags&RowMajorBit,
|
||||
MatrixType::StorageOrder,
|
||||
MatrixType::MaxRowsAtCompileTime,
|
||||
MatrixType::MaxRowsAtCompileTime> l(rows, rows);
|
||||
l.setZero();
|
||||
@ -523,7 +523,7 @@ bool LU<MatrixType>::solve(
|
||||
return false;
|
||||
}
|
||||
Matrix<Scalar, Dynamic, OtherDerived::ColsAtCompileTime,
|
||||
MatrixType::Flags&RowMajorBit,
|
||||
MatrixType::StorageOrder,
|
||||
MatrixType::MaxRowsAtCompileTime, OtherDerived::MaxColsAtCompileTime>
|
||||
d(c.corner(TopLeft, m_rank, c.cols()));
|
||||
m_lu.corner(TopLeft, m_rank, m_rank)
|
||||
|
@ -192,7 +192,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
|
||||
m_matrix.resize(size, size);
|
||||
m_parent.resize(size);
|
||||
m_nonZerosPerCol.resize(size);
|
||||
int * tags = ei_alloc_stack(int, size);
|
||||
int * tags = ei_aligned_stack_alloc(int, size);
|
||||
|
||||
const int* Ap = a._outerIndexPtr();
|
||||
const int* Ai = a._innerIndexPtr();
|
||||
@ -238,7 +238,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
|
||||
Lp[k+1] = Lp[k] + m_nonZerosPerCol[k];
|
||||
|
||||
m_matrix.resizeNonZeros(Lp[size]);
|
||||
ei_free_stack(tags, int, size);
|
||||
ei_aligned_stack_free(tags, int, size);
|
||||
}
|
||||
|
||||
template<typename MatrixType, int Backend>
|
||||
@ -257,9 +257,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
|
||||
Scalar* Lx = m_matrix._valuePtr();
|
||||
m_diag.resize(size);
|
||||
|
||||
Scalar * y = ei_alloc_stack(Scalar, size);
|
||||
int * pattern = ei_alloc_stack(int, size);
|
||||
int * tags = ei_alloc_stack(int, size);
|
||||
Scalar * y = ei_aligned_stack_alloc(Scalar, size);
|
||||
int * pattern = ei_aligned_stack_alloc(int, size);
|
||||
int * tags = ei_aligned_stack_alloc(int, size);
|
||||
|
||||
const int* P = 0;
|
||||
const int* Pinv = 0;
|
||||
@ -315,9 +315,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
|
||||
}
|
||||
}
|
||||
|
||||
ei_free_stack(y, Scalar, size);
|
||||
ei_free_stack(pattern, int, size);
|
||||
ei_free_stack(tags, int, size);
|
||||
ei_aligned_stack_free(y, Scalar, size);
|
||||
ei_aligned_stack_free(pattern, int, size);
|
||||
ei_aligned_stack_free(tags, int, size);
|
||||
|
||||
return ok; /* success, diagonal of D is all nonzero */
|
||||
}
|
||||
|
@ -24,6 +24,7 @@
|
||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
|
||||
#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types
|
||||
#include "main.h"
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user