mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
* In LU solvers: no need anymore to use row-major matrices
* Matrix: always inherit WithAlignedOperatorNew, regardless of vectorization or not * rename ei_alloc_stack to ei_aligned_stack_alloc * mixingtypes test: disable vectorization as SSE intrinsics don't allow mixing types and we just get compile errors there.
This commit is contained in:
parent
fd7eba3394
commit
d9e5fd393a
@ -95,9 +95,9 @@ static void ei_cache_friendly_product(
|
|||||||
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
|
const bool needRhsCopy = (PacketSize>1) && ((rhsStride%PacketSize!=0) || (size_t(rhs)%16!=0));
|
||||||
Scalar* EIGEN_RESTRICT block = 0;
|
Scalar* EIGEN_RESTRICT block = 0;
|
||||||
const int allocBlockSize = l2BlockRows*size;
|
const int allocBlockSize = l2BlockRows*size;
|
||||||
block = ei_alloc_stack(Scalar, allocBlockSize);
|
block = ei_aligned_stack_alloc(Scalar, allocBlockSize);
|
||||||
Scalar* EIGEN_RESTRICT rhsCopy
|
Scalar* EIGEN_RESTRICT rhsCopy
|
||||||
= ei_alloc_stack(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
= ei_aligned_stack_alloc(Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||||
|
|
||||||
// loops on each L2 cache friendly blocks of the result
|
// loops on each L2 cache friendly blocks of the result
|
||||||
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
|
for(int l2i=0; l2i<rows; l2i+=l2BlockRows)
|
||||||
@ -338,8 +338,8 @@ static void ei_cache_friendly_product(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ei_free_stack(block, Scalar, allocBlockSize);
|
ei_aligned_stack_free(block, Scalar, allocBlockSize);
|
||||||
ei_free_stack(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
ei_aligned_stack_free(rhsCopy, Scalar, l2BlockSizeAligned*l2BlockSizeAligned);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // EIGEN_EXTERN_INSTANTIATIONS
|
#endif // EIGEN_EXTERN_INSTANTIATIONS
|
||||||
|
@ -122,12 +122,11 @@ struct ei_traits<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols
|
|||||||
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
|
template<typename _Scalar, int _Rows, int _Cols, int _StorageOrder, int _MaxRows, int _MaxCols>
|
||||||
class Matrix
|
class Matrix
|
||||||
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
|
: public MatrixBase<Matrix<_Scalar, _Rows, _Cols, _StorageOrder, _MaxRows, _MaxCols> >
|
||||||
#ifdef EIGEN_VECTORIZE
|
|
||||||
, public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
|
, public ei_with_aligned_operator_new<_Scalar,ei_size_at_compile_time<_Rows,_Cols>::ret>
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
|
EIGEN_GENERIC_PUBLIC_INTERFACE(Matrix)
|
||||||
|
enum { StorageOrder = _StorageOrder };
|
||||||
friend class Eigen::Map<Matrix, Unaligned>;
|
friend class Eigen::Map<Matrix, Unaligned>;
|
||||||
typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
|
typedef class Eigen::Map<Matrix, Unaligned> UnalignedMapType;
|
||||||
friend class Eigen::Map<Matrix, Aligned>;
|
friend class Eigen::Map<Matrix, Aligned>;
|
||||||
|
@ -573,7 +573,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
|
|||||||
_res = &res.coeffRef(0);
|
_res = &res.coeffRef(0);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_res = ei_alloc_stack(Scalar,res.size());
|
_res = ei_aligned_stack_alloc(Scalar,res.size());
|
||||||
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
Map<Matrix<Scalar,DestDerived::RowsAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||||
@ -583,7 +583,7 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,ColMajor,HasDirect
|
|||||||
if (!EvalToRes)
|
if (!EvalToRes)
|
||||||
{
|
{
|
||||||
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
||||||
ei_free_stack(_res, Scalar, res.size());
|
ei_aligned_stack_free(_res, Scalar, res.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -619,7 +619,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
|||||||
_res = &res.coeffRef(0);
|
_res = &res.coeffRef(0);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_res = ei_alloc_stack(Scalar, res.size());
|
_res = ei_aligned_stack_alloc(Scalar, res.size());
|
||||||
Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
|
Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size()) = res;
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
ei_cache_friendly_product_colmajor_times_vector(res.size(),
|
||||||
@ -629,7 +629,7 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
|||||||
if (!EvalToRes)
|
if (!EvalToRes)
|
||||||
{
|
{
|
||||||
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
res = Map<Matrix<Scalar,DestDerived::SizeAtCompileTime,1> >(_res, res.size());
|
||||||
ei_free_stack(_res, Scalar, res.size());
|
ei_aligned_stack_free(_res, Scalar, res.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -652,13 +652,13 @@ struct ei_cache_friendly_product_selector<ProductType,LhsRows,RowMajor,HasDirect
|
|||||||
_rhs = &product.rhs().const_cast_derived().coeffRef(0);
|
_rhs = &product.rhs().const_cast_derived().coeffRef(0);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_rhs = ei_alloc_stack(Scalar, product.rhs().size());
|
_rhs = ei_aligned_stack_alloc(Scalar, product.rhs().size());
|
||||||
Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
|
Map<Matrix<Scalar,Rhs::SizeAtCompileTime,1> >(_rhs, product.rhs().size()) = product.rhs();
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
ei_cache_friendly_product_rowmajor_times_vector(&product.lhs().const_cast_derived().coeffRef(0,0), product.lhs().stride(),
|
||||||
_rhs, product.rhs().size(), res);
|
_rhs, product.rhs().size(), res);
|
||||||
|
|
||||||
if (!UseRhsDirectly) ei_free_stack(_rhs, Scalar, product.rhs().size());
|
if (!UseRhsDirectly) ei_aligned_stack_free(_rhs, Scalar, product.rhs().size());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -680,13 +680,13 @@ struct ei_cache_friendly_product_selector<ProductType,1,LhsOrder,LhsAccess,RhsCo
|
|||||||
_lhs = &product.lhs().const_cast_derived().coeffRef(0);
|
_lhs = &product.lhs().const_cast_derived().coeffRef(0);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_lhs = ei_alloc_stack(Scalar, product.lhs().size());
|
_lhs = ei_aligned_stack_alloc(Scalar, product.lhs().size());
|
||||||
Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
|
Map<Matrix<Scalar,Lhs::SizeAtCompileTime,1> >(_lhs, product.lhs().size()) = product.lhs();
|
||||||
}
|
}
|
||||||
ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
ei_cache_friendly_product_rowmajor_times_vector(&product.rhs().const_cast_derived().coeffRef(0,0), product.rhs().stride(),
|
||||||
_lhs, product.lhs().size(), res);
|
_lhs, product.lhs().size(), res);
|
||||||
|
|
||||||
if(!UseLhsDirectly) ei_free_stack(_lhs, Scalar, product.lhs().size());
|
if(!UseLhsDirectly) ei_aligned_stack_free(_lhs, Scalar, product.lhs().size());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -139,24 +139,24 @@ inline static int ei_alignmentOffset(const Scalar* ptr, int maxOffset)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* ei_alloc_stack(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
|
* ei_aligned_stack_alloc(TYPE,SIZE) allocates an aligned buffer of sizeof(TYPE)*SIZE bytes
|
||||||
* on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
|
* on the stack if sizeof(TYPE)*SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
|
||||||
* Otherwise the memory is allocated on the heap.
|
* Otherwise the memory is allocated on the heap.
|
||||||
* Data allocated with ei_alloc_stack \b must be freed by calling ei_free_stack(PTR,TYPE,SIZE).
|
* Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,TYPE,SIZE).
|
||||||
* \code
|
* \code
|
||||||
* float * data = ei_alloc_stack(float,array.size());
|
* float * data = ei_aligned_stack_alloc(float,array.size());
|
||||||
* // ...
|
* // ...
|
||||||
* ei_free_stack(data,float,array.size());
|
* ei_aligned_stack_free(data,float,array.size());
|
||||||
* \endcode
|
* \endcode
|
||||||
*/
|
*/
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#define ei_alloc_stack(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
|
#define ei_aligned_stack_alloc(TYPE,SIZE) ((sizeof(TYPE)*(SIZE)>EIGEN_STACK_ALLOCATION_LIMIT) \
|
||||||
? ei_aligned_malloc<TYPE>(SIZE) \
|
? ei_aligned_malloc<TYPE>(SIZE) \
|
||||||
: (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
|
: (TYPE*)alloca(sizeof(TYPE)*(SIZE)))
|
||||||
#define ei_free_stack(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
|
#define ei_aligned_stack_free(PTR,TYPE,SIZE) if (sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT) ei_aligned_free(PTR,SIZE)
|
||||||
#else
|
#else
|
||||||
#define ei_alloc_stack(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
|
#define ei_aligned_stack_alloc(TYPE,SIZE) ei_aligned_malloc<TYPE>(SIZE)
|
||||||
#define ei_free_stack(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
|
#define ei_aligned_stack_free(PTR,TYPE,SIZE) ei_aligned_free(PTR,SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** \class WithAlignedOperatorNew
|
/** \class WithAlignedOperatorNew
|
||||||
|
@ -76,7 +76,7 @@ template<typename MatrixType> class LU
|
|||||||
MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix
|
MatrixType::ColsAtCompileTime, // the number of rows in the "kernel matrix" is the number of cols of the original matrix
|
||||||
// so that the product "matrix * kernel = zero" makes sense
|
// so that the product "matrix * kernel = zero" makes sense
|
||||||
Dynamic, // we don't know at compile-time the dimension of the kernel
|
Dynamic, // we don't know at compile-time the dimension of the kernel
|
||||||
MatrixType::Flags&RowMajorBit,
|
MatrixType::StorageOrder,
|
||||||
MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
|
MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
|
||||||
MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number
|
MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space, whose dimension is the number
|
||||||
// of columns of the original matrix
|
// of columns of the original matrix
|
||||||
@ -86,7 +86,7 @@ template<typename MatrixType> class LU
|
|||||||
MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number
|
MatrixType::RowsAtCompileTime, // the image is a subspace of the destination space, whose dimension is the number
|
||||||
// of rows of the original matrix
|
// of rows of the original matrix
|
||||||
Dynamic, // we don't know at compile time the dimension of the image (the rank)
|
Dynamic, // we don't know at compile time the dimension of the image (the rank)
|
||||||
MatrixType::Flags,
|
MatrixType::StorageOrder,
|
||||||
MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
|
MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
|
||||||
MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns.
|
MatrixType::MaxColsAtCompileTime // so it has the same number of rows and at most as many columns.
|
||||||
> ImageResultType;
|
> ImageResultType;
|
||||||
@ -436,7 +436,7 @@ void LU<MatrixType>::computeKernel(KernelMatrixType *result) const
|
|||||||
* independent vectors in Ker U.
|
* independent vectors in Ker U.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Matrix<Scalar, Dynamic, Dynamic, MatrixType::Flags&RowMajorBit,
|
Matrix<Scalar, Dynamic, Dynamic, MatrixType::StorageOrder,
|
||||||
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime>
|
MatrixType::MaxColsAtCompileTime, MatrixType::MaxColsAtCompileTime>
|
||||||
y(-m_lu.corner(TopRight, m_rank, dimker));
|
y(-m_lu.corner(TopRight, m_rank, dimker));
|
||||||
|
|
||||||
@ -504,7 +504,7 @@ bool LU<MatrixType>::solve(
|
|||||||
|
|
||||||
// Step 2
|
// Step 2
|
||||||
Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime,
|
Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime,
|
||||||
MatrixType::Flags&RowMajorBit,
|
MatrixType::StorageOrder,
|
||||||
MatrixType::MaxRowsAtCompileTime,
|
MatrixType::MaxRowsAtCompileTime,
|
||||||
MatrixType::MaxRowsAtCompileTime> l(rows, rows);
|
MatrixType::MaxRowsAtCompileTime> l(rows, rows);
|
||||||
l.setZero();
|
l.setZero();
|
||||||
@ -523,7 +523,7 @@ bool LU<MatrixType>::solve(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
Matrix<Scalar, Dynamic, OtherDerived::ColsAtCompileTime,
|
Matrix<Scalar, Dynamic, OtherDerived::ColsAtCompileTime,
|
||||||
MatrixType::Flags&RowMajorBit,
|
MatrixType::StorageOrder,
|
||||||
MatrixType::MaxRowsAtCompileTime, OtherDerived::MaxColsAtCompileTime>
|
MatrixType::MaxRowsAtCompileTime, OtherDerived::MaxColsAtCompileTime>
|
||||||
d(c.corner(TopLeft, m_rank, c.cols()));
|
d(c.corner(TopLeft, m_rank, c.cols()));
|
||||||
m_lu.corner(TopLeft, m_rank, m_rank)
|
m_lu.corner(TopLeft, m_rank, m_rank)
|
||||||
|
@ -192,7 +192,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
|
|||||||
m_matrix.resize(size, size);
|
m_matrix.resize(size, size);
|
||||||
m_parent.resize(size);
|
m_parent.resize(size);
|
||||||
m_nonZerosPerCol.resize(size);
|
m_nonZerosPerCol.resize(size);
|
||||||
int * tags = ei_alloc_stack(int, size);
|
int * tags = ei_aligned_stack_alloc(int, size);
|
||||||
|
|
||||||
const int* Ap = a._outerIndexPtr();
|
const int* Ap = a._outerIndexPtr();
|
||||||
const int* Ai = a._innerIndexPtr();
|
const int* Ai = a._innerIndexPtr();
|
||||||
@ -238,7 +238,7 @@ void SparseLDLT<MatrixType,Backend>::_symbolic(const MatrixType& a)
|
|||||||
Lp[k+1] = Lp[k] + m_nonZerosPerCol[k];
|
Lp[k+1] = Lp[k] + m_nonZerosPerCol[k];
|
||||||
|
|
||||||
m_matrix.resizeNonZeros(Lp[size]);
|
m_matrix.resizeNonZeros(Lp[size]);
|
||||||
ei_free_stack(tags, int, size);
|
ei_aligned_stack_free(tags, int, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename MatrixType, int Backend>
|
template<typename MatrixType, int Backend>
|
||||||
@ -257,9 +257,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
|
|||||||
Scalar* Lx = m_matrix._valuePtr();
|
Scalar* Lx = m_matrix._valuePtr();
|
||||||
m_diag.resize(size);
|
m_diag.resize(size);
|
||||||
|
|
||||||
Scalar * y = ei_alloc_stack(Scalar, size);
|
Scalar * y = ei_aligned_stack_alloc(Scalar, size);
|
||||||
int * pattern = ei_alloc_stack(int, size);
|
int * pattern = ei_aligned_stack_alloc(int, size);
|
||||||
int * tags = ei_alloc_stack(int, size);
|
int * tags = ei_aligned_stack_alloc(int, size);
|
||||||
|
|
||||||
const int* P = 0;
|
const int* P = 0;
|
||||||
const int* Pinv = 0;
|
const int* Pinv = 0;
|
||||||
@ -315,9 +315,9 @@ bool SparseLDLT<MatrixType,Backend>::_numeric(const MatrixType& a)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ei_free_stack(y, Scalar, size);
|
ei_aligned_stack_free(y, Scalar, size);
|
||||||
ei_free_stack(pattern, int, size);
|
ei_aligned_stack_free(pattern, int, size);
|
||||||
ei_free_stack(tags, int, size);
|
ei_aligned_stack_free(tags, int, size);
|
||||||
|
|
||||||
return ok; /* success, diagonal of D is all nonzero */
|
return ok; /* success, diagonal of D is all nonzero */
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
// Eigen. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
|
#define EIGEN_NO_STATIC_ASSERT // turn static asserts into runtime asserts in order to check them
|
||||||
|
#define EIGEN_DONT_VECTORIZE // SSE intrinsics aren't designed to allow mixing types
|
||||||
#include "main.h"
|
#include "main.h"
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user