mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-15 02:43:14 +08:00
implements a blocked version of PartialLU
This commit is contained in:
parent
0103de8512
commit
ce1dc1ab16
@ -201,6 +201,100 @@ PartialLU<MatrixType>::PartialLU(const MatrixType& matrix)
|
|||||||
compute(matrix);
|
compute(matrix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \internal performs the LU decomposition in place of the matrix \a lu.
|
||||||
|
* In addition, this function returns the row transpositions in the
|
||||||
|
* vector \a row_transpositions which must have a size equal to the number
|
||||||
|
* of columns of the matrix \a lu, and an integer \a nb_transpositions
|
||||||
|
* which returns the actual number of transpositions.
|
||||||
|
*/
|
||||||
|
template<typename MatrixType, typename IntVector>
|
||||||
|
void ei_lu_unblocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions)
|
||||||
|
{
|
||||||
|
const int rows = lu.rows();
|
||||||
|
const int size = std::min(lu.rows(),lu.cols());
|
||||||
|
nb_transpositions = 0;
|
||||||
|
for(int k = 0; k < size; ++k)
|
||||||
|
{
|
||||||
|
int row_of_biggest_in_col;
|
||||||
|
lu.block(k,k,rows-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col);
|
||||||
|
row_of_biggest_in_col += k;
|
||||||
|
|
||||||
|
row_transpositions.coeffRef(k) = row_of_biggest_in_col;
|
||||||
|
|
||||||
|
if(k != row_of_biggest_in_col)
|
||||||
|
{
|
||||||
|
lu.row(k).swap(lu.row(row_of_biggest_in_col));
|
||||||
|
++nb_transpositions;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(k<rows-1)
|
||||||
|
{
|
||||||
|
lu.col(k).end(rows-k-1) /= lu.coeff(k,k);
|
||||||
|
for(int col = k + 1; col < size; ++col)
|
||||||
|
lu.col(col).end(rows-k-1) -= lu.col(k).end(rows-k-1) * lu.coeff(k,col);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This is the blocked version of ei_lu_unblocked() */
|
||||||
|
template<typename MatrixType, typename IntVector>
|
||||||
|
void ei_lu_blocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions)
|
||||||
|
{
|
||||||
|
const int size = lu.rows();
|
||||||
|
|
||||||
|
// automatically adjust the number of subdivisions to the size
|
||||||
|
// of the matrix so that there is enough sub blocks:
|
||||||
|
int blockSize = size/8;
|
||||||
|
blockSize = (blockSize/16)*16;
|
||||||
|
blockSize = std::min(std::max(blockSize,8), 256);
|
||||||
|
// if the matrix is too small, no blocking:
|
||||||
|
if(size<32)
|
||||||
|
blockSize = size;
|
||||||
|
|
||||||
|
nb_transpositions = 0;
|
||||||
|
for(int k = 0; k < size; k+=blockSize)
|
||||||
|
{
|
||||||
|
int bs = std::min(size-k,blockSize);
|
||||||
|
int ps = size - k;
|
||||||
|
int rs = size - k - bs;
|
||||||
|
// partition the matrix:
|
||||||
|
// A00 | A01 | A02
|
||||||
|
// lu = A10 | A11 | A12
|
||||||
|
// A20 | A21 | A22
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A_0(lu,0,0,size,k);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A11_21(lu,k,k,ps,bs);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A_2(lu,0,k+bs,size,rs);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A11(lu,k,k,bs,bs);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A12(lu,k,k+bs,bs,rs);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A21(lu,k+bs,k,rs,bs);
|
||||||
|
Block<MatrixType,Dynamic,Dynamic> A22(lu,k+bs,k+bs,rs,rs);
|
||||||
|
|
||||||
|
VectorBlock<IntVector,Dynamic> row_transpositions_in_panel(row_transpositions,k,bs);
|
||||||
|
int nb_transpositions_in_panel;
|
||||||
|
ei_lu_unblocked(A11_21, row_transpositions_in_panel, nb_transpositions_in_panel);
|
||||||
|
nb_transpositions_in_panel += nb_transpositions_in_panel;
|
||||||
|
|
||||||
|
// update permutations and apply them to A10
|
||||||
|
for(int i=k;i<k+bs; ++i)
|
||||||
|
{
|
||||||
|
int piv = (row_transpositions.coeffRef(i) += k);
|
||||||
|
A_0.row(i).swap(A_0.row(piv));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(rs)
|
||||||
|
{
|
||||||
|
// apply permutations to A_2
|
||||||
|
for(int i=k;i<k+bs; ++i)
|
||||||
|
A_2.row(i).swap(A_2.row(row_transpositions.coeff(i)));
|
||||||
|
|
||||||
|
// A12 = A11^-1 A12
|
||||||
|
A11.template triangularView<UnitLowerTriangular>().solveInPlace(A12);
|
||||||
|
|
||||||
|
A22 -= A21 * A12;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<typename MatrixType>
|
template<typename MatrixType>
|
||||||
void PartialLU<MatrixType>::compute(const MatrixType& matrix)
|
void PartialLU<MatrixType>::compute(const MatrixType& matrix)
|
||||||
{
|
{
|
||||||
@ -211,40 +305,15 @@ void PartialLU<MatrixType>::compute(const MatrixType& matrix)
|
|||||||
const int size = matrix.rows();
|
const int size = matrix.rows();
|
||||||
|
|
||||||
IntColVectorType rows_transpositions(size);
|
IntColVectorType rows_transpositions(size);
|
||||||
int number_of_transpositions = 0;
|
|
||||||
|
|
||||||
for(int k = 0; k < size; ++k)
|
int nb_transpositions;
|
||||||
{
|
ei_lu_blocked(m_lu, rows_transpositions, nb_transpositions);
|
||||||
int row_of_biggest_in_col;
|
m_det_p = (nb_transpositions%2) ? -1 : 1;
|
||||||
m_lu.block(k,k,size-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col);
|
|
||||||
row_of_biggest_in_col += k;
|
|
||||||
|
|
||||||
rows_transpositions.coeffRef(k) = row_of_biggest_in_col;
|
|
||||||
|
|
||||||
if(k != row_of_biggest_in_col) {
|
|
||||||
m_lu.row(k).swap(m_lu.row(row_of_biggest_in_col));
|
|
||||||
++number_of_transpositions;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(k<size-1) {
|
|
||||||
m_lu.col(k).end(size-k-1) /= m_lu.coeff(k,k);
|
|
||||||
/* I know it's tempting to replace this for loop by a single matrix product. But actually there's no reason why it
|
|
||||||
* should be faster because it's just an exterior vector product; and in practice this gives much slower code with
|
|
||||||
* GCC 4.2-4.4 (this is weird, would be interesting to investigate). On the other hand, it would be worth having a variant
|
|
||||||
* for row-major matrices, traversing in the other direction for better performance, with a meta selector to compile only
|
|
||||||
* one path
|
|
||||||
*/
|
|
||||||
for(int col = k + 1; col < size; ++col)
|
|
||||||
m_lu.col(col).end(size-k-1) -= m_lu.col(k).end(size-k-1) * m_lu.coeff(k,col);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
|
for(int k = 0; k < size; ++k) m_p.coeffRef(k) = k;
|
||||||
for(int k = size-1; k >= 0; --k)
|
for(int k = size-1; k >= 0; --k)
|
||||||
std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k)));
|
std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k)));
|
||||||
|
|
||||||
m_det_p = (number_of_transpositions%2) ? -1 : 1;
|
|
||||||
|
|
||||||
m_isInitialized = true;
|
m_isInitialized = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,8 +86,8 @@ void test_inverse()
|
|||||||
CALL_SUBTEST( inverse(Matrix2d()) );
|
CALL_SUBTEST( inverse(Matrix2d()) );
|
||||||
CALL_SUBTEST( inverse(Matrix3f()) );
|
CALL_SUBTEST( inverse(Matrix3f()) );
|
||||||
CALL_SUBTEST( inverse(Matrix4f()) );
|
CALL_SUBTEST( inverse(Matrix4f()) );
|
||||||
CALL_SUBTEST( inverse(MatrixXf(8,8)) );
|
CALL_SUBTEST( inverse(MatrixXf(72,72)) );
|
||||||
CALL_SUBTEST( inverse(MatrixXcd(7,7)) );
|
CALL_SUBTEST( inverse(MatrixXcd(56,56)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
// test some tricky cases for 4x4 matrices
|
// test some tricky cases for 4x4 matrices
|
||||||
|
Loading…
x
Reference in New Issue
Block a user