diff --git a/Eigen/src/LU/PartialLU.h b/Eigen/src/LU/PartialLU.h index b3a40f0e5..337438e2f 100644 --- a/Eigen/src/LU/PartialLU.h +++ b/Eigen/src/LU/PartialLU.h @@ -201,6 +201,100 @@ PartialLU::PartialLU(const MatrixType& matrix) compute(matrix); } +/** \internal performs the LU decomposition in place of the matrix \a lu. + * In addition, this function returns the row transpositions in the + * vector \a row_transpositions which must have a size equal to the number + * of columns of the matrix \a lu, and an integer \a nb_transpositions + * which returns the actual number of transpositions. + */ +template +void ei_lu_unblocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions) +{ + const int rows = lu.rows(); + const int size = std::min(lu.rows(),lu.cols()); + nb_transpositions = 0; + for(int k = 0; k < size; ++k) + { + int row_of_biggest_in_col; + lu.block(k,k,rows-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col); + row_of_biggest_in_col += k; + + row_transpositions.coeffRef(k) = row_of_biggest_in_col; + + if(k != row_of_biggest_in_col) + { + lu.row(k).swap(lu.row(row_of_biggest_in_col)); + ++nb_transpositions; + } + + if(k +void ei_lu_blocked(MatrixType& lu, IntVector& row_transpositions, int& nb_transpositions) +{ + const int size = lu.rows(); + + // automatically adjust the number of subdivisions to the size + // of the matrix so that there is enough sub blocks: + int blockSize = size/8; + blockSize = (blockSize/16)*16; + blockSize = std::min(std::max(blockSize,8), 256); + // if the matrix is too small, no blocking: + if(size<32) + blockSize = size; + + nb_transpositions = 0; + for(int k = 0; k < size; k+=blockSize) + { + int bs = std::min(size-k,blockSize); + int ps = size - k; + int rs = size - k - bs; + // partition the matrix: + // A00 | A01 | A02 + // lu = A10 | A11 | A12 + // A20 | A21 | A22 + Block A_0(lu,0,0,size,k); + Block A11_21(lu,k,k,ps,bs); + Block A_2(lu,0,k+bs,size,rs); + Block A11(lu,k,k,bs,bs); + Block A12(lu,k,k+bs,bs,rs); + Block A21(lu,k+bs,k,rs,bs); + Block A22(lu,k+bs,k+bs,rs,rs); + + VectorBlock row_transpositions_in_panel(row_transpositions,k,bs); + int nb_transpositions_in_panel; + ei_lu_unblocked(A11_21, row_transpositions_in_panel, nb_transpositions_in_panel); + nb_transpositions_in_panel += nb_transpositions_in_panel; + + // update permutations and apply them to A10 + for(int i=k;i().solveInPlace(A12); + + A22 -= A21 * A12; + } + } +} + template void PartialLU::compute(const MatrixType& matrix) { @@ -211,40 +305,15 @@ void PartialLU::compute(const MatrixType& matrix) const int size = matrix.rows(); IntColVectorType rows_transpositions(size); - int number_of_transpositions = 0; - for(int k = 0; k < size; ++k) - { - int row_of_biggest_in_col; - m_lu.block(k,k,size-k,1).cwise().abs().maxCoeff(&row_of_biggest_in_col); - row_of_biggest_in_col += k; - - rows_transpositions.coeffRef(k) = row_of_biggest_in_col; - - if(k != row_of_biggest_in_col) { - m_lu.row(k).swap(m_lu.row(row_of_biggest_in_col)); - ++number_of_transpositions; - } - - if(k= 0; --k) std::swap(m_p.coeffRef(k), m_p.coeffRef(rows_transpositions.coeff(k))); - m_det_p = (number_of_transpositions%2) ? -1 : 1; - m_isInitialized = true; } diff --git a/test/inverse.cpp b/test/inverse.cpp index 352887d45..b4eef73b6 100644 --- a/test/inverse.cpp +++ b/test/inverse.cpp @@ -86,8 +86,8 @@ void test_inverse() CALL_SUBTEST( inverse(Matrix2d()) ); CALL_SUBTEST( inverse(Matrix3f()) ); CALL_SUBTEST( inverse(Matrix4f()) ); - CALL_SUBTEST( inverse(MatrixXf(8,8)) ); - CALL_SUBTEST( inverse(MatrixXcd(7,7)) ); + CALL_SUBTEST( inverse(MatrixXf(72,72)) ); + CALL_SUBTEST( inverse(MatrixXcd(56,56)) ); } // test some tricky cases for 4x4 matrices