diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h index b414b5c46..94c30616a 100644 --- a/Eigen/src/LU/PartialPivLU.h +++ b/Eigen/src/LU/PartialPivLU.h @@ -331,17 +331,15 @@ PartialPivLU::PartialPivLU(EigenBase& matrix) namespace internal { /** \internal This is the blocked version of fullpivlu_unblocked() */ -template +template struct partial_lu_impl { - // FIXME add a stride to Map, so that the following mapping becomes easier, - // another option would be to create an expression being able to automatically - // warp any Map, Matrix, and Block expressions as a unique type, but since that's exactly - // a Map + stride, why not adding a stride to Map, and convenient ctors from a Matrix, - // and Block. - typedef Map > MapLU; - typedef Block MatrixType; - typedef Block BlockType; + static const int UnBlockedBound = 16; + static const bool UnBlockedAtCompileTime = SizeAtCompileTime!=Dynamic && SizeAtCompileTime<=UnBlockedBound; + static const int ActualSizeAtCompileTime = UnBlockedAtCompileTime ? SizeAtCompileTime : Dynamic; + typedef Matrix MatrixType; + typedef Ref MatrixTypeRef; + typedef Ref > BlockType; typedef typename MatrixType::RealScalar RealScalar; /** \internal performs the LU decomposition in-place of the matrix \a lu @@ -354,7 +352,7 @@ struct partial_lu_impl * * \returns The index of the first pivot which is exactly zero if any, or a negative number otherwise. */ - static Index unblocked_lu(MatrixType& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) + static Index unblocked_lu(MatrixTypeRef& lu, PivIndex* row_transpositions, PivIndex& nb_transpositions) { typedef scalar_score_coeff_op Scoring; typedef typename Scoring::result_type Score; @@ -417,13 +415,12 @@ struct partial_lu_impl */ static Index blocked_lu(Index rows, Index cols, Scalar* lu_data, Index luStride, PivIndex* row_transpositions, PivIndex& nb_transpositions, Index maxBlockSize=256) { - MapLU lu1(lu_data,StorageOrder==RowMajor?rows:luStride,StorageOrder==RowMajor?luStride:cols); - MatrixType lu(lu1,0,0,rows,cols); + MatrixTypeRef lu = MatrixType::Map(lu_data,rows, cols, OuterStride<>(luStride)); const Index size = (std::min)(rows,cols); // if the matrix is too small, no blocking: - if(size<=16) + if(UnBlockedAtCompileTime || size<=UnBlockedBound) { return unblocked_lu(lu, row_transpositions, nb_transpositions); } @@ -449,12 +446,12 @@ struct partial_lu_impl // A00 | A01 | A02 // lu = A_0 | A_1 | A_2 = A10 | A11 | A12 // A20 | A21 | A22 - BlockType A_0(lu,0,0,rows,k); - BlockType A_2(lu,0,k+bs,rows,tsize); - BlockType A11(lu,k,k,bs,bs); - BlockType A12(lu,k,k+bs,bs,tsize); - BlockType A21(lu,k+bs,k,trows,bs); - BlockType A22(lu,k+bs,k+bs,trows,tsize); + BlockType A_0 = lu.block(0,0,rows,k); + BlockType A_2 = lu.block(0,k+bs,rows,tsize); + BlockType A11 = lu.block(k,k,bs,bs); + BlockType A12 = lu.block(k,k+bs,bs,tsize); + BlockType A21 = lu.block(k+bs,k,trows,bs); + BlockType A22 = lu.block(k+bs,k+bs,trows,tsize); PivIndex nb_transpositions_in_panel; // recursively call the blocked LU algorithm on [A11^T A21^T]^T @@ -497,7 +494,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t eigen_assert((&row_transpositions.coeffRef(1)-&row_transpositions.coeffRef(0)) == 1); partial_lu_impl - + < typename MatrixType::Scalar, MatrixType::Flags&RowMajorBit?RowMajor:ColMajor, + typename TranspositionType::StorageIndex, + EIGEN_SIZE_MIN_PREFER_FIXED(MatrixType::RowsAtCompileTime,MatrixType::ColsAtCompileTime)> ::blocked_lu(lu.rows(), lu.cols(), &lu.coeffRef(0,0), lu.outerStride(), &row_transpositions.coeffRef(0), nb_transpositions); }