Change Eigen's ColPivHouseholderQR to use the numerically stable norm downdate formula from http://www.netlib.org/lapack/lawnspdf/lawn176.pdf, which has been used in LAPACK's xGEQPF and xGEQP3 since 2006. With the old formula, the code chooses the wrong pivots and fails to correctly determine rank on graded matrices.

This change also adds additional checks for non-increasing diagonal in R11 to existing unit tests, and adds a new unit test with the Kahan matrix, which consistently fails for the original code.

Benchmark timings on Intel(R) Xeon(R) CPU E5-1650 v3 @ 3.50GHz. Code compiled with AVX & FMA. I just ran on square matrices of 3 difference sizes.

Benchmark               Time(ns)     CPU(ns) Iterations
-------------------------------------------------------
Before:
BM_EigencolPivQR/64        53677       53627      12890
BM_EigencolPivQR/512    15265408    15250784         46
BM_EigencolPivQR/4k  15403556228 15388788368          2

After (non-vectorized version):
Benchmark               Time(ns)     CPU(ns) Iterations  Degradation
--------------------------------------------------------------------
BM_EigencolPivQR/64        63736       63669      10844         18.5%
BM_EigencolPivQR/512    16052546    16037381         43          5.1%
BM_EigencolPivQR/4k  15149263620 15132025316          2         -2.0%

Performance-wise there seems to be a ~18.5% degradation for small (64x64) matrices, probably due to the cost of more O(min(m,n)^2) sqrt operations that are not needed for the unstable formula.
This commit is contained in:
Rasmus Munk Larsen 2016-01-28 15:07:26 -08:00
parent b908e071a8
commit acce4dd050
2 changed files with 155 additions and 52 deletions

View File

@ -86,7 +86,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(),
m_colsTranspositions(),
m_temp(),
m_colSqNorms(),
m_colNorms(),
m_isInitialized(false),
m_usePrescribedThreshold(false) {}
@ -102,7 +102,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(PermIndexType(cols)),
m_colsTranspositions(cols),
m_temp(cols),
m_colSqNorms(cols),
m_colNorms(cols),
m_isInitialized(false),
m_usePrescribedThreshold(false) {}
@ -125,7 +125,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
m_colsPermutation(PermIndexType(matrix.cols())),
m_colsTranspositions(matrix.cols()),
m_temp(matrix.cols()),
m_colSqNorms(matrix.cols()),
m_colNorms(matrix.cols()),
m_isInitialized(false),
m_usePrescribedThreshold(false)
{
@ -413,7 +413,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
PermutationType m_colsPermutation;
IntRowVectorType m_colsTranspositions;
RowVectorType m_temp;
RealRowVectorType m_colSqNorms;
RealRowVectorType m_colNorms;
bool m_isInitialized, m_usePrescribedThreshold;
RealScalar m_prescribedThreshold, m_maxpivot;
Index m_nonzero_pivots;
@ -463,6 +463,7 @@ template<typename MatrixType>
void ColPivHouseholderQR<MatrixType>::computeInPlace()
{
using std::abs;
Index rows = m_qr.rows();
Index cols = m_qr.cols();
Index size = m_qr.diagonalSize();
@ -474,31 +475,24 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colsTranspositions.resize(m_qr.cols());
Index number_of_transpositions = 0;
m_colSqNorms.resize(cols);
m_colNorms.resize(cols);
for (Index k = 0; k < cols; ++k)
m_colSqNorms.coeffRef(k) = m_qr.col(k).squaredNorm();
m_colNorms.coeffRef(k) = m_qr.col(k).norm();
RealRowVectorType colNormsMostRecentDirect(m_colNorms);
RealScalar threshold_helper = m_colSqNorms.maxCoeff() * numext::abs2(NumTraits<Scalar>::epsilon()) / RealScalar(rows);
RealScalar threshold_helper = numext::abs2(m_colNorms.maxCoeff() * NumTraits<Scalar>::epsilon()) / RealScalar(rows);
RealScalar norm_downdate_threshold = numext::sqrt(NumTraits<Scalar>::epsilon());
m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
m_maxpivot = RealScalar(0);
for(Index k = 0; k < size; ++k)
{
// first, we look up in our table m_colSqNorms which column has the biggest squared norm
// first, we look up in our table m_colNorms which column has the biggest norm
Index biggest_col_index;
RealScalar biggest_col_sq_norm = m_colSqNorms.tail(cols-k).maxCoeff(&biggest_col_index);
RealScalar biggest_col_sq_norm = numext::abs2(m_colNorms.tail(cols-k).maxCoeff(&biggest_col_index));
biggest_col_index += k;
// since our table m_colSqNorms accumulates imprecision at every step, we must now recompute
// the actual squared norm of the selected column.
// Note that not doing so does result in solve() sometimes returning inf/nan values
// when running the unit test with 1000 repetitions.
biggest_col_sq_norm = m_qr.col(biggest_col_index).tail(rows-k).squaredNorm();
// we store that back into our table: it can't hurt to correct our table.
m_colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
// Track the number of meaningful pivots but do not stop the decomposition to make
// sure that the initial matrix is properly reproduced. See bug 941.
if(m_nonzero_pivots==size && biggest_col_sq_norm < threshold_helper * RealScalar(rows-k))
@ -508,7 +502,9 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_colsTranspositions.coeffRef(k) = biggest_col_index;
if(k != biggest_col_index) {
m_qr.col(k).swap(m_qr.col(biggest_col_index));
std::swap(m_colSqNorms.coeffRef(k), m_colSqNorms.coeffRef(biggest_col_index));
std::swap(m_colNorms.coeffRef(k), m_colNorms.coeffRef(biggest_col_index));
std::swap(colNormsMostRecentDirect.coeffRef(k),
colNormsMostRecentDirect.coeffRef(biggest_col_index));
++number_of_transpositions;
}
@ -526,8 +522,29 @@ void ColPivHouseholderQR<MatrixType>::computeInPlace()
m_qr.bottomRightCorner(rows-k, cols-k-1)
.applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &m_temp.coeffRef(k+1));
// update our table of squared norms of the columns
m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
// update our table of norms of the columns
for (Index j = k + 1; j < cols; ++j) {
// The following implements the stable norm downgrade step discussed in
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
// and used in LAPACK routines xGEQPF and xGEQP3.
// See lines 278-297 in http://www.netlib.org/lapack/explore-html/dc/df4/sgeqpf_8f_source.html
if (m_colNorms.coeffRef(j) != 0) {
RealScalar temp = abs(m_qr.coeffRef(k, j)) / m_colNorms.coeffRef(j);
temp = (RealScalar(1) + temp) * (RealScalar(1) - temp);
temp = temp < 0 ? 0 : temp;
RealScalar temp2 =
temp * numext::abs2(m_colNorms.coeffRef(j) /
colNormsMostRecentDirect.coeffRef(j));
if (temp2 <= norm_downdate_threshold) {
// The updated norm has become to inaccurate so re-compute the column
// norm directly.
m_colNorms.coeffRef(j) = m_qr.col(j).tail(rows - k - 1).norm();
colNormsMostRecentDirect.coeffRef(j) = m_colNorms.coeffRef(j);
} else {
m_colNorms.coeffRef(j) *= numext::sqrt(temp);
}
}
}
}
m_colsPermutation.setIdentity(PermIndexType(cols));

View File

@ -19,6 +19,7 @@ template<typename MatrixType> void qr()
Index rank = internal::random<Index>(1, (std::min)(rows, cols)-1);
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
MatrixType m1;
createRandomPIMatrixOfRank(rank,rows,cols,m1);
@ -36,6 +37,24 @@ template<typename MatrixType> void qr()
MatrixType c = q * r * qr.colsPermutation().inverse();
VERIFY_IS_APPROX(m1, c);
// Verify that the absolute value of the diagonal elements in R are
// non-increasing until they reach the singularity threshold.
RealScalar threshold =
std::sqrt(RealScalar(rows)) * (std::abs)(r(0, 0)) * NumTraits<Scalar>::epsilon();
for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) {
RealScalar x = (std::abs)(r(i, i));
RealScalar y = (std::abs)(r(i + 1, i + 1));
if (x < threshold && y < threshold) continue;
if (test_isApproxOrLessThan(x, y)) {
for (Index j = 0; j < (std::min)(rows, cols); ++j) {
std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl;
}
std::cout << "Failure at i=" << i << ", rank=" << rank
<< ", threshold=" << threshold << std::endl;
}
VERIFY_IS_APPROX_OR_LESS_THAN(y, x);
}
MatrixType m2 = MatrixType::Random(cols,cols2);
MatrixType m3 = m1*m2;
m2 = MatrixType::Random(cols,cols2);
@ -47,6 +66,7 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
{
enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
int rank = internal::random<int>(1, (std::min)(int(Rows), int(Cols))-1);
Matrix<Scalar,Rows,Cols> m1;
createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
@ -66,6 +86,67 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);
m2 = qr.solve(m3);
VERIFY_IS_APPROX(m3, m1*m2);
// Verify that the absolute value of the diagonal elements in R are
// non-increasing until they reache the singularity threshold.
RealScalar threshold =
std::sqrt(RealScalar(Rows)) * (std::abs)(r(0, 0)) * NumTraits<Scalar>::epsilon();
for (Index i = 0; i < (std::min)(int(Rows), int(Cols)) - 1; ++i) {
RealScalar x = (std::abs)(r(i, i));
RealScalar y = (std::abs)(r(i + 1, i + 1));
if (x < threshold && y < threshold) continue;
if (test_isApproxOrLessThan(x, y)) {
for (Index j = 0; j < (std::min)(int(Rows), int(Cols)); ++j) {
std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl;
}
std::cout << "Failure at i=" << i << ", rank=" << rank
<< ", threshold=" << threshold << std::endl;
}
VERIFY_IS_APPROX_OR_LESS_THAN(y, x);
}
}
// This test is meant to verify that pivots are chosen such that
// even for a graded matrix, the diagonal of R falls of roughly
// monotonically until it reaches the threshold for singularity.
// We use the so-called Kahan matrix, which is a famous counter-example
// for rank-revealing QR. See
// http://www.netlib.org/lapack/lawnspdf/lawn176.pdf
// page 3 for more detail.
template<typename MatrixType> void qr_kahan_matrix()
{
typedef typename MatrixType::Index Index;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
Index rows = 300, cols = rows;
MatrixType m1;
m1.setZero(rows,cols);
RealScalar s = std::pow(NumTraits<RealScalar>::epsilon(), 1.0 / rows);
RealScalar c = std::sqrt(1 - s*s);
for (Index i = 0; i < rows; ++i) {
m1(i, i) = pow(s, i);
m1.row(i).tail(rows - i - 1) = -pow(s, i) * c * MatrixType::Ones(1, rows - i - 1);
}
m1 = (m1 + m1.transpose()).eval();
ColPivHouseholderQR<MatrixType> qr(m1);
MatrixType r = qr.matrixQR().template triangularView<Upper>();
RealScalar threshold =
std::sqrt(RealScalar(rows)) * (std::abs)(r(0, 0)) * NumTraits<Scalar>::epsilon();
for (Index i = 0; i < (std::min)(rows, cols) - 1; ++i) {
RealScalar x = (std::abs)(r(i, i));
RealScalar y = (std::abs)(r(i + 1, i + 1));
if (x < threshold && y < threshold) continue;
if (test_isApproxOrLessThan(x, y)) {
for (Index j = 0; j < (std::min)(rows, cols); ++j) {
std::cout << "i = " << j << ", |r_ii| = " << (std::abs)(r(j, j)) << std::endl;
}
std::cout << "Failure at i=" << i << ", rank=" << qr.rank()
<< ", threshold=" << threshold << std::endl;
}
VERIFY_IS_APPROX_OR_LESS_THAN(y, x);
}
}
template<typename MatrixType> void qr_invertible()
@ -131,6 +212,11 @@ void test_qr_colpivoting()
CALL_SUBTEST_5(( qr_fixedsize<Matrix<double,1,1>, 1 >() ));
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( qr_kahan_matrix<MatrixXf>() );
CALL_SUBTEST_2( qr_kahan_matrix<MatrixXd>() );
}
for(int i = 0; i < g_repeat; i++) {
CALL_SUBTEST_1( qr_invertible<MatrixXf>() );
CALL_SUBTEST_2( qr_invertible<MatrixXd>() );