From abba11bdcf3e9f98a616e59570086edbf0d762b5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 22 Sep 2014 15:22:52 +0200 Subject: [PATCH] Many improvements in Divide&Conquer SVD: - Fix many numerical issues, in particular regarding deflation. - Add heavy debugging output to help track numerical issues (there are still fews) - Make use of Eiegn's apply-inplane-rotation feature. --- unsupported/Eigen/src/BDCSVD/BDCSVD.h | 536 +++++++++++++++++--------- 1 file changed, 364 insertions(+), 172 deletions(-) diff --git a/unsupported/Eigen/src/BDCSVD/BDCSVD.h b/unsupported/Eigen/src/BDCSVD/BDCSVD.h index 0167872af..b13ee415e 100644 --- a/unsupported/Eigen/src/BDCSVD/BDCSVD.h +++ b/unsupported/Eigen/src/BDCSVD/BDCSVD.h @@ -11,6 +11,7 @@ // Copyright (C) 2013 Jean Ceccato // Copyright (C) 2013 Pierre Zoppitelli // Copyright (C) 2013 Jitse Niesen +// Copyright (C) 2014 Gael Guennebaud // // Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed @@ -18,7 +19,8 @@ #ifndef EIGEN_BDCSVD_H #define EIGEN_BDCSVD_H - +// #define EIGEN_BDCSVD_DEBUG_VERBOSE +// #define EIGEN_BDCSVD_SANITY_CHECKS namespace Eigen { template class BDCSVD; @@ -165,6 +167,7 @@ private: template void copyUV(const HouseholderU &householderU, const HouseholderV &householderV, const NaiveU &naiveU, const NaiveV &naivev); static void structured_update(Block A, const MatrixXr &B, Index n1); + static RealScalar secularEq(RealScalar x, const ArrayXr& col0, const ArrayXr& diag, const ArrayXr& diagShifted, RealScalar shift, Index n); protected: MatrixXr m_naiveU, m_naiveV; @@ -189,11 +192,12 @@ public: }; //end class BDCSVD -// Methode to allocate ans initialize matrix and attributs +// Methode to allocate ans initialize matrix and attributes template void BDCSVD::allocate(Index rows, Index cols, unsigned int computationOptions) { m_isTranspose = (cols > rows); + if (Base::allocate(rows, cols, computationOptions)) return; @@ -233,7 +237,7 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign allocate(matrix.rows(), matrix.cols(), computationOptions); using std::abs; - //**** step 1 Bidiagonalization m_isTranspose = (matrix.cols()>matrix.rows()) ; + //**** step 1 Bidiagonalization MatrixType copy; if (m_isTranspose) copy = matrix.adjoint(); else copy = matrix; @@ -264,8 +268,11 @@ BDCSVD& BDCSVD::compute(const MatrixType& matrix, unsign break; } } +// std::cout << "m_naiveU\n" << m_naiveU << "\n\n"; +// std::cout << "m_naiveV\n" << m_naiveV << "\n\n"; if(m_isTranspose) copyUV(bid.householderV(), bid.householderU(), m_naiveV, m_naiveU); else copyUV(bid.householderU(), bid.householderV(), m_naiveU, m_naiveV); +// std::cout << "DONE\n"; m_isInitialized = true; return *this; }// end compute @@ -278,18 +285,16 @@ void BDCSVD::copyUV(const HouseholderU &householderU, const Househol // Note exchange of U and V: m_matrixU is set from m_naiveV and vice versa if (computeU()) { - Index Ucols = m_computeThinU ? m_nonzeroSingularValues : householderU.cols(); + Index Ucols = m_computeThinU ? m_diagSize : householderU.cols(); m_matrixU = MatrixX::Identity(householderU.cols(), Ucols); - Index blockCols = m_computeThinU ? m_nonzeroSingularValues : m_diagSize; - m_matrixU.topLeftCorner(m_diagSize, blockCols) = naiveV.template cast().topLeftCorner(m_diagSize, blockCols); + m_matrixU.topLeftCorner(m_diagSize, m_diagSize) = naiveV.template cast().topLeftCorner(m_diagSize, m_diagSize); householderU.applyThisOnTheLeft(m_matrixU); } if (computeV()) { - Index Vcols = m_computeThinV ? m_nonzeroSingularValues : householderV.cols(); + Index Vcols = m_computeThinV ? m_diagSize : householderV.cols(); m_matrixV = MatrixX::Identity(householderV.cols(), Vcols); - Index blockCols = m_computeThinV ? m_nonzeroSingularValues : m_diagSize; - m_matrixV.topLeftCorner(m_diagSize, blockCols) = naiveU.template cast().topLeftCorner(m_diagSize, blockCols); + m_matrixV.topLeftCorner(m_diagSize, m_diagSize) = naiveU.template cast().topLeftCorner(m_diagSize, m_diagSize); householderV.applyThisOnTheLeft(m_matrixV); } } @@ -308,7 +313,7 @@ void BDCSVD::structured_update(Block A, co Index n = A.rows(); if(n>100) { - // If the matrices are large enough, let's exploit the sparse strucure of A by + // If the matrices are large enough, let's exploit the sparse structure of A by // splitting it in half (wrt n1), and packing the non-zero columns. DenseIndex n2 = n - n1; MatrixXr A1(n1,n), A2(n2,n), B1(n,n), B2(n,n); @@ -385,6 +390,7 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, // right submatrix before the left one. divide(k + 1 + firstCol, lastCol, k + 1 + firstRowW, k + 1 + firstColW, shift); divide(firstCol, k - 1 + firstCol, firstRowW, firstColW + 1, shift + 1); + if (m_compU) { lambda = m_naiveU(firstCol + k, firstCol + k); @@ -417,6 +423,13 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, c0 = alphaK * lambda / r0; s0 = betaK * phi / r0; } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + if (m_compU) { MatrixXr q1 (m_naiveU.col(firstCol + k).segment(firstCol, k + 1)); @@ -449,6 +462,13 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, m_naiveU.row(1).segment(firstCol + 1, k).setZero(); m_naiveU.row(0).segment(firstCol + k + 1, n - k - 1).setZero(); } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + m_computed(firstCol + shift, firstCol + shift) = r0; m_computed.col(firstCol + shift).segment(firstCol + shift + 1, k) = alphaK * l.transpose().real(); m_computed.col(firstCol + shift).segment(firstCol + shift + k + 1, n - k - 1) = betaK * f.transpose().real(); @@ -461,11 +481,22 @@ void BDCSVD::divide (Index firstCol, Index lastCol, Index firstRowW, VectorType singVals; computeSVDofM(firstCol + shift, n, UofSVD, singVals, VofSVD); +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(UofSVD.allFinite()); + assert(VofSVD.allFinite()); +#endif + if (m_compU) structured_update(m_naiveU.block(firstCol, firstCol, n + 1, n + 1), UofSVD, (n+2)/2); else m_naiveU.middleCols(firstCol, n + 1) *= UofSVD; // FIXME this requires a temporary, and exploit that there are 2 rows at compile time if (m_compV) structured_update(m_naiveV.block(firstRowW, firstColW, n, n), VofSVD, (n+1)/2); +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + m_computed.block(firstCol + shift, firstCol + shift, n, n).setZero(); m_computed.block(firstCol + shift, firstCol + shift, n, n).diagonal() = singVals; }// end divide @@ -491,18 +522,78 @@ void BDCSVD::computeSVDofM(Index firstCol, Index n, MatrixXr& U, Vec U.resize(n+1, n+1); if (m_compV) V.resize(n, n); - if (col0.hasNaN() || diag.hasNaN()) return; + if (col0.hasNaN() || diag.hasNaN()) { std::cout << "\n\nHAS NAN\n\n"; return; } - ArrayXr shifts(n), mus(n), zhat(n); - computeSingVals(col0, diag, singVals, shifts, mus); - perturbCol0(col0, diag, singVals, shifts, mus, zhat); - computeSingVecs(zhat, diag, singVals, shifts, mus, U, V); + ArrayXr shifts(n), mus(n), zhat(n); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "computeSVDofM using:\n"; + std::cout << " z: " << col0.transpose() << "\n"; + std::cout << " d: " << diag.transpose() << "\n"; +#endif + + // Compute singVals, shifts, and mus + computeSingVals(col0, diag, singVals, shifts, mus); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << " sing-val: " << singVals.transpose() << "\n"; + std::cout << " mu: " << mus.transpose() << "\n"; + std::cout << " shift: " << shifts.transpose() << "\n"; +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(singVals.allFinite()); + assert(mus.allFinite()); + assert(shifts.allFinite()); +#endif + + // Compute zhat + perturbCol0(col0, diag, singVals, shifts, mus, zhat); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << " zhat: " << zhat.transpose() << "\n"; + { + Index actual_n = n; + while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + std::cout << "\n\n mus: " << mus.head(actual_n).transpose() << "\n\n"; + std::cout << " check1: " << ((singVals.array()-(shifts+mus)) / singVals.array()).head(actual_n).transpose() << "\n\n"; + std::cout << " check2: " << ((singVals.array()-diag) / singVals.array()).head(actual_n).transpose() << "\n\n"; + std::cout << " check3: " << ((diag.segment(1,actual_n-1)-singVals.head(actual_n-1).array()) / singVals.head(actual_n-1).array()).transpose() << "\n\n\n"; + } +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(zhat.allFinite()); +#endif + + computeSingVecs(zhat, diag, singVals, shifts, mus, U, V); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "U^T U: " << (U.transpose() * U - MatrixType(MatrixType::Identity(U.cols(),U.cols()))).norm() << "\n"; + std::cout << "V^T V: " << (V.transpose() * V - MatrixType(MatrixType::Identity(V.cols(),V.cols()))).norm() << "\n"; +#endif + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert((U.transpose() * U - MatrixType(MatrixType::Identity(U.cols(),U.cols()))).norm() < 1e-14 * n); + assert((V.transpose() * V - MatrixType(MatrixType::Identity(V.cols(),V.cols()))).norm() < 1e-14 * n); + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + // Reverse order so that singular values in increased order - singVals.reverseInPlace(); - U.leftCols(n) = U.leftCols(n).rowwise().reverse().eval(); // FIXME this requires a temporary - if (m_compV) V = V.rowwise().reverse().eval(); // FIXME this requires a temporary + // Because of deflation, the zeros singular-values are already at the end + Index actual_n = n; + while(actual_n>1 && singVals(actual_n-1)==0) --actual_n; + singVals.head(actual_n).reverseInPlace(); + U.leftCols(actual_n) = U.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary + if (m_compV) V.leftCols(actual_n) = V.leftCols(actual_n).rowwise().reverse().eval(); // FIXME this requires a temporary +} + +template +typename BDCSVD::RealScalar BDCSVD::secularEq(RealScalar mu, const ArrayXr& col0, const ArrayXr& diag, const ArrayXr& diagShifted, RealScalar shift, Index n) +{ + return 1 + (col0.square() / ((diagShifted - mu) )/( (diag + shift + mu))).head(n).sum(); } template @@ -514,6 +605,19 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia using std::max; Index n = col0.size(); + Index actual_n = n; + while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + Index m = 0; + Array perm(actual_n); + { + for(Index k=0;k::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // otherwise, use secular equation to find singular value RealScalar left = diag(k); - RealScalar right = (k != n-1) ? diag(k+1) : (diag(n-1) + col0.matrix().norm()); + RealScalar right = (k != actual_n-1) ? diag(k+1) : (diag(actual_n-1) + col0.matrix().norm()); // first decide whether it's closer to the left end or the right end RealScalar mid = left + (right-left) / 2; - RealScalar fMid = 1 + (col0.square() / ((diag + mid) * (diag - mid))).sum(); + RealScalar fMid = 1 + (col0.square() / ((diag + mid) * (diag - mid))).head(actual_n).sum(); - RealScalar shift = (k == n-1 || fMid > 0) ? left : right; + RealScalar shift = (k == actual_n-1 || fMid > 0) ? left : right; // measure everything relative to shift ArrayXr diagShifted = diag - shift; @@ -543,7 +647,7 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia if (shift == left) { muPrev = (right - left) * 0.1; - if (k == n-1) muCur = right - left; + if (k == actual_n-1) muCur = right - left; else muCur = (right - left) * 0.5; } else @@ -552,8 +656,8 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia muCur = -(right - left) * 0.5; } - RealScalar fPrev = 1 + (col0.square() / ((diagShifted - muPrev) * (diag + shift + muPrev))).sum(); - RealScalar fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum(); + RealScalar fPrev = secularEq(muPrev, col0, diag, diagShifted, shift, actual_n); + RealScalar fCur = secularEq(muCur, col0, diag, diagShifted, shift, actual_n); if (abs(fPrev) < abs(fCur)) { swap(fPrev, fCur); @@ -563,7 +667,7 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia // rational interpolation: fit a function of the form a / mu + b through the two previous // iterates and use its zero to compute the next iterate bool useBisection = false; - while (abs(muCur - muPrev) > 8 * NumTraits::epsilon() * (max)(abs(muCur), abs(muPrev)) && fCur != fPrev && !useBisection) + while (abs(muCur - muPrev) > 8 * NumTraits::epsilon() * (max)(abs(muCur), abs(muPrev)) && abs(fCur - fPrev)>NumTraits::epsilon() && !useBisection) { ++m_numIters; @@ -573,7 +677,7 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia muPrev = muCur; fPrev = fCur; muCur = -a / b; - fCur = 1 + (col0.square() / ((diagShifted - muCur) * (diag + shift + muCur))).sum(); + fCur = secularEq(muCur, col0, diag, diagShifted, shift, actual_n); if (shift == left && (muCur < 0 || muCur > right - left)) useBisection = true; if (shift == right && (muCur < -(right - left) || muCur > 0)) useBisection = true; @@ -595,14 +699,19 @@ void BDCSVD::computeSingVals(const ArrayXr& col0, const ArrayXr& dia rightShifted = -1e-30; } - RealScalar fLeft = 1 + (col0.square() / ((diagShifted - leftShifted) * (diag + shift + leftShifted))).sum(); - RealScalar fRight = 1 + (col0.square() / ((diagShifted - rightShifted) * (diag + shift + rightShifted))).sum(); - assert(fLeft * fRight < 0); + RealScalar fLeft = secularEq(leftShifted, col0, diag, diagShifted, shift, actual_n); + RealScalar fRight = secularEq(rightShifted, col0, diag, diagShifted, shift, actual_n); + +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + if(fLeft * fRight>0) + std::cout << fLeft << " * " << fRight << " == " << fLeft * fRight << " ; " << left << " - " << right << " -> " << leftShifted << " " << rightShifted << " shift=" << shift << "\n"; +#endif + eigen_internal_assert(fLeft * fRight < 0); while (rightShifted - leftShifted > 2 * NumTraits::epsilon() * (max)(abs(leftShifted), abs(rightShifted))) { RealScalar midShifted = (leftShifted + rightShifted) / 2; - RealScalar fMid = 1 + (col0.square() / ((diagShifted - midShifted) * (diag + shift + midShifted))).sum(); + RealScalar fMid = secularEq(midShifted, col0, diag, diagShifted, shift, actual_n); if (fLeft * fMid < 0) { rightShifted = midShifted; @@ -638,27 +747,53 @@ void BDCSVD::perturbCol0 { using std::sqrt; Index n = col0.size(); + + // Ignore trailing zeros: + Index actual_n = n; + while(actual_n>1 && col0(actual_n-1)==0) --actual_n; + // Deflated non-zero singular values are kept in-place, + // we thus compute an indirection array to properly ignore all deflated entries. + // TODO compute it once! + Index m = 0; // size of the deflated problem + Array perm(actual_n); + { + for(Index k=0;k 0) zhat(k) = tmp; - else zhat(k) = -tmp; + RealScalar dk = diag(k); + RealScalar prod = (singVals(actual_n-1) + dk) * (mus(actual_n-1) + (shifts(actual_n-1) - dk)); + + for(Index l = 0; l 0.9 ) + std::cout << " " << ((singVals(j)+dk)*(mus(j)+(shifts(j)-dk)))/((diag(i)+dk)*(diag(i)-dk)) << " == (" << (singVals(j)+dk) << " * " << (mus(j)+(shifts(j)-dk)) << ") / (" << (diag(i)+dk) << " * " << (diag(i)-dk) << ")\n"; +#endif + } + } +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "zhat(" << k << ") = sqrt( " << prod << ") ; " << (singVals(actual_n-1) + dk) << " * " << mus(actual_n-1) + shifts(actual_n-1) << " - " << dk << "\n"; +#endif + RealScalar tmp = sqrt(prod); + zhat(k) = col0(k) > 0 ? tmp : -tmp; } } } @@ -670,6 +805,23 @@ void BDCSVD::computeSingVecs const ArrayXr& shifts, const ArrayXr& mus, MatrixXr& U, MatrixXr& V) { Index n = zhat.size(); + + // Deflated non-zero singular values are kept in-place, + // we thus compute an indirection array to properly ignore all deflated entries. + // TODO compute it once! + Index actual_n = n; + while(actual_n>1 && zhat(actual_n-1)==0) --actual_n; + Index m = 0; + Array perm(actual_n); + { + for(Index k=0;k::computeSingVecs } else { - U.col(k).head(n) = zhat / (((diag - shifts(k)) - mus(k)) * (diag + singVals[k])); - U(n,k) = 0; + U.col(k).setZero(); + for(Index l=0;l::deflation43(Index firstCol, Index shift, Index i, Index using std::abs; using std::sqrt; using std::pow; - RealScalar c = m_computed(firstCol + shift, firstCol + shift); - RealScalar s = m_computed(i, firstCol + shift); - RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2)); + Index start = firstCol + shift; + RealScalar c = m_computed(start, start); + RealScalar s = m_computed(start+i, start); + RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); if (r == 0) { - m_computed(i, i) = 0; + m_computed(start+i, start+i) = 0; return; } - c/=r; - s/=r; - m_computed(firstCol + shift, firstCol + shift) = r; - m_computed(i, firstCol + shift) = 0; - m_computed(i, i) = 0; - if (m_compU) - { - m_naiveU.col(firstCol).segment(firstCol,size) = - c * m_naiveU.col(firstCol).segment(firstCol, size) - - s * m_naiveU.col(i).segment(firstCol, size) ; - - m_naiveU.col(i).segment(firstCol, size) = - (c + s*s/c) * m_naiveU.col(i).segment(firstCol, size) + - (s/c) * m_naiveU.col(firstCol).segment(firstCol,size); - } + m_computed(start,start) = r; + m_computed(start+i, start) = 0; + m_computed(start+i, start+i) = 0; + + JacobiRotation J(c/r,-s/r); + if (m_compU) m_naiveU.middleRows(firstCol, size+1).applyOnTheRight(firstCol, firstCol+i, J); + else m_naiveU.applyOnTheRight(firstCol, firstCol+i, J); }// end deflation 43 // page 13 // i,j >= 1, i != j and |di - dj| < epsilon * norm2(M) // We apply two rotations to have zj = 0; +// TODO deflation44 is still broken and not properly tested template void BDCSVD::deflation44(Index firstColu , Index firstColm, Index firstRowW, Index firstColW, Index i, Index j, Index size) { @@ -740,9 +898,20 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi using std::sqrt; using std::conj; using std::pow; - RealScalar c = m_computed(firstColm, firstColm + j - 1); - RealScalar s = m_computed(firstColm, firstColm + i - 1); - RealScalar r = sqrt(pow(abs(c), 2) + pow(abs(s), 2)); + RealScalar c = m_computed(firstColm+i, firstColm); + RealScalar s = m_computed(firstColm+j, firstColm); + RealScalar r = sqrt(numext::abs2(c) + numext::abs2(s)); +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.4: " << i << "," << j << " -> " << c << " " << s << " " << r << " ; " + << m_computed(firstColm + i-1, firstColm) << " " + << m_computed(firstColm + i, firstColm) << " " + << m_computed(firstColm + i+1, firstColm) << " " + << m_computed(firstColm + i+2, firstColm) << "\n"; + std::cout << m_computed(firstColm + i-1, firstColm + i-1) << " " + << m_computed(firstColm + i, firstColm+i) << " " + << m_computed(firstColm + i+1, firstColm+i+1) << " " + << m_computed(firstColm + i+2, firstColm+i+2) << "\n"; +#endif if (r==0) { m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); @@ -753,25 +922,15 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi m_computed(firstColm + i, firstColm) = r; m_computed(firstColm + i, firstColm + i) = m_computed(firstColm + j, firstColm + j); m_computed(firstColm + j, firstColm) = 0; + + JacobiRotation J(c,s); if (m_compU) { - m_naiveU.col(firstColu + i).segment(firstColu, size) = - c * m_naiveU.col(firstColu + i).segment(firstColu, size) - - s * m_naiveU.col(firstColu + j).segment(firstColu, size) ; - - m_naiveU.col(firstColu + j).segment(firstColu, size) = - (c + s*s/c) * m_naiveU.col(firstColu + j).segment(firstColu, size) + - (s/c) * m_naiveU.col(firstColu + i).segment(firstColu, size); + m_naiveU.middleRows(firstColu, size).applyOnTheRight(firstColu + i, firstColu + j, J); } if (m_compV) { - m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) = - c * m_naiveV.col(firstColW + i).segment(firstRowW, size - 1) + - s * m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) ; - - m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) = - (c + s*s/c) * m_naiveV.col(firstColW + j).segment(firstRowW, size - 1) - - (s/c) * m_naiveV.col(firstColW + i).segment(firstRowW, size - 1); + m_naiveU.middleRows(firstRowW, size-1).applyOnTheRight(firstColW + i, firstColW + j, J.transpose()); } }// end deflation 44 @@ -780,104 +939,137 @@ void BDCSVD::deflation44(Index firstColu , Index firstColm, Index fi template void BDCSVD::deflation(Index firstCol, Index lastCol, Index k, Index firstRowW, Index firstColW, Index shift) { - //condition 4.1 using std::sqrt; using std::abs; + using std::max; const Index length = lastCol + 1 - firstCol; - RealScalar norm1 = m_computed.block(firstCol+shift, firstCol+shift, length, 1).squaredNorm(); - RealScalar norm2 = m_computed.block(firstCol+shift, firstCol+shift, length, length).diagonal().squaredNorm(); - RealScalar epsilon = 10 * NumTraits::epsilon() * sqrt(norm1 + norm2); - if (m_computed(firstCol + shift, firstCol + shift) < epsilon) - m_computed(firstCol + shift, firstCol + shift) = epsilon; + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif + + Block col0(m_computed, firstCol+shift, firstCol+shift, length, 1); + Diagonal fulldiag(m_computed); + VectorBlock,Dynamic> diag(fulldiag, firstCol+shift, length); + + RealScalar epsilon = 8 * NumTraits::epsilon() * (max)(col0.cwiseAbs().maxCoeff(), diag.cwiseAbs().maxCoeff()); + + //condition 4.1 + if (diag(0) < epsilon) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.1, because " << diag(0) << " < " << epsilon << "\n"; +#endif + diag(0) = epsilon; + } //condition 4.2 - for (Index i=firstCol + shift + 1;i<=lastCol + shift;i++) - if (abs(m_computed(i, firstCol + shift)) < epsilon) - m_computed(i, firstCol + shift) = 0; + for (Index i=1;i firstCol + shift + k) permutation[p] = j++; - else if (j> lastCol + shift) permutation[p] = i++; - else if (m_computed(i, i) < m_computed(j, j)) permutation[p] = j++; - else permutation[p] = i++; - } - //we do the permutation - RealScalar aux; - //we stock the current index of each col - //and the column of each index - Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation - Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation - for (int pos = 0; pos< length; pos++) - { - realCol[pos] = pos + firstCol + shift; - realInd[pos] = pos; - } - const Index Zero = firstCol + shift; - VectorType temp; - for (int i = 1; i < length - 1; i++) - { - const Index I = i + Zero; - const Index realI = realInd[i]; - const Index j = permutation[length - i] - Zero; - const Index J = realCol[j]; - - //diag displace - aux = m_computed(I, I); - m_computed(I, I) = m_computed(J, J); - m_computed(J, J) = aux; - - //firstrow displace - aux = m_computed(I, Zero); - m_computed(I, Zero) = m_computed(J, Zero); - m_computed(J, Zero) = aux; - - // change columns - if (m_compU) - { - temp = m_naiveU.col(I - shift).segment(firstCol, length + 1); - m_naiveU.col(I - shift).segment(firstCol, length + 1) = m_naiveU.col(J - shift).segment(firstCol, length + 1); - m_naiveU.col(J - shift).segment(firstCol, length + 1) = temp; - } - else - { - temp = m_naiveU.col(I - shift).segment(0, 2); - m_naiveU.col(I - shift).template head<2>() = m_naiveU.col(J - shift).segment(0, 2); - m_naiveU.col(J - shift).template head<2>() = temp; } - if (m_compV) - { - const Index CWI = I + firstColW - Zero; - const Index CWJ = J + firstColW - Zero; - temp = m_naiveV.col(CWI).segment(firstRowW, length); - m_naiveV.col(CWI).segment(firstRowW, length) = m_naiveV.col(CWJ).segment(firstRowW, length); - m_naiveV.col(CWJ).segment(firstRowW, length) = temp; - } - - //update real pos - realCol[realI] = J; - realCol[j] = I; - realInd[J - Zero] = realI; - realInd[I - Zero] = j; - } - for (Index i = firstCol + shift + 1; i k) permutation[p] = j++; + else if (j >= length) permutation[p] = i++; + else if (diag(i) < diag(j)) permutation[p] = j++; + else permutation[p] = i++; + } + } + + // Current index of each col, and current column of each index + Index *realInd = new Index[length]; // FIXME avoid repeated dynamic memory allocation + Index *realCol = new Index[length]; // FIXME avoid repeated dynamic memory allocation + + for(int pos = 0; pos< length; pos++) + { + realCol[pos] = pos; + realInd[pos] = pos; + } + + for(Index i = 1; i < length - 1; i++) + { + const Index pi = permutation[length - i]; + const Index J = realCol[pi]; + + using std::swap; + // swap diaognal and first column entries: + swap(diag(i), diag(J)); + swap(col0(i), col0(J)); + + // change columns + if (m_compU) m_naiveU.col(firstCol+i).segment(firstCol, length + 1).swap(m_naiveU.col(firstCol+J).segment(firstCol, length + 1)); + else m_naiveU.col(firstCol+i).segment(0, 2) .swap(m_naiveU.col(firstCol+J).segment(0, 2)); + if (m_compV) m_naiveV.col(firstColW + i).segment(firstRowW, length).swap(m_naiveV.col(firstColW + J).segment(firstRowW, length)); + + //update real pos + const Index realI = realInd[i]; + realCol[realI] = J; + realCol[pi] = i; + realInd[J] = realI; + realInd[i] = pi; + } + delete[] permutation; + delete[] realInd; + delete[] realCol; + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + for(int k=2;k::epsilon()*diag(i+1)) +// if ((diag(i+1) - diag(i)) < epsilon) + { +#ifdef EIGEN_BDCSVD_DEBUG_VERBOSE + std::cout << "deflation 4.4 with i = " << i << " because " << (diag(i+1) - diag(i)) << " < " << epsilon << "\n"; +#endif + deflation44(firstCol, firstCol + shift, firstRowW, firstColW, i, i + 1, length); + } + +#ifdef EIGEN_BDCSVD_SANITY_CHECKS + assert(m_naiveU.allFinite()); + assert(m_naiveV.allFinite()); + assert(m_computed.allFinite()); +#endif }//end deflation