diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index ad7a0be27..8f6336f94 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -25,6 +25,7 @@ * - a BiCGSTAB(L) implementation * - a DGMRES implementation * - a MINRES implementation + * - a IDRSTABL implementation * * Choosing the best solver for solving \c A \c x = \c b depends a lot on the preconditioner chosen as well as the properties of \c A. The following flowchart might help you. * \dot width=50% @@ -89,6 +90,7 @@ #include "src/IterativeSolvers/MINRES.h" #include "src/IterativeSolvers/IDRS.h" #include "src/IterativeSolvers/BiCGSTABL.h" +#include "src/IterativeSolvers/IDRSTABL.h" #include "../../Eigen/src/Core/util/ReenableStupidWarnings.h" diff --git a/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h b/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h new file mode 100755 index 000000000..712c17181 --- /dev/null +++ b/unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h @@ -0,0 +1,476 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2020 Chris Schoutrop +// Copyright (C) 2020 Mischa Senders +// Copyright (C) 2020 Lex Kuijpers +// Copyright (C) 2020 Jens Wehner +// Copyright (C) 2020 Jan van Dijk +// Copyright (C) 2020 Adithya Vijaykumar +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +/* + +The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L) + +This implementation of IDRSTABL is based on +1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with +reliable update strategies for solving sparse linear systems. Journal of +Computational and Applied Mathematics, 259, 244-258. + doi:10.1016/j.cam.2013.08.028 + 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned +IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International +Journal of Applied Mathematics, 45(3). + 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems: +Second Edition. Philadelphia, PA: SIAM. + 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family +of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear +Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062. + doi:10.1137/070685804 + 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s) +and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598. + + Right-preconditioning based on Ref. 3 is implemented here. +*/ + +#ifndef EIGEN_IDRSTABL_H +#define EIGEN_IDRSTABL_H + +namespace Eigen { + +namespace internal { + +template +bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters, + typename Dest::RealScalar &tol_error, Index L, Index S) { + /* + Setup and type definitions. + */ + using numext::abs; + using numext::sqrt; + typedef typename Dest::Scalar Scalar; + typedef typename Dest::RealScalar RealScalar; + typedef Matrix VectorType; + typedef Matrix DenseMatrixType; + + const Index N = x.rows(); + + Index k = 0; // Iteration counter + const Index maxIters = iters; + + const RealScalar rhs_norm = rhs.stableNorm(); + const RealScalar tol = tol_error * rhs_norm; + + if (rhs_norm == 0) { + /* + If b==0, then the exact solution is x=0. + rhs_norm is needed for other calculations anyways, this exit is a freebie. + */ + x.setZero(); + tol_error = 0.0; + return true; + } + // Construct decomposition objects beforehand. + FullPivLU lu_solver; + + if (S >= N || L >= N) { + /* + The matrix is very small, or the choice of L and S is very poor + in that case solving directly will be best. + */ + lu_solver.compute(DenseMatrixType(mat)); + x = lu_solver.solve(rhs); + tol_error = (rhs - mat * x).stableNorm() / rhs_norm; + return true; + } + + // Define maximum sizes to prevent any reallocation later on. + DenseMatrixType u(N, L + 1); + DenseMatrixType r(N, L + 1); + + DenseMatrixType V(N * (L + 1), S); + + VectorType alpha(S); + VectorType gamma(L); + VectorType update(N); + + /* + Main IDRSTABL algorithm + */ + // Set up the initial residual + VectorType x0 = x; + r.col(0) = rhs - mat * x; + x.setZero(); // The final solution will be x0+x + + tol_error = r.col(0).stableNorm(); + + // FOM = Full orthogonalisation method + DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1); + + // Construct an initial U matrix of size N x S + DenseMatrixType U(N * (L + 1), S); + for (Index col_index = 0; col_index < S; ++col_index) { + // Arnoldi-like process to generate a set of orthogonal vectors spanning + // {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the + // Full Orthogonalization Method (FOM) from Ref.3 to provide a possible + // early exit with no additional MV. + if (col_index != 0) { + /* + Modified Gram-Schmidt strategy: + */ + VectorType w = mat * precond.solve(u.col(0)); + for (Index i = 0; i < col_index; ++i) { + auto v = U.col(i).head(N); + h_FOM(i, col_index - 1) = v.dot(w); + w -= h_FOM(i, col_index - 1) * v; + } + u.col(0) = w; + h_FOM(col_index, col_index - 1) = u.col(0).stableNorm(); + + if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) { + /* + This only happens if u is NOT exactly zero. In case it is exactly zero + it would imply that that this u has no component in the direction of the + current residual. + + By then setting u to zero it will not contribute any further (as it + should). Whereas attempting to normalize results in division by zero. + + Such cases occur if: + 1. The basis of dimension 1) { + // Check for early FOM exit. + Scalar beta = r.col(0).stableNorm(); + VectorType e1 = VectorType::Zero(S - 1); + e1(0) = beta; + lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1)); + VectorType y = lu_solver.solve(e1); + VectorType x2 = x + U.topLeftCorner(N, S - 1) * y; + + // Using proposition 6.7 in Saad, one MV can be saved to calculate the + // residual + RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm(); + + if (FOM_residual < tol) { + // Exit, the FOM algorithm was already accurate enough + iters = k; + // Convert back to the unpreconditioned solution + x = precond.solve(x2); + // x contains the updates to x0, add those back to obtain the solution + x += x0; + tol_error = FOM_residual / rhs_norm; + return true; + } + } + + /* + Select an initial (N x S) matrix R0. + 1. Generate random R0, orthonormalize the result. + 2. This results in R0, however to save memory and compute we only need the + adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix + (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the + anti-distributivity property of the adjoint. This results in AR_T, which is + constant if R_T does not have to be regenerated and can be precomputed. + Based on reference 4, this has zero probability in exact arithmetic. + */ + + // Original IDRSTABL and Kensuke choose S random vectors: + const HouseholderQR qr(DenseMatrixType::Random(N, S)); + DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint(); + DenseMatrixType AR_T = DenseMatrixType(R_T * mat); + + // Pre-allocate sigma. + DenseMatrixType sigma(S, S); + + bool reset_while = false; // Should the while loop be reset for some reason? + + while (k < maxIters) { + for (Index j = 1; j <= L; ++j) { + /* + The IDR Step + */ + // Construction of the sigma-matrix, and the decomposition of sigma. + for (Index i = 0; i < S; ++i) { + sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1)); + } + + lu_solver.compute(sigma); + // Obtain the update coefficients alpha + if (j == 1) { + // alpha=inverse(sigma)*(R_T*r_0); + alpha.noalias() = lu_solver.solve(R_T * r.col(0)); + } else { + // alpha=inverse(sigma)*(AR_T*r_{j-2}) + alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2))); + } + + // Obtain new solution and residual from this update + update.noalias() = U.topRows(N) * alpha; + r.col(0) -= mat * precond.solve(update); + x += update; + + for (Index i = 1; i <= j - 2; ++i) { + // This only affects the case L>2 + r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha; + } + if (j > 1) { + // r=[r;A*r_{j-2}] + r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2)); + } + tol_error = r.col(0).stableNorm(); + + if (tol_error < tol) { + // If at this point the algorithm has converged, exit. + reset_while = true; + break; + } + + bool break_normalization = false; + for (Index q = 1; q <= S; ++q) { + if (q == 1) { + // u = r; + u.leftCols(j + 1) = r.leftCols(j + 1); + } else { + // u=[u_1;u_2;...;u_j] + u.leftCols(j) = u.middleCols(1, j); + } + + // Obtain the update coefficients beta implicitly + // beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1) + u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1))); + + // u=[u;Au_{j-1}] + u.col(j).noalias() = mat * precond.solve(u.col(j - 1)); + + // Orthonormalize u_j to the columns of V_j(:,1:q-1) + if (q > 1) { + /* + Modified Gram-Schmidt-like procedure to make u orthogonal to the + columns of V from Ref. 1. + + The vector mu from Ref. 1 is obtained implicitly: + mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1). + */ + for (Index i = 0; i <= q - 2; ++i) { + auto v = V.col(i).segment(N * j, N); + Scalar h = v.squaredNorm(); + h = v.dot(u.col(j)) / h; + u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1); + } + } + // Normalize u and assign to a column of V + Scalar normalization_constant = u.col(j).stableNorm(); + // If u is exactly zero, this will lead to a NaN. Small, non-zero u is + // fine. + if (normalization_constant == RealScalar(0.0)) { + break_normalization = true; + break; + } else { + u.leftCols(j + 1) /= normalization_constant; + } + + V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1)); + } + + if (break_normalization == false) { + U = V; + } + } + if (reset_while) { + break; + } + + // r=[r;mat*r_{L-1}] + r.col(L).noalias() = mat * precond.solve(r.col(L - 1)); + + /* + The polynomial step + */ + ColPivHouseholderQR qr_solver(r.rightCols(L)); + gamma.noalias() = qr_solver.solve(r.col(0)); + + // Update solution and residual using the "minimized residual coefficients" + update.noalias() = r.leftCols(L) * gamma; + x += update; + r.col(0) -= mat * precond.solve(update); + + // Update iteration info + ++k; + tol_error = r.col(0).stableNorm(); + + if (tol_error < tol) { + // Slightly early exit by moving the criterion before the update of U, + // after the main while loop the result of that calculation would not be + // needed. + break; + } + + /* + U=U0-sum(gamma_j*U_j) + Consider the first iteration. Then U only contains U0, so at the start of + the while-loop U should be U0. Therefore only the first N rows of U have to + be updated. + */ + for (Index i = 1; i <= L; ++i) { + U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1); + } + } + + /* + Exit after the while loop terminated. + */ + iters = k; + // Convert back to the unpreconditioned solution + x = precond.solve(x); + // x contains the updates to x0, add those back to obtain the solution + x += x0; + tol_error = tol_error / rhs_norm; + return true; +} + +} // namespace internal + +template > +class IDRSTABL; + +namespace internal { + +template +struct traits> { + typedef MatrixType_ MatrixType; + typedef Preconditioner_ Preconditioner; +}; + +} // namespace internal + +/** \ingroup IterativeLinearSolvers_Module + * \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a + * short-recurrences Krylov method for sparse square problems. It can outperform + * both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the + * optimal GMRES convergence in terms of the number of Matrix-Vector products. + * However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is + * suitable for both indefinite systems and systems with complex eigenvalues. + * + * This class allows solving for A.x = b sparse linear problems. The vectors x + * and b can be either dense or sparse. + * + * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a + * sparse matrix. \tparam Preconditioner_ the type of the preconditioner. + * Default is DiagonalPreconditioner + * + * \implsparsesolverconcept + * + * The maximum number of iterations and tolerance value can be controlled via + * the setMaxIterations() and setTolerance() methods. The defaults are the size + * of the problem for the maximum number of iterations and + * NumTraits::epsilon() for the tolerance. + * + * The tolerance is the maximum relative residual error: |Ax-b|/|b| for which + * the linear system is considered solved. + * + * \b Performance: When using sparse matrices, best performance is achieved for + * a row-major sparse matrix format. Moreover, in this case multi-threading can + * be exploited if the user code is compiled with OpenMP enabled. See \ref + * TopicMultiThreading for details. + * + * By default the iterations start with x=0 as an initial guess of the solution. + * One can control the start using the solveWithGuess() method. + * + * IDR(s)STAB(l) can also be used in a matrix-free context, see the following + * \link MatrixfreeSolverExample example \endlink. + * + * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner + */ + +template +class IDRSTABL : public IterativeSolverBase> { + typedef IterativeSolverBase Base; + using Base::m_error; + using Base::m_info; + using Base::m_isInitialized; + using Base::m_iterations; + using Base::matrix; + Index m_L; + Index m_S; + + public: + typedef MatrixType_ MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef Preconditioner_ Preconditioner; + + public: + /** Default constructor. */ + IDRSTABL() : m_L(2), m_S(4) {} + + /** Initialize the solver with matrix \a A for further \c Ax=b solving. + + This constructor is a shortcut for the default constructor followed + by a call to compute(). + + \warning this class stores a reference to the matrix A as well as some + precomputed values that depend on it. Therefore, if \a A is changed + this class becomes invalid. Call compute() to update it with the new + matrix A, or modify a copy of A. + */ + template + explicit IDRSTABL(const EigenBase &A) : Base(A.derived()), m_L(2), m_S(4) {} + + /** \internal */ + /** Loops over the number of columns of b and does the following: + 1. sets the tolerance and maxIterations + 2. Calls the function that has the core solver + routine + */ + template + void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const { + m_iterations = Base::maxIterations(); + m_error = Base::m_tolerance; + bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S); + + m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence; + } + + /** Sets the parameter L, indicating the amount of minimize residual steps are + * used. */ + void setL(Index L) { + eigen_assert(L >= 1 && "L needs to be positive"); + m_L = L; + } + /** Sets the parameter S, indicating the dimension of the shadow residual + * space.. */ + void setS(Index S) { + eigen_assert(S >= 1 && "S needs to be positive"); + m_S = S; + } +}; + +} // namespace Eigen + +#endif /* EIGEN_IDRSTABL_H */ diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6bd8430cb..0f05c2885 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -100,6 +100,7 @@ ei_add_test(dgmres) ei_add_test(minres) ei_add_test(idrs) ei_add_test(bicgstabl) +ei_add_test(idrstabl) ei_add_test(levenberg_marquardt) ei_add_test(kronecker_product) ei_add_test(bessel_functions) diff --git a/unsupported/test/idrstabl.cpp b/unsupported/test/idrstabl.cpp new file mode 100644 index 000000000..7e40dd64e --- /dev/null +++ b/unsupported/test/idrstabl.cpp @@ -0,0 +1,28 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "../../test/sparse_solver.h" +#include + +template +void test_idrstabl_T() { + IDRSTABL, DiagonalPreconditioner > idrstabl_colmajor_diag; + IDRSTABL, IncompleteLUT > idrstabl_colmajor_ilut; + + idrstabl_colmajor_diag.setTolerance(NumTraits::epsilon() * 4); + idrstabl_colmajor_ilut.setTolerance(NumTraits::epsilon() * 4); + + CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_diag)); + CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_ilut)); +} + +EIGEN_DECLARE_TEST(idrstabl) { + CALL_SUBTEST_1((test_idrstabl_T())); + CALL_SUBTEST_2((test_idrstabl_T >())); +}