From 69714ff6139bea82b4231a37f959ce29d65772c1 Mon Sep 17 00:00:00 2001
From: Julian Kent <jkflying@gmail.com>
Date: Thu, 28 Jul 2022 18:04:35 +0000
Subject: [PATCH] Add Sparse Subset of Matrix Inverse

---
 Eigen/src/SparseLU/SparseLU.h                 |  51 ++++
 unsupported/Eigen/SparseExtra                 |   2 +
 .../Eigen/src/SparseExtra/SparseInverse.h     | 231 ++++++++++++++++++
 unsupported/test/sparse_extra.cpp             |  45 ++++
 4 files changed, 329 insertions(+)
 create mode 100644 unsupported/Eigen/src/SparseExtra/SparseInverse.h
diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h
index 6fc7d3327..a0ca79a5b 100644
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@@ -816,6 +816,31 @@ struct SparseLUMatrixLReturnType : internal::no_assignment_operator
     m_mapL.template solveTransposedInPlace<Conjugate>(X);
   }
 
+  SparseMatrix<Scalar, ColMajor, Index> toSparse() const {
+    ArrayXi colCount = ArrayXi::Ones(cols());
+    for (Index i = 0; i < cols(); i++) {
+      typename MappedSupernodalType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() > iter.col()) {
+          colCount(iter.col())++;
+        }
+      }
+    }
+    SparseMatrix<Scalar, ColMajor, Index> sL(rows(), cols());
+    sL.reserve(colCount);
+    for (Index i = 0; i < cols(); i++) {
+      sL.insert(i, i) = 1.0;
+      typename MappedSupernodalType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() > iter.col()) {
+          sL.insert(iter.row(), iter.col()) = iter.value();
+        }
+      }
+    }
+    sL.makeCompressed();
+    return sL;
+  }
+
   const MappedSupernodalType& m_mapL;
 };
 
@@ -915,6 +940,32 @@ struct SparseLUMatrixUReturnType : internal::no_assignment_operator
     }// End For U-solve
   }
 
+  SparseMatrix<Scalar, RowMajor, Index> toSparse() {
+    ArrayXi rowCount = ArrayXi::Zero(rows());
+    for (Index i = 0; i < cols(); i++) {
+      typename MatrixLType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() <= iter.col()) {
+          rowCount(iter.row())++;
+        }
+      }
+    }
+
+    SparseMatrix<Scalar, RowMajor, Index> sU(rows(), cols());
+    sU.reserve(rowCount);
+    for (Index i = 0; i < cols(); i++) {
+      typename MatrixLType::InnerIterator iter(m_mapL, i);
+      for (; iter; ++iter) {
+        if (iter.row() <= iter.col()) {
+          sU.insert(iter.row(), iter.col()) = iter.value();
+        }
+      }
+    }
+    sU.makeCompressed();
+    const SparseMatrix<Scalar, RowMajor, Index> u = m_mapU;  // convert to RowMajor
+    sU += u;
+    return sU;
+  }
 
   const MatrixLType& m_mapL;
   const MatrixUType& m_mapU;
diff --git a/unsupported/Eigen/SparseExtra b/unsupported/Eigen/SparseExtra
index 2fa8f9f6a..f4920de70 100644
--- a/unsupported/Eigen/SparseExtra
+++ b/unsupported/Eigen/SparseExtra
@@ -33,6 +33,7 @@
   *
   * This module contains some experimental features extending the sparse module:
   * - A RandomSetter which is a wrapper object allowing to set/update a sparse matrix with random access.
+  * - A SparseInverse which calculates a sparse subset of the inverse of a sparse matrix corresponding to nonzeros of the input
   * - MatrixMarket format(https://math.nist.gov/MatrixMarket/formats.html) readers and writers for sparse and dense matrices.
   *
   * \code
@@ -42,6 +43,7 @@
 
 
 #include "src/SparseExtra/RandomSetter.h"
+#include "src/SparseExtra/SparseInverse.h"
 
 #include "src/SparseExtra/MarketIO.h"
 
diff --git a/unsupported/Eigen/src/SparseExtra/SparseInverse.h b/unsupported/Eigen/src/SparseExtra/SparseInverse.h
new file mode 100644
index 000000000..c8a4920ba
--- /dev/null
+++ b/unsupported/Eigen/src/SparseExtra/SparseInverse.h
@@ -0,0 +1,231 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2022 Julian Kent <jkflying@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPARSEINVERSE_H
+#define EIGEN_SPARSEINVERSE_H
+
+#include "./InternalHeaderCheck.h"
+
+#include "../../../../Eigen/Sparse"
+#include "../../../../Eigen/SparseLU"
+
+namespace Eigen {
+
+/**
+ * @brief Kahan algorithm based accumulator
+ *
+ * The Kahan sum algorithm guarantees to bound the error from floating point
+ * accumulation to a fixed value, regardless of the number of accumulations
+ * performed. Naive accumulation accumulates errors O(N), and pairwise O(logN).
+ * However pairwise also requires O(logN) memory while Kahan summation requires
+ * O(1) memory, but 4x the operations / latency.
+ *
+ * NB! Do not enable associative math optimizations, they may cause the Kahan
+ * summation to be optimized out leaving you with naive summation again.
+ *
+ */
+template <typename Scalar>
+class KahanSum {
+  // Straighforward Kahan summation for accurate accumulation of a sum of numbers
+  Scalar _sum{};
+  Scalar _correction{};
+
+ public:
+  Scalar value() { return _sum; }
+
+  void operator+=(Scalar increment) {
+    const Scalar correctedIncrement = increment + _correction;
+    const Scalar previousSum = _sum;
+    _sum += correctedIncrement;
+    _correction = correctedIncrement - (_sum - previousSum);
+  }
+};
+template <typename Scalar, Index Width = 16>
+class FABSum {
+  // https://epubs.siam.org/doi/pdf/10.1137/19M1257780
+  // Fast and Accurate Blocked Summation
+  // Uses naive summation for the fast sum, and Kahan summation for the accurate sum
+  // Theoretically SIMD sum could be changed to a tree sum which would improve accuracy
+  // over naive summation
+  KahanSum<Scalar> _totalSum;
+  Matrix<Scalar, Width, 1> _block;
+  Index _blockUsed{};
+
+ public:
+  Scalar value() { return _block.topRows(_blockUsed).sum() + _totalSum.value(); }
+
+  void operator+=(Scalar increment) {
+    _block(_blockUsed++, 0) = increment;
+    if (_blockUsed == Width) {
+      _totalSum += _block.sum();
+      _blockUsed = 0;
+    }
+  }
+};
+
+/**
+ * @brief computes an accurate dot product on two sparse vectors
+ *
+ * Uses an accurate summation algorithm for the accumulator in order to
+ * compute an accurate dot product for two sparse vectors.
+ *
+ */
+template <typename Derived, typename OtherDerived>
+typename Derived::Scalar accurateDot(const SparseMatrixBase<Derived>& A, const SparseMatrixBase<OtherDerived>& other) {
+  typedef typename Derived::Scalar Scalar;
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(OtherDerived)
+  EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Derived, OtherDerived)
+  static_assert(internal::is_same<Scalar, typename OtherDerived::Scalar>::value, "mismatched types");
+
+  internal::evaluator<Derived> thisEval(A.derived());
+  typename Derived::ReverseInnerIterator i(thisEval, 0);
+
+  internal::evaluator<OtherDerived> otherEval(other.derived());
+  typename OtherDerived::ReverseInnerIterator j(otherEval, 0);
+
+  FABSum<Scalar> res;
+  while (i && j) {
+    if (i.index() == j.index()) {
+      res += numext::conj(i.value()) * j.value();
+      --i;
+      --j;
+    } else if (i.index() > j.index())
+      --i;
+    else
+      --j;
+  }
+  return res.value();
+}
+
+/**
+ * @brief calculate sparse subset of inverse of sparse matrix
+ *
+ * This class returns a sparse subset of the inverse of the input matrix.
+ * The nonzeros correspond to the nonzeros of the input, plus any additional
+ * elements required due to fill-in of the internal LU factorization. This is
+ * is minimized via a applying a fill-reducing permutation as part of the LU
+ * factorization.
+ *
+ * If there are specific entries of the input matrix which you need inverse
+ * values for, which are zero for the input, you need to insert entries into
+ * the input sparse matrix for them to be calculated.
+ *
+ * Due to the sensitive nature of matrix inversion, particularly on large
+ * matrices which are made possible via sparsity, high accuracy dot products
+ * based on Kahan summation are used to reduce numerical error. If you still
+ * encounter numerical errors you may with to equilibrate your matrix before
+ * calculating the inverse, as well as making sure it is actually full rank.
+ */
+template <typename Scalar>
+class SparseInverse {
+ public:
+  typedef SparseMatrix<Scalar, ColMajor> MatrixType;
+  typedef SparseMatrix<Scalar, RowMajor> RowMatrixType;
+
+  SparseInverse() {}
+
+  /**
+   * @brief This Constructor is for if you already have a factored SparseLU and would like to use it to calculate a
+   * sparse inverse.
+   *
+   * Just call this constructor with your already factored SparseLU class and you can directly call the .inverse()
+   * method to get the result.
+   */
+  SparseInverse(const SparseLU<MatrixType>& slu) { _result = computeInverse(slu); }
+
+  /**
+   * @brief Calculate the sparse inverse from a given sparse input
+   */
+  SparseInverse& compute(const SparseMatrix<Scalar>& A) {
+    SparseLU<MatrixType> slu;
+    slu.compute(A);
+    _result = computeInverse(slu);
+    return *this;
+  }
+
+  /**
+   * @brief return the already-calculated sparse inverse, or a 0x0 matrix if it could not be computed
+   */
+  const MatrixType& inverse() const { return _result; }
+
+  /**
+   * @brief Internal function to calculate the sparse inverse in a functional way
+   * @return A sparse inverse representation, or, if the decomposition didn't complete, a 0x0 matrix.
+   */
+  static MatrixType computeInverse(const SparseLU<MatrixType>& slu) {
+    if (slu.info() != Success) {
+      return MatrixType(0, 0);
+    }
+
+    // Extract from SparseLU and decompose into L, inverse D and U terms
+    Matrix<Scalar, Dynamic, 1> invD;
+    RowMatrixType Upper;
+    {
+      RowMatrixType DU = slu.matrixU().toSparse();
+      invD = DU.diagonal().cwiseInverse();
+      Upper = (invD.asDiagonal() * DU).template triangularView<StrictlyUpper>();
+    }
+    MatrixType Lower = slu.matrixL().toSparse().template triangularView<StrictlyLower>();
+
+    // Compute the inverse and reapply the permutation matrix from the LU decomposition
+    return slu.colsPermutation().transpose() * computeInverse(Upper, invD, Lower) * slu.rowsPermutation();
+  }
+
+  /**
+   * @brief Internal function to calculate the inverse from strictly upper, diagonal and strictly lower components
+   */
+  static MatrixType computeInverse(const RowMatrixType& Upper, const Matrix<Scalar, Dynamic, 1>& inverseDiagonal,
+                                   const MatrixType& Lower) {
+    // Calculate the 'minimal set', which is the nonzeros of (L+U).transpose()
+    // It could be zeroed, but we will overwrite all non-zeros anyways.
+    MatrixType colInv = Lower.transpose().template triangularView<UnitUpper>();
+    colInv += Upper.transpose();
+
+    // We also need rowmajor representation in order to do efficient row-wise dot products
+    RowMatrixType rowInv = Upper.transpose().template triangularView<UnitLower>();
+    rowInv += Lower.transpose();
+
+    // Use the Takahashi algorithm to build the supporting elements of the inverse
+    // upwards and to the left, from the bottom right element, 1 col/row at a time
+    for (Index recurseLevel = Upper.cols() - 1; recurseLevel >= 0; recurseLevel--) {
+      const auto& col = Lower.col(recurseLevel);
+      const auto& row = Upper.row(recurseLevel);
+
+      // Calculate the inverse values for the nonzeros in this column
+      typename MatrixType::ReverseInnerIterator colIter(colInv, recurseLevel);
+      for (; recurseLevel < colIter.index(); --colIter) {
+        const Scalar element = -accurateDot(col, rowInv.row(colIter.index()));
+        colIter.valueRef() = element;
+        rowInv.coeffRef(colIter.index(), recurseLevel) = element;
+      }
+
+      // Calculate the inverse values for the nonzeros in this row
+      typename RowMatrixType::ReverseInnerIterator rowIter(rowInv, recurseLevel);
+      for (; recurseLevel < rowIter.index(); --rowIter) {
+        const Scalar element = -accurateDot(row, colInv.col(rowIter.index()));
+        rowIter.valueRef() = element;
+        colInv.coeffRef(recurseLevel, rowIter.index()) = element;
+      }
+
+      // And finally the diagonal, which corresponds to both row and col iterator now
+      const Scalar diag = inverseDiagonal(recurseLevel) - accurateDot(row, colInv.col(recurseLevel));
+      rowIter.valueRef() = diag;
+      colIter.valueRef() = diag;
+    }
+
+    return colInv;
+  }
+
+ private:
+  MatrixType _result;
+};
+
+}  // namespace Eigen
+#endif
diff --git a/unsupported/test/sparse_extra.cpp b/unsupported/test/sparse_extra.cpp
index 744545be6..7a07bf5a3 100644
--- a/unsupported/test/sparse_extra.cpp
+++ b/unsupported/test/sparse_extra.cpp
@@ -164,6 +164,49 @@ void check_marketio_dense()
   VERIFY_IS_EQUAL(m1,m2);
 }
 
+template <typename Scalar>
+void check_sparse_inverse() {
+  typedef SparseMatrix<Scalar> MatrixType;
+  typedef SparseMatrix<Scalar, RowMajor> RowMatrixType;
+
+  Matrix<Scalar, -1, -1> A;
+  A.resize(1000, 1000);
+  A.fill(0);
+  A.setIdentity();
+  A.col(0).array() += 1;
+  A.row(0).array() += 2;
+  A.col(2).array() += 3;
+  A.row(7).array() += 3;
+  A.col(9).array() += 3;
+  A.block(3, 4, 4, 2).array() += 9;
+  A.middleRows(10, 50).array() += 3;
+  A.middleCols(50, 50).array() += 40;
+  A.block(500, 300, 40, 20).array() += 10;
+  A.transposeInPlace();
+
+  Eigen::SparseLU<MatrixType> slu;
+  slu.compute(A.sparseView());
+  Matrix<Scalar, -1, -1> Id(A.rows(), A.cols());
+  Id.setIdentity();
+  Matrix<Scalar, -1, -1> inv = slu.solve(Id);
+
+  const MatrixType sparseInv = Eigen::SparseInverse<Scalar>().compute(A.sparseView()).inverse();
+
+  Scalar sumdiff = 0;  // Check the diff only of the non-zero elements
+  for (Eigen::Index j = 0; j < A.cols(); j++) {
+    for (typename MatrixType::InnerIterator iter(sparseInv, j); iter; ++iter) {
+      const Scalar diff = std::abs(inv(iter.row(), iter.col()) - iter.value());
+      VERIFY_IS_APPROX_OR_LESS_THAN(diff, 1e-11);
+
+      if (iter.value() != 0) {
+        sumdiff += diff;
+      }
+    }
+  }
+
+  VERIFY_IS_APPROX_OR_LESS_THAN(sumdiff, 1e-10);
+}
+
 EIGEN_DECLARE_TEST(sparse_extra)
 {
   for(int i = 0; i < g_repeat; i++) {
@@ -200,6 +243,8 @@ EIGEN_DECLARE_TEST(sparse_extra)
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<float>,Dynamic,1> >()) );
     CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<double>,Dynamic,1> >()) );
 
+    CALL_SUBTEST_6((check_sparse_inverse<double>()));
+
     TEST_SET_BUT_UNUSED_VARIABLE(s);
   }
 }