improve Simplicial Cholesky analyzePattern

2025-07-25 14:24:31 +08:00 · 2025-01-28 17:53:43 +00:00 · 2025-01-28 17:53:43 +00:00 · a056b93114
commit a056b93114
parent 5d866a7a78
2 changed files with 264 additions and 38 deletions
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@ -134,7 +134,7 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived> {
      << "\n";
    s << "  tree:     " << ((total += m_parent.size() * sizeof(int)) >> 20) << "Mb"
      << "\n";
-    s << "  nonzeros: " << ((total += m_nonZerosPerCol.size() * sizeof(int)) >> 20) << "Mb"
+    s << "  nonzeros: " << ((total += m_workSpace.size() * sizeof(int)) >> 20) << "Mb"
      << "\n";
    s << "  perm:     " << ((total += m_P.size() * sizeof(int)) >> 20) << "Mb"
      << "\n";
@ -240,7 +240,7 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived> {
  CholMatrixType m_matrix;
  VectorType m_diag;  // the diagonal coefficients (LDLT mode)
  VectorI m_parent;   // elimination tree
-  VectorI m_nonZerosPerCol;
+  VectorI m_workSpace;
  PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P;     // the permutation
  PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv;  // the inverse permutation
@ -830,7 +830,7 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMat
  const Index size = a.rows();
  pmat = &ap;
  // Note that ordering methods compute the inverse permutation
-  if (!internal::is_same<OrderingType, NaturalOrdering<Index> >::value) {
+  if (!internal::is_same<OrderingType, NaturalOrdering<StorageIndex> >::value) {
    {
      CholMatrixType C;
      internal::permute_symm_to_fullsymm<UpLo, NonHermitian>(a, C, NULL);
--- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@ -25,40 +25,265 @@ the Mozilla Public License v. 2.0, as stated at the top of this file.
 namespace Eigen {
-template <typename Derived>
+namespace internal {
 void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT) {
  const StorageIndex size = StorageIndex(ap.rows());
  m_matrix.resize(size, size);
  m_parent.resize(size);
  m_nonZerosPerCol.resize(size);
-  ei_declare_aligned_stack_constructed_variable(StorageIndex, tags, size, 0);
+template <typename Scalar, typename StorageIndex>
 struct simpl_chol_helper {
  using CholMatrixType = SparseMatrix<Scalar, ColMajor, StorageIndex>;
  using InnerIterator = typename CholMatrixType::InnerIterator;
  using VectorI = Matrix<StorageIndex, Dynamic, 1>;
  static constexpr StorageIndex kEmpty = -1;
-  for (StorageIndex k = 0; k < size; ++k) {
+  // Implementation of a stack or last-in first-out structure with some debugging machinery.
-    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
+  struct Stack {
-    m_parent[k] = -1;        /* parent of k is not yet known */
+    StorageIndex* m_data;
-    tags[k] = k;             /* mark node k as visited */
+    Index m_size;
-    m_nonZerosPerCol[k] = 0; /* count of nonzeros in column k of L */
+#ifndef EIGEN_NO_DEBUG
-    for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) {
+    const Index m_maxSize;
-      StorageIndex i = it.index();
+    Stack(StorageIndex* data, StorageIndex size, StorageIndex maxSize)
-      if (i < k) {
+        : m_data(data), m_size(size), m_maxSize(maxSize) {
-        /* follow path from i to root of etree, stop at flagged node */
+      eigen_assert(size >= 0);
-        for (; tags[i] != k; i = m_parent[i]) {
+      eigen_assert(maxSize >= size);
-          /* find parent of i if not yet determined */
+    }
-          if (m_parent[i] == -1) m_parent[i] = k;
+#else
-          m_nonZerosPerCol[i]++; /* L (k,i) is nonzero */
+    Stack(StorageIndex* data, StorageIndex size, StorageIndex /*maxSize*/) : m_data(data), m_size(size) {}
-          tags[i] = k;           /* mark i as visited */
+#endif
    bool empty() const { return m_size == 0; }
    Index size() const { return m_size; }
    StorageIndex back() const {
      eigen_assert(m_size > 0);
      return m_data[m_size - 1];
    }
    void push(const StorageIndex& value) {
 #ifndef EIGEN_NO_DEBUG
      eigen_assert(m_size < m_maxSize);
 #endif
      m_data[m_size] = value;
      m_size++;
    }
    void pop() {
      eigen_assert(m_size > 0);
      m_size--;
    }
  };
  // Implementation of a disjoint-set or union-find structure with path compression.
  struct DisjointSet {
    StorageIndex* m_set;
    DisjointSet(StorageIndex* set, StorageIndex size) : m_set(set) { std::iota(set, set + size, 0); }
    // Find the set representative or root of `u`.
    StorageIndex find(StorageIndex u) const {
      eigen_assert(u != kEmpty);
      while (m_set[u] != u) {
        // manually unroll the loop by a factor of 2 to improve performance
        u = m_set[m_set[u]];
      }
      return u;
    }
    // Perform full path compression such that each node from `u` to `v` points to `v`.
    void compress(StorageIndex u, StorageIndex v) {
      eigen_assert(u != kEmpty);
      eigen_assert(v != kEmpty);
      while (m_set[u] != v) {
        StorageIndex next = m_set[u];
        m_set[u] = v;
        u = next;
      }
    };
  };
  // Computes the higher adjacency pattern by transposing the input lower adjacency matrix.
  // Only the index arrays are calculated, as the values are not needed for the symbolic factorization.
  // The outer index array provides the size requirements of the inner index array.
  // Computes the outer index array of the higher adjacency matrix.
  static void calc_hadj_outer(const StorageIndex size, const CholMatrixType& ap, StorageIndex* outerIndex) {
    for (StorageIndex j = 1; j < size; j++) {
      for (InnerIterator it(ap, j); it; ++it) {
        StorageIndex i = it.index();
        if (i < j) outerIndex[i + 1]++;
      }
    }
    std::partial_sum(outerIndex, outerIndex + size + 1, outerIndex);
  }
  // inner index array
  static void calc_hadj_inner(const StorageIndex size, const CholMatrixType& ap, const StorageIndex* outerIndex,
                              StorageIndex* innerIndex, StorageIndex* tmp) {
    std::fill_n(tmp, size, 0);
    for (StorageIndex j = 1; j < size; j++) {
      for (InnerIterator it(ap, j); it; ++it) {
        StorageIndex i = it.index();
        if (i < j) {
          StorageIndex b = outerIndex[i] + tmp[i];
          innerIndex[b] = j;
          tmp[i]++;
        }
      }
    }
  }
-  /* construct Lp index array from m_nonZerosPerCol column counts */
+  // Adapted from:
-  StorageIndex* Lp = m_matrix.outerIndexPtr();
+  // Joseph W. Liu. (1986).
-  Lp[0] = 0;
+  // A compact row storage scheme for Cholesky factors using elimination trees.
-  for (StorageIndex k = 0; k < size; ++k) Lp[k + 1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
+  // ACM Trans. Math. Softw. 12, 2 (June 1986), 127–148. https://doi.org/10.1145/6497.6499
-  m_matrix.resizeNonZeros(Lp[size]);
+  // Computes the elimination forest of the lower adjacency matrix, a compact representation of the sparse L factor.
  // The L factor may contain multiple elimination trees if a column contains only its diagonal element.
  // Each elimination tree is an n-ary tree in which each node points to its parent.
  static void calc_etree(const StorageIndex size, const CholMatrixType& ap, StorageIndex* parent, StorageIndex* tmp) {
    std::fill_n(parent, size, kEmpty);
    DisjointSet ancestor(tmp, size);
    for (StorageIndex j = 1; j < size; j++) {
      for (InnerIterator it(ap, j); it; ++it) {
        StorageIndex i = it.index();
        if (i < j) {
          StorageIndex r = ancestor.find(i);
          if (r != j) parent[r] = j;
          ancestor.compress(i, j);
        }
      }
    }
  }
  // Computes the child pointers of the parent tree to facilitate a depth-first search traversal.
  static void calc_lineage(const StorageIndex size, const StorageIndex* parent, StorageIndex* firstChild,
                           StorageIndex* firstSibling) {
    std::fill_n(firstChild, size, kEmpty);
    std::fill_n(firstSibling, size, kEmpty);
    for (StorageIndex j = 0; j < size; j++) {
      StorageIndex p = parent[j];
      if (p == kEmpty) continue;
      StorageIndex c = firstChild[p];
      if (c == kEmpty)
        firstChild[p] = j;
      else {
        while (firstSibling[c] != kEmpty) c = firstSibling[c];
        firstSibling[c] = j;
      }
    }
  }
  // Computes a post-ordered traversal of the elimination tree.
  static void calc_post(const StorageIndex size, const StorageIndex* parent, StorageIndex* firstChild,
                        const StorageIndex* firstSibling, StorageIndex* post, StorageIndex* dfs) {
    Stack post_stack(post, 0, size);
    for (StorageIndex j = 0; j < size; j++) {
      if (parent[j] != kEmpty) continue;
      // Begin at a root
      Stack dfs_stack(dfs, 0, size);
      dfs_stack.push(j);
      while (!dfs_stack.empty()) {
        StorageIndex i = dfs_stack.back();
        StorageIndex c = firstChild[i];
        if (c == kEmpty) {
          post_stack.push(i);
          dfs_stack.pop();
        } else {
          dfs_stack.push(c);
          // Remove the path from `i` to `c` for future traversals.
          firstChild[i] = firstSibling[c];
        }
      }
    }
    eigen_assert(post_stack.size() == size);
    eigen_assert(std::all_of(firstChild, firstChild + size, [](StorageIndex a) { return a == kEmpty; }));
  }
  // Adapted from:
  // Gilbert, J. R., Ng, E., & Peyton, B. W. (1994).
  // An efficient algorithm to compute row and column counts for sparse Cholesky factorization.
  // SIAM Journal on Matrix Analysis and Applications, 15(4), 1075–1091.
  // Computes the non-zero pattern of the L factor.
  static void calc_colcount(const StorageIndex size, const StorageIndex* hadjOuter, const StorageIndex* hadjInner,
                            const StorageIndex* parent, StorageIndex* prevLeaf, StorageIndex* tmp,
                            const StorageIndex* post, StorageIndex* nonZerosPerCol, bool doLDLT) {
    // initialize nonZerosPerCol with 1 for leaves, 0 for non-leaves
    std::fill_n(nonZerosPerCol, size, 1);
    for (StorageIndex j = 0; j < size; j++) {
      StorageIndex p = parent[j];
      // p is not a leaf
      if (p != kEmpty) nonZerosPerCol[p] = 0;
    }
    DisjointSet parentSet(tmp, size);
    // prevLeaf is already initialized
    eigen_assert(std::all_of(prevLeaf, prevLeaf + size, [](StorageIndex a) { return a == kEmpty; }));
    for (StorageIndex j_ = 0; j_ < size; j_++) {
      StorageIndex j = post[j_];
      nonZerosPerCol[j] += hadjOuter[j + 1] - hadjOuter[j];
      for (StorageIndex k = hadjOuter[j]; k < hadjOuter[j + 1]; k++) {
        StorageIndex i = hadjInner[k];
        eigen_assert(i > j);
        StorageIndex prev = prevLeaf[i];
        if (prev != kEmpty) {
          StorageIndex q = parentSet.find(prev);
          parentSet.compress(prev, q);
          nonZerosPerCol[q]--;
        }
        prevLeaf[i] = j;
      }
      StorageIndex p = parent[j];
      if (p != kEmpty) parentSet.compress(j, p);
    }
    for (StorageIndex j = 0; j < size; j++) {
      StorageIndex p = parent[j];
      if (p != kEmpty) nonZerosPerCol[p] += nonZerosPerCol[j] - 1;
      if (doLDLT) nonZerosPerCol[j]--;
    }
  }
  // Finalizes the non zero pattern of the L factor and allocates the memory for the factorization.
  static void init_matrix(const StorageIndex size, const StorageIndex* nonZerosPerCol, CholMatrixType& L) {
    eigen_assert(L.outerIndexPtr()[0] == 0);
    std::partial_sum(nonZerosPerCol, nonZerosPerCol + size, L.outerIndexPtr() + 1);
    L.resizeNonZeros(L.outerIndexPtr()[size]);
  }
  // Driver routine for the symbolic sparse Cholesky factorization.
  static void run(const StorageIndex size, const CholMatrixType& ap, CholMatrixType& L, VectorI& parent,
                  VectorI& workSpace, bool doLDLT) {
    parent.resize(size);
    workSpace.resize(4 * size);
    L.resize(size, size);
    StorageIndex* tmp1 = workSpace.data();
    StorageIndex* tmp2 = workSpace.data() + size;
    StorageIndex* tmp3 = workSpace.data() + 2 * size;
    StorageIndex* tmp4 = workSpace.data() + 3 * size;
    // Borrow L's outer index array for the higher adjacency pattern.
    StorageIndex* hadj_outer = L.outerIndexPtr();
    calc_hadj_outer(size, ap, hadj_outer);
    // Request additional temporary storage for the inner indices of the higher adjacency pattern.
    ei_declare_aligned_stack_constructed_variable(StorageIndex, hadj_inner, hadj_outer[size], nullptr);
    calc_hadj_inner(size, ap, hadj_outer, hadj_inner, tmp1);
    calc_etree(size, ap, parent.data(), tmp1);
    calc_lineage(size, parent.data(), tmp1, tmp2);
    calc_post(size, parent.data(), tmp1, tmp2, tmp3, tmp4);
    calc_colcount(size, hadj_outer, hadj_inner, parent.data(), tmp1, tmp2, tmp3, tmp4, doLDLT);
    init_matrix(size, tmp4, L);
  }
 };
 }  // namespace internal
 template <typename Derived>
 void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT) {
  using Helper = internal::simpl_chol_helper<Scalar, StorageIndex>;
  eigen_assert(ap.innerSize() == ap.outerSize());
  const StorageIndex size = internal::convert_index<StorageIndex>(ap.outerSize());
  Helper::run(size, ap, m_matrix, m_parent, m_workSpace, doLDLT);
  m_isInitialized = true;
  m_info = Success;
@ -70,20 +295,21 @@ template <typename Derived>
 template <bool DoLDLT, bool NonHermitian>
 void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap) {
  using std::sqrt;
  const StorageIndex size = StorageIndex(ap.rows());
  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
  eigen_assert(ap.rows() == ap.cols());
-  eigen_assert(m_parent.size() == ap.rows());
+  eigen_assert(m_parent.size() == size);
-  eigen_assert(m_nonZerosPerCol.size() == ap.rows());
+  eigen_assert(m_workSpace.size() >= 3 * size);
  const StorageIndex size = StorageIndex(ap.rows());
  const StorageIndex* Lp = m_matrix.outerIndexPtr();
  StorageIndex* Li = m_matrix.innerIndexPtr();
  Scalar* Lx = m_matrix.valuePtr();
  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
-  ei_declare_aligned_stack_constructed_variable(StorageIndex, pattern, size, 0);
+  StorageIndex* nonZerosPerCol = m_workSpace.data();
-  ei_declare_aligned_stack_constructed_variable(StorageIndex, tags, size, 0);
+  StorageIndex* pattern = m_workSpace.data() + size;
  StorageIndex* tags = m_workSpace.data() + 2 * size;
  bool ok = true;
  m_diag.resize(DoLDLT ? size : 0);
@ -93,7 +319,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
    y[k] = Scalar(0);         // Y(0:k) is now all zero
    StorageIndex top = size;  // stack for pattern is empty
    tags[k] = k;              // mark node k as visited
-    m_nonZerosPerCol[k] = 0;  // count of nonzeros in column k of L
+    nonZerosPerCol[k] = 0;    // count of nonzeros in column k of L
    for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) {
      StorageIndex i = it.index();
      if (i <= k) {
@ -124,13 +350,13 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
      else
        yi = l_ki = yi / Lx[Lp[i]];
-      Index p2 = Lp[i] + m_nonZerosPerCol[i];
+      Index p2 = Lp[i] + nonZerosPerCol[i];
      Index p;
      for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi;
      d -= getDiag(l_ki * getSymm(yi));
      Li[p] = k; /* store L(k,i) in column form of L */
      Lx[p] = l_ki;
-      ++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */
+      ++nonZerosPerCol[i]; /* increment count of nonzeros in col i */
    }
    if (DoLDLT) {
      m_diag[k] = d;
@ -139,7 +365,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
        break;
      }
    } else {
-      Index p = Lp[k] + m_nonZerosPerCol[k]++;
+      Index p = Lp[k] + nonZerosPerCol[k]++;
      Li[p] = k; /* store L(k,k) = sqrt (d) in column k */
      if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) {
        ok = false; /* failure, matrix is not positive definite */