improve Simplicial Cholesky analyzePattern

This commit is contained in:
Charles Schlosser 2025-01-28 17:53:43 +00:00 committed by Rasmus Munk Larsen
parent 5d866a7a78
commit a056b93114
2 changed files with 264 additions and 38 deletions

View File

@ -134,7 +134,7 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived> {
<< "\n"; << "\n";
s << " tree: " << ((total += m_parent.size() * sizeof(int)) >> 20) << "Mb" s << " tree: " << ((total += m_parent.size() * sizeof(int)) >> 20) << "Mb"
<< "\n"; << "\n";
s << " nonzeros: " << ((total += m_nonZerosPerCol.size() * sizeof(int)) >> 20) << "Mb" s << " nonzeros: " << ((total += m_workSpace.size() * sizeof(int)) >> 20) << "Mb"
<< "\n"; << "\n";
s << " perm: " << ((total += m_P.size() * sizeof(int)) >> 20) << "Mb" s << " perm: " << ((total += m_P.size() * sizeof(int)) >> 20) << "Mb"
<< "\n"; << "\n";
@ -240,7 +240,7 @@ class SimplicialCholeskyBase : public SparseSolverBase<Derived> {
CholMatrixType m_matrix; CholMatrixType m_matrix;
VectorType m_diag; // the diagonal coefficients (LDLT mode) VectorType m_diag; // the diagonal coefficients (LDLT mode)
VectorI m_parent; // elimination tree VectorI m_parent; // elimination tree
VectorI m_nonZerosPerCol; VectorI m_workSpace;
PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P; // the permutation PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_P; // the permutation
PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv; // the inverse permutation PermutationMatrix<Dynamic, Dynamic, StorageIndex> m_Pinv; // the inverse permutation
@ -830,7 +830,7 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, ConstCholMat
const Index size = a.rows(); const Index size = a.rows();
pmat = &ap; pmat = &ap;
// Note that ordering methods compute the inverse permutation // Note that ordering methods compute the inverse permutation
if (!internal::is_same<OrderingType, NaturalOrdering<Index> >::value) { if (!internal::is_same<OrderingType, NaturalOrdering<StorageIndex> >::value) {
{ {
CholMatrixType C; CholMatrixType C;
internal::permute_symm_to_fullsymm<UpLo, NonHermitian>(a, C, NULL); internal::permute_symm_to_fullsymm<UpLo, NonHermitian>(a, C, NULL);

View File

@ -25,40 +25,265 @@ the Mozilla Public License v. 2.0, as stated at the top of this file.
namespace Eigen { namespace Eigen {
template <typename Derived> namespace internal {
void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT) {
const StorageIndex size = StorageIndex(ap.rows());
m_matrix.resize(size, size);
m_parent.resize(size);
m_nonZerosPerCol.resize(size);
ei_declare_aligned_stack_constructed_variable(StorageIndex, tags, size, 0); template <typename Scalar, typename StorageIndex>
struct simpl_chol_helper {
using CholMatrixType = SparseMatrix<Scalar, ColMajor, StorageIndex>;
using InnerIterator = typename CholMatrixType::InnerIterator;
using VectorI = Matrix<StorageIndex, Dynamic, 1>;
static constexpr StorageIndex kEmpty = -1;
for (StorageIndex k = 0; k < size; ++k) { // Implementation of a stack or last-in first-out structure with some debugging machinery.
/* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */ struct Stack {
m_parent[k] = -1; /* parent of k is not yet known */ StorageIndex* m_data;
tags[k] = k; /* mark node k as visited */ Index m_size;
m_nonZerosPerCol[k] = 0; /* count of nonzeros in column k of L */ #ifndef EIGEN_NO_DEBUG
for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) { const Index m_maxSize;
StorageIndex i = it.index(); Stack(StorageIndex* data, StorageIndex size, StorageIndex maxSize)
if (i < k) { : m_data(data), m_size(size), m_maxSize(maxSize) {
/* follow path from i to root of etree, stop at flagged node */ eigen_assert(size >= 0);
for (; tags[i] != k; i = m_parent[i]) { eigen_assert(maxSize >= size);
/* find parent of i if not yet determined */ }
if (m_parent[i] == -1) m_parent[i] = k; #else
m_nonZerosPerCol[i]++; /* L (k,i) is nonzero */ Stack(StorageIndex* data, StorageIndex size, StorageIndex /*maxSize*/) : m_data(data), m_size(size) {}
tags[i] = k; /* mark i as visited */ #endif
bool empty() const { return m_size == 0; }
Index size() const { return m_size; }
StorageIndex back() const {
eigen_assert(m_size > 0);
return m_data[m_size - 1];
}
void push(const StorageIndex& value) {
#ifndef EIGEN_NO_DEBUG
eigen_assert(m_size < m_maxSize);
#endif
m_data[m_size] = value;
m_size++;
}
void pop() {
eigen_assert(m_size > 0);
m_size--;
}
};
// Implementation of a disjoint-set or union-find structure with path compression.
struct DisjointSet {
StorageIndex* m_set;
DisjointSet(StorageIndex* set, StorageIndex size) : m_set(set) { std::iota(set, set + size, 0); }
// Find the set representative or root of `u`.
StorageIndex find(StorageIndex u) const {
eigen_assert(u != kEmpty);
while (m_set[u] != u) {
// manually unroll the loop by a factor of 2 to improve performance
u = m_set[m_set[u]];
}
return u;
}
// Perform full path compression such that each node from `u` to `v` points to `v`.
void compress(StorageIndex u, StorageIndex v) {
eigen_assert(u != kEmpty);
eigen_assert(v != kEmpty);
while (m_set[u] != v) {
StorageIndex next = m_set[u];
m_set[u] = v;
u = next;
}
};
};
// Computes the higher adjacency pattern by transposing the input lower adjacency matrix.
// Only the index arrays are calculated, as the values are not needed for the symbolic factorization.
// The outer index array provides the size requirements of the inner index array.
// Computes the outer index array of the higher adjacency matrix.
static void calc_hadj_outer(const StorageIndex size, const CholMatrixType& ap, StorageIndex* outerIndex) {
for (StorageIndex j = 1; j < size; j++) {
for (InnerIterator it(ap, j); it; ++it) {
StorageIndex i = it.index();
if (i < j) outerIndex[i + 1]++;
}
}
std::partial_sum(outerIndex, outerIndex + size + 1, outerIndex);
}
// inner index array
static void calc_hadj_inner(const StorageIndex size, const CholMatrixType& ap, const StorageIndex* outerIndex,
StorageIndex* innerIndex, StorageIndex* tmp) {
std::fill_n(tmp, size, 0);
for (StorageIndex j = 1; j < size; j++) {
for (InnerIterator it(ap, j); it; ++it) {
StorageIndex i = it.index();
if (i < j) {
StorageIndex b = outerIndex[i] + tmp[i];
innerIndex[b] = j;
tmp[i]++;
} }
} }
} }
} }
/* construct Lp index array from m_nonZerosPerCol column counts */ // Adapted from:
StorageIndex* Lp = m_matrix.outerIndexPtr(); // Joseph W. Liu. (1986).
Lp[0] = 0; // A compact row storage scheme for Cholesky factors using elimination trees.
for (StorageIndex k = 0; k < size; ++k) Lp[k + 1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1); // ACM Trans. Math. Softw. 12, 2 (June 1986), 127148. https://doi.org/10.1145/6497.6499
m_matrix.resizeNonZeros(Lp[size]); // Computes the elimination forest of the lower adjacency matrix, a compact representation of the sparse L factor.
// The L factor may contain multiple elimination trees if a column contains only its diagonal element.
// Each elimination tree is an n-ary tree in which each node points to its parent.
static void calc_etree(const StorageIndex size, const CholMatrixType& ap, StorageIndex* parent, StorageIndex* tmp) {
std::fill_n(parent, size, kEmpty);
DisjointSet ancestor(tmp, size);
for (StorageIndex j = 1; j < size; j++) {
for (InnerIterator it(ap, j); it; ++it) {
StorageIndex i = it.index();
if (i < j) {
StorageIndex r = ancestor.find(i);
if (r != j) parent[r] = j;
ancestor.compress(i, j);
}
}
}
}
// Computes the child pointers of the parent tree to facilitate a depth-first search traversal.
static void calc_lineage(const StorageIndex size, const StorageIndex* parent, StorageIndex* firstChild,
StorageIndex* firstSibling) {
std::fill_n(firstChild, size, kEmpty);
std::fill_n(firstSibling, size, kEmpty);
for (StorageIndex j = 0; j < size; j++) {
StorageIndex p = parent[j];
if (p == kEmpty) continue;
StorageIndex c = firstChild[p];
if (c == kEmpty)
firstChild[p] = j;
else {
while (firstSibling[c] != kEmpty) c = firstSibling[c];
firstSibling[c] = j;
}
}
}
// Computes a post-ordered traversal of the elimination tree.
static void calc_post(const StorageIndex size, const StorageIndex* parent, StorageIndex* firstChild,
const StorageIndex* firstSibling, StorageIndex* post, StorageIndex* dfs) {
Stack post_stack(post, 0, size);
for (StorageIndex j = 0; j < size; j++) {
if (parent[j] != kEmpty) continue;
// Begin at a root
Stack dfs_stack(dfs, 0, size);
dfs_stack.push(j);
while (!dfs_stack.empty()) {
StorageIndex i = dfs_stack.back();
StorageIndex c = firstChild[i];
if (c == kEmpty) {
post_stack.push(i);
dfs_stack.pop();
} else {
dfs_stack.push(c);
// Remove the path from `i` to `c` for future traversals.
firstChild[i] = firstSibling[c];
}
}
}
eigen_assert(post_stack.size() == size);
eigen_assert(std::all_of(firstChild, firstChild + size, [](StorageIndex a) { return a == kEmpty; }));
}
// Adapted from:
// Gilbert, J. R., Ng, E., & Peyton, B. W. (1994).
// An efficient algorithm to compute row and column counts for sparse Cholesky factorization.
// SIAM Journal on Matrix Analysis and Applications, 15(4), 10751091.
// Computes the non-zero pattern of the L factor.
static void calc_colcount(const StorageIndex size, const StorageIndex* hadjOuter, const StorageIndex* hadjInner,
const StorageIndex* parent, StorageIndex* prevLeaf, StorageIndex* tmp,
const StorageIndex* post, StorageIndex* nonZerosPerCol, bool doLDLT) {
// initialize nonZerosPerCol with 1 for leaves, 0 for non-leaves
std::fill_n(nonZerosPerCol, size, 1);
for (StorageIndex j = 0; j < size; j++) {
StorageIndex p = parent[j];
// p is not a leaf
if (p != kEmpty) nonZerosPerCol[p] = 0;
}
DisjointSet parentSet(tmp, size);
// prevLeaf is already initialized
eigen_assert(std::all_of(prevLeaf, prevLeaf + size, [](StorageIndex a) { return a == kEmpty; }));
for (StorageIndex j_ = 0; j_ < size; j_++) {
StorageIndex j = post[j_];
nonZerosPerCol[j] += hadjOuter[j + 1] - hadjOuter[j];
for (StorageIndex k = hadjOuter[j]; k < hadjOuter[j + 1]; k++) {
StorageIndex i = hadjInner[k];
eigen_assert(i > j);
StorageIndex prev = prevLeaf[i];
if (prev != kEmpty) {
StorageIndex q = parentSet.find(prev);
parentSet.compress(prev, q);
nonZerosPerCol[q]--;
}
prevLeaf[i] = j;
}
StorageIndex p = parent[j];
if (p != kEmpty) parentSet.compress(j, p);
}
for (StorageIndex j = 0; j < size; j++) {
StorageIndex p = parent[j];
if (p != kEmpty) nonZerosPerCol[p] += nonZerosPerCol[j] - 1;
if (doLDLT) nonZerosPerCol[j]--;
}
}
// Finalizes the non zero pattern of the L factor and allocates the memory for the factorization.
static void init_matrix(const StorageIndex size, const StorageIndex* nonZerosPerCol, CholMatrixType& L) {
eigen_assert(L.outerIndexPtr()[0] == 0);
std::partial_sum(nonZerosPerCol, nonZerosPerCol + size, L.outerIndexPtr() + 1);
L.resizeNonZeros(L.outerIndexPtr()[size]);
}
// Driver routine for the symbolic sparse Cholesky factorization.
static void run(const StorageIndex size, const CholMatrixType& ap, CholMatrixType& L, VectorI& parent,
VectorI& workSpace, bool doLDLT) {
parent.resize(size);
workSpace.resize(4 * size);
L.resize(size, size);
StorageIndex* tmp1 = workSpace.data();
StorageIndex* tmp2 = workSpace.data() + size;
StorageIndex* tmp3 = workSpace.data() + 2 * size;
StorageIndex* tmp4 = workSpace.data() + 3 * size;
// Borrow L's outer index array for the higher adjacency pattern.
StorageIndex* hadj_outer = L.outerIndexPtr();
calc_hadj_outer(size, ap, hadj_outer);
// Request additional temporary storage for the inner indices of the higher adjacency pattern.
ei_declare_aligned_stack_constructed_variable(StorageIndex, hadj_inner, hadj_outer[size], nullptr);
calc_hadj_inner(size, ap, hadj_outer, hadj_inner, tmp1);
calc_etree(size, ap, parent.data(), tmp1);
calc_lineage(size, parent.data(), tmp1, tmp2);
calc_post(size, parent.data(), tmp1, tmp2, tmp3, tmp4);
calc_colcount(size, hadj_outer, hadj_inner, parent.data(), tmp1, tmp2, tmp3, tmp4, doLDLT);
init_matrix(size, tmp4, L);
}
};
} // namespace internal
template <typename Derived>
void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT) {
using Helper = internal::simpl_chol_helper<Scalar, StorageIndex>;
eigen_assert(ap.innerSize() == ap.outerSize());
const StorageIndex size = internal::convert_index<StorageIndex>(ap.outerSize());
Helper::run(size, ap, m_matrix, m_parent, m_workSpace, doLDLT);
m_isInitialized = true; m_isInitialized = true;
m_info = Success; m_info = Success;
@ -70,20 +295,21 @@ template <typename Derived>
template <bool DoLDLT, bool NonHermitian> template <bool DoLDLT, bool NonHermitian>
void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap) { void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap) {
using std::sqrt; using std::sqrt;
const StorageIndex size = StorageIndex(ap.rows());
eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
eigen_assert(ap.rows() == ap.cols()); eigen_assert(ap.rows() == ap.cols());
eigen_assert(m_parent.size() == ap.rows()); eigen_assert(m_parent.size() == size);
eigen_assert(m_nonZerosPerCol.size() == ap.rows()); eigen_assert(m_workSpace.size() >= 3 * size);
const StorageIndex size = StorageIndex(ap.rows());
const StorageIndex* Lp = m_matrix.outerIndexPtr(); const StorageIndex* Lp = m_matrix.outerIndexPtr();
StorageIndex* Li = m_matrix.innerIndexPtr(); StorageIndex* Li = m_matrix.innerIndexPtr();
Scalar* Lx = m_matrix.valuePtr(); Scalar* Lx = m_matrix.valuePtr();
ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0); ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
ei_declare_aligned_stack_constructed_variable(StorageIndex, pattern, size, 0); StorageIndex* nonZerosPerCol = m_workSpace.data();
ei_declare_aligned_stack_constructed_variable(StorageIndex, tags, size, 0); StorageIndex* pattern = m_workSpace.data() + size;
StorageIndex* tags = m_workSpace.data() + 2 * size;
bool ok = true; bool ok = true;
m_diag.resize(DoLDLT ? size : 0); m_diag.resize(DoLDLT ? size : 0);
@ -93,7 +319,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
y[k] = Scalar(0); // Y(0:k) is now all zero y[k] = Scalar(0); // Y(0:k) is now all zero
StorageIndex top = size; // stack for pattern is empty StorageIndex top = size; // stack for pattern is empty
tags[k] = k; // mark node k as visited tags[k] = k; // mark node k as visited
m_nonZerosPerCol[k] = 0; // count of nonzeros in column k of L nonZerosPerCol[k] = 0; // count of nonzeros in column k of L
for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) { for (typename CholMatrixType::InnerIterator it(ap, k); it; ++it) {
StorageIndex i = it.index(); StorageIndex i = it.index();
if (i <= k) { if (i <= k) {
@ -124,13 +350,13 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
else else
yi = l_ki = yi / Lx[Lp[i]]; yi = l_ki = yi / Lx[Lp[i]];
Index p2 = Lp[i] + m_nonZerosPerCol[i]; Index p2 = Lp[i] + nonZerosPerCol[i];
Index p; Index p;
for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi; for (p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p) y[Li[p]] -= getSymm(Lx[p]) * yi;
d -= getDiag(l_ki * getSymm(yi)); d -= getDiag(l_ki * getSymm(yi));
Li[p] = k; /* store L(k,i) in column form of L */ Li[p] = k; /* store L(k,i) in column form of L */
Lx[p] = l_ki; Lx[p] = l_ki;
++m_nonZerosPerCol[i]; /* increment count of nonzeros in col i */ ++nonZerosPerCol[i]; /* increment count of nonzeros in col i */
} }
if (DoLDLT) { if (DoLDLT) {
m_diag[k] = d; m_diag[k] = d;
@ -139,7 +365,7 @@ void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType&
break; break;
} }
} else { } else {
Index p = Lp[k] + m_nonZerosPerCol[k]++; Index p = Lp[k] + nonZerosPerCol[k]++;
Li[p] = k; /* store L(k,k) = sqrt (d) in column k */ Li[p] = k; /* store L(k,k) = sqrt (d) in column k */
if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) { if (NonHermitian ? d == RealScalar(0) : numext::real(d) <= RealScalar(0)) {
ok = false; /* failure, matrix is not positive definite */ ok = false; /* failure, matrix is not positive definite */