From b6267507ea08bf572666bf634bc3a6fabe6aba11 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 25 May 2012 18:17:57 +0200 Subject: [PATCH 01/73] Add preliminary files for SparseLU --- Eigen/src/SparseLU/SparseLU.h | 341 ++++++++++++++++++ Eigen/src/SparseLU/SparseLU_Coletree.h | 188 ++++++++++ Eigen/src/SparseLU/SparseLU_Matrix.h | 74 ++++ Eigen/src/SparseLU/SparseLU_Memory.h | 242 +++++++++++++ Eigen/src/SparseLU/SparseLU_Structs.h | 122 +++++++ Eigen/src/SparseLU/SparseLU_Utils.h | 32 ++ .../src/SparseLU/SparseLU_heap_relax_snode.h | 133 +++++++ Eigen/src/SparseLU/SparseLU_panel_dfs.h | 221 ++++++++++++ Eigen/src/SparseLU/SparseLU_pivotL.h | 132 +++++++ Eigen/src/SparseLU/SparseLU_relax_snode.h | 89 +++++ Eigen/src/SparseLU/SparseLU_snode_bmod.h | 88 +++++ Eigen/src/SparseLU/SparseLU_snode_dfs.h | 119 ++++++ 12 files changed, 1781 insertions(+) create mode 100644 Eigen/src/SparseLU/SparseLU.h create mode 100644 Eigen/src/SparseLU/SparseLU_Coletree.h create mode 100644 Eigen/src/SparseLU/SparseLU_Matrix.h create mode 100644 Eigen/src/SparseLU/SparseLU_Memory.h create mode 100644 Eigen/src/SparseLU/SparseLU_Structs.h create mode 100644 Eigen/src/SparseLU/SparseLU_Utils.h create mode 100644 Eigen/src/SparseLU/SparseLU_heap_relax_snode.h create mode 100644 Eigen/src/SparseLU/SparseLU_panel_dfs.h create mode 100644 Eigen/src/SparseLU/SparseLU_pivotL.h create mode 100644 Eigen/src/SparseLU/SparseLU_relax_snode.h create mode 100644 Eigen/src/SparseLU/SparseLU_snode_bmod.h create mode 100644 Eigen/src/SparseLU/SparseLU_snode_dfs.h diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h new file mode 100644 index 000000000..f5a1c787e --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU.h @@ -0,0 +1,341 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + + +#ifndef EIGEN_SPARSE_LU +#define EIGEN_SPARSE_LU + +#include +#include +#include +#include +#include +#include +namespace Eigen { + +template +class SparseLU +{ + public: + typedef _MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + typedef SparseMatrix NCMatrix; + typedef SuperNodalMatrix SCMatrix; + typedef GlobalLU_t Eigen_GlobalLU_t; + typedef Matrix VectorType; + typedef PermutationMatrix PermutationType; + public: + SparseLU():m_isInitialized(true),m_symmetricmode(false),m_fact(DOFACT),m_diagpivotthresh(1.0) + { + initperfvalues(); + } + SparseLU(const MatrixType& matrix):SparseLU() + { + + compute(matrix); + } + + ~SparseLU() + { + + } + + void analyzePattern (const MatrixType& matrix); + void factorize (const MatrixType& matrix); + void compute (const MatrixType& matrix); + + /** Indicate that the pattern of the input matrix is symmetric */ + void isSymmetric(bool sym) + { + m_symmetricmode = sym; + } + + /** Set the threshold used for a diagonal entry to be an acceptable pivot. */ + void diagPivotThresh(RealScalar thresh) + { + m_diagpivotthresh = thresh; + } + protected: + // Functions + void initperfvalues(); + + // Variables + mutable ComputationInfo m_info; + bool m_isInitialized; + bool m_factorizationIsOk; + bool m_analysisIsOk; + fact_t m_fact; + NCMatrix m_mat; // The input (permuted ) matrix + SCMatrix m_Lstore; // The lower triangular matrix (supernodal) + NCMatrix m_Ustore; //The upper triangular matrix + PermutationType m_perm_c; // Column permutation + PermutationType m_iperm_c; // Column permutation + PermutationType m_perm_r ; // Row permutation + PermutationType m_iperm_r ; // Inverse row permutation + VectorXi m_etree; // Column elimination tree + + Scalar *m_work; // + Index *m_iwork; // + static Eigen_GlobalLU_t m_Glu; // persistent data to facilitate multiple factors + // should be defined as a class member + // SuperLU/SparseLU options + bool m_symmetricmode; + + // values for performance + int m_panel_size; // a panel consists of at most consecutive columns + int m_relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + int m_maxsuper; // The maximum size for a supernode in complete LU + int m_rowblk; // The minimum row dimension for 2-D blocking to be used; + int m_colblk; // The minimum column dimension for 2-D blocking to be used; + int m_fillfactor; // The estimated fills factors for L and U, compared with A + RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot + + private: + // Copy constructor + SparseLU (SparseLU& ) {} + +}; // End class SparseLU + +/* Set the default values for performance */ +void SparseLU::initperfvalues() +{ + m_panel_size = 12; + m_relax = 1; + m_maxsuper = 100; + m_rowblk = 200; + m_colblk = 60; + m_fillfactor = 20; +} + + +/** + * Compute the column permutation to minimize the fill-in (file amd.c ) + * - Apply this permutation to the input matrix - + * - Compute the column elimination tree on the permuted matrix (file Eigen_Coletree.h) + * - Postorder the elimination tree and the column permutation (file Eigen_Coletree.h) + * - + */ +template +void SparseLU::analyzePattern(const MatrixType& mat) +{ + // Compute the column permutation + AMDordering amd(mat); + m_perm_c = amd.get_perm_c(); + // Apply the permutation to the column of the input matrix + m_mat = mat * m_perm_c; //how is the permutation represented ??? + + // Compute the column elimination tree of the permuted matrix + if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); + internal::sp_coletree(m_mat, m_etree); + + // In symmetric mode, do not do postorder here + if (m_symmetricmode == false) { + VectorXi post, iwork; + // Post order etree + post = internal::TreePostorder(m_mat.cols(), m_etree); + + // Renumber etree in postorder + iwork.resize(n+1); + for (i = 0; i < n; ++i) iwork(post(i)) = post(m_etree(i)); + m_etree = iwork; + + // Postmultiply A*Pc by post, + // i.e reorder the matrix according to the postorder of the etree + // FIXME Check if this is available : constructor from a vector + PermutationType post_perm(post); + m_mat = m_mat * post_perm; + + // Product of m_perm_c and post + for (i = 0; i < n; ++i) iwork(i) = m_perm_c(post_perm.indices()(i)); + m_perm_c = iwork; + } // end postordering +} + +/** + * - Numerical factorization + * - Interleaved with the symbolic factorization + * \tparam MatrixType The type of the matrix, it should be a column-major sparse matrix + * \return info where + * : successful exit + * = 0: successful exit + * > 0: if info = i, and i is + * <= A->ncol: U(i,i) is exactly zero. The factorization has + * been completed, but the factor U is exactly singular, + * and division by zero will occur if it is used to solve a + * system of equations. + * > A->ncol: number of bytes allocated when memory allocation + * failure occurred, plus A->ncol. If lwork = -1, it is + * the estimated amount of space needed, plus A->ncol. + */ +template +int SparseLU::factorize(const MatrixType& matrix) +{ + + // Allocate storage common to the factor routines + int lwork = 0; + int info = LUMemInit(lwork); + eigen_assert ( (info == 0) && "Unable to allocate memory for the factors"); + + int m = m_mat.rows(); + int n = m_mat.cols(); + int maxpanel = m_panel_size * m; + + // Set up pointers for integer working arrays + Map segrep(m_iwork, m); // + Map parent(&segrep(0) + m, m); // + Map xplore(&parent(0) + m, m); // + Map repfnz(&xplore(0) + m, maxpanel); // + Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// + Map xprune(&panel_lsub(0) + maxpanel, n); // + Map marker(&xprune(0)+n, m * LU_NO_MARKER); // + repfnz.setConstant(-1); + panel_lsub.setConstant(-1); + + // Set up pointers for scalar working arrays + VectorType dense(maxpanel); + dense.setZero(); + VectorType tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk); + tempv.setZero(); + + // Setup Permutation vectors + PermutationType iperm_r; // inverse of perm_r + if (m_fact = SamePattern_SameRowPerm) + iperm_r = m_perm_r.inverse(); + // Compute the inverse of perm_c + PermutationType iperm_c; + iperm_c = m_perm_c.inverse(); + + // Identify initial relaxed snodes + VectorXi relax_end(n); + if ( m_symmetricmode = true ) + LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + else + LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + + m_perm_r.setConstant(-1); + marker.setConstant(-1); + + VectorXi& xsup = m_Glu.xsup; + VectorXi& supno = m_GLu.supno; + VectorXi& xlsub = m_Glu.xlsub; + VectorXi& xlusup = m_GLu.xlusup; + VectorXi& xusub = m_Glu.xusub; + + supno(0) = -1; + xsup(0) = xlsub(0) = xusub(0) = xlusup(0); + int panel_size = m_panel_size; + int wdef = panel_size; // upper bound on panel width + + // Work on one 'panel' at a time. A panel is one of the following : + // (a) a relaxed supernode at the bottom of the etree, or + // (b) panel_size contiguous columns, defined by the user + register int jcol,kcol; + int min_mn = std::min(m,n); + VectorXi panel_histo(n); + bool ok = true; + Index nextu, nextlu, jsupno, fsupc, new_next; + int pivrow; // Pivotal row number in the original row matrix + int nseg1; // Number of segments in U-column above panel row jcol + int nseg; // Number of segments in each U-column + for (jcol = 0; jcol < min_mn; ) + { + if (relax_end(jcol) != -1) + { // Starting a relaxed node from jcol + kcol = relax_end(jcol); // End index of the relaxed snode + + // Factorize the relaxed supernode(jcol:kcol) + // First, determine the union of the row structure of the snode + info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); + if ( !info ) + { + ok = false; + break; + } + nextu = xusub(jcol); //starting location of column jcol in ucol + nextlu = xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) + jsupno = supno(jcol); // Supernode number which column jcol belongs to + fsupc = xsup(jsupno); //First column number of the current supernode + new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); + nzlumax = m_Glu.nzlumax; + while (new_next > nzlumax ) + { + m_Glu.lusup = LUMemXpand(jcol, nextlu, LUSUP, nzlumax); + m_GLu.nzlumax = nzlumax; + } + // Now, left-looking factorize each column within the snode + for (icol = jcol; icol<=kcol; icol++){ + xusub(icol+1) = nextu; + // Scatter into SPA dense(*) + for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) + dense(it.row()) = it.val(); + + // Numeric update within the snode + LU_snode_bmod(icol, jsupno, fsupc, dense, tempv); + + // Eliminate the current column + info = LU_pivotL(icol, pivrow); + eigen_assert(info == 0 && "The matrix is structurally singular"); + } + jcol = icol; // The last column te be eliminated + } + else + { // Work on one panel of panel_size columns + + // Adjust panel size so that a panel won't overlap with the next relaxed snode. + panel_size = w_def; + for (k = jcol + 1; k < std::min(jcol+panel_size, min_mn); k++) + { + if (relax_end(k) != -1) + { + panel_size = k - jcol; + break; + } + } + if (k == min_mn) + panel_size = min_mn - jcol; + + // Symbolic outer factorization on a panel of columns + LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_Glu); + + // Numeric sup-panel updates in topological order + LU_panel_bmod(m, panel_size, jcol); + + // Sparse LU within the panel, and below the panel diagonal + for ( jj = jcol, j< jcol + panel_size; jj++) + { + k = (jj - jcol) * m; // Column index for w-wide arrays + } // end for + jcol += panel_size; // Move to the next panel + } // end else + } // end for -- end elimination + m_info = ok ? Success : NumericalIssue; + m_factorizationIsOk = ok; +} + + +} // End namespace Eigen +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h new file mode 100644 index 000000000..d57048883 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -0,0 +1,188 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of sp_coletree.c file in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLETREE_H +#define SPARSELU_COLETREE_H + +/** Compute the column elimination tree of a sparse matrix + * NOTE : The matrix is supposed to be in column-major format. + * + */ +template +int LU_sp_coletree(const MatrixType& mat, VectorXi& parent) +{ + int nc = mat.cols(); // Number of columns + int nr = mat.rows(); // Number of rows + + VectorXi root(nc); // root of subtree of etree + root.setZero(); + VectorXi pp(nc); // disjoint sets + pp.setZero(); // Initialize disjoint sets + VectorXi firstcol(nr); // First nonzero column in each row + firstcol.setZero(); + + //Compute firstcol[row] + int row,col; + firstcol.setConstant(nc); //for (row = 0; row < nr; firstcol(row++) = nc); + for (col = 0; col < nc; col++) + { + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // Is it necessary to brows the whole matrix, the lower part should do the job ?? + row = it.row(); + firstcol(row) = std::min(firstcol(row), col); + } + } + /* Compute etree by Liu's algorithm for symmetric matrices, + except use (firstcol[r],c) in place of an edge (r,c) of A. + Thus each row clique in A'*A is replaced by a star + centered at its first vertex, which has the same fill. */ + int rset, cset, rroot; + for (col = 0; col < nc; col++) + { + pp(col) = cset = col; // Initially, each element is in its own set + root(cset) = col; + parent(col) = nc; + for (typename MatrixType::InnerIterator it(mat, col); it; ++it) + { // A sequence of interleaved find and union is performed + row = firstcol(it.row()); + if (row >= col) continue; + rset = internal::etree_find(row, pp); // Find the name of the set containing row + rroot = root(rset); + if (rroot != col) + { + parent(rroot) = col; + pp(cset) = cset = rset; // Get the union of cset and rset + root(cset) = col; + } + } + } + return 0; +} + +/** Find the root of the tree/set containing the vertex i : Use Path halving */ +int etree_find (int i, VectorXi& pp) +{ + int p = pp(i); // Parent + int gp = pp(p); // Grand parent + while (gp != p) + { + pp(i) = gp; // Parent pointer on find path is changed to former grand parent + i = gp; + p = pp(i); + gp = pp(p); + } + return p; +} + +/** + * Post order a tree + */ +VectorXi TreePostorder(int n, VectorXi& parent) +{ + VectorXi first_kid, next_kid; // Linked list of children + VectorXi post; // postordered etree + int postnum; + // Allocate storage for working arrays and results + first_kid.resize(n+1); + next_kid.setZero(n+1); + post.setZero(n+1); + + // Set up structure describing children + int v, dad; + first_kid.setConstant(-1); + for (v = n-1, v >= 0; v--) + { + dad = parent(v); + next_kid(v) = first_kid(dad); + first_kid(dad) = v; + } + + // Depth-first search from dummy root vertex #n + postnum = 0; + internal::nr_etdfs(n, parent, first_kid, next_kid, post, postnum); + return post; +} +/** + * Depth-first search from vertex n. No recursion. + * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. +*/ +void nr_etdfs (int n, int *parent, int* first_kid, int *next_kid, int *post, int postnum) +{ + int current = n, first, next; + while (postnum != n) + { + // No kid for the current node + first = first_kid(current); + + // no first kid for the current node + if (first == -1) + { + // Numbering this node because it has no kid + post(current) = postnum++; + + // looking for the next kid + next = next_kid(current); + while (next == -1) + { + // No more kids : back to the parent node + current = parent(current); + // numbering the parent node + post(current) = postnum++; + // Get the next kid + next = next_kid(current); + } + // stopping criterion + if (postnum==n+1) return; + + // Updating current node + current = next; + } + else + { + current = first; + } + } +} + +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h new file mode 100644 index 000000000..c4d56ee0a --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_SPARSELU_MATRIX_H +#define EIGEN_SPARSELU_MATRIX_H + +/** \ingroup SparseLU_Module + * \brief a class to manipulate the supernodal matrices in the SparseLU factorization + * + * This class extends the class SparseMatrix and should contain the data to easily store + * and manipulate the supernodes during the factorization and solution phase of Sparse LU. + * Only the lower triangular matrix has supernodes. + * + * NOTE : This class corresponds to the SCformat structure in SuperLU + * + */ + +template +class SuperNodalMatrix +{ + public: + SCMatrix() + { + + } + + ~SCMatrix() + { + + } + operator SparseMatrix(); + + protected: + Index nnz; // Number of nonzero values + Index nsupper; // Index of the last supernode + Scalar *nzval; //array of nonzero values packed by (supernode ??) column + Index *nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j + Index *rowind; // Array of compressed row indices of rectangular supernodes + Index rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j + Index *col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index *sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + // Index *nzval_colptr corresponds to m_outerIndex in SparseMatrix + + private : + SuperNodalMatrix(SparseMatrix& ) {} +}; + +SuperNodalMatrix::operator SparseMatrix() +{ + +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h new file mode 100644 index 000000000..6e0fc658d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -0,0 +1,242 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of [s,d,c,z]memory.c files in SuperLU + + * -- SuperLU routine (version 3.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * August 1, 2008 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_SPARSELU_MEMORY +#define EIGEN_SPARSELU_MEMORY + +#define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1 +#define LU_GluIntArray(n) (5* (n) + 5) +#define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ + + (w + 1) * m * sizeof(Scalar) +namespace internal { + +/* Allocate various working space needed in the numerical factorization phase. + * m_work : space fot the output data structures (lwork is the size) + * m_Glu: persistent data to facilitate multiple factors : is it really necessary ?? + * NOTE Unlike SuperLU, this routine does not allow the user to provide the size to allocate + * nor it return an estimated amount of space required. + * + * Useful variables : + * - m_fillratio : Ratio of fill expected + * - lwork = -1 : return an estimated size of the required memory + * = 0 : Estimate and allocate the memory + */ +template +int SparseLU::LUMemInit(int lwork) +{ + int iword = sizeof(Index); + int dword = sizeof(Scalar); + int n = m_Glu.n = m_mat.cols(); + int m = m_mat.rows(); + m_Glu.num_expansions = 0; // No memory expansions so far ?? + int estimated_size; + + + if (!m_Glu.expanders) + m_Glu.expanders = new ExpHeader(NO_MEMTYPE); + + if (m_fact_t != SamePattern_SameRowPerm) // Create space for a new factorization + { + // Guess the size for L\U factors + int annz = m_mat.nonZeros(); + int nzlmax, nzumax, nzlumax; + nzumax = nzlumax = m_fillratio * annz; // ??? + nzlmax = std::max(1, m_fill_ratio/4.) * annz; //??? + + // Return the estimated size to the user if necessary + if (lwork = -1) + { + estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) + + (nzlmax + nzumax) * iword + (nzlumax+nzumax) * dword + n); + return estimated_size; + } + + // Setup the required space + // NOTE: In SuperLU, there is an option to let the user provide its own space. + + // Allocate Integer pointers for L\U factors.resize(n+1); + m_Glu.supno.resize(n+1); + m_Glu.xlsub.resize(n+1); + m_Glu.xlusup.resize(n+1); + m_Glu.xusub.resize(n+1); + + // Reserve memory for L/U factors + m_Glu.lusup = internal::expand(nzlumax, LUSUP, 0, 0, m_Glu); + m_Glu.ucol = internal::expand(nzumax, UCOL, 0, 0, m_Glu); + m_Glu.lsub = internal::expand(nzlmax, LSUB, 0, 0, m_Glu); + m_Glu.usub = internal::expand(nzumax, USUB, 0, 1, m_Glu); + + // Check if the memory is correctly allocated, + while ( !m_Glu.lusup || !m_Glu.ucol || !m_Glu.lsub || !m_Glu.usub) + { + //otherwise reduce the estimated size and retry + delete [] m_Glu.lusup; + delete [] m_Glu.ucol; + delete [] m_Glu.lsub; + delete [] m_Glu.usub; + + nzlumax /= 2; + nzumax /= 2; + nzlmax /= 2; + eigen_assert (nzlumax > annz && "Not enough memory to perform factorization"); + + m_Glu.lusup = internal::expand(nzlumax, LUSUP, 0, 0, m_Glu); + m_Glu.ucol = internal::expand(nzumax, UCOL, 0, 0, m_Glu); + m_Glu.lsub = internal::expand(nzlmax, LSUB, 0, 0, m_Glu); + m_Glu.usub = internal::expand(nzumax, USUB, 0, 1, m_Glu); + } + } + else // m_fact == SamePattern_SameRowPerm; + { + if (lwork = -1) + { + estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) + + (Glu.nzlmax + Glu.nzumax) * iword + (Glu.nzlumax+Glu.nzumax) * dword + n); + return estimated_size; + } + // Use existing space from previous factorization + // Unlike in SuperLU, this should not be necessary here since m_Glu is persistent as a member of the class + m_Glu.xsup = m_Lstore.sup_to_col; + m_Glu.supno = m_Lstore.col_to_sup; + m_Glu.xlsub = m_Lstore.rowind_colptr; + m_Glu.xlusup = m_Lstore.nzval_colptr; + xusub = m_Ustore.outerIndexPtr(); + + m_Glu.expanders[LSUB].size = m_Glu.nzlmax; // Maximum value from previous factorization + m_Glu.expanders[LUSUP].size = m_Glu.nzlumax; + m_Glu.expanders[USUB].size = GLu.nzumax; + m_Glu.expanders[UCOL].size = m_Glu.nzumax; + m_Glu.lsub = GLu.expanders[LSUB].mem = m_Lstore.rowind; + m_Glu.lusup = GLu.expanders[LUSUP].mem = m_Lstore.nzval; + GLu.usub = m_Glu.expanders[USUB].mem = m_Ustore.InnerIndexPtr(); + m_Glu.ucol = m_Glu.expanders[UCOL].mem = m_Ustore.valuePtr(); + } + + // LUWorkInit : Now, allocate known working storage + int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n; + int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk); + m_iwork = new Index(isize); + eigen_assert( (m_iwork != 0) && "Malloc fails for iwork"); + m_work = new Scalar(dsize); + eigen_assert( (m_work != 0) && "Malloc fails for dwork"); + + ++m_Glu.num_expansions; + return 0; +} // end LuMemInit + +/** + * Expand the existing storage to accomodate more fill-ins + */ +template +DestType* SparseLU::expand(int& prev_len, // Length from previous call + MemType type, // Which part of the memory to expand + int len_to_copy, // Size of the memory to be copied to new store + int keep_prev) // = 1: use prev_len; Do not expand this vector + // = 0: compute new_len to expand) +{ + + float alpha = 1.5; // Ratio of the memory increase + int new_len; // New size of the allocated memory + if(m_Glu.num_expansions == 0 || keep_prev) + new_len = prev_len; + else + new_len = alpha * prev_len; + + // Allocate new space + DestType *new_mem, *old_mem; + new_mem = new DestType(new_len); + if ( m_Glu.num_expansions != 0 ) // The memory has been expanded before + { + int tries = 0; + if (keep_prev) + { + if (!new_mem) return 0; + } + else + { + while ( !new_mem) + { + // Reduce the size and allocate again + if ( ++tries > 10) return 0; + alpha = LU_Reduce(alpha); + new_len = alpha * prev_len; + new_mem = new DestType(new_len); + } + } // keep_prev + //Copy the previous values to the newly allocated space + ExpHeader* expanders = m_Glu.expanders; + std::memcpy(new_mem, expanders[type].mem, len_to_copy); + delete [] expanders[type].mem; + } + expanders[type].mem = new_mem; + expanders[type].size = new_len; + prev_len = new_len; + if(m_Glu.num_expansions) ++m_Glu.num_expansions; + return expanders[type].mem; +} + +/** + * \brief Expand the existing storage + * + * NOTE: The calling sequence of this function is different from that of SuperLU + * + * \return a pointer to the newly allocated space + */ +template +DestType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen) +{ + DestType *newmem; + if (memtype == USUB) + new_mem = expand(maxlen, mem_type, next, 1); + else + new_mem = expand(maxlen, mem_type, next, 0); + eigen_assert(new_mem && "Can't expand memory"); + + return new_mem; + +} + +}// Namespace Internal +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h new file mode 100644 index 000000000..72e1db343 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + * NOTE: Part of this file is the modified version of files slu_[s,d,c,z]defs.h + * -- SuperLU routine (version 4.1) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November, 2010 + * + * Global data structures used in LU factorization - + * + * nsuper: #supernodes = nsuper + 1, numbered [0, nsuper]. + * (xsup,supno): supno[i] is the supernode no to which i belongs; + * xsup(s) points to the beginning of the s-th supernode. + * e.g. supno 0 1 2 2 3 3 3 4 4 4 4 4 (n=12) + * xsup 0 1 2 4 7 12 + * Note: dfs will be performed on supernode rep. relative to the new + * row pivoting ordering + * + * (xlsub,lsub): lsub[*] contains the compressed subscript of + * rectangular supernodes; xlsub[j] points to the starting + * location of the j-th column in lsub[*]. Note that xlsub + * is indexed by column. + * Storage: original row subscripts + * + * During the course of sparse LU factorization, we also use + * (xlsub,lsub) for the purpose of symmetric pruning. For each + * supernode {s,s+1,...,t=s+r} with first column s and last + * column t, the subscript set + * lsub[j], j=xlsub[s], .., xlsub[s+1]-1 + * is the structure of column s (i.e. structure of this supernode). + * It is used for the storage of numerical values. + * Furthermore, + * lsub[j], j=xlsub[t], .., xlsub[t+1]-1 + * is the structure of the last column t of this supernode. + * It is for the purpose of symmetric pruning. Therefore, the + * structural subscripts can be rearranged without making physical + * interchanges among the numerical values. + * + * However, if the supernode has only one column, then we + * only keep one set of subscripts. For any subscript interchange + * performed, similar interchange must be done on the numerical + * values. + * + * The last column structures (for pruning) will be removed + * after the numercial LU factorization phase. + * + * (xlusup,lusup): lusup[*] contains the numerical values of the + * rectangular supernodes; xlusup[j] points to the starting + * location of the j-th column in storage vector lusup[*] + * Note: xlusup is indexed by column. + * Each rectangular supernode is stored by column-major + * scheme, consistent with Fortran 2-dim array storage. + * + * (xusub,ucol,usub): ucol[*] stores the numerical values of + * U-columns outside the rectangular supernodes. The row + * subscript of nonzero ucol[k] is stored in usub[k]. + * xusub[i] points to the starting location of column i in ucol. + * Storage: new row subscripts; that is subscripts of PA. + */ +#ifndef EIGEN_LU_STRUCTS +#define EIGEN_LU_STRUCTS +namespace Eigen { + +#define NO_MEMTYPE 4 /* 0: lusup + 1: ucol + 2: lsub + 3: usub */ +typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PREMC} colperm_t; +typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, Factored} fact_t; +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; + +/** Headers for dynamically managed memory + \tparam BaseType can be int, real scalar or complex scalar*/ +template +struct ExpHeader { + int size; // Length of the memory that has been used */ + BaseType *mem; +} ExpHeader; + +template +struct { + VectorXi xsup; // supernode and column mapping + VectorXi supno; // Supernode number corresponding to this column + VectorXi lsub; // Compressed L subscripts of rectangular supernodes + VectorXi xlsub; // xlsub(j) points to the starting location of the j-th column in lsub + VectorXi xlusup; + VectorXi xusub; + VectorType lusup; // L supernodes + VectorType ucol; // U columns + Index nzlmax; // Current max size of lsub + Index nzumax; // Current max size of ucol + Index nzlumax; // Current max size of lusup + Index n; // Number of columns in the matrix + int num_expansions; + ExpHeader *expanders; // Array of pointers to 4 types of memory +} GlobalLU_t; + +}// End namespace Eigen +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h new file mode 100644 index 000000000..3c3b24a15 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifdef EIGEN_SPARSELU_UTILS_H +#define EIGEN_SPARSELU_UTILS_H + +// Number of marker arrays used in the symbolic factorization each of size n +#define LU_NO_MARKER 3 +#define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) +#define LU_EMPTY (-1) +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h new file mode 100644 index 000000000..908f4d4cb --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -0,0 +1,133 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_HEAP_RELAX_SNODE_H +#define EIGEN_HEAP_RELAX_SNODE_H +#include +/** + * \brief Identify the initial relaxed supernodes + * + * This routine applied to a symmetric elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +void internal::LU_heap_relax_snode (const int n, VectorXi& et, const int relax_columns, VectorXi& descendants, VectorXi& relax_end) +{ + + // The etree may not be postordered, but its heap ordered + // Post order etree + VectorXi post = internal::TreePostorder(n, et); + VectorXi inv_post(n+1); + register int i; + for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; + + // Renumber etree in postorder + VectorXi iwork(n); + VectorXi et_save(n+1); + for (i = 0; i < n; ++i) + { + iwork(post(i)) = post(et(i)); + } + et_save = et; // Save the original etree + et = iwork; + + // compute the number of descendants of each node in the etree + relax_end.setConstant(-1); + register int j, parent; + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + + // Identify the relaxed supernodes by postorder traversal of the etree + register int snode_start; // beginning of a snode + register int k; + int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree + int nsuper_et = 0; // Number of relaxed snodes in the original etree + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + ++nsuper_et_post; + k = n; + for (i = snode_start; i <= j; ++i) + k = std::min(k, inv_post(i)); + l = inv_post(j); + if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode + { + // This is also a supernode in the original etree + relax_end(k) = l; // Record last column + ++nsuper_et; + } + else + { + for (i = snode_start; i <= j; ++i) + { + l = inv_post(i); + if (descendants(i) == 0) + { + relax_end(l) = l; + ++nsuper_et; + } + } + } + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + + // Recover the original etree + et = et_save; +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h new file mode 100644 index 000000000..550544d05 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xpanel_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_DFS_H +#define SPARSELU_PANEL_DFS_H +/** + * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives + * + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. This list is + * a superset of the topological order of each individual column within + * the panel. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. Each column has + * a separate list for this purpose. + * + * Two markers arrays are used for dfs : + * marker[i] == jj, if i was visited during dfs of current column jj; + * marker1[i] >= jcol, if i was visited by earlier columns in this panel; + * + * \param m number of rows in the matrix + * \param w Panel size + * \param jcol Starting column of the panel + * \param A Input matrix in column-major storage + * \param perm_r Row permutation + * \param nseg + * + */ +template +int SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, VectorXi& nseg, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +{ + + int jj; // Index through each column in the panel + int nextl_col; // Next available position in panel_lsub[*,jj] + int krow; // Row index of the current element + int kperm; // permuted row index + int krep; // Supernode reprentative of the current row + int kmark; + int chperm, chmark, chrep, oldrep, kchild; + int myfnz; // First nonzero element in the current column + int xdfs, maxdfs, kpar; + + // Initialize pointers +// VectorXi& marker1 = marker.block(m, m); + VectorBlock marker1(marker, m, m); + nseg = 0; + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + // For each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj - jcol) * m; + + //FIXME + VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero location in each row + VectorBlock dense_col(dense.segment(nextl_col, m)); // Accumulate a column vector here + + + // For each nnz in A[*, jj] do depth first search + for (MatrixType::InnerIterator it(A, jj); it; ++it) + { + krow = it.row(); + dense_col(krow) = it.val(); + kmark = marker(krow); + if (kmark == jj) + continue; // krow visited before, go to the next nonzero + + // For each unmarked krow of jj + marker(krow) = jj; + kperm = perm_r(krow); + if (kperm == -1 ) { + // krow is in L : place it in structure of L(*, jj) + panel_lsub(nextl_col++) = krow; // krow is indexed into A + } + else + { + // krow is in U : if its supernode-representative krep + // has been explored, update repfnz(*) + krep = xsup(supno(kperm)+1) - 1; + myfnz = repfnz_col(krep); + + if (myfnz != -1 ) + { + // Representative visited before + if (myfnz > kperm ) repfnz_col(krep) = kperm; + + } + else + { + // Otherwise, perform dfs starting at krep + oldrep = -1; + parent(krep) = oldrep; + repfnz_col(krep) = kperm; + xdfs = xlsub(krep); + maxdfs = xprune(krep); + + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + kchild = lsub(xdfs); + xdfs++; + chmark = marker(kchild); + + if (chmark != jj ) + { + marker(kchild) = jj; + chperm = perm_r(kchild); + + if (chperm == -1) + { + // case kchild is in L: place it in L(*, j) + panel_lsub(nextl_col++) = kchild; + } + else + { + // case kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz(*) + chrep = xsup(supno(chperm)+1) - 1; + myfnz = repfnz_col(chrep); + + if (myfnz != -1) + { // Visited before + if (myfnz > chperm) + repfnz_col(chrep) = chperm; + } + else + { // Cont. dfs at snode-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeper down G(L) + parent(krep) = oldrep; + repfnz_col(krep) = chperm; + xdfs = xlsub(krep); + maxdfs = xprune(krep); + + } // end if myfnz != -1 + } // end if chperm == -1 + + } // end if chmark !=jj + } // end while xdfs < maxdfs + + // krow has no more unexplored nbrs : + // Place snode-rep krep in postorder DFS, if this + // segment is seen for the first time. (Note that + // "repfnz(krep)" may change later.) + // Baktrack dfs to its parent + if (marker1(krep) < jcol ) + { + segrep(nseg) = krep; + ++nseg; + marker1(krep) = jj; + } + + kpar = parent(krep); // Pop recursion, mimic recursion + if (kpar == -1) + break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while (kpar != -1); // Do until empty stack + + } // end if (myfnz = -1) + + } // end if (kperm == -1) + + }// end for nonzeros in column jj + + } // end for column jj + +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h new file mode 100644 index 000000000..f939ef939 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of dpivotL.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PIVOTL_H +#define SPARSELU_PIVOTL_H +/** + * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. + * + * Here is the pivot policy : + * (1) + * + * \param jcol The current column of L + * \param pivrow [out] The pivot row + * + * + */ +int SparseLU::LU_pivotL(const int jcol, Index& pivrow) +{ + // Initialize pointers + VectorXi& lsub = m_Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + VectorXi& xlsub = m_Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + Scalar* lusup = m_Glu.lusup.data(); // Numerical values of the rectangular supernodes + VectorXi& xlusup = m_Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + + Index fsupc = (m_Glu.xsup)((m_Glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 + Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion + Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode + Scalar* lu_sup_ptr = &(lusup[xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(lusup[xlusup(jcol)]); // Start of jcol in the supernode + Index* lsub_ptr = &(lsub.data()[lptr]); // Start of row indices of the supernode + + // Determine the largest abs numerical value for partial pivoting + Index diagind = m_iperm_c(jcol); // diagonal index + Scalar pivmax = 0.0; + Index pivptr = nsupc; + Index diag = -1; + Index old_pivptr = nsupc; + Scalar rtemp; + for (isub = nsupc; isub < nsupr; ++isub) { + rtemp = std::abs(lu_col_ptr[isub]); + if (rtemp > pivmax) { + pivmax = rtemp; + pivptr = isub; + } + if (lsub_ptr[isub] == diagind) diag = isub; + } + + // Test for singularity + if ( pivmax == 0.0 ) { + pivrow = lsub_ptr[pivptr]; + m_perm_r(pivrow) = jcol; + return (jcol+1); + } + + Scalar thresh = m_diagpivotthresh * pivmax; + + // Choose appropriate pivotal element + + { + // Test if the diagonal element can be used as a pivot (given the threshold value) + if (diag >= 0 ) + { + // Diagonal element exists + rtemp = std::abs(lu_col_ptr[diag]); + if (rtemp != Scalar(0.0) && rtemp >= thresh) pivptr = diag; + } + pivrow = lsub_ptr[pivptr]; + } + + // Record pivot row + perm_r(pivrow) = jcol; + // Interchange row subscripts + if (pivptr != nsupc ) + { + std::swap( lsub_ptr(pivptr), lsub_ptr(nsupc) ); + // Interchange numerical values as well, for the two rows in the whole snode + // such that L is indexed the same way as A + for (icol = 0; icol <= nsupc; icol++) + { + itemp = pivptr + icol * nsupr; + std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * nsupr]); + } + } + // cdiv operations + Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc]; + for (k = nsupc+1; k < nsupr; k++) + lu_col_ptr[k] *= temp; + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h new file mode 100644 index 000000000..61b8e74bb --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -0,0 +1,89 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* This file is a modified version of heap_relax_snode.c file in SuperLU + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ + +#ifndef EIGEN_HEAP_RELAX_SNODE_H +#define EIGEN_HEAP_RELAX_SNODE_H +#include +/** + * \brief Identify the initial relaxed supernodes + * + * This routine applied to a column elimination tree. + * It assumes that the matrix has been reordered according to the postorder of the etree + * \param et elimination tree + * \param relax_columns Maximum number of columns allowed in a relaxed snode + * \param descendants Number of descendants of each node in the etree + * \param relax_end last column in a supernode + */ +void internal::LU_relax_snode (const int n, VectorXi& et, const int relax_columns, VectorXi& descendants, VectorXi& relax_end) +{ + + // compute the number of descendants of each node in the etree + register int j, parent; + relax_end.setConstant(-1); + descendants.setZero(); + for (j = 0; j < n; j++) + { + parent = et(j); + if (parent != n) // not the dummy root + descendants(parent) += descendants(j) + 1; + } + + // Identify the relaxed supernodes by postorder traversal of the etree + register int snode_start; // beginning of a snode + for (j = 0; j < n; ) + { + parent = et(j); + snode_start = j; + while ( parent != n && descendants(parent) < relax_columns ) + { + j = parent; + parent = et(j); + } + // Found a supernode in postordered etree, j is the last column + relax_end(snode_start) = j; // Record last column + j++; + // Search for a new leaf + while (descendants(j) != 0 && j < n) j++; + } // End postorder traversal of the etree + +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h new file mode 100644 index 000000000..fc6ffc320 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -0,0 +1,88 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of dsnode_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_SNODE_BMOD_H +#define SPARSELU_SNODE_BMOD_H +template +int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, + VectorType& dense, VectorType& tempv) +{ + VectorXi& lsub = m_Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + VectorXi& xlsub = m_Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + Scalar* lusup = m_Glu.lusup.data(); // Numerical values of the rectangular supernodes + VectorXi& xlusup = m_Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + + int nextlu = xlusup(jcol); // Starting location of the next column to add + int irow; + // Process the supernodal portion of L\U[*,jcol] + for (int isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) + { + irow = lsub(isub); + lusup(nextlu) = dense(irow); + dense(irow) = 0; + ++nextlu; + } + xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) + + if (fsupc < jcol ){ + int luptr = xlusup(fsupc); // points to the first column of the supernode + int nsupr = xlsub(fsupc + 1) -xlsub(fsupc); //Number of rows in the supernode + int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol] + int ufirst = xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) + + int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks + int incx = 1, incy = 1; + Scalar alpha = Scalar(-1.0); + Scalar beta = Scalar(1.0); + // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc..., fsupc:jcol) + //BLASFUNC(trsv)("L", "N", "U", &nsupc, &(lusup[luptr]), &nsupr, &(lusup[ufirst]), &incx); + Map, 0, OuterStride<> > A( &(lusup[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map > l(&(lusup[ufirst]), nsupc); + l = A.triangularView().solve(l); + // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) + BLASFUNC(gemv)("N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy); + + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h new file mode 100644 index 000000000..c3048be54 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -0,0 +1,119 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of dsnode_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifdef EIGEN_SNODE_DFS_H +#define EIGEN_SNODE_DFS_H + /** + * \brief Determine the union of the row structures of those columns within the relaxed snode. + * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, + * the portion outside the rectangular supernode must be zero. + * + * \param jcol start of the supernode + * \param kcol end of the supernode + * \param asub Row indices + * \param colptr Pointer to the beginning of each column + * \param xprune (out) The pruned tree ?? + * \param marker (in/out) working vector + */ + template + int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const VectorXi* asub, const VectorXi* colptr, + VectorXi& xprune, VectorXi& marker, LU_GlobalLu_t *m_Glu) + { + VectorXi& xsup = m_Glu->xsup; + VectorXi& supno = m_Glu->supno; // Supernode number corresponding to this column + VectorXi& lsub = m_Glu->lsub; + VectorXi& xlsub = m_Glu->xlsub; + + int nsuper = ++supno(jcol); // Next available supernode number + register int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub + register int i,k; + int krow,kmark; + for (i = jcol; i <=kcol; i++) + { + // For each nonzero in A(*,i) + for (k = colptr(i); k < colptr(i+1); k++) + { + krow = asub(k); + kmark = marker(krow); + if ( kmark != kcol ) + { + // First time to visit krow + marker(krow) = kcol; + lsub(nextl++) = krow; + if( nextl >= nzlmax ) + { + m_Glu->lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); + m_Glu->nzlmax = nzlmax; + lsub = m_Glu->lsub; + } + } + } + supno(i) = nsuper; + } + + // If supernode > 1, then make a copy of the subscripts for pruning + if (jcol < kcol) + { + int new_next = nextl + (nextl - xlsub(jcol)); + while (new_next > nzlmax) + { + m_Glu->lsub = LUMemXpand(jcol, nextl, LSUB, &nzlmax); + m_Glu->nzlmax= nzlmax; + lsub = m_Glu->lsub; + } + int ifrom, ito = nextl; + for (ifrom = xlsub(jcol); ifrom < nextl;) + lsub(ito++) = lsub(ifrom++); + for (i = jcol+1; i <=kcol; i++)xlsub(i) = nextl; + nextl = ito; + } + xsup(nsuper+1) = kcol + 1; // Start of next available supernode + supno(kcol+1) = nsuper; + xprune(kcol) = nextl; + xlsub(kcol+1) = nextl; + return 0; + } + + +#endif \ No newline at end of file From 8ab820b5b8d7555ffcd95513b9ac6cd7fd7b6438 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 29 May 2012 17:55:38 +0200 Subject: [PATCH 02/73] Symbolic and numeric update on a whole panel --- Eigen/src/SparseLU/SparseLU.h | 31 +++- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 178 +++++++++++++++++++++++ Eigen/src/SparseLU/SparseLU_panel_dfs.h | 5 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 26 ++-- 4 files changed, 221 insertions(+), 19 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLU_panel_bmod.h diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index f5a1c787e..f1c530b55 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -192,7 +192,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) * the estimated amount of space needed, plus A->ncol. */ template -int SparseLU::factorize(const MatrixType& matrix) +void SparseLU::factorize(const MatrixType& matrix) { // Allocate storage common to the factor routines @@ -256,7 +256,6 @@ int SparseLU::factorize(const MatrixType& matrix) register int jcol,kcol; int min_mn = std::min(m,n); VectorXi panel_histo(n); - bool ok = true; Index nextu, nextlu, jsupno, fsupc, new_next; int pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol @@ -272,8 +271,9 @@ int SparseLU::factorize(const MatrixType& matrix) info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); if ( !info ) { - ok = false; - break; + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; } nextu = xusub(jcol); //starting location of column jcol in ucol nextlu = xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) @@ -322,17 +322,36 @@ int SparseLU::factorize(const MatrixType& matrix) LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_Glu); // Numeric sup-panel updates in topological order - LU_panel_bmod(m, panel_size, jcol); + LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_Glu); // Sparse LU within the panel, and below the panel diagonal for ( jj = jcol, j< jcol + panel_size; jj++) { k = (jj - jcol) * m; // Column index for w-wide arrays + + nseg = nseg1; // begin after all the panel segments + //Depth-first-search for the current column + info = LU_column_dfs(m, jj, ... ); + if ( !info ) + { + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + // Numeric updates to this column + info = LU_column_bmod(jj, ... ); + if ( !info ) + { + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + } // end for jcol += panel_size; // Move to the next panel } // end else } // end for -- end elimination - m_info = ok ? Success : NumericalIssue; + m_info = Success; m_factorizationIsOk = ok; } diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h new file mode 100644 index 000000000..29cc6d0f0 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -0,0 +1,178 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xpanel_dfs.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PANEL_BMOD_H +#define SPARSELU_PANEL_BMOD_H +/** + * \brief Performs numeric block updates (sup-panel) in topological order. + * + * Before entering this routine, the original nonzeros in the panel + * were already copied i nto the spa[m,w] ... FIXME to be checked + * + * \param m number of rows in the matrix + * \param w Panel size + * \param jcol Starting column of the panel + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the panel + * \param tempv working array + * \param segrep in ... + * \param repfnz in ... + * \param Glu Global LU data. + * + * + */ +template +void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, VectorType& dense, VectorType& tempv, VectorXi& segrep, VectorXi& repfnz, LU_GlobalLu_t& Glu) +{ + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + VectorXi& xlusup = Glu.xlusup; + VectorType& lusup = Glu.lusup; + + int i,ksub,jj,nextl_col,irow; + int fsupc, nsupc, nsupr, nrow; + int krep, krep_ind; + int nrow; + int lptr; // points to the row subscripts of a supernode + int luptr; // ... + int segsze,no_zeros,irow ; + // For each nonz supernode segment of U[*,j] in topological order + int k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) + { // For each updating supernode + + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + */ + krep = segrep(k); k--; + fsupc = xsup(supno(krep)); + nsupc = krep - fsupc + 1; + nsupr = xlsub(fsupc+1) - xlsub(fsupc); + nrow = nsupr - nsupc; + lptr = xlsub(fsupc); + krep_ind = lptr + nsupc - 1; + + repfnz_col = repfnz; + dense_col = dense; + + // NOTE : Unlike the original implementation in SuperLU, the present implementation + // does not include a 2-D block update. + + // Sequence through each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row + VectorBLock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == -1 ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = xlusup(fsupc); + + // NOTE : Unlike the original implementation in SuperLU, + // there is no update feature for col-col, 2col-col ... + + // Perform a trianglar solve and block update, + // then scatter the result of sup-col update to dense[] + no_zeros = kfnz - fsupc; + + // Copy U[*,j] segment from dense[*] to tempv[*] : + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense_col[*] + isub = lptr + no_zeros; + for (i = 0; i < segsize; ++i) + { + irow = lsub(isub); + tempv(i) = dense_col(irow); // Gather to a compact vector + ++isub; + } + // Start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // triangular solve with Eigen + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map > u( tempv.data(), segsize); + u = A.triangularView().solve(u); + + luptr += segsize; + // Dense Matrix vector product y <-- A*x; + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map l( &(tempv.data()[segsize]), segsize); + l= A * u; + + // Scatter tempv(*) into SPA dense(*) such that tempv(*) + // can be used for the triangular solve of the next + // column of the panel. The y will be copied into ucol(*) + // after the whole panel has been finished. + + isub = lptr + no_zeros; + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + dense_col(irow) = tempv(i); + tempv(i) = zero; + isub++; + } + + // Scatter the update from &tempv[segsize] into SPA dense(*) + // Start dense rectangular L + for (i = 0; i < nrow; i++) + { + irow = lsub(isub); + dense_col(irow) -= tempv(segsize + i); + tempv(segsize + i) = 0; + ++isub; + } + + } // End for each column in the panel + + } // End for each updating supernode +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 550544d05..7b85b6d7c 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -67,11 +67,12 @@ * \param jcol Starting column of the panel * \param A Input matrix in column-major storage * \param perm_r Row permutation - * \param nseg + * \param nseg Number of U segments + * ... * */ template -int SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, VectorXi& nseg, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, VectorXi& nseg, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) { int jj; // Index through each column in the panel diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index fc6ffc320..e7146a262 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -46,12 +46,12 @@ #define SPARSELU_SNODE_BMOD_H template int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, - VectorType& dense, VectorType& tempv) + VectorType& dense, VectorType& tempv, LU_GlobalLu_t& Glu) { - VectorXi& lsub = m_Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - VectorXi& xlsub = m_Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - Scalar* lusup = m_Glu.lusup.data(); // Numerical values of the rectangular supernodes - VectorXi& xlusup = m_Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + VectorXi& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + VectorXi& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + VectorType& lusup = Glu.lusup; // Numerical values of the rectangular supernodes + VectorXi& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) int nextlu = xlusup(jcol); // Starting location of the next column to add int irow; @@ -72,16 +72,20 @@ int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, int ufirst = xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks - int incx = 1, incy = 1; - Scalar alpha = Scalar(-1.0); - Scalar beta = Scalar(1.0); +// int incx = 1, incy = 1; +// Scalar alpha = Scalar(-1.0); +// Scalar beta = Scalar(1.0); + // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc..., fsupc:jcol) //BLASFUNC(trsv)("L", "N", "U", &nsupc, &(lusup[luptr]), &nsupr, &(lusup[ufirst]), &incx); - Map, 0, OuterStride<> > A( &(lusup[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map > l(&(lusup[ufirst]), nsupc); + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map > l(&(lusup.data()[ufirst]), nsupc); l = A.triangularView().solve(l); + // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) - BLASFUNC(gemv)("N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy); + Map > u(&(lusup.data()[ufirst+nsupc], nsupc); + u = A * l; +// BLASFUNC(gemv)("N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy); return 0; } From 8608d08d658b09bfd92057d752eb80d59462cdc8 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 30 May 2012 18:09:26 +0200 Subject: [PATCH 03/73] Symbolic and numeric updates within the panel --- Eigen/src/SparseLU/SparseLU.h | 35 ++- Eigen/src/SparseLU/SparseLU_Memory.h | 6 +- Eigen/src/SparseLU/SparseLU_Utils.h | 2 +- Eigen/src/SparseLU/SparseLU_column_bmod.h | 216 +++++++++++++++++ Eigen/src/SparseLU/SparseLU_column_dfs.h | 269 ++++++++++++++++++++++ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 8 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 16 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 20 +- 8 files changed, 538 insertions(+), 34 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLU_column_bmod.h create mode 100644 Eigen/src/SparseLU/SparseLU_column_dfs.h diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index f1c530b55..5b45dd6d0 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -245,7 +245,7 @@ void SparseLU::factorize(const MatrixType& matrix) VectorXi& xlusup = m_GLu.xlusup; VectorXi& xusub = m_Glu.xusub; - supno(0) = -1; + supno(0) = IND_EMPTY; xsup(0) = xlsub(0) = xusub(0) = xlusup(0); int panel_size = m_panel_size; int wdef = panel_size; // upper bound on panel width @@ -262,7 +262,7 @@ void SparseLU::factorize(const MatrixType& matrix) int nseg; // Number of segments in each U-column for (jcol = 0; jcol < min_mn; ) { - if (relax_end(jcol) != -1) + if (relax_end(jcol) != IND_EMPTY) { // Starting a relaxed node from jcol kcol = relax_end(jcol); // End index of the relaxed snode @@ -298,7 +298,12 @@ void SparseLU::factorize(const MatrixType& matrix) // Eliminate the current column info = LU_pivotL(icol, pivrow); - eigen_assert(info == 0 && "The matrix is structurally singular"); + if ( !info ) + { + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } } jcol = icol; // The last column te be eliminated } @@ -309,7 +314,7 @@ void SparseLU::factorize(const MatrixType& matrix) panel_size = w_def; for (k = jcol + 1; k < std::min(jcol+panel_size, min_mn); k++) { - if (relax_end(k) != -1) + if (relax_end(k) != IND_EMPTY) { panel_size = k - jcol; break; @@ -331,7 +336,9 @@ void SparseLU::factorize(const MatrixType& matrix) nseg = nseg1; // begin after all the panel segments //Depth-first-search for the current column - info = LU_column_dfs(m, jj, ... ); + VectorBlock panel_lsubk(panel_lsub, k, m); //FIXME + VectorBlock repfnz_k(repfnz, k, m); //FIXME + info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_Glu); if ( !info ) { m_info = NumericalIssue; @@ -339,7 +346,9 @@ void SparseLU::factorize(const MatrixType& matrix) return; } // Numeric updates to this column - info = LU_column_bmod(jj, ... ); + VectorBlock dense_k(dense, k, m); //FIXME + VectorBlock segrep_k(segrep, nseg1, m) // FIXME Check the length + info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_Glu); if ( !info ) { m_info = NumericalIssue; @@ -347,6 +356,20 @@ void SparseLU::factorize(const MatrixType& matrix) return; } + // Copy the U-segments to ucol(*) + + + // Form the L-segment + info = LU_pivotL(...); + if ( !info ) + { + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } + + // Prune columns (0:jj-1) using column jj + } // end for jcol += panel_size; // Move to the next panel } // end else diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 6e0fc658d..91b24fa67 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -86,7 +86,7 @@ int SparseLU::LUMemInit(int lwork) nzlmax = std::max(1, m_fill_ratio/4.) * annz; //??? // Return the estimated size to the user if necessary - if (lwork = -1) + if (lwork == IND_EMPTY) { estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) + (nzlmax + nzumax) * iword + (nzlumax+nzumax) * dword + n); @@ -130,7 +130,7 @@ int SparseLU::LUMemInit(int lwork) } else // m_fact == SamePattern_SameRowPerm; { - if (lwork = -1) + if (lwork == IND_EMPTY) { estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) + (Glu.nzlmax + Glu.nzumax) * iword + (Glu.nzlumax+Glu.nzumax) * dword + n); @@ -232,7 +232,7 @@ DestType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen new_mem = expand(maxlen, mem_type, next, 1); else new_mem = expand(maxlen, mem_type, next, 0); - eigen_assert(new_mem && "Can't expand memory"); + eigen_assert(new_mem && "Can't expand memory"); // FIXME Should be an exception return new_mem; diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 3c3b24a15..27eaed25c 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -28,5 +28,5 @@ // Number of marker arrays used in the symbolic factorization each of size n #define LU_NO_MARKER 3 #define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) -#define LU_EMPTY (-1) +#define IND_EMPTY (-1) #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h new file mode 100644 index 000000000..58755363d --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -0,0 +1,216 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xcolumn_bmod.c file in SuperLU + + * -- SuperLU routine (version 3.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * October 15, 2003 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_BMOD_H +#define SPARSELU_COLUMN_BMOD_H +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param dense Store the full representation of the column + * \param tempv working array + * \param segrep segment representative ... + * \param repfnz ??? First nonzero column in each row ??? ... + * \param fpanelc First column in the current panel + * \param Glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template +int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, VectorType& tempv, VectorXi& segrep, VectorXi& repfnz, int fpanelc, LU_GlobalLu_t& Glu) +{ + + int jsupno, k, ksub, krep, krep_ind, ksupno; + /* krep = representative of current k-th supernode + * fsupc = first supernodal column + * nsupc = number of columns in a supernode + * nsupr = number of rows in a supernode + * luptr = location of supernodal LU-block in storage + * kfnz = first nonz in the k-th supernodal segment + * no-zeros = no lf leading zeros in a supernodal U-segment + */ + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + VectorXi& xlusup = Glu.xlusup; + VectorType& lusup = Glu.lusup; + int nzlumax = GLu.nzlumax; + int jsupno = supno(jcol); + // For each nonzero supernode segment of U[*,j] in topological order + k = nseg - 1; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = supno(krep); + if (jsupno != ksupno ) + { + // outside the rectangular supernode + fsupc = xsup(ksupno); + fst_col = std::max(fsupc, fpanelc); + + // Distance from the current supernode to the current panel; + // d_fsupc = 0 if fsupc > fpanelc + d_fsupc = fst_col - fsupc; + + luptr = xlusup(fst_col) + d_fsupc; + lptr = xlsub(fsupc) + d_fsupc; + + kfnz = repfnz(krep); + kfnz = std::max(kfnz, fpanelc); + + segsize = krep - kfnz + 1; + nsupc = krep - fst_col + 1; + nsupr = xlsub(fsupc+1) - xlsub(fsupc); + nrow = nsupr - d_fsupc - nsupc; + krep_ind = lptr + nsupc - 1; + + // NOTE Unlike the original implementation in SuperLU, the only feature + // here is a sup-col update. + + // Perform a triangular solver and block update, + // then scatter the result of sup-col update to dense + no_zeros = kfnz - fst_col; + // First, copy U[*,j] segment from dense(*) to tempv(*) + isub = lptr + no_zeros; + for (i = 0; i ww segsize; i++) + { + irow = lsub(isub); + tempv(i) = densee(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map u(tempv.data(), segsize); + u = A.triangularView().solve(u); + + // Dense matrix-vector product y <-- A*x + luptr += segsize; + new (&A) (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + Map l( &(tempv.data()[segsize]), segsize); + l= A * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i w segsize; i++) + { + irow = lsub(isub); + dense(irow) = tempv(i); + tempv(i) = Scalar(0.0); + ++isub; + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub); + dense(irow) -= tempv(segsize + i); + tempv(segsize + i) = Scalar(0.0); + ++isub; + } + } // end if jsupno + } // end for each segment + + // Process the supernodal portion of L\U[*,j] + nextlu = xlusup(jcol); + fsupc = xsup(jsupno); + + // copy the SPA dense into L\U[*,j] + new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); + while (new_next > nzlumax ) + { + Glu.lusup = LUmemXpand(jcol, nextlu, LUSUP, &nzlumax); + Glu.nzlumax = nzlumax; + lusup = Glu.lusup; + lsub = Glu.lsub; + } + + for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) + { + irow = lsub(isub); + lusub(nextlu) = dense(irow); + dense(irow) = Scalar(0.0); + ++nextlu; + } + + xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); + + /* For more updates within the panel (also within the current supernode), + * should start from the first column of the panel, or the first column + * of the supernode, whichever is bigger. There are two cases: + * 1) fsupc < fpanelc, then fst_col <- fpanelc + * 2) fsupc >= fpanelc, then fst_col <-fsupc + */ + fst_col = std::max(fsupc, fpanelc); + + if (fst_col < jcol) + { + // Distance between the current supernode and the current panel + // d_fsupc = 0 if fsupc >= fpanelc + d_fsupc = fst_col - fsupc; + + lptr = xlsub(fsupc) + d_fsupc; + luptr = xlusup(fst_col) + d_fsupc; + nsupr = xlsub(fsupc+1) - xlsub(fsupc); // leading dimension + nsupc = jcol - fst_col; // excluding jcol + nrow = nsupr - d_fsupc - nsupc; + + // points to the beginning of jcol in snode L\U(jsupno) + ufirst = xlusup(jcol) + d_fsupc; + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map l( &(lusup.data()[ufirst]), nsupc ); + u = A.triangularView().solve(u); + + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + Map l( &(lusup.data()[ufirst+nsupc]), nsupr ); + l = l - A * u; + + } // End if fst_col + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h new file mode 100644 index 000000000..15ddcf7c0 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -0,0 +1,269 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xcolumn_dfs.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COLUMN_DFS_H +#define SPARSELU_COLUMN_DFS_H +/** + * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary + * + * A supernode representative is the last column of a supernode. + * The nonzeros in U[*,j] are segments that end at supernodes representatives. + * The routine returns a list of the supernodal representatives + * in topological order of the dfs that generates them. + * The location of the first nonzero in each supernodal segment + * (supernodal entry location) is also returned. + * + * \param m number of rows in the matrix + * \param jcol Current column + * \param perm_r Row permutation + * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended + * \param lsub_col defines the rhs vector to start the dfs + * \param [in,out] segrep Segment representatives - new segments appended + * \param repfnz + * \param xprune + * \param marker + * \param parent + * \param xplore + * \param Glu global LU data + * \return 0 success + * > 0 number of bytes allocated when run out of space + * + */ +int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, VectorXi& nseg VectorXi& lsub_col, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +{ + typedef typename VectorXi::Index; + + int jcolp1, jcolm1, jsuper, nsuper, nextl; + int krow; // Row index of the current element + int kperm; // permuted row index + int krep; // Supernode reprentative of the current row + int k, kmark; + int chperm, chmark, chrep, oldrep, kchild; + int myfnz; // First nonzero element in the current column + int xdfs, maxdfs, kpar; + + // Initialize pointers + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + + nsuper = supno(jcol); + jsuper = nsuper; + nextl = xlsup(jcol); + VectorBlock marker2(marker, 2*m, m); + // For each nonzero in A(*,jcol) do dfs + for (k = 0; lsub_col[k] != IND_EMPTY; k++) + { + krow = lsub_col(k); + lsub_col(k) = IND_EMPTY; + kmark = marker2(krow); + + // krow was visited before, go to the next nonz; + if (kmark == jcol) continue; + + // For each unmarker nbr krow of jcol + // krow is in L: place it in structure of L(*,jcol) + marker2(krow) = jcol; + kperm = perm_r(krow); + + if (kperm == IND_EMPTY ) + { + lsub(nextl++) = krow; // krow is indexed into A + if ( nextl >= nzlmax ) + { + Glu.lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); + //FIXME try... catch out of space + Glu.nzlmax = nzlmax; + lsub = Glu.lsub; + } + if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing + } + else + { + // krow is in U : if its supernode-rep krep + // has been explored, update repfnz(*) + krep = xsup(supno(kperm)+1) - 1; + myfnz = repfnz(krep); + + if (myfnz != IND_EMPTY ) + { + // visited before + if (myfnz > kperm) repfnz(krep) = kperm; + // continue; + } + else + { + // otherwise, perform dfs starting at krep + oldrep = IND_EMPTY; + parent(krep) = oldrep; + repfnz(krep) = kperm; + xdfs = xlsub(krep); + maxdfs = xprune(krep); + + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + kchild = lsub(xdfs); + xdfs++; + chmark = marker2(kchild); + + if (chmark != jcol) + { + // Not reached yet + marker2(kchild) = jcol; + chperm = perm_r(kchild); + + // if kchild is in L: place it in L(*,k) + if (chperm == IND_EMPTY) + { + lsub(nextl++) = kchild; + if (nextl >= nzlmax) + { + Glu.lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); + //FIXME Catch out of space errors + GLu.nzlmax = nzlmax; + lsub = Glu.lsub; + } + if (chmark != jcolm1) jsuper = IND_EMPTY; + } + else + { + // if kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz + chrep = xsup(supno(chperm)+1) - 1; + myfnz = repfnz(chrep); + if (myfnz != IND_EMPTY) + { + // Visited before + if ( myfnz > chperm) repfnz(chrep) = chperm; + } + else + { + // continue dfs at super-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeped down G(L^t) + parent(krep) = olddrep; + repfnz(krep) = chperm; + xdfs = xlsub(krep); + maxdfs = xprune(krep); + } // else myfnz + } // else for chperm + + } // if chmark + + } // end while + + // krow has no more unexplored nbrs; + // place supernode-rep krep in postorder DFS. + // backtrack dfs to its parent + + segrep(nseg) = ;krep; + ++nseg; + kpar = parent(krep); // Pop from stack, mimic recursion + if (kpar == IND_EMPTY) break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while ( kpar != IND_EMPTY); + + } // else myfnz + + } // else kperm + + } // for each nonzero ... + + // check to see if j belongs in the same supeprnode as j-1 + if ( jcol == 0 ) + { // Do nothing for column 0 + nsuper = supno(0) = 0 ; + } + else + { + fsupc = xsup(nsuper); + jptr = xlsub(jcol); // Not yet compressed + jm1ptr = xlsub(jcolm1); + + // Make sure the number of columns in a supernode doesn't + // exceed threshold + if ( (jcol - fsupc) >= m_maxsuper) jsuper = IND_EMPTY; + + /* If jcol starts a new supernode, reclaim storage space in + * lsub from previous supernode. Note we only store + * the subscript set of the first and last columns of + * a supernode. (first for num values, last for pruning) + */ + if (jsuper == IND_EMPTY) + { // starts a new supernode + if ( (fsupc < jcolm1-1) ) + { // >= 3 columns in nsuper + ito = xlsub(fsupcc+1) + xlsub(jcolm1) = ito; + istop = ito + jptr - jm1ptr; + xprune(jcolm1) = istop; // intialize xprune(jcol-1) + xlsub(jcol) = istop; + + for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) + lsub(ito) = lsub(ifrom); + nextl = ito; // = istop + length(jcol) + } + nsuper++; + supno(jcol) = nsuper; + } // if a new supernode + } // end else: jcol > 0 + + // Tidy up the pointers before exit + xsup(nsuper+1) = jcolp1; + supno(jcolp1) = nsuper; + xprune(jcol) = nextl; // Intialize upper bound for pruning + xlsub(jcolp1) = nextl; + + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 29cc6d0f0..93daa938c 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -24,7 +24,7 @@ /* - * NOTE: This file is the modified version of xpanel_dfs.c file in SuperLU + * NOTE: This file is the modified version of xpanel_bmod.c file in SuperLU * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, @@ -111,7 +111,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int VectorBLock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); - if ( kfnz == -1 ) + if ( kfnz == IND_EMPTY ) continue; // skip any zero segment segsize = krep - kfnz + 1; @@ -143,7 +143,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int luptr += segsize; // Dense Matrix vector product y <-- A*x; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); Map l( &(tempv.data()[segsize]), segsize); l= A * u; @@ -157,7 +157,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int { irow = lsub(isub); dense_col(irow) = tempv(i); - tempv(i) = zero; + tempv(i) = Scalar(0.0); isub++; } diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 7b85b6d7c..97e5121db 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -72,7 +72,7 @@ * */ template -void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, VectorXi& nseg, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) { int jj; // Index through each column in the panel @@ -115,7 +115,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType // For each unmarked krow of jj marker(krow) = jj; kperm = perm_r(krow); - if (kperm == -1 ) { + if (kperm == IND_EMPTY ) { // krow is in L : place it in structure of L(*, jj) panel_lsub(nextl_col++) = krow; // krow is indexed into A } @@ -126,7 +126,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType krep = xsup(supno(kperm)+1) - 1; myfnz = repfnz_col(krep); - if (myfnz != -1 ) + if (myfnz != IND_EMPTY ) { // Representative visited before if (myfnz > kperm ) repfnz_col(krep) = kperm; @@ -135,7 +135,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType else { // Otherwise, perform dfs starting at krep - oldrep = -1; + oldrep = IND_EMPTY; parent(krep) = oldrep; repfnz_col(krep) = kperm; xdfs = xlsub(krep); @@ -155,7 +155,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType marker(kchild) = jj; chperm = perm_r(kchild); - if (chperm == -1) + if (chperm == IND_EMPTY) { // case kchild is in L: place it in L(*, j) panel_lsub(nextl_col++) = kchild; @@ -168,7 +168,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType chrep = xsup(supno(chperm)+1) - 1; myfnz = repfnz_col(chrep); - if (myfnz != -1) + if (myfnz != IND_EMPTY) { // Visited before if (myfnz > chperm) repfnz_col(chrep) = chperm; @@ -202,13 +202,13 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType } kpar = parent(krep); // Pop recursion, mimic recursion - if (kpar == -1) + if (kpar == IND_EMPTY) break; // dfs done krep = kpar; xdfs = xplore(krep); maxdfs = xprune(krep); - } while (kpar != -1); // Do until empty stack + } while (kpar != IND_EMPTY); // Do until empty stack } // end if (myfnz = -1) diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index e7146a262..9da986497 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -54,9 +54,9 @@ int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, VectorXi& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) int nextlu = xlusup(jcol); // Starting location of the next column to add - int irow; + int irow, isub; // Process the supernodal portion of L\U[*,jcol] - for (int isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) + for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) { irow = lsub(isub); lusup(nextlu) = dense(irow); @@ -72,20 +72,16 @@ int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, int ufirst = xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks -// int incx = 1, incy = 1; -// Scalar alpha = Scalar(-1.0); -// Scalar beta = Scalar(1.0); // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc..., fsupc:jcol) - //BLASFUNC(trsv)("L", "N", "U", &nsupc, &(lusup[luptr]), &nsupr, &(lusup[ufirst]), &incx); - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map > l(&(lusup.data()[ufirst]), nsupc); - l = A.triangularView().solve(l); + Map,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map > u(&(lusup.data()[ufirst]), nsupc); + u = A.triangularView().solve(u); // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) - Map > u(&(lusup.data()[ufirst+nsupc], nsupc); - u = A * l; -// BLASFUNC(gemv)("N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy); + new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + Map > l(&(lusup.data()[ufirst+nsupc], nsupc); + l = l - A * u; return 0; } From b26d6b02de24f2c96f4bdfb6bf1c42afc80693c6 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 31 May 2012 17:10:29 +0200 Subject: [PATCH 04/73] Eliminate and prune columns in a panel --- Eigen/src/SparseLU/SparseLU.h | 51 ++++++- Eigen/src/SparseLU/SparseLU_Utils.h | 65 ++++++++- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 123 +++++++++++++++++ Eigen/src/SparseLU/SparseLU_pivotL.h | 41 ++++-- Eigen/src/SparseLU/SparseLU_pruneL.h | 152 +++++++++++++++++++++ 5 files changed, 412 insertions(+), 20 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLU_copy_to_ucol.h create mode 100644 Eigen/src/SparseLU/SparseLU_pruneL.h diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 5b45dd6d0..833832f3b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -113,7 +113,7 @@ class SparseLU int m_colblk; // The minimum column dimension for 2-D blocking to be used; int m_fillfactor; // The estimated fills factors for L and U, compared with A RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot - + int nnzL, nnzU; // Nonzeros in L and U factors private: // Copy constructor SparseLU (SparseLU& ) {} @@ -260,6 +260,7 @@ void SparseLU::factorize(const MatrixType& matrix) int pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column + int irep,ir; for (jcol = 0; jcol < min_mn; ) { if (relax_end(jcol) != IND_EMPTY) @@ -297,7 +298,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_snode_bmod(icol, jsupno, fsupc, dense, tempv); // Eliminate the current column - info = LU_pivotL(icol, pivrow); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_Glu); if ( !info ) { m_info = NumericalIssue; @@ -357,10 +358,17 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - + //FIXME Check that repfnz_k, dense_k... have stored references to modified columns + info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_Glu); + if ( !info ) + { + m_info = NumericalIssue; + m_factorizationIsOk = false; + return; + } // Form the L-segment - info = LU_pivotL(...); + info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_Glu); if ( !info ) { m_info = NumericalIssue; @@ -369,11 +377,44 @@ void SparseLU::factorize(const MatrixType& matrix) } // Prune columns (0:jj-1) using column jj + LU_pruneL(jj, m_perm_r, pivrow, nseg, segrep, repfnz_k, xprune, m_Glu); - } // end for + // Reset repfnz for this column + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + repfnz(irep) = IND_EMPTY; + } + } // end SparseLU within the panel jcol += panel_size; // Move to the next panel } // end else } // end for -- end elimination + + // Adjust row permutation in the case of rectangular matrices + if (m > n ) + { + k = 0; + for (i = 0; i < m; ++i) + { + if ( perm_r(i) == IND_EMPTY ) + { + perm_r(i) = n + k; + ++k; + } + } + } + // Count the number of nonzeros in factors + LU_countnz(min_mn, xprune, m_nnzL, m_nnzU, m_Glu); + // Apply permutation to the L subscripts + LU_fixupL(min_mn, m_perm_r, m_Glu); + + // Free work space and compress storage iwork and work + // ?? Should it be done automatically by C++ + //... + + // Create supernode matrix L and the column major matrix U + // ... + m_info = Success; m_factorizationIsOk = ok; } diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 27eaed25c..88d1c8b80 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -29,4 +29,67 @@ #define LU_NO_MARKER 3 #define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) #define IND_EMPTY (-1) -#endif \ No newline at end of file + +void SparseLU::LU_countnz(const int n, VectorXi& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) +{ + VectorXi& xsup = Glu.xsup; + VectorXi& xlsub = Glu.xlsub; + nnzL = 0; + nnzU = (Glu.xusub)(n); + int nnzL0 = 0; + int nsuper = (Glu.supno)(n); + int jlen, irep; + + if (n <= 0 ) return; + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = xsup(i); + jlen = xlsub(fsupc+1) - xlsub(fsupc); + + for (j = fsupc; j < xsup(i+1); j++) + { + nnzL += jlen; + nnzLU += j - fsupc + 1; + jlen--; + } + irep = xsup(i+1) - 1; + nnzL0 += xprune(irep) - xlsub(irep); + } + +} +/** + * \brief Fix up the data storage lsub for L-subscripts. + * + * It removes the subscripts sets for structural pruning, + * and applies permutation to the remaining subscripts + * + */ +void SparseLU::LU_fixupL(const int n, const VectorXi& perm_r, GlobalLU_t& Glu) +{ + int nsuper, fsupc, i, j, k, jstart; + VectorXi& xsup = GLu.xsup; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + + int nextl = 0; + int nsuper = (Glu.supno)(n); + + // For each supernode + for (i = 0; i <= nsuper; i++) + { + fsupc = xsup(i); + jstart = xlsub(fsupc); + xlsub(fsupc) = nextl; + for (j = jstart; j < xlsub(fsupc + 1); j++) + { + lsub(nextl) = perm_r(lsub(j)); // Now indexed into P*A + nextl++ + } + for (k = fsupc+1; k < xsup(i+1); k++) + xlsub(k) = nextl; // other columns in supernode i + } + + xlsub(n) = nextl; +} +#endif diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h new file mode 100644 index 000000000..3f8d8abe2 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -0,0 +1,123 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xcopy_to_ucol.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_COPY_TO_UCOL_H +#define SPARSELU_COPY_TO_UCOL_H +/** + * \brief Performs numeric block updates (sup-col) in topological order + * + * \param jcol current column to update + * \param nseg Number of segments in the U part + * \param segrep segment representative ... + * \param repfnz ??? First nonzero column in each row ??? ... + * \param perm_r Row permutation + * \param dense Store the full representation of the column + * \param Glu Global LU data. + * \return 0 - successful return + * > 0 - number of bytes allocated when run out of space + * + */ +template +int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, VectorXi& segrep, VectorXi& repfnz, VectorXi& perm_r, VectorType& dense, LU_GlobalLu_t& Glu) +{ + int ksupno, k, ksub, krep, ksupno; + + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + VectorType& ucol = GLu.ucol; + VectorXi& usub = Glu.usub; + VectorXi& xusub = Glu.xusub; + int nzumax = GLu.nzumax; + int jsupno = supno(jcol); + + // For each nonzero supernode segment of U[*,j] in topological order + k = nseg - 1; + int nextu = xusub(jcol); + int kfnz, isub, segsize; + int new_next,irow; + for (ksub = 0; ksub < nseg; ksub++) + { + krep = segrep(k); k--; + ksupno = supno(krep); + if (jsupno != ksupno ) // should go into ucol(); + { + kfnz = repfnz(krep); + if (kfnz != IND_EMPTY) + { // Nonzero U-segment + fsupc = xsup(ksupno); + isub = xlsub(fsupc) + kfnz - fsupc; + segsize = krep - kfnz + 1; + new_next = nextu + segsize; + while (new_next > nzumax) + { + Glu.ucol = LU_MemXpand(jcol, nextu, UCOL, nzumax); //FIXME try and catch errors + ucol = Glu.ucol; + Glu.nzumax = nzumax; + Glu.usub = LU_MemXpand(jcol, nextu, USUB, nzumax); //FIXME try and catch errors + Glu.nzumax = nzumax; + usub = Glu.usub; + lsub = Glu.lsub; + } + + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + usub(nextu) = perm_r(irow); // Unlike teh L part, the U part is stored in its final order + ucol(nextu) = dense(irow); + dense(irow) = Scalar(0.0); + nextu++; + isub++; + } + + } // end nonzero U-segment + + } // end if jsupno + + } // end for each segment + xusub(jcol + 1) = nextu; // close U(*,jcol) + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index f939ef939..3bfe14e7e 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -24,7 +24,7 @@ /* - * NOTE: This file is the modified version of dpivotL.c file in SuperLU + * NOTE: This file is the modified version of xpivotL.c file in SuperLU * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, @@ -47,23 +47,36 @@ /** * \brief Performs the numerical pivotin on the current column of L, and the CDIV operation. * - * Here is the pivot policy : - * (1) + * Pivot policy : + * (1) Compute thresh = u * max_(i>=j) abs(A_ij); + * (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN + * pivot row = k; + * ELSE IF abs(A_jj) >= thresh THEN + * pivot row = j; + * ELSE + * pivot row = m; + * + * Note: If you absolutely want to use a given pivot order, then set u=0.0. * * \param jcol The current column of L - * \param pivrow [out] The pivot row - * + * \param u diagonal pivoting threshold + * \param [in,out]perm_r Row permutation (threshold pivoting) + * \param [in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' + * \param [out]pivrow The pivot row + * \param Glu Global LU data + * \return 0 if success, i > 0 if U(i,i) is exactly zero * */ -int SparseLU::LU_pivotL(const int jcol, Index& pivrow) +template +int SparseLU::LU_pivotL(const int jcol, const Scalar u, VectorXi& perm_r, VectorXi& iperm_c, int& pivrow, GlobalLU_t& Glu) { // Initialize pointers - VectorXi& lsub = m_Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - VectorXi& xlsub = m_Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - Scalar* lusup = m_Glu.lusup.data(); // Numerical values of the rectangular supernodes - VectorXi& xlusup = m_Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + VectorXi& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + VectorXi& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + Scalar* lusup = Glu.lusup.data(); // Numerical values of the rectangular supernodes + VectorXi& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) - Index fsupc = (m_Glu.xsup)((m_Glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index fsupc = (Glu.xsup)((Glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode @@ -72,7 +85,7 @@ int SparseLU::LU_pivotL(const int jcol, Index& pivrow) Index* lsub_ptr = &(lsub.data()[lptr]); // Start of row indices of the supernode // Determine the largest abs numerical value for partial pivoting - Index diagind = m_iperm_c(jcol); // diagonal index + Index diagind = iperm_c(jcol); // diagonal index Scalar pivmax = 0.0; Index pivptr = nsupc; Index diag = -1; @@ -90,11 +103,11 @@ int SparseLU::LU_pivotL(const int jcol, Index& pivrow) // Test for singularity if ( pivmax == 0.0 ) { pivrow = lsub_ptr[pivptr]; - m_perm_r(pivrow) = jcol; + perm_r(pivrow) = jcol; return (jcol+1); } - Scalar thresh = m_diagpivotthresh * pivmax; + Scalar thresh = diagpivotthresh * pivmax; // Choose appropriate pivotal element diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h new file mode 100644 index 000000000..687717d52 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -0,0 +1,152 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +/* + + * NOTE: This file is the modified version of xpruneL.c file in SuperLU + + * -- SuperLU routine (version 2.0) -- + * Univ. of California Berkeley, Xerox Palo Alto Research Center, + * and Lawrence Berkeley National Lab. + * November 15, 1997 + * + * Copyright (c) 1994 by Xerox Corporation. All rights reserved. + * + * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY + * EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. + * + * Permission is hereby granted to use or copy this program for any + * purpose, provided the above notices are retained on all copies. + * Permission to modify the code and to distribute modified code is + * granted, provided the above notices are retained, and a notice that + * the code was modified is included with the above copyright notice. + */ +#ifndef SPARSELU_PRUNEL_H +#define SPARSELU_PRUNEL_H +/** + * \brief Prunes the L-structure. + * + * It prunes the L-structure of supernodes whose L-structure constains the current pivot row "pivrow" + * + * + * \param jcol The current column of L + * \param [in]perm_r Row permutation + * \param [out]pivrow The pivot row + * \param nseg Number of segments ??? + * \param segrep + * \param repfnz + * \param [out]xprune + * \param Glu Global LU data + * + */ +template +void SparseLU::LU_pruneL(const int jcol, const VectorXi& perm_r, const int pivrow, const int nseg, const VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, GlobalLU_t& Glu) +{ + // Initialize pointers + VectorXi& xsup = Glu.xsup; + VectorXi& supno = Glu.supno; + VectorXi& lsub = Glu.lsub; + VectorXi& xlsub = Glu.xlsub; + VectorType& lusup = Glu.lusup; + VectorXi& xlusup = Glu.xlusup; + + // For each supernode-rep irep in U(*,j] + int jsupno = supno(jcol); + int i,irep,irep1; + bool movnum, do_prune = false; + int kmin, kmax, ktemp, minloc, maxloc; + for (i = 0; i < nseg; i++) + { + irep = segrep(i); + irep1 = irep + 1; + do_prune = false; + + // Don't prune with a zero U-segment + if (repfnz(irep) == IND_EMPTY) continue; + + // If a snode overlaps with the next panel, then the U-segment + // is fragmented into two parts -- irep and irep1. We should let + // pruning occur at the rep-column in irep1s snode. + if (supno(irep) == supno(irep1) continue; // don't prune + + // If it has not been pruned & it has a nonz in row L(pivrow,i) + if (supno(irep) != jsupno ) + { + if ( xprune (irep) >= xlsub(irep1) + { + kmin = xlsub(irep); + kmax = xlsub(irep1) - 1; + for (krow = kmin; krow <= kmax; krow++) + { + if (lsub(krow) == pivrow) + { + do_prune = true; + break; + } + } + } + + if (do_prune) + { + // do a quicksort-type partition + // movnum=true means that the num values have to be exchanged + movnum = false; + if (irep == xsup(supno(irep)) ) // Snode of size 1 + movnum = true; + + while (kmin <= kmax) + { + if (perm_r(lsub(kmax)) == IND_EMPTY) + kmax--; + else if ( perm_r(lsub(kmin)) != IND_EMPTY) + kmin--; + else + { + // kmin below pivrow (not yet pivoted), and kmax + // above pivrow: interchange the two suscripts + ktemp = lsub(kmin); + lsub(kmin) = lsub(kmax); + lsub(kmax) = ktemp; + + // If the supernode has only one column, then we + // only keep one set of subscripts. For any subscript + // intercnahge performed, similar interchange must be + // done on the numerical values. + if (movnum) + { + minloc = xlusup(irep) + ( kmin - xlsub(irep) ); + maxloc = xlusup(irep) + ( kmax - xlsub(irep) ); + std::swap(lusup(minloc), lusup(maxloc)); + } + kmin++; + kmax--; + } + } // end while + + xprune(irep) = kmin; + } // end if do_prune + } // end pruning + } // End for each U-segment +} +#endif \ No newline at end of file From 4e5655cc037de587d01d9fc59c006d7feb9a6251 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 1 Jun 2012 18:44:51 +0200 Subject: [PATCH 05/73] Supernodal Matrix --- Eigen/src/SparseLU/SparseLU.h | 14 ++++-- Eigen/src/SparseLU/SparseLU_Matrix.h | 68 +++++++++++++++++++++------- Eigen/src/SparseLU/SparseLU_Utils.h | 1 + 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 833832f3b..593ec7e25 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -26,13 +26,17 @@ #ifndef EIGEN_SPARSE_LU #define EIGEN_SPARSE_LU +namespace Eigen { + +template +class SparseLU; + #include #include #include #include #include #include -namespace Eigen { template class SparseLU @@ -412,9 +416,11 @@ void SparseLU::factorize(const MatrixType& matrix) // ?? Should it be done automatically by C++ //... - // Create supernode matrix L and the column major matrix U - // ... - + // Create supernode matrix L + m_Lstore.setInfos(m, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); + // Create the column major upper sparse matrix U + // Could be great to have the SparseMatrix constructor accepting the CSC matrix pointers + // The Map class can do the job somehow m_info = Success; m_factorizationIsOk = ok; } diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index c4d56ee0a..1fe991d1c 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -27,47 +27,83 @@ #define EIGEN_SPARSELU_MATRIX_H /** \ingroup SparseLU_Module - * \brief a class to manipulate the supernodal matrices in the SparseLU factorization + * \brief a class to manipulate the L supernodal factor from the SparseLU factorization * - * This class extends the class SparseMatrix and should contain the data to easily store + * This class contain the data to easily store * and manipulate the supernodes during the factorization and solution phase of Sparse LU. * Only the lower triangular matrix has supernodes. * * NOTE : This class corresponds to the SCformat structure in SuperLU * */ - +/* TO DO + * InnerIterator as for sparsematrix + * SuperInnerIterator to iterate through all supernodes + * Function for triangular solve + */ template class SuperNodalMatrix { public: - SCMatrix() + typedef typename _Scalar Scalar; + typedef typename _Index Index; + public: + SuperNodalMatrix() + { + + } + SuperNodalMatrix(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ):m_row(m),m_col(n),m_nnz(nnz), + m_nzval(nzval),m_nzval_colptr(nzval_colptr),m_rowind(rowind), + m_rowind_colptr(rowind_colptr),m_col_to_sup(col_to_sup),m_sup_to_col(sup_to_col) { } - ~SCMatrix() + ~SuperNodalMatrix() { } - operator SparseMatrix(); + void setInfos(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) + { + m_row = m; + m_col = n; + m_nnz = nnz; + m_nzval = nzval; + m_nzval_colptr = nzval_colptr; + m_rowind = rowind; + m_rowind_colptr = rowind_colptr; + m_col_to_sup = col_to_sup; + m_sup_to_col = sup_to_col; + + } + SuperNodalMatrix(SparseMatrix& mat); + class InnerIterator + { + public: + + protected: + + }: protected: - Index nnz; // Number of nonzero values - Index nsupper; // Index of the last supernode - Scalar *nzval; //array of nonzero values packed by (supernode ??) column - Index *nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j - Index *rowind; // Array of compressed row indices of rectangular supernodes - Index rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j - Index *col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs - Index *sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode - // Index *nzval_colptr corresponds to m_outerIndex in SparseMatrix + Index m_row; // Number of rows + Index m_col; // Number of columns + Index m_nnz; // Number of nonzero values + Index m_nsupper; // Index of the last supernode + Scalar* m_nzval; //array of nonzero values packed by (supernode ??) column + Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j + Index* m_rowind; // Array of compressed row indices of rectangular supernodes + Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j + Index *m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index *m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode private : SuperNodalMatrix(SparseMatrix& ) {} }; -SuperNodalMatrix::operator SparseMatrix() +SuperNodalMatrix::SuperNodalMatrix(SparseMatrix& mat) { } diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 88d1c8b80..8d3d5efee 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -92,4 +92,5 @@ void SparseLU::LU_fixupL(const int n, const VectorXi& perm_r, GlobalLU_t& Glu) xlsub(n) = nextl; } + #endif From 268ba3b52132d14e3005031a140252724f4bf605 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 6 Jun 2012 18:23:39 +0200 Subject: [PATCH 06/73] Memory expansion and few bugs --- Eigen/src/SparseLU/SparseLU.h | 7 +- Eigen/src/SparseLU/SparseLU_Matrix.h | 174 ++++++++++++++++-- Eigen/src/SparseLU/SparseLU_Memory.h | 243 ++++++++++++-------------- Eigen/src/SparseLU/SparseLU_Structs.h | 37 ++-- 4 files changed, 296 insertions(+), 165 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 593ec7e25..996dbf078 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -412,15 +412,14 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply permutation to the L subscripts LU_fixupL(min_mn, m_perm_r, m_Glu); - // Free work space and compress storage iwork and work - // ?? Should it be done automatically by C++ + // Free work space iwork and work //... // Create supernode matrix L m_Lstore.setInfos(m, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); // Create the column major upper sparse matrix U - // Could be great to have the SparseMatrix constructor accepting the CSC matrix pointers - // The Map class can do the job somehow + // ?? Use the MappedSparseMatrix class ?? + new (&m_Ustore) Map > ( m, min_mn, nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); m_info = Success; m_factorizationIsOk = ok; } diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 1fe991d1c..01f8784da 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -53,17 +53,21 @@ class SuperNodalMatrix } SuperNodalMatrix(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, - Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ):m_row(m),m_col(n),m_nnz(nnz), - m_nzval(nzval),m_nzval_colptr(nzval_colptr),m_rowind(rowind), - m_rowind_colptr(rowind_colptr),m_col_to_sup(col_to_sup),m_sup_to_col(sup_to_col) + Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { - + setInfos(m, n, nnz, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); } ~SuperNodalMatrix() { } + /** + * Set appropriate pointers for the lower triangular supernodal matrix + * These infos are available at the end of the numerical factorization + * FIXME This class will be modified such that it can be use in the course + * of the factorization. + */ void setInfos(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { @@ -78,21 +82,80 @@ class SuperNodalMatrix m_sup_to_col = sup_to_col; } - SuperNodalMatrix(SparseMatrix& mat); - class InnerIterator + /** + * Number of rows + */ + int rows() { - public: - - protected: - - }: + return m_row; + } + + /** + * Number of columns + */ + int cols() + { + return m_col; + } + + /** + * Return the array of nonzero values packed by column + * + * The size is nnz + */ + Scalar* valuePtr() + { + return m_nzval; + } + + /** + * Return the pointers to the beginning of each column in \ref outerIndexPtr() + */ + Index* colIndexPtr() + { + return m_nzval_colptr; + } + + /** + * Return the array of compressed row indices of all supernodes + */ + Index* rowIndex() + { + return m_rowind; + } + /** + * Return the location in \em rowvaluePtr() which starts each column + */ + Index* rowIndexPtr() + { + return m_rowind_colptr; + } + /** + * Return the array of column-to-supernode mapping + */ + Index colToSup() + { + return m_col_to_sup; + } + /** + * Return the array of supernode-to-column mapping + */ + Index supToCol() + { + return m_sup_to_col; + } + + + class InnerIterator; + class SuperNodeIterator; + protected: Index m_row; // Number of rows Index m_col; // Number of columns Index m_nnz; // Number of nonzero values - Index m_nsupper; // Index of the last supernode - Scalar* m_nzval; //array of nonzero values packed by (supernode ??) column + Index m_nsuper; // Number of supernodes + Scalar* m_nzval; //array of nonzero values packed by column Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j Index* m_rowind; // Array of compressed row indices of rectangular supernodes Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j @@ -100,11 +163,90 @@ class SuperNodalMatrix Index *m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode private : - SuperNodalMatrix(SparseMatrix& ) {} }; -SuperNodalMatrix::SuperNodalMatrix(SparseMatrix& mat) +/** + * \brief InnerIterator class to iterate over nonzero values in the triangular supernodal matrix + * + */ +template +class SuperNodalMatrix::InnerIterator { + public: + InnerIterator(const SuperNodalMatrix& mat, Index outer) + : m_matrix(mat), + m_outer(outer), + m_idval(mat.colIndexPtr()[outer]), + m_startval(m_idval), + m_endval(mat.colIndexPtr()[outer+1]) + m_idrow(mat.rowIndexPtr()[outer]), + m_startidrow(m_idrow), + m_endidrow(mat.rowIndexPtr()[outer+1]) + {} + inline InnerIterator& operator++() + { + m_idval++; + m_idrow++ ; + return *this; + } + inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } + + inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_idval]; } + + inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } + inline Index row() const { return index(); } + inline Index col() const { return m_outer; } + + inline Index supIndex() const { return m_matrix.colToSup()[m_outer]; } + + inline operator bool() const + { + return ( (m_idval < m_endval) && (m_idval > m_startval) && + (m_idrow < m_endidrow) && (m_idrow > m_startidrow) ); + } + + protected: + const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + const Index m_outer; // Current column + Index m_idval; //Index to browse the values in the current column + const Index m_startval; // Start of the column value + const Index m_endval; // End of the column value + Index m_idrow; //Index to browse the row indices + const Index m_startidrow; // Start of the row indices of the current column value + const Index m_endidrow; // End of the row indices of the current column value +}; +/** + * \brief Iterator class to iterate over nonzeros Supernodes in the triangular supernodal matrix + * + * The final goal is to use this class when dealing with supernodes during numerical factorization + */ +template +class SuperNodalMatrix::SuperNodeIterator +{ + public: + SuperNodeIterator(const SuperNodalMatrix& mat) + { + + } + SuperNodeIterator(const SuperNodalMatrix& mat, Index supno) + { + + } + + /* + * Available Methods : + * Browse all supernodes (operator ++ ) + * Number of supernodes + * Columns of the current supernode + * triangular matrix of the current supernode + * rectangular part of the current supernode + */ + protected: + const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + Index m_idsup; // Index to browse all supernodes + const Index m_nsuper; // Number of all supernodes + Index m_startidsup; + Index m_endidsup; -} +}; #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 91b24fa67..730557b63 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -52,169 +52,155 @@ + (w + 1) * m * sizeof(Scalar) namespace internal { -/* Allocate various working space needed in the numerical factorization phase. - * m_work : space fot the output data structures (lwork is the size) - * m_Glu: persistent data to facilitate multiple factors : is it really necessary ?? +/** + * \brief Allocate various working space needed in the numerical factorization phase. + * \param m number of rows of the input matrix + * \param n number of columns + * \param annz number of initial nonzeros in the matrix + * \param work scalar working space needed by all factor routines + * \param iwork Integer working space + * \param lwork if lwork=-1, this routine returns an estimated size of the required memory + * \param Glu persistent data to facilitate multiple factors : will be deleted later ?? + * \return an estimated size of the required memory if lwork = -1; + * FIXME should also return the size of actually allocated when memory allocation failed * NOTE Unlike SuperLU, this routine does not allow the user to provide the size to allocate - * nor it return an estimated amount of space required. - * - * Useful variables : - * - m_fillratio : Ratio of fill expected - * - lwork = -1 : return an estimated size of the required memory - * = 0 : Estimate and allocate the memory */ -template -int SparseLU::LUMemInit(int lwork) +template +int SparseLU::LUMemInit(int m, int n, int annz, Scalar *work, Index *iwork, int lwork, int fillratio, GlobalLU_t& Glu) { - int iword = sizeof(Index); - int dword = sizeof(Scalar); - int n = m_Glu.n = m_mat.cols(); - int m = m_mat.rows(); - m_Glu.num_expansions = 0; // No memory expansions so far ?? + typedef typename ScalarVector::Scalar; + typedef typename IndexVector::Index; + + Glu.num_expansions = 0; //No memory expansions so far + if (!Glu.expanders) + Glu.expanders = new ExpHeader(LU_NBR_MEMTYPE); + + // Guess the size for L\U factors + int nzlmax, nzumax, nzlumax; + nzumax = nzlumax = m_fillratio * annz; // estimated number of nonzeros in U + nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor + + // Return the estimated size to the user if necessary int estimated_size; - - - if (!m_Glu.expanders) - m_Glu.expanders = new ExpHeader(NO_MEMTYPE); - - if (m_fact_t != SamePattern_SameRowPerm) // Create space for a new factorization - { - // Guess the size for L\U factors - int annz = m_mat.nonZeros(); - int nzlmax, nzumax, nzlumax; - nzumax = nzlumax = m_fillratio * annz; // ??? - nzlmax = std::max(1, m_fill_ratio/4.) * annz; //??? - - // Return the estimated size to the user if necessary - if (lwork == IND_EMPTY) - { - estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) - + (nzlmax + nzumax) * iword + (nzlumax+nzumax) * dword + n); - return estimated_size; - } - - // Setup the required space - // NOTE: In SuperLU, there is an option to let the user provide its own space. - - // Allocate Integer pointers for L\U factors.resize(n+1); - m_Glu.supno.resize(n+1); - m_Glu.xlsub.resize(n+1); - m_Glu.xlusup.resize(n+1); - m_Glu.xusub.resize(n+1); - - // Reserve memory for L/U factors - m_Glu.lusup = internal::expand(nzlumax, LUSUP, 0, 0, m_Glu); - m_Glu.ucol = internal::expand(nzumax, UCOL, 0, 0, m_Glu); - m_Glu.lsub = internal::expand(nzlmax, LSUB, 0, 0, m_Glu); - m_Glu.usub = internal::expand(nzumax, USUB, 0, 1, m_Glu); - - // Check if the memory is correctly allocated, - while ( !m_Glu.lusup || !m_Glu.ucol || !m_Glu.lsub || !m_Glu.usub) - { - //otherwise reduce the estimated size and retry - delete [] m_Glu.lusup; - delete [] m_Glu.ucol; - delete [] m_Glu.lsub; - delete [] m_Glu.usub; - - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; - eigen_assert (nzlumax > annz && "Not enough memory to perform factorization"); - - m_Glu.lusup = internal::expand(nzlumax, LUSUP, 0, 0, m_Glu); - m_Glu.ucol = internal::expand(nzumax, UCOL, 0, 0, m_Glu); - m_Glu.lsub = internal::expand(nzlmax, LSUB, 0, 0, m_Glu); - m_Glu.usub = internal::expand(nzumax, USUB, 0, 1, m_Glu); - } - } - else // m_fact == SamePattern_SameRowPerm; + if (lwork == IND_EMPTY) { - if (lwork == IND_EMPTY) - { - estimated_size = LU_GluIntArray(n) * iword + LU_TempSpace(m, m_panel_size) - + (Glu.nzlmax + Glu.nzumax) * iword + (Glu.nzlumax+Glu.nzumax) * dword + n); - return estimated_size; - } - // Use existing space from previous factorization - // Unlike in SuperLU, this should not be necessary here since m_Glu is persistent as a member of the class - m_Glu.xsup = m_Lstore.sup_to_col; - m_Glu.supno = m_Lstore.col_to_sup; - m_Glu.xlsub = m_Lstore.rowind_colptr; - m_Glu.xlusup = m_Lstore.nzval_colptr; - xusub = m_Ustore.outerIndexPtr(); + estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, m_panel_size) + + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n); + return estimated_size; + } + + // Setup the required space + // NOTE: In SuperLU, there is an option to let the user provide its own space, unlike here. + + // Allocate Integer pointers for L\U factors + Glu.supno = new IndexVector; + Glu.supno->resize(n+1); + + Glu.xlsub = new IndexVector; + Glu.xlsub->resize(n+1); + + Glu.xlusup = new IndexVector; + Glu.xlusup->resize(n+1); + + Glu.xusub = new IndexVector; + Glu.xusub->resize(n+1); + + // Reserve memory for L/U factors + Glu.lusup = new ScalarVector; + Glu.ucol = new ScalarVector; + Glu.lsub = new IndexVector; + Glu.usub = new IndexVector; + + expand(Glu.lusup,nzlumax, LUSUP, 0, 0, Glu); + expand(Glu.ucol,nzumax, UCOL, 0, 0, Glu); + expand(Glu.lsub,nzlmax, LSUB, 0, 0, Glu); + expand(Glu.usub,nzumax, USUB, 0, 1, Glu); + + // Check if the memory is correctly allocated, + // Should be a try... catch section here + while ( !Glu.lusup.size() || !Glu.ucol.size() || !Glu.lsub.size() || !Glu.usub.size()) + { + //otherwise reduce the estimated size and retry +// delete [] Glu.lusup; +// delete [] Glu.ucol; +// delete [] Glu.lsub; +// delete [] Glu.usub; +// + nzlumax /= 2; + nzumax /= 2; + nzlmax /= 2; + //FIXME Should be an excpetion here + eigen_assert (nzlumax > annz && "Not enough memory to perform factorization"); - m_Glu.expanders[LSUB].size = m_Glu.nzlmax; // Maximum value from previous factorization - m_Glu.expanders[LUSUP].size = m_Glu.nzlumax; - m_Glu.expanders[USUB].size = GLu.nzumax; - m_Glu.expanders[UCOL].size = m_Glu.nzumax; - m_Glu.lsub = GLu.expanders[LSUB].mem = m_Lstore.rowind; - m_Glu.lusup = GLu.expanders[LUSUP].mem = m_Lstore.nzval; - GLu.usub = m_Glu.expanders[USUB].mem = m_Ustore.InnerIndexPtr(); - m_Glu.ucol = m_Glu.expanders[UCOL].mem = m_Ustore.valuePtr(); + expand(Glu.lsup, nzlumax, LUSUP, 0, 0, Glu); + expand(Glu.ucol, nzumax, UCOL, 0, 0, Glu); + expand(Glu.lsub, nzlmax, LSUB, 0, 0, Glu); + expand(Glu.usub, nzumax, USUB, 0, 1, Glu); } // LUWorkInit : Now, allocate known working storage int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n; int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk); - m_iwork = new Index(isize); + iwork = new Index(isize); eigen_assert( (m_iwork != 0) && "Malloc fails for iwork"); - m_work = new Scalar(dsize); + work = new Scalar(dsize); eigen_assert( (m_work != 0) && "Malloc fails for dwork"); - ++m_Glu.num_expansions; + ++Glu.num_expansions; return 0; } // end LuMemInit /** * Expand the existing storage to accomodate more fill-ins + * \param vec Valid pointer to a vector to allocate or expand + * \param [in,out]prev_len At input, length from previous call. At output, length of the newly allocated vector + * \param type Which part of the memory to expand + * \param len_to_copy Size of the memory to be copied to new store + * \param keep_prev true: use prev_len; Do not expand this vector; false: compute new_len and expand */ -template -DestType* SparseLU::expand(int& prev_len, // Length from previous call - MemType type, // Which part of the memory to expand - int len_to_copy, // Size of the memory to be copied to new store - int keep_prev) // = 1: use prev_len; Do not expand this vector - // = 0: compute new_len to expand) +template +int SparseLU::expand(VectorType& vec, int& prev_len, MemType type, int len_to_copy, bool keep_prev, GlobalLU_t& Glu) { float alpha = 1.5; // Ratio of the memory increase int new_len; // New size of the allocated memory - if(m_Glu.num_expansions == 0 || keep_prev) - new_len = prev_len; + + if(Glu.num_expansions == 0 || keep_prev) + new_len = prev_len; // First time allocate requested else new_len = alpha * prev_len; - // Allocate new space - DestType *new_mem, *old_mem; - new_mem = new DestType(new_len); - if ( m_Glu.num_expansions != 0 ) // The memory has been expanded before + // Allocate new space +// vec = new VectorType(new_len); + VectorType old_vec(vec); + if ( Glu.num_expansions != 0 ) // The memory has been expanded before { int tries = 0; + vec.resize(new_len); //expand the current vector if (keep_prev) { - if (!new_mem) return 0; + if (!vec.size()) return -1 ; // FIXME could throw an exception somehow } else { - while ( !new_mem) + while (!vec.size()) { // Reduce the size and allocate again - if ( ++tries > 10) return 0; + if ( ++tries > 10) return -1 alpha = LU_Reduce(alpha); new_len = alpha * prev_len; - new_mem = new DestType(new_len); + vec->resize(new_len); } - } // keep_prev + } // end allocation //Copy the previous values to the newly allocated space - ExpHeader* expanders = m_Glu.expanders; - std::memcpy(new_mem, expanders[type].mem, len_to_copy); - delete [] expanders[type].mem; - } - expanders[type].mem = new_mem; - expanders[type].size = new_len; + for (int i = 0; i < old_vec.size(); i++) + vec(i) = old_vec(i); + } // end expansion +// expanders[type].mem = vec; +// expanders[type].size = new_len; prev_len = new_len; - if(m_Glu.num_expansions) ++m_Glu.num_expansions; - return expanders[type].mem; + if(Glu.num_expansions) ++Glu.num_expansions; + return 0; } /** @@ -224,15 +210,16 @@ DestType* SparseLU::expand(int& prev_len, // Length from previous call * * \return a pointer to the newly allocated space */ -template -DestType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen) +template +VectorType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen) { - DestType *newmem; + VectorType *newmem; if (memtype == USUB) - new_mem = expand(maxlen, mem_type, next, 1); + vec = expand(vec, maxlen, mem_type, next, 1); else - new_mem = expand(maxlen, mem_type, next, 0); - eigen_assert(new_mem && "Can't expand memory"); // FIXME Should be an exception + vec = expand(vec, maxlen, mem_type, next, 0); + // FIXME Should be an exception instead of an assert + eigen_assert(new_mem.size() && "Can't expand memory"); return new_mem; diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 72e1db343..e680eaa21 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -23,7 +23,7 @@ // Eigen. If not, see . /* - * NOTE: Part of this file is the modified version of files slu_[s,d,c,z]defs.h + * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h * -- SuperLU routine (version 4.1) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. @@ -84,36 +84,39 @@ #define EIGEN_LU_STRUCTS namespace Eigen { -#define NO_MEMTYPE 4 /* 0: lusup +#define LU_NBR_MEMTYPE 4 /* 0: lusup 1: ucol 2: lsub 3: usub */ -typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PREMC} colperm_t; -typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, Factored} fact_t; +typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC} colperm_t; +typedef enum {DOFACT, SamePattern, Factored} fact_t; typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; /** Headers for dynamically managed memory - \tparam BaseType can be int, real scalar or complex scalar*/ -template + \tparam IndexVectorType can be int, real scalar or complex scalar*/ +template struct ExpHeader { int size; // Length of the memory that has been used */ - BaseType *mem; + VectorType *mem; // Save the current pointer of the newly allocated memory } ExpHeader; -template +template struct { - VectorXi xsup; // supernode and column mapping - VectorXi supno; // Supernode number corresponding to this column - VectorXi lsub; // Compressed L subscripts of rectangular supernodes - VectorXi xlsub; // xlsub(j) points to the starting location of the j-th column in lsub - VectorXi xlusup; - VectorXi xusub; - VectorType lusup; // L supernodes - VectorType ucol; // U columns + IndexVector* xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode + IndexVector* supno; // Supernode number corresponding to this column (column to supernode mapping) + ScalarVector* lusup; // nonzero values of L ordered by columns + IndexVector* lsub; // Compressed row indices of L rectangular supernodes. + IndexVector* xlusup; // pointers to the beginning of each column in lusup + IndexVector* xlsub; // pointers to the beginning of each column in lsub Index nzlmax; // Current max size of lsub - Index nzumax; // Current max size of ucol Index nzlumax; // Current max size of lusup + + ScalarVector* ucol; // nonzero values of U ordered by columns + IndexVector* usub; // row indices of U columns in ucol + IndexVector* xusub; // Pointers to the beginning of each column of U in ucol + Index nzumax; // Current max size of ucol Index n; // Number of columns in the matrix + int num_expansions; ExpHeader *expanders; // Array of pointers to 4 types of memory } GlobalLU_t; From f091879d776965588d8fe631b70e902a6bae3e59 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 7 Jun 2012 19:06:22 +0200 Subject: [PATCH 07/73] Memory management --- Eigen/src/SparseLU/SparseLU.h | 72 +++++---- Eigen/src/SparseLU/SparseLU_Memory.h | 176 +++++++++++---------- Eigen/src/SparseLU/SparseLU_Structs.h | 7 +- Eigen/src/SparseLU/SparseLU_column_bmod.h | 34 ++-- Eigen/src/SparseLU/SparseLU_column_dfs.h | 30 ++-- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 45 +++--- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 36 ++--- 7 files changed, 206 insertions(+), 194 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 996dbf078..7f0fb1b0b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -48,7 +48,8 @@ class SparseLU typedef SparseMatrix NCMatrix; typedef SuperNodalMatrix SCMatrix; typedef GlobalLU_t Eigen_GlobalLU_t; - typedef Matrix VectorType; + typedef Matrix ScalarVector; + typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; public: SparseLU():m_isInitialized(true),m_symmetricmode(false),m_fact(DOFACT),m_diagpivotthresh(1.0) @@ -93,15 +94,15 @@ class SparseLU fact_t m_fact; NCMatrix m_mat; // The input (permuted ) matrix SCMatrix m_Lstore; // The lower triangular matrix (supernodal) - NCMatrix m_Ustore; //The upper triangular matrix + NCMatrix m_Ustore; // The upper triangular matrix PermutationType m_perm_c; // Column permutation PermutationType m_iperm_c; // Column permutation PermutationType m_perm_r ; // Row permutation PermutationType m_iperm_r ; // Inverse row permutation - VectorXi m_etree; // Column elimination tree + IndexVector m_etree; // Column elimination tree - Scalar *m_work; // - Index *m_iwork; // + ScalarVector m_work; // + IndexVector m_iwork; // static Eigen_GlobalLU_t m_Glu; // persistent data to facilitate multiple factors // should be defined as a class member // SuperLU/SparseLU options @@ -158,7 +159,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) // In symmetric mode, do not do postorder here if (m_symmetricmode == false) { - VectorXi post, iwork; + IndexVector post, iwork; // Post order etree post = internal::TreePostorder(m_mat.cols(), m_etree); @@ -209,20 +210,20 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Set up pointers for integer working arrays - Map segrep(m_iwork, m); // - Map parent(&segrep(0) + m, m); // - Map xplore(&parent(0) + m, m); // - Map repfnz(&xplore(0) + m, maxpanel); // - Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// - Map xprune(&panel_lsub(0) + maxpanel, n); // - Map marker(&xprune(0)+n, m * LU_NO_MARKER); // + Map segrep(&m_iwork(0), m); // + Map parent(&segrep(0) + m, m); // + Map xplore(&parent(0) + m, m); // + Map repfnz(&xplore(0) + m, maxpanel); // + Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// + Map xprune(&panel_lsub(0) + maxpanel, n); // + Map marker(&xprune(0)+n, m * LU_NO_MARKER); // repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - VectorType dense(maxpanel); + ScalarVector dense(maxpanel); dense.setZero(); - VectorType tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk); + ScalarVector tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk); tempv.setZero(); // Setup Permutation vectors @@ -234,7 +235,7 @@ void SparseLU::factorize(const MatrixType& matrix) iperm_c = m_perm_c.inverse(); // Identify initial relaxed snodes - VectorXi relax_end(n); + IndexVector relax_end(n); if ( m_symmetricmode = true ) LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else @@ -243,11 +244,12 @@ void SparseLU::factorize(const MatrixType& matrix) m_perm_r.setConstant(-1); marker.setConstant(-1); - VectorXi& xsup = m_Glu.xsup; - VectorXi& supno = m_GLu.supno; - VectorXi& xlsub = m_Glu.xlsub; - VectorXi& xlusup = m_GLu.xlusup; - VectorXi& xusub = m_Glu.xusub; + IndexVector& xsup = m_Glu.xsup; + IndexVector& supno = m_GLu.supno; + IndexVector& xlsub = m_Glu.xlsub; + IndexVector& xlusup = m_GLu.xlusup; + IndexVector& xusub = m_Glu.xusub; + Index& nzlumax = m_Glu.nzlumax; supno(0) = IND_EMPTY; xsup(0) = xlsub(0) = xusub(0) = xlusup(0); @@ -259,7 +261,7 @@ void SparseLU::factorize(const MatrixType& matrix) // (b) panel_size contiguous columns, defined by the user register int jcol,kcol; int min_mn = std::min(m,n); - VectorXi panel_histo(n); + IndexVector panel_histo(n); Index nextu, nextlu, jsupno, fsupc, new_next; int pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol @@ -274,7 +276,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Factorize the relaxed supernode(jcol:kcol) // First, determine the union of the row structure of the snode info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); - if ( !info ) + if ( info ) { m_info = NumericalIssue; m_factorizationIsOk = false; @@ -288,8 +290,12 @@ void SparseLU::factorize(const MatrixType& matrix) nzlumax = m_Glu.nzlumax; while (new_next > nzlumax ) { - m_Glu.lusup = LUMemXpand(jcol, nextlu, LUSUP, nzlumax); - m_GLu.nzlumax = nzlumax; + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_Glu); + if (mem) + { + m_factorizationIsOk = false; + return; + } } // Now, left-looking factorize each column within the snode for (icol = jcol; icol<=kcol; icol++){ @@ -303,7 +309,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Eliminate the current column info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_Glu); - if ( !info ) + if ( info ) { m_info = NumericalIssue; m_factorizationIsOk = false; @@ -341,8 +347,8 @@ void SparseLU::factorize(const MatrixType& matrix) nseg = nseg1; // begin after all the panel segments //Depth-first-search for the current column - VectorBlock panel_lsubk(panel_lsub, k, m); //FIXME - VectorBlock repfnz_k(repfnz, k, m); //FIXME + VectorBlock panel_lsubk(panel_lsub, k, m); //FIXME + VectorBlock repfnz_k(repfnz, k, m); //FIXME info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_Glu); if ( !info ) { @@ -351,10 +357,10 @@ void SparseLU::factorize(const MatrixType& matrix) return; } // Numeric updates to this column - VectorBlock dense_k(dense, k, m); //FIXME - VectorBlock segrep_k(segrep, nseg1, m) // FIXME Check the length + VectorBlock dense_k(dense, k, m); //FIXME + VectorBlock segrep_k(segrep, nseg1, m) // FIXME Check the length info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_Glu); - if ( !info ) + if ( info ) { m_info = NumericalIssue; m_factorizationIsOk = false; @@ -364,7 +370,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Copy the U-segments to ucol(*) //FIXME Check that repfnz_k, dense_k... have stored references to modified columns info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_Glu); - if ( !info ) + if ( info ) { m_info = NumericalIssue; m_factorizationIsOk = false; @@ -373,7 +379,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Form the L-segment info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_Glu); - if ( !info ) + if ( info ) { m_info = NumericalIssue; m_factorizationIsOk = false; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 730557b63..a92c3bcc4 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -53,7 +53,7 @@ namespace internal { /** - * \brief Allocate various working space needed in the numerical factorization phase. + * \brief Allocate various working space failed in the numerical factorization phase. * \param m number of rows of the input matrix * \param n number of columns * \param annz number of initial nonzeros in the matrix @@ -61,22 +61,21 @@ namespace internal { * \param iwork Integer working space * \param lwork if lwork=-1, this routine returns an estimated size of the required memory * \param Glu persistent data to facilitate multiple factors : will be deleted later ?? - * \return an estimated size of the required memory if lwork = -1; - * FIXME should also return the size of actually allocated when memory allocation failed - * NOTE Unlike SuperLU, this routine does not allow the user to provide the size to allocate + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed + * NOTE Unlike SuperLU, this routine does not allow the user to provide its own user space */ template -int SparseLU::LUMemInit(int m, int n, int annz, Scalar *work, Index *iwork, int lwork, int fillratio, GlobalLU_t& Glu) +int SparseLU::LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, GlobalLU_t& Glu) { typedef typename ScalarVector::Scalar; typedef typename IndexVector::Index; - Glu.num_expansions = 0; //No memory expansions so far - if (!Glu.expanders) - Glu.expanders = new ExpHeader(LU_NBR_MEMTYPE); - + int& num_expansions = Glu.num_expansions; //No memory expansions so far + num_expansions = 0; // Guess the size for L\U factors - int nzlmax, nzumax, nzlumax; + Index& nzlmax = Glu.nzlmax; + Index& nzumax = Glu.nzumax; + Index& nzlumax = Glu.nzlumax; nzumax = nzlumax = m_fillratio * annz; // estimated number of nonzeros in U nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor @@ -90,138 +89,145 @@ int SparseLU::LUMemInit(int m, int n, int annz, Scalar *work, Index *iwork, int } // Setup the required space - // NOTE: In SuperLU, there is an option to let the user provide its own space, unlike here. - // Allocate Integer pointers for L\U factors - Glu.supno = new IndexVector; - Glu.supno->resize(n+1); - - Glu.xlsub = new IndexVector; - Glu.xlsub->resize(n+1); - - Glu.xlusup = new IndexVector; - Glu.xlusup->resize(n+1); - - Glu.xusub = new IndexVector; - Glu.xusub->resize(n+1); + // First allocate Integer pointers for L\U factors + Glu.supno.resize(n+1); + Glu.xlsub.resize(n+1); + Glu.xlusup.resize(n+1); + Glu.xusub.resize(n+1); // Reserve memory for L/U factors - Glu.lusup = new ScalarVector; - Glu.ucol = new ScalarVector; - Glu.lsub = new IndexVector; - Glu.usub = new IndexVector; - - expand(Glu.lusup,nzlumax, LUSUP, 0, 0, Glu); - expand(Glu.ucol,nzumax, UCOL, 0, 0, Glu); - expand(Glu.lsub,nzlmax, LSUB, 0, 0, Glu); - expand(Glu.usub,nzumax, USUB, 0, 1, Glu); + expand(Glu.lusup, nzlumax, 0, 0, num_expansions); + expand(Glu.ucol,nzumax, 0, 0, num_expansions); + expand(Glu.lsub,nzlmax, 0, 0, num_expansions); + expand(Glu.usub,nzumax, 0, 1, num_expansions); // Check if the memory is correctly allocated, // Should be a try... catch section here while ( !Glu.lusup.size() || !Glu.ucol.size() || !Glu.lsub.size() || !Glu.usub.size()) { //otherwise reduce the estimated size and retry -// delete [] Glu.lusup; -// delete [] Glu.ucol; -// delete [] Glu.lsub; -// delete [] Glu.usub; -// nzlumax /= 2; nzumax /= 2; nzlmax /= 2; - //FIXME Should be an excpetion here - eigen_assert (nzlumax > annz && "Not enough memory to perform factorization"); + //FIXME Should be an exception here + if (nzlumax < annz ) return nzlumax; - expand(Glu.lsup, nzlumax, LUSUP, 0, 0, Glu); - expand(Glu.ucol, nzumax, UCOL, 0, 0, Glu); - expand(Glu.lsub, nzlmax, LSUB, 0, 0, Glu); - expand(Glu.usub, nzumax, USUB, 0, 1, Glu); + expand(Glu.lsup, nzlumax, 0, 0, Glu); + expand(Glu.ucol, nzumax, 0, 0, Glu); + expand(Glu.lsub, nzlmax, 0, 0, Glu); + expand(Glu.usub, nzumax, 0, 1, Glu); } // LUWorkInit : Now, allocate known working storage int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n; int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk); - iwork = new Index(isize); - eigen_assert( (m_iwork != 0) && "Malloc fails for iwork"); - work = new Scalar(dsize); - eigen_assert( (m_work != 0) && "Malloc fails for dwork"); + iwork.resize(isize); + work.resize(isize); - ++Glu.num_expansions; + ++num_expansions; return 0; + } // end LuMemInit /** * Expand the existing storage to accomodate more fill-ins - * \param vec Valid pointer to a vector to allocate or expand - * \param [in,out]prev_len At input, length from previous call. At output, length of the newly allocated vector - * \param type Which part of the memory to expand - * \param len_to_copy Size of the memory to be copied to new store - * \param keep_prev true: use prev_len; Do not expand this vector; false: compute new_len and expand + * \param vec Valid pointer to the vector to allocate or expand + * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector + * \param [in]len_to_copy Current number of elements in the factors + * \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand + * \param [in,out]num_expansions Number of times the memory has been expanded */ template -int SparseLU::expand(VectorType& vec, int& prev_len, MemType type, int len_to_copy, bool keep_prev, GlobalLU_t& Glu) +int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase int new_len; // New size of the allocated memory - if(Glu.num_expansions == 0 || keep_prev) - new_len = prev_len; // First time allocate requested + if(num_expansions == 0 || keep_prev) + new_len = length ; // First time allocate requested else - new_len = alpha * prev_len; + new_len = alpha * length ; - // Allocate new space -// vec = new VectorType(new_len); - VectorType old_vec(vec); - if ( Glu.num_expansions != 0 ) // The memory has been expanded before + VectorType old_vec; // Temporary vector to hold the previous values + if (len_to_copy > 0 ) + old_vec = vec; // old_vec should be of size len_to_copy... to be checked + + //expand the current vector //FIXME Should be in a try ... catch region + vec.resize(new_len); + /* + * Test if the memory has been well allocated + * otherwise reduce the size and try to reallocate + * copy data from previous vector (if exists) to the newly allocated vector + */ + if ( num_expansions != 0 ) // The memory has been expanded before { int tries = 0; - vec.resize(new_len); //expand the current vector if (keep_prev) { - if (!vec.size()) return -1 ; // FIXME could throw an exception somehow + if (!vec.size()) return new_len ; } else { while (!vec.size()) { - // Reduce the size and allocate again - if ( ++tries > 10) return -1 + // Reduce the size and allocate again + if ( ++tries > 10) return new_len; alpha = LU_Reduce(alpha); - new_len = alpha * prev_len; - vec->resize(new_len); + new_len = alpha * length ; + vec.resize(new_len); //FIXME Should be in a try catch section } } // end allocation + //Copy the previous values to the newly allocated space - for (int i = 0; i < old_vec.size(); i++) - vec(i) = old_vec(i); + if (len_to_copy > 0) + vec.segment(0, len_to_copy) = old_vec; } // end expansion -// expanders[type].mem = vec; -// expanders[type].size = new_len; - prev_len = new_len; - if(Glu.num_expansions) ++Glu.num_expansions; + length = new_len; + if(num_expansions) ++num_expansions; return 0; } /** * \brief Expand the existing storage - * - * NOTE: The calling sequence of this function is different from that of SuperLU - * - * \return a pointer to the newly allocated space + * \param vec vector to expand + * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size + * \param next current number of elements in the vector. + * \param Glu Global data structure + * \return 0 on success, > 0 size of the memory allocated so far */ -template -VectorType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen) +template +int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& Glu) { - VectorType *newmem; + int failed_size; + int& num_expansions = Glu.num_expansions; if (memtype == USUB) - vec = expand(vec, maxlen, mem_type, next, 1); + failed_size = expand(vec, maxlen, next, 1, num_expansions); else - vec = expand(vec, maxlen, mem_type, next, 0); - // FIXME Should be an exception instead of an assert - eigen_assert(new_mem.size() && "Can't expand memory"); + failed_size = expand(vec, maxlen, next, 0, num_expansions); + + if (failed_size) + return faileld_size; - return new_mem; + // The following code is not really needed since maxlen is passed by reference + // and correspond to the appropriate field in Glu +// switch ( mem_type ) { +// case LUSUP: +// Glu.nzlumax = maxlen; +// break; +// case UCOL: +// Glu.nzumax = maxlen; +// break; +// case LSUB: +// Glu.nzlmax = maxlen; +// break; +// case USUB: +// Glu.nzumax = maxlen; +// break; +// } + + return 0 ; } diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index e680eaa21..48fde1ada 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -92,8 +92,9 @@ typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC} colperm_t; typedef enum {DOFACT, SamePattern, Factored} fact_t; typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; -/** Headers for dynamically managed memory - \tparam IndexVectorType can be int, real scalar or complex scalar*/ + +/* Obsolete, headers for dynamically managed memory + \tparam VectorType can be int, real scalar or complex scalar*/ template struct ExpHeader { int size; // Length of the memory that has been used */ @@ -118,7 +119,7 @@ struct { Index n; // Number of columns in the matrix int num_expansions; - ExpHeader *expanders; // Array of pointers to 4 types of memory + ExpHeader *expanders; // Deprecated... Array of pointers to 4 types of memory } GlobalLU_t; }// End namespace Eigen diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 58755363d..bed4f9519 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -59,8 +59,8 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, VectorType& tempv, VectorXi& segrep, VectorXi& repfnz, int fpanelc, LU_GlobalLu_t& Glu) +template +int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLu_t& Glu) { int jsupno, k, ksub, krep, krep_ind, ksupno; @@ -72,13 +72,14 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, * kfnz = first nonz in the k-th supernodal segment * no-zeros = no lf leading zeros in a supernodal U-segment */ - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; - VectorXi& xlusup = Glu.xlusup; - VectorType& lusup = Glu.lusup; - int nzlumax = GLu.nzlumax; + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; + IndexVector& xlusup = Glu.xlusup; + ScalarVector& lusup = Glu.lusup; + Index& nzlumax = Glu.nzlumax; + int jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order k = nseg - 1; @@ -126,13 +127,13 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, luptr += nsupr * no_zeros + no_zeros; // Form Eigen matrix and vector Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - Map u(tempv.data(), segsize); + Map u(tempv.data(), segsize); u = A.triangularView().solve(u); // Dense matrix-vector product y <-- A*x luptr += segsize; new (&A) (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - Map l( &(tempv.data()[segsize]), segsize); + Map l( &(tempv.data()[segsize]), segsize); l= A * u; // Scatter tempv[] into SPA dense[] as a temporary storage @@ -164,10 +165,9 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); while (new_next > nzlumax ) { - Glu.lusup = LUmemXpand(jcol, nextlu, LUSUP, &nzlumax); - Glu.nzlumax = nzlumax; - lusup = Glu.lusup; - lsub = Glu.lsub; + mem = LUmemXpand(Glu.lusup, nzlumax, nextlu, LUSUP, Glu); + if (mem) return mem; + lsub = Glu.lsub; //FIXME Why is it updated here. } for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) @@ -203,11 +203,11 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, // points to the beginning of jcol in snode L\U(jsupno) ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map l( &(lusup.data()[ufirst]), nsupc ); + Map l( &(lusup.data()[ufirst]), nsupc ); u = A.triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - Map l( &(lusup.data()[ufirst+nsupc]), nsupr ); + Map l( &(lusup.data()[ufirst+nsupc]), nsupr ); l = l - A * u; } // End if fst_col diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 15ddcf7c0..1c832d60e 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -70,9 +70,10 @@ * > 0 number of bytes allocated when run out of space * */ -int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, VectorXi& nseg VectorXi& lsub_col, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +template +int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLu_t& Glu) { - typedef typename VectorXi::Index; + typedef typename IndexVector::IndexVector; int jcolp1, jcolm1, jsuper, nsuper, nextl; int krow; // Row index of the current element @@ -82,17 +83,18 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto int chperm, chmark, chrep, oldrep, kchild; int myfnz; // First nonzero element in the current column int xdfs, maxdfs, kpar; - + int mem; // Initialize pointers - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; + IndexVector& nzlmax = Glu.nzlmax; nsuper = supno(jcol); jsuper = nsuper; nextl = xlsup(jcol); - VectorBlock marker2(marker, 2*m, m); + VectorBlock marker2(marker, 2*m, m); // For each nonzero in A(*,jcol) do dfs for (k = 0; lsub_col[k] != IND_EMPTY; k++) { @@ -113,10 +115,8 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto lsub(nextl++) = krow; // krow is indexed into A if ( nextl >= nzlmax ) { - Glu.lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); - //FIXME try... catch out of space - Glu.nzlmax = nzlmax; - lsub = Glu.lsub; + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + if ( mem ) return mem; } if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing } @@ -163,10 +163,8 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto lsub(nextl++) = kchild; if (nextl >= nzlmax) { - Glu.lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); - //FIXME Catch out of space errors - GLu.nzlmax = nzlmax; - lsub = Glu.lsub; + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB); + if (mem) return mem; } if (chmark != jcolm1) jsuper = IND_EMPTY; } diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 3f8d8abe2..dc53edcfb 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -58,26 +58,28 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, VectorXi& segrep, VectorXi& repfnz, VectorXi& perm_r, VectorType& dense, LU_GlobalLu_t& Glu) +template +int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLu_t& Glu) { - int ksupno, k, ksub, krep, ksupno; + Index ksupno, k, ksub, krep, ksupno; + typedef typename IndexVector::Index; - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; - VectorType& ucol = GLu.ucol; - VectorXi& usub = Glu.usub; - VectorXi& xusub = Glu.xusub; - int nzumax = GLu.nzumax; - int jsupno = supno(jcol); + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; + ScalarVector& ucol = GLu.ucol; + IndexVector& usub = Glu.usub; + IndexVector& xusub = Glu.xusub; + Index& nzumax = Glu.nzumax; + + Index jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order k = nseg - 1; - int nextu = xusub(jcol); - int kfnz, isub, segsize; - int new_next,irow; + Index nextu = xusub(jcol); + Index kfnz, isub, segsize; + Index new_next,irow; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep(k); k--; @@ -93,13 +95,12 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, VectorXi& segrep, new_next = nextu + segsize; while (new_next > nzumax) { - Glu.ucol = LU_MemXpand(jcol, nextu, UCOL, nzumax); //FIXME try and catch errors - ucol = Glu.ucol; - Glu.nzumax = nzumax; - Glu.usub = LU_MemXpand(jcol, nextu, USUB, nzumax); //FIXME try and catch errors - Glu.nzumax = nzumax; - usub = Glu.usub; - lsub = Glu.lsub; + mem = LU_MemXpand(ucol, nzumax, nextu, UCOL, Glu); + if (mem) return mem; + mem = LU_MemXpand(usub, nzumax, nextu, USUB, Glu); + if (mem) return mem; + + lsub = Glu.lsub; //FIXME Why setting this as well ?? } for (i = 0; i < segsize; i++) diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index c3048be54..cf64eb747 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -55,17 +55,19 @@ * \param colptr Pointer to the beginning of each column * \param xprune (out) The pruned tree ?? * \param marker (in/out) working vector + * \return 0 on success, > 0 size of the memory when memory allocation failed */ - template - int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const VectorXi* asub, const VectorXi* colptr, - VectorXi& xprune, VectorXi& marker, LU_GlobalLu_t *m_Glu) + template + int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLu_t& Glu) { - VectorXi& xsup = m_Glu->xsup; - VectorXi& supno = m_Glu->supno; // Supernode number corresponding to this column - VectorXi& lsub = m_Glu->lsub; - VectorXi& xlsub = m_Glu->xlsub; - - int nsuper = ++supno(jcol); // Next available supernode number + typedef typename IndexVector::Index; + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; // Supernode number corresponding to this column + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; + Index& nzlmax = Glu.nzlmax; + int mem; + Index nsuper = ++supno(jcol); // Next available supernode number register int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub register int i,k; int krow,kmark; @@ -83,26 +85,24 @@ lsub(nextl++) = krow; if( nextl >= nzlmax ) { - m_Glu->lsub = LUMemXpand(jcol, nextl, LSUB, nzlmax); - m_Glu->nzlmax = nzlmax; - lsub = m_Glu->lsub; + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + if (mem) return mem; } } } - supno(i) = nsuper; + supno(i) = nsuper; } // If supernode > 1, then make a copy of the subscripts for pruning if (jcol < kcol) { - int new_next = nextl + (nextl - xlsub(jcol)); + Index new_next = nextl + (nextl - xlsub(jcol)); while (new_next > nzlmax) { - m_Glu->lsub = LUMemXpand(jcol, nextl, LSUB, &nzlmax); - m_Glu->nzlmax= nzlmax; - lsub = m_Glu->lsub; + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + if (mem) return mem; } - int ifrom, ito = nextl; + Index ifrom, ito = nextl; for (ifrom = xlsub(jcol); ifrom < nextl;) lsub(ito++) = lsub(ifrom++); for (i = jcol+1; i <=kcol; i++)xlsub(i) = nextl; From 7bdaa60f6c9ea6e86e87639597811c546479bb93 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 8 Jun 2012 17:23:38 +0200 Subject: [PATCH 08/73] triangular solve... almost finished --- Eigen/src/SparseLU/SparseLU.h | 160 ++++++++++++++++++++++++--- Eigen/src/SparseLU/SparseLU_Matrix.h | 10 +- Eigen/src/SparseLU/SparseLU_Memory.h | 2 +- 3 files changed, 153 insertions(+), 19 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 7f0fb1b0b..38a587594 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -70,6 +70,8 @@ class SparseLU void analyzePattern (const MatrixType& matrix); void factorize (const MatrixType& matrix); void compute (const MatrixType& matrix); + template + bool SparseLU::_solve(const MatrixBase &b, MatrixBase &dest) const /** Indicate that the pattern of the input matrix is symmetric */ void isSymmetric(bool sym) @@ -82,6 +84,21 @@ class SparseLU { m_diagpivotthresh = thresh; } + + + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * \sa compute() + */ + template + inline const internal::solve_retval solve(const MatrixBase& b) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==b.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived()); + } + protected: // Functions void initperfvalues(); @@ -101,8 +118,8 @@ class SparseLU PermutationType m_iperm_r ; // Inverse row permutation IndexVector m_etree; // Column elimination tree - ScalarVector m_work; // - IndexVector m_iwork; // + ScalarVector m_work; // Scalar work vector + IndexVector m_iwork; //Index work vector static Eigen_GlobalLU_t m_Glu; // persistent data to facilitate multiple factors // should be defined as a class member // SuperLU/SparseLU options @@ -210,26 +227,37 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Set up pointers for integer working arrays - Map segrep(&m_iwork(0), m); // - Map parent(&segrep(0) + m, m); // - Map xplore(&parent(0) + m, m); // - Map repfnz(&xplore(0) + m, maxpanel); // - Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// - Map xprune(&panel_lsub(0) + maxpanel, n); // - Map marker(&xprune(0)+n, m * LU_NO_MARKER); // + VectorBlock segrep(m_iwork, 0, m); +// Map segrep(&m_iwork(0), m); // + + VectorBlock parent(segrep, m, m); +// Map parent(&segrep(0) + m, m); // + + VectorBlock xplore(parent, m, m); +// Map xplore(&parent(0) + m, m); // + + VectorBlock repnfnz(xplore, m, maxpanel); +// Map repfnz(&xplore(0) + m, maxpanel); // + + VectorBlock panel_lsub(repfnz, maxpanel, maxpanel) +// Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// + + VectorBlock xprune(panel_lsub, maxpanel, n); +// Map xprune(&panel_lsub(0) + maxpanel, n); // + + VectorBlock marker(xprune, n, m * LU_NO_MARKER); +// Map marker(&xprune(0)+n, m * LU_NO_MARKER); // + repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - ScalarVector dense(maxpanel); + VectorBlock dense(m_work, 0, maxpanel); dense.setZero(); - ScalarVector tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk); + VectorBlock tempv(m_work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); tempv.setZero(); // Setup Permutation vectors - PermutationType iperm_r; // inverse of perm_r - if (m_fact = SamePattern_SameRowPerm) - iperm_r = m_perm_r.inverse(); // Compute the inverse of perm_c PermutationType iperm_c; iperm_c = m_perm_c.inverse(); @@ -424,12 +452,112 @@ void SparseLU::factorize(const MatrixType& matrix) // Create supernode matrix L m_Lstore.setInfos(m, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); // Create the column major upper sparse matrix U - // ?? Use the MappedSparseMatrix class ?? - new (&m_Ustore) Map > ( m, min_mn, nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); + new (&m_Ustore) Map > ( m, min_mn, nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); //FIXME + this.m_Ustore = m_Ustore; + m_info = Success; m_factorizationIsOk = ok; } +template +bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const +{ + eigen_assert(m_isInitialized && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + x = b; /* on return, x is overwritten by the computed solution */ + + int nrhs = b.cols(); + + // Permute the right hand side to form Pr*B + x = m_perm_r * x; + + // Forward solve PLy = Pb; + Index fsupc; // First column of the current supernode + Index istart; // Pointer index to the subscript of the current column + Index nsupr; // Number of rows in the current supernode + Index nsupc; // Number of columns in the current supernode + Index nrow; // Number of rows in the non-diagonal part of the supernode + Index luptr; // Pointer index to the current nonzero value + Index iptr; // row index pointer iterator + Index irow; //Current index row + Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values + Matrix work(n,nrhs); // working vector + work.setZero(); + int j; + for (k = 0; k <= m_Lstore.nsuper(); k ++) + { + fsupc = m_Lstore.sup_to_col()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; + nrow = nsupr - nsupc; + + if (nsupc == 1 ) + { + for (j = 0; j < nrhs; j++) + { + luptr = m_Lstore.colIndexPtr()[fsupc]; + for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) + { + irow = m_Lstore.rowIndex()[iptr]; + ++luptr; + x(irow, j) -= x(fsupc, j) * Lval[luptr]; + } + } + } + else + { + // The supernode has more than one column + + // Triangular solve + luptr = m_Lstore.colIndexPtr()[fsupc]; + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); +// Map, 0, OuterStride > u( &(x(fsupc,0)), nsupc, nrhs, OuterStride<>(x.rows()) ); + Matrix& u = x.block(fsupc, 0, nsupc, nrhs); + u = A.triangularView().solve(u); + + // Matrix-vector product + new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * u; + + //Begin Scatter + for (j = 0; j < nrhs; j++) + { + iptr = istart + nsupc; + for (i = 0; i < nrow; i++) + { + irow = m_Lstore.rowIndex()[iptr]; + x(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } // end for all supernodes + + // Back solve Ux = y +} + +namespace internal { + +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef SparseLU<_MatrixType,Derived> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template void evalTo(Dest& dst) const + { + dec().derived()._solve(rhs(),dst); + } +}; + +} // end namespace internal + + } // End namespace Eigen #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 01f8784da..e4bf7eda8 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -110,7 +110,7 @@ class SuperNodalMatrix } /** - * Return the pointers to the beginning of each column in \ref outerIndexPtr() + * Return the pointers to the beginning of each column in \ref valuePtr() */ Index* colIndexPtr() { @@ -146,7 +146,13 @@ class SuperNodalMatrix return m_sup_to_col; } - + /** + * Return the number of supernodes + */ + int nsuper() + { + return m_nsuper; + } class InnerIterator; class SuperNodeIterator; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index a92c3bcc4..a981b5436 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -76,7 +76,7 @@ int SparseLU::LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& Index& nzlmax = Glu.nzlmax; Index& nzumax = Glu.nzumax; Index& nzlumax = Glu.nzlumax; - nzumax = nzlumax = m_fillratio * annz; // estimated number of nonzeros in U + nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary From 0591011d5cedccf62feb86bee70cd658192ea3df Mon Sep 17 00:00:00 2001 From: "Desire NUENTSA W." Date: Sun, 10 Jun 2012 23:36:38 +0200 Subject: [PATCH 09/73] Sparse LU - End Triangular solve... start debugging --- Eigen/src/SparseLU/SparseLU.h | 45 ++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 38a587594..e3838fbb7 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -498,7 +498,7 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const { for (j = 0; j < nrhs; j++) { - luptr = m_Lstore.colIndexPtr()[fsupc]; + luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the for loop for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) { irow = m_Lstore.rowIndex()[iptr]; @@ -512,10 +512,10 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const // The supernode has more than one column // Triangular solve - luptr = m_Lstore.colIndexPtr()[fsupc]; + luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the loop Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); // Map, 0, OuterStride > u( &(x(fsupc,0)), nsupc, nrhs, OuterStride<>(x.rows()) ); - Matrix& u = x.block(fsupc, 0, nsupc, nrhs); + Matrix& u = x.block(fsupc, 0, nsupc, nrhs); //FIXME Check this u = A.triangularView().solve(u); // Matrix-vector product @@ -538,6 +538,45 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const } // end for all supernodes // Back solve Ux = y + for (k = m_Lstore.nsuper(); k >= 0; k--) + { + fsupc = m_Lstore.sup_to_col()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; + luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (j = 0; j < nrhs; j++) + { + x(fsupc, j) /= Lval[luptr]; + } + } + else + { + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Matrix& u = x.block(fsupc, 0, nsupc, nrhs); + u = A.triangularView().solve(u); + } + + for (j = 0; j < nrhs; ++j) + { + for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) + { + irow = m_Ustore.InnerIndices()[i]; + x(irow, j) -= x(irow, jcol) * m_Ustore.Values()[i]; + } + } + } + } // End For U-solve + + // Permute back the solution + x = x * m_perm_c; + + return true; } namespace internal { From bccf64d34281066da48cf2da29fd61f7ed703025 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 11 Jun 2012 18:52:26 +0200 Subject: [PATCH 10/73] Checking Syntax... --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 5 + Eigen/src/OrderingMethods/Ordering.h | 214 +++++++++++++++++ Eigen/src/SparseLU/SparseLU.h | 216 +++++++++++------- Eigen/src/SparseLU/SparseLU_Coletree.h | 41 ++-- Eigen/src/SparseLU/SparseLU_Memory.h | 74 +++--- Eigen/src/SparseLU/SparseLU_Structs.h | 27 +-- Eigen/src/SparseLU/SparseLU_Utils.h | 21 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 25 +- Eigen/src/SparseLU/SparseLU_relax_snode.h | 15 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 17 +- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 27 +-- 11 files changed, 478 insertions(+), 204 deletions(-) create mode 100644 Eigen/src/OrderingMethods/Eigen_Colamd.h create mode 100644 Eigen/src/OrderingMethods/Ordering.h diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h new file mode 100644 index 000000000..8caee7740 --- /dev/null +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -0,0 +1,5 @@ + +#ifndef EIGEN_COLAMD_H +#define EIGEN_COLAMD_H + +#endif \ No newline at end of file diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h new file mode 100644 index 000000000..c43c381a4 --- /dev/null +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -0,0 +1,214 @@ + +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef EIGEN_ORDERING_H +#define EIGEN_ORDERING_H + +#include +#include + +namespace Eigen { +template +class OrderingBase +{ + public: + typedef typename internal::traits::MatrixType MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::Index Index; + typedef PermutationMatrix PermutationType; + + public: + OrderingBase():m_isInitialized(false) + { + + } + OrderingBase(const MatrixType& mat):OrderingBase() + { + compute(mat); + } + Derived& compute(const MatrixType& mat) + { + return derived().compute(mat); + } + Derived& derived() + { + return *static_cast(this); + } + const Derived& derived() const + { + return *static_cast(this); + } + /** + * Get the permutation vector + */ + PermutationType& get_perm(const MatrixType& mat) + { + if (m_isInitialized = true) return m_P; + else abort(); // FIXME Should find a smoother way to exit with error code + } + template + void at_plus_a(const MatrixType& mat); + + /** keeps off-diagonal entries; drops diagonal entries */ + struct keep_diag { + inline bool operator() (const Index& row, const Index& col, const Scalar&) const + { + return row!=col; + } + }; + + protected: + void init() + { + m_isInitialized = false; + } + PermutationType m_P; // The computed permutation + mutable bool m_isInitialized; + SparseMatrix m_mat; // Stores the (symmetrized) matrix to permute +} +/** + * Get the symmetric pattern A^T+A from the input matrix A. + * NOTE: The values should not be considered here + */ +template +void OrderingBase::at_plus_a(const MatrixType& mat) +{ + MatrixType C; + C = mat.transpose(); // NOTE: Could be costly + for (int i = 0; i < C.rows(); i++) + { + for (typename MatrixType::InnerIterator it(C, i); it; ++it) + it.valueRef() = 0.0; + } + m_mat = C + mat; + +/** + * Get the column approximate minimum degree ordering + * The matrix should be in column-major format + */ +template +class COLAMDOrdering: public OrderingBase< ColamdOrdering > +{ + public: + typedef OrderingBase< ColamdOrdering > Base; + typedef SparseMatrix MatrixType; + + public: + COLAMDOrdering():Base() {} + + COLAMDOrdering(const MatrixType& matrix):Base() + { + compute(matrix); + } + COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() + { + compute(matrix); + perm_c = this.get_perm(); + } + void compute(const MatrixType& mat) + { + // Test if the matrix is column major... + + int m = mat.rows(); + int n = mat.cols(); + int nnz = mat.nonZeros(); + // Get the recommended value of Alen to be used by colamd + int Alen = colamd_recommended(nnz, m, n); + // Set the default parameters + double knobs[COLAMD_KNOBS]; + colamd_set_defaults(knobs); + + int info; + VectorXi p(n), A(nnz); + for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); + for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); + // Call Colamd routine to compute the ordering + info = colamd(m, n, Alen, A,p , knobs, stats) + eigen_assert( (info != FALSE)&& "COLAMD failed " ); + + m_P.resize(n); + for (int i = 0; i < n; i++) m_P(p(i)) = i; + m_isInitialized = true; + } + protected: + using Base::m_isInitialized; + using Base m_P; +} + +/** + * Get the approximate minimum degree ordering + * If the matrix is not structurally symmetric, an ordering of A^T+A is computed + * \tparam Scalar The type of the scalar of the matrix for which the ordering is applied + * \tparam Index The type of indices of the matrix + * \tparam _UpLo If the matrix is symmetric, indicates which part to use + */ +template +class AMDordering : public OrderingBase > +{ + public: + enum { UpLo = _UpLo }; + typedef OrderingBase< AMDOrdering > Base; + typedef SparseMatrix MatrixType; + public: + AMDOrdering():Base(){} + AMDOrdering(const MatrixType& mat):Base() + { + compute(matrix); + } + AMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() + { + compute(matrix); + perm_c = this.get_perm(); + } + /** Compute the permutation vector from a column-major sparse matrix */ + void compute(const MatrixType& mat) + { + // Compute the symmetric pattern + at_plus_a(mat); + + // Call the AMD routine + m_mat.prune(keep_diag()); + internal::minimum_degree_ordering(m_mat, m_P); + if (m_P.size()>0) m_isInitialized = true; + } + /** Compute the permutation with a self adjoint matrix */ + template + void compute(const SparseSelfAdjointView& mat) + { + m_mat = mat; + + // Call the AMD routine + m_mat.prune(keep_diag()); + internal::minimum_degree_ordering(m_mat, m_P); + if (m_P.size()>0) m_isInitialized = true; + } + protected: + using Base::m_isInitialized; + using Base::m_P; + using Base::m_mat; +} + +} // end namespace Eigen +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index e3838fbb7..a4b4fa98b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -32,12 +32,22 @@ template class SparseLU; #include -#include -#include #include #include -#include +#include +#include +#include +#include +#include +/** + * \ingroup SparseLU_Module + * \brief Sparse supernodal LU factorization for general matrices + * + * This class implements the supernodal LU factorization for general matrices. + * + * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> + */ template class SparseLU { @@ -47,7 +57,7 @@ class SparseLU typedef typename MatrixType::Index Index; typedef SparseMatrix NCMatrix; typedef SuperNodalMatrix SCMatrix; - typedef GlobalLU_t Eigen_GlobalLU_t; + typedef GlobalLU_t LU_GlobalLU_t; typedef Matrix ScalarVector; typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; @@ -58,18 +68,28 @@ class SparseLU } SparseLU(const MatrixType& matrix):SparseLU() { - compute(matrix); } ~SparseLU() { - + // Free all explicit dynamic pointers } void analyzePattern (const MatrixType& matrix); void factorize (const MatrixType& matrix); - void compute (const MatrixType& matrix); + + /** + * Compute the symbolic and numeric factorization of the input sparse matrix. + * The input matrix should be in column-major storage. + */ + void compute (const MatrixType& matrix) + { + // Analyze + analyzePattern(matrix); + //Factorize + factorize(matrix); + } template bool SparseLU::_solve(const MatrixBase &b, MatrixBase &dest) const @@ -102,6 +122,13 @@ class SparseLU protected: // Functions void initperfvalues(); + template + int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, + const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); + + template + int LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, + ScalarVector& dense, ScalarVector& tempv, LU_GlobalLu_t& Glu); // Variables mutable ComputationInfo m_info; @@ -113,14 +140,12 @@ class SparseLU SCMatrix m_Lstore; // The lower triangular matrix (supernodal) NCMatrix m_Ustore; // The upper triangular matrix PermutationType m_perm_c; // Column permutation - PermutationType m_iperm_c; // Column permutation PermutationType m_perm_r ; // Row permutation - PermutationType m_iperm_r ; // Inverse row permutation IndexVector m_etree; // Column elimination tree ScalarVector m_work; // Scalar work vector IndexVector m_iwork; //Index work vector - static Eigen_GlobalLU_t m_Glu; // persistent data to facilitate multiple factors + static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors // should be defined as a class member // SuperLU/SparseLU options bool m_symmetricmode; @@ -135,7 +160,8 @@ class SparseLU int m_colblk; // The minimum column dimension for 2-D blocking to be used; int m_fillfactor; // The estimated fills factors for L and U, compared with A RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot - int nnzL, nnzU; // Nonzeros in L and U factors + int m_nnzL, m_nnzU; // Nonzeros in L and U factors + private: // Copy constructor SparseLU (SparseLU& ) {} @@ -156,45 +182,56 @@ void SparseLU::initperfvalues() /** * Compute the column permutation to minimize the fill-in (file amd.c ) + * * - Apply this permutation to the input matrix - + * * - Compute the column elimination tree on the permuted matrix (file Eigen_Coletree.h) + * * - Postorder the elimination tree and the column permutation (file Eigen_Coletree.h) - * - + * */ -template +template void SparseLU::analyzePattern(const MatrixType& mat) { - // Compute the column permutation - AMDordering amd(mat); - m_perm_c = amd.get_perm_c(); + + //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. + + // Compute the fill-reducing ordering + // TODO Currently, the only available ordering method is AMD. + + OrderingType ord(mat); + m_perm_c = ord.get_perm(); + //FIXME Check the right semantic behind m_perm_c + // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; + + // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c; //how is the permutation represented ??? + m_mat = mat * m_perm_c; //FIXME Check if this is valid, check as well how to permute only the index // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); - internal::sp_coletree(m_mat, m_etree); + LU_sp_coletree(m_mat, m_etree); // In symmetric mode, do not do postorder here - if (m_symmetricmode == false) { + if (!m_symmetricmode) { IndexVector post, iwork; // Post order etree - post = internal::TreePostorder(m_mat.cols(), m_etree); + LU_TreePostorder(m_mat.cols(), m_etree, post); // Renumber etree in postorder iwork.resize(n+1); for (i = 0; i < n; ++i) iwork(post(i)) = post(m_etree(i)); - m_etree = iwork; - - // Postmultiply A*Pc by post, - // i.e reorder the matrix according to the postorder of the etree - // FIXME Check if this is available : constructor from a vector - PermutationType post_perm(post); - m_mat = m_mat * post_perm; + m_etree = iwork; + + // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree + PermutationType post_perm(post); + //m_mat = m_mat * post_perm; // FIXME This should surely be in factorize() - // Product of m_perm_c and post - for (i = 0; i < n; ++i) iwork(i) = m_perm_c(post_perm.indices()(i)); - m_perm_c = iwork; + // Composition of the two permutations + m_perm_c = m_perm_c * post_perm; } // end postordering + + m_analysisIsok = true; } /** @@ -217,36 +254,43 @@ template void SparseLU::factorize(const MatrixType& matrix) { - // Allocate storage common to the factor routines - int lwork = 0; - int info = LUMemInit(lwork); - eigen_assert ( (info == 0) && "Unable to allocate memory for the factors"); + eigen_assert(m_analysisIsok && "analyzePattern() should be called first"); + eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + // Apply the column permutation computed in analyzepattern() + m_mat = matrix * m_perm_c; + m_mat.makeCompressed(); int m = m_mat.rows(); int n = m_mat.cols(); + int nnz = m_mat.nonZeros(); int maxpanel = m_panel_size * m; + // Allocate storage common to the factor routines + int lwork = 0; + int info = LUMemInit(m, n, nnz, m_work, m_iwork, lwork, m_fillratio, m_panel_size, m_maxsuper, m_rowblk, m_glu); + if (info) + { + std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; + m_factorizationIsOk = false; + return ; + } + // Set up pointers for integer working arrays - VectorBlock segrep(m_iwork, 0, m); -// Map segrep(&m_iwork(0), m); // - - VectorBlock parent(segrep, m, m); -// Map parent(&segrep(0) + m, m); // - - VectorBlock xplore(parent, m, m); -// Map xplore(&parent(0) + m, m); // - - VectorBlock repnfnz(xplore, m, maxpanel); -// Map repfnz(&xplore(0) + m, maxpanel); // - - VectorBlock panel_lsub(repfnz, maxpanel, maxpanel) -// Map panel_lsub(&repfnz(0) + maxpanel, maxpanel);// - - VectorBlock xprune(panel_lsub, maxpanel, n); -// Map xprune(&panel_lsub(0) + maxpanel, n); // - - VectorBlock marker(xprune, n, m * LU_NO_MARKER); -// Map marker(&xprune(0)+n, m * LU_NO_MARKER); // + int idx = 0; + VectorBlock segrep(m_iwork, idx, m); + idx += m; + VectorBlock parent(m_iwork, idx, m); + idx += m; + VectorBlock xplore(m_iwork, idx, m); + idx += m; + VectorBlock repnfnz(m_iwork, idx, maxpanel); + idx += maxpanel; + VectorBlock panel_lsub(m_iwork, idx, maxpanel) + idx += maxpanel; + VectorBlock xprune(m_iwork, idx, n); + idx += n; + VectorBlock marker(m_iwork, idx, m * LU_NO_MARKER); repfnz.setConstant(-1); panel_lsub.setConstant(-1); @@ -259,43 +303,41 @@ void SparseLU::factorize(const MatrixType& matrix) // Setup Permutation vectors // Compute the inverse of perm_c - PermutationType iperm_c; - iperm_c = m_perm_c.inverse(); + PermutationType iperm_c (m_perm_c.inverse() ); // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode = true ) - LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + internal::LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + internal::LU_relax_snode(n, m_etree, m_relax, marker, relax_end); m_perm_r.setConstant(-1); marker.setConstant(-1); - IndexVector& xsup = m_Glu.xsup; - IndexVector& supno = m_GLu.supno; - IndexVector& xlsub = m_Glu.xlsub; - IndexVector& xlusup = m_GLu.xlusup; - IndexVector& xusub = m_Glu.xusub; - Index& nzlumax = m_Glu.nzlumax; + IndexVector& xsup = m_glu.xsup; + IndexVector& supno = m_glu.supno; + IndexVector& xlsub = m_glu.xlsub; + IndexVector& xlusup = m_glu.xlusup; + IndexVector& xusub = m_glu.xusub; + Index& nzlumax = m_glu.nzlumax; supno(0) = IND_EMPTY; - xsup(0) = xlsub(0) = xusub(0) = xlusup(0); + xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = 0; int panel_size = m_panel_size; - int wdef = panel_size; // upper bound on panel width + int wdef = m_panel_size; // upper bound on panel width // Work on one 'panel' at a time. A panel is one of the following : // (a) a relaxed supernode at the bottom of the etree, or // (b) panel_size contiguous columns, defined by the user register int jcol,kcol; - int min_mn = std::min(m,n); IndexVector panel_histo(n); Index nextu, nextlu, jsupno, fsupc, new_next; - int pivrow; // Pivotal row number in the original row matrix + Index pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column int irep,ir; - for (jcol = 0; jcol < min_mn; ) + for (jcol = 0; jcol < n; ) { if (relax_end(jcol) != IND_EMPTY) { // Starting a relaxed node from jcol @@ -308,6 +350,7 @@ void SparseLU::factorize(const MatrixType& matrix) { m_info = NumericalIssue; m_factorizationIsOk = false; + std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; return; } nextu = xusub(jcol); //starting location of column jcol in ucol @@ -315,16 +358,17 @@ void SparseLU::factorize(const MatrixType& matrix) jsupno = supno(jcol); // Supernode number which column jcol belongs to fsupc = xsup(jsupno); //First column number of the current supernode new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); - nzlumax = m_Glu.nzlumax; while (new_next > nzlumax ) { - mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_Glu); + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu); if (mem) { + std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; m_factorizationIsOk = false; return; } } + // Now, left-looking factorize each column within the snode for (icol = jcol; icol<=kcol; icol++){ xusub(icol+1) = nextu; @@ -336,7 +380,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_snode_bmod(icol, jsupno, fsupc, dense, tempv); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_Glu); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -351,7 +395,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Adjust panel size so that a panel won't overlap with the next relaxed snode. panel_size = w_def; - for (k = jcol + 1; k < std::min(jcol+panel_size, min_mn); k++) + for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) { @@ -359,14 +403,14 @@ void SparseLU::factorize(const MatrixType& matrix) break; } } - if (k == min_mn) - panel_size = min_mn - jcol; + if (k == n) + panel_size = n - jcol; // Symbolic outer factorization on a panel of columns - LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_Glu); + LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order - LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_Glu); + LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); // Sparse LU within the panel, and below the panel diagonal for ( jj = jcol, j< jcol + panel_size; jj++) @@ -377,7 +421,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); //FIXME VectorBlock repfnz_k(repfnz, k, m); //FIXME - info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_Glu); + info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { m_info = NumericalIssue; @@ -387,7 +431,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Numeric updates to this column VectorBlock dense_k(dense, k, m); //FIXME VectorBlock segrep_k(segrep, nseg1, m) // FIXME Check the length - info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_Glu); + info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) { m_info = NumericalIssue; @@ -397,7 +441,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Copy the U-segments to ucol(*) //FIXME Check that repfnz_k, dense_k... have stored references to modified columns - info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_Glu); + info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_glu); if ( info ) { m_info = NumericalIssue; @@ -406,7 +450,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Form the L-segment - info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_Glu); + info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -415,7 +459,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Prune columns (0:jj-1) using column jj - LU_pruneL(jj, m_perm_r, pivrow, nseg, segrep, repfnz_k, xprune, m_Glu); + LU_pruneL(jj, m_perm_r, pivrow, nseg, segrep, repfnz_k, xprune, m_glu); // Reset repfnz for this column for (i = 0; i < nseg; i++) @@ -442,17 +486,17 @@ void SparseLU::factorize(const MatrixType& matrix) } } // Count the number of nonzeros in factors - LU_countnz(min_mn, xprune, m_nnzL, m_nnzU, m_Glu); + LU_countnz(n, xprune, m_nnzL, m_nnzU, m_glu); // Apply permutation to the L subscripts - LU_fixupL(min_mn, m_perm_r, m_Glu); + LU_fixupL(n, m_perm_r, m_glu); // Free work space iwork and work //... // Create supernode matrix L - m_Lstore.setInfos(m, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); + m_Lstore.setInfos(m, n, m_nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); // Create the column major upper sparse matrix U - new (&m_Ustore) Map > ( m, min_mn, nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); //FIXME + new (&m_Ustore) Map > ( m, n, m_nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); //FIXME this.m_Ustore = m_Ustore; m_info = Success; diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index d57048883..4c42387be 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -49,26 +49,26 @@ * NOTE : The matrix is supposed to be in column-major format. * */ -template -int LU_sp_coletree(const MatrixType& mat, VectorXi& parent) +template +int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { int nc = mat.cols(); // Number of columns int nr = mat.rows(); // Number of rows - VectorXi root(nc); // root of subtree of etree + IndexVector root(nc); // root of subtree of etree root.setZero(); - VectorXi pp(nc); // disjoint sets + IndexVector pp(nc); // disjoint sets pp.setZero(); // Initialize disjoint sets - VectorXi firstcol(nr); // First nonzero column in each row + IndexVector firstcol(nr); // First nonzero column in each row firstcol.setZero(); - //Compute firstcol[row] + //Compute first nonzero column in each row int row,col; firstcol.setConstant(nc); //for (row = 0; row < nr; firstcol(row++) = nc); for (col = 0; col < nc; col++) { for (typename MatrixType::InnerIterator it(mat, col); it; ++it) - { // Is it necessary to brows the whole matrix, the lower part should do the job ?? + { // Is it necessary to browse the whole matrix, the lower part should do the job ?? row = it.row(); firstcol(row) = std::min(firstcol(row), col); } @@ -80,7 +80,7 @@ int LU_sp_coletree(const MatrixType& mat, VectorXi& parent) int rset, cset, rroot; for (col = 0; col < nc; col++) { - pp(col) = cset = col; // Initially, each element is in its own set + cset = pp(col) = col; // Initially, each element is in its own set //FIXME root(cset) = col; parent(col) = nc; for (typename MatrixType::InnerIterator it(mat, col); it; ++it) @@ -92,7 +92,7 @@ int LU_sp_coletree(const MatrixType& mat, VectorXi& parent) if (rroot != col) { parent(rroot) = col; - pp(cset) = cset = rset; // Get the union of cset and rset + cset = pp(cset) = rset; // Get the union of cset and rset //FIXME root(cset) = col; } } @@ -101,7 +101,8 @@ int LU_sp_coletree(const MatrixType& mat, VectorXi& parent) } /** Find the root of the tree/set containing the vertex i : Use Path halving */ -int etree_find (int i, VectorXi& pp) +template +int etree_find (int i, IndexVector& pp) { int p = pp(i); // Parent int gp = pp(p); // Grand parent @@ -116,12 +117,14 @@ int etree_find (int i, VectorXi& pp) } /** - * Post order a tree + * Post order a tree + * \param parent Input tree + * \param post postordered tree */ -VectorXi TreePostorder(int n, VectorXi& parent) +template +void SparseLU::LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) { - VectorXi first_kid, next_kid; // Linked list of children - VectorXi post; // postordered etree + IndexVector first_kid, next_kid; // Linked list of children int postnum; // Allocate storage for working arrays and results first_kid.resize(n+1); @@ -140,14 +143,15 @@ VectorXi TreePostorder(int n, VectorXi& parent) // Depth-first search from dummy root vertex #n postnum = 0; - internal::nr_etdfs(n, parent, first_kid, next_kid, post, postnum); + internal::LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); return post; } /** * Depth-first search from vertex n. No recursion. * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. */ -void nr_etdfs (int n, int *parent, int* first_kid, int *next_kid, int *post, int postnum) +template +void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum) { int current = n, first, next; while (postnum != n) @@ -155,7 +159,7 @@ void nr_etdfs (int n, int *parent, int* first_kid, int *next_kid, int *post, int // No kid for the current node first = first_kid(current); - // no first kid for the current node + // no kid for the current node if (first == -1) { // Numbering this node because it has no kid @@ -169,11 +173,12 @@ void nr_etdfs (int n, int *parent, int* first_kid, int *next_kid, int *post, int current = parent(current); // numbering the parent node post(current) = postnum++; + // Get the next kid next = next_kid(current); } // stopping criterion - if (postnum==n+1) return; + if (postnum == n+1) return; // Updating current node current = next; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index a981b5436..b2888e9a0 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -46,43 +46,48 @@ #ifndef EIGEN_SPARSELU_MEMORY #define EIGEN_SPARSELU_MEMORY +#define LU_NO_MARKER 3 +#define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) +#define IND_EMPTY (-1) + #define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1 #define LU_GluIntArray(n) (5* (n) + 5) #define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ - + (w + 1) * m * sizeof(Scalar) + + (w + 1) * m * sizeof(Scalar) ) + namespace internal { /** - * \brief Allocate various working space failed in the numerical factorization phase. + * \brief Allocate various working space for the numerical factorization phase. * \param m number of rows of the input matrix * \param n number of columns * \param annz number of initial nonzeros in the matrix * \param work scalar working space needed by all factor routines * \param iwork Integer working space * \param lwork if lwork=-1, this routine returns an estimated size of the required memory - * \param Glu persistent data to facilitate multiple factors : will be deleted later ?? + * \param glu persistent data to facilitate multiple factors : will be deleted later ?? * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed - * NOTE Unlike SuperLU, this routine does not allow the user to provide its own user space + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation */ template -int SparseLU::LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, GlobalLU_t& Glu) +int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, GlobalLU_t& glu) { typedef typename ScalarVector::Scalar; typedef typename IndexVector::Index; - int& num_expansions = Glu.num_expansions; //No memory expansions so far + int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; // Guess the size for L\U factors - Index& nzlmax = Glu.nzlmax; - Index& nzumax = Glu.nzumax; - Index& nzlumax = Glu.nzlumax; + Index& nzlmax = glu.nzlmax; + Index& nzumax = glu.nzumax; + Index& nzlumax = glu.nzlumax; nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary - int estimated_size; if (lwork == IND_EMPTY) { + int estimated_size; estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, m_panel_size) + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n); return estimated_size; @@ -91,32 +96,33 @@ int SparseLU::LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& // Setup the required space // First allocate Integer pointers for L\U factors - Glu.supno.resize(n+1); - Glu.xlsub.resize(n+1); - Glu.xlusup.resize(n+1); - Glu.xusub.resize(n+1); + glu.xsup.resize(n+1); + glu.supno.resize(n+1); + glu.xlsub.resize(n+1); + glu.xlusup.resize(n+1); + glu.xusub.resize(n+1); // Reserve memory for L/U factors - expand(Glu.lusup, nzlumax, 0, 0, num_expansions); - expand(Glu.ucol,nzumax, 0, 0, num_expansions); - expand(Glu.lsub,nzlmax, 0, 0, num_expansions); - expand(Glu.usub,nzumax, 0, 1, num_expansions); + expand(glu.lusup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol,nzumax, 0, 0, num_expansions); + expand(glu.lsub,nzlmax, 0, 0, num_expansions); + expand(glu.usub,nzumax, 0, 1, num_expansions); // Check if the memory is correctly allocated, - // Should be a try... catch section here - while ( !Glu.lusup.size() || !Glu.ucol.size() || !Glu.lsub.size() || !Glu.usub.size()) + // FIXME Should be a try... catch section here + while ( !glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()) { - //otherwise reduce the estimated size and retry + //Reduce the estimated size and retry nzlumax /= 2; nzumax /= 2; nzlmax /= 2; - //FIXME Should be an exception here + if (nzlumax < annz ) return nzlumax; - expand(Glu.lsup, nzlumax, 0, 0, Glu); - expand(Glu.ucol, nzumax, 0, 0, Glu); - expand(Glu.lsub, nzlmax, 0, 0, Glu); - expand(Glu.usub, nzumax, 0, 1, Glu); + expand(glu.lsup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol, nzumax, 0, 0, num_expansions); + expand(glu.lsub, nzlmax, 0, 0, num_expansions); + expand(glu.usub, nzumax, 0, 1, num_expansions); } // LUWorkInit : Now, allocate known working storage @@ -194,14 +200,14 @@ int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_p * \param vec vector to expand * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size * \param next current number of elements in the vector. - * \param Glu Global data structure + * \param glu Global data structure * \return 0 on success, > 0 size of the memory allocated so far */ template -int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& Glu) +int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& glu) { int failed_size; - int& num_expansions = Glu.num_expansions; + int& num_expansions = glu.num_expansions; if (memtype == USUB) failed_size = expand(vec, maxlen, next, 1, num_expansions); else @@ -211,19 +217,19 @@ int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memt return faileld_size; // The following code is not really needed since maxlen is passed by reference - // and correspond to the appropriate field in Glu + // and correspond to the appropriate field in glu // switch ( mem_type ) { // case LUSUP: -// Glu.nzlumax = maxlen; +// glu.nzlumax = maxlen; // break; // case UCOL: -// Glu.nzumax = maxlen; +// glu.nzumax = maxlen; // break; // case LSUB: -// Glu.nzlmax = maxlen; +// glu.nzlmax = maxlen; // break; // case USUB: -// Glu.nzumax = maxlen; +// glu.nzumax = maxlen; // break; // } diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 48fde1ada..1394eccdf 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -93,33 +93,24 @@ typedef enum {DOFACT, SamePattern, Factored} fact_t; typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; -/* Obsolete, headers for dynamically managed memory - \tparam VectorType can be int, real scalar or complex scalar*/ -template -struct ExpHeader { - int size; // Length of the memory that has been used */ - VectorType *mem; // Save the current pointer of the newly allocated memory -} ExpHeader; - template struct { - IndexVector* xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode - IndexVector* supno; // Supernode number corresponding to this column (column to supernode mapping) - ScalarVector* lusup; // nonzero values of L ordered by columns - IndexVector* lsub; // Compressed row indices of L rectangular supernodes. - IndexVector* xlusup; // pointers to the beginning of each column in lusup - IndexVector* xlsub; // pointers to the beginning of each column in lsub + IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode + IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) + ScalarVector lusup; // nonzero values of L ordered by columns + IndexVector lsub; // Compressed row indices of L rectangular supernodes. + IndexVector xlusup; // pointers to the beginning of each column in lusup + IndexVector xlsub; // pointers to the beginning of each column in lsub Index nzlmax; // Current max size of lsub Index nzlumax; // Current max size of lusup - ScalarVector* ucol; // nonzero values of U ordered by columns - IndexVector* usub; // row indices of U columns in ucol - IndexVector* xusub; // Pointers to the beginning of each column of U in ucol + ScalarVector ucol; // nonzero values of U ordered by columns + IndexVector usub; // row indices of U columns in ucol + IndexVector xusub; // Pointers to the beginning of each column of U in ucol Index nzumax; // Current max size of ucol Index n; // Number of columns in the matrix int num_expansions; - ExpHeader *expanders; // Deprecated... Array of pointers to 4 types of memory } GlobalLU_t; }// End namespace Eigen diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 8d3d5efee..5c12b6243 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -25,15 +25,13 @@ #ifdef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H -// Number of marker arrays used in the symbolic factorization each of size n -#define LU_NO_MARKER 3 -#define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) -#define IND_EMPTY (-1) +// Number of marker arrays used in the factorization each of size n -void SparseLU::LU_countnz(const int n, VectorXi& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) +template +void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) { - VectorXi& xsup = Glu.xsup; - VectorXi& xlsub = Glu.xlsub; + IndexVector& xsup = Glu.xsup; + IndexVector& xlsub = Glu.xlsub; nnzL = 0; nnzU = (Glu.xusub)(n); int nnzL0 = 0; @@ -65,12 +63,13 @@ void SparseLU::LU_countnz(const int n, VectorXi& xprune, int& nnzL, int& nnzU, G * and applies permutation to the remaining subscripts * */ -void SparseLU::LU_fixupL(const int n, const VectorXi& perm_r, GlobalLU_t& Glu) +template +void SparseLU::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& Glu) { int nsuper, fsupc, i, j, k, jstart; - VectorXi& xsup = GLu.xsup; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; + IndexVector& xsup = GLu.xsup; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; int nextl = 0; int nsuper = (Glu.supno)(n); diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 908f4d4cb..4190e0462 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -40,9 +40,10 @@ * the code was modified is included with the above copyright notice. */ -#ifndef EIGEN_HEAP_RELAX_SNODE_H -#define EIGEN_HEAP_RELAX_SNODE_H -#include +#ifndef SPARSELU_HEAP_RELAX_SNODE_H +#define SPARSELU_HEAP_RELAX_SNODE_H +#include +namespace internal { /** * \brief Identify the initial relaxed supernodes * @@ -53,19 +54,20 @@ * \param descendants Number of descendants of each node in the etree * \param relax_end last column in a supernode */ -void internal::LU_heap_relax_snode (const int n, VectorXi& et, const int relax_columns, VectorXi& descendants, VectorXi& relax_end) +template +void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) { // The etree may not be postordered, but its heap ordered - // Post order etree - VectorXi post = internal::TreePostorder(n, et); - VectorXi inv_post(n+1); + IndexVector post; + TreePostorder(n, et, post); // Post order etree + IndexVector inv_post(n+1); register int i; - for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; + for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? // Renumber etree in postorder - VectorXi iwork(n); - VectorXi et_save(n+1); + IndexVector iwork(n); + IndexVector et_save(n+1); for (i = 0; i < n; ++i) { iwork(post(i)) = post(et(i)); @@ -74,7 +76,7 @@ void internal::LU_heap_relax_snode (const int n, VectorXi& et, const int relax_c et = iwork; // compute the number of descendants of each node in the etree - relax_end.setConstant(-1); + relax_end.setConstant(IND_EMPTY); register int j, parent; descendants.setZero(); for (j = 0; j < n; j++) @@ -130,4 +132,5 @@ void internal::LU_heap_relax_snode (const int n, VectorXi& et, const int relax_c // Recover the original etree et = et_save; } +} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h index 61b8e74bb..f7b478560 100644 --- a/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -40,25 +40,26 @@ * the code was modified is included with the above copyright notice. */ -#ifndef EIGEN_HEAP_RELAX_SNODE_H -#define EIGEN_HEAP_RELAX_SNODE_H -#include +#ifndef SPARSELU_RELAX_SNODE_H +#define SPARSELU_RELAX_SNODE_H +namespace internal { /** * \brief Identify the initial relaxed supernodes * - * This routine applied to a column elimination tree. + * This routine is applied to a column elimination tree. * It assumes that the matrix has been reordered according to the postorder of the etree * \param et elimination tree * \param relax_columns Maximum number of columns allowed in a relaxed snode * \param descendants Number of descendants of each node in the etree * \param relax_end last column in a supernode */ -void internal::LU_relax_snode (const int n, VectorXi& et, const int relax_columns, VectorXi& descendants, VectorXi& relax_end) +template +void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) { // compute the number of descendants of each node in the etree register int j, parent; - relax_end.setConstant(-1); + relax_end.setConstant(IND_EMPTY); descendants.setZero(); for (j = 0; j < n; j++) { @@ -86,4 +87,6 @@ void internal::LU_relax_snode (const int n, VectorXi& et, const int relax_column } // End postorder traversal of the etree } + +} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 9da986497..6130a5622 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -42,16 +42,18 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ +namespace internal { #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H -template -int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, - VectorType& dense, VectorType& tempv, LU_GlobalLu_t& Glu) +template +int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, + ScalarVector& dense, ScalarVector& tempv, LU_GlobalLu_t& Glu) { - VectorXi& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - VectorXi& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - VectorType& lusup = Glu.lusup; // Numerical values of the rectangular supernodes - VectorXi& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + typedef typename Matrix IndexVector; + IndexVector& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + IndexVector& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + ScalarVector& lusup = Glu.lusup; // Numerical values of the rectangular supernodes + IndexVector& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) int nextlu = xlusup(jcol); // Starting location of the next column to add int irow, isub; @@ -85,4 +87,5 @@ int SparseLU::LU_dsnode_bmod (const int jcol, const int jsupno, const int fsupc, return 0; } +} // End namespace internal #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index cf64eb747..669f172f5 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -42,8 +42,9 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ -#ifdef EIGEN_SNODE_DFS_H -#define EIGEN_SNODE_DFS_H +#ifdef SPARSELU_SNODE_DFS_H +#define SPARSELU_SNODE_DFS_H +namespace eigen { /** * \brief Determine the union of the row structures of those columns within the relaxed snode. * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, @@ -57,15 +58,15 @@ * \param marker (in/out) working vector * \return 0 on success, > 0 size of the memory when memory allocation failed */ - template - int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLu_t& Glu) + template + int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { typedef typename IndexVector::Index; - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; // Supernode number corresponding to this column - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; - Index& nzlmax = Glu.nzlmax; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; // Supernode number corresponding to this column + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + Index& nzlmax = glu.nzlmax; int mem; Index nsuper = ++supno(jcol); // Next available supernode number register int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub @@ -85,7 +86,7 @@ lsub(nextl++) = krow; if( nextl >= nzlmax ) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); if (mem) return mem; } } @@ -99,13 +100,13 @@ Index new_next = nextl + (nextl - xlsub(jcol)); while (new_next > nzlmax) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); if (mem) return mem; } Index ifrom, ito = nextl; for (ifrom = xlsub(jcol); ifrom < nextl;) lsub(ito++) = lsub(ifrom++); - for (i = jcol+1; i <=kcol; i++)xlsub(i) = nextl; + for (i = jcol+1; i <=kcol; i++) xlsub(i) = nextl; nextl = ito; } xsup(nsuper+1) = kcol + 1; // Start of next available supernode @@ -115,5 +116,5 @@ return 0; } - +} // end namespace eigen #endif \ No newline at end of file From c0ad1094995e28a2d564e83a2ca1c6b76cfbd536 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 12 Jun 2012 18:19:59 +0200 Subject: [PATCH 11/73] Checking Data structures and function prototypes --- Eigen/src/SparseLU/SparseLU.h | 126 +++++++++++++-------- Eigen/src/SparseLU/SparseLU_Structs.h | 10 +- Eigen/src/SparseLU/SparseLU_column_bmod.h | 58 +++++----- Eigen/src/SparseLU/SparseLU_column_dfs.h | 30 ++--- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 25 ++-- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 46 ++++---- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 43 +++---- Eigen/src/SparseLU/SparseLU_pivotL.h | 22 ++-- Eigen/src/SparseLU/SparseLU_pruneL.h | 28 +++-- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 20 ++-- 10 files changed, 219 insertions(+), 189 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index a4b4fa98b..36b1ce570 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -27,19 +27,12 @@ #define EIGEN_SPARSE_LU namespace Eigen { - -template -class SparseLU; -#include + +// Data structure needed by all routines #include -#include -#include -#include +#include -#include -#include -#include /** * \ingroup SparseLU_Module * \brief Sparse supernodal LU factorization for general matrices @@ -62,7 +55,7 @@ class SparseLU typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; public: - SparseLU():m_isInitialized(true),m_symmetricmode(false),m_fact(DOFACT),m_diagpivotthresh(1.0) + SparseLU():m_isInitialized(true),m_symmetricmode(false),m_diagpivotthresh(1.0) { initperfvalues(); } @@ -106,7 +99,7 @@ class SparseLU } - /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + /** \returns the solution X of \f$ A X = b \f$ using the current decomposition of A. * * \sa compute() */ @@ -122,20 +115,34 @@ class SparseLU protected: // Functions void initperfvalues(); - template - int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, - const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); - - template + int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, + const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); int LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, ScalarVector& tempv, LU_GlobalLu_t& Glu); + ScalarVector& dense, LU_GlobalLU_t& Glu); + int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, + IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu); + void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, + IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, + IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, + IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu); + void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, + ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, + IndexVector& repfnz, LU_GlobalLU_t& glu); + int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg, + IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, + IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu); + int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, + IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& Glu); + int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, + IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu); + void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, + const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) // Variables mutable ComputationInfo m_info; bool m_isInitialized; bool m_factorizationIsOk; bool m_analysisIsOk; - fact_t m_fact; NCMatrix m_mat; // The input (permuted ) matrix SCMatrix m_Lstore; // The lower triangular matrix (supernodal) NCMatrix m_Ustore; // The upper triangular matrix @@ -146,7 +153,8 @@ class SparseLU ScalarVector m_work; // Scalar work vector IndexVector m_iwork; //Index work vector static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors - // should be defined as a class member + // FIXME All fields of this struct can be defined separately as class members + // SuperLU/SparseLU options bool m_symmetricmode; @@ -179,7 +187,10 @@ void SparseLU::initperfvalues() m_fillfactor = 20; } - +// Functions needed by the anaysis phase +#include +// Ordering interface +#include /** * Compute the column permutation to minimize the fill-in (file amd.c ) * @@ -206,7 +217,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c; //FIXME Check if this is valid, check as well how to permute only the index + m_mat = mat * m_perm_c; // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); @@ -234,6 +245,21 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_analysisIsok = true; } +// Functions needed by the numerical factorization phase +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + /** * - Numerical factorization * - Interleaved with the symbolic factorization @@ -284,7 +310,7 @@ void SparseLU::factorize(const MatrixType& matrix) idx += m; VectorBlock xplore(m_iwork, idx, m); idx += m; - VectorBlock repnfnz(m_iwork, idx, maxpanel); + VectorBlock repfnz(m_iwork, idx, maxpanel); idx += maxpanel; VectorBlock panel_lsub(m_iwork, idx, maxpanel) idx += maxpanel; @@ -324,8 +350,6 @@ void SparseLU::factorize(const MatrixType& matrix) supno(0) = IND_EMPTY; xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = 0; - int panel_size = m_panel_size; - int wdef = m_panel_size; // upper bound on panel width // Work on one 'panel' at a time. A panel is one of the following : // (a) a relaxed supernode at the bottom of the etree, or @@ -348,9 +372,9 @@ void SparseLU::factorize(const MatrixType& matrix) info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); if ( info ) { + std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; m_info = NumericalIssue; m_factorizationIsOk = false; - std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; return; } nextu = xusub(jcol); //starting location of column jcol in ucol @@ -377,13 +401,14 @@ void SparseLU::factorize(const MatrixType& matrix) dense(it.row()) = it.val(); // Numeric update within the snode - LU_snode_bmod(icol, jsupno, fsupc, dense, tempv); + LU_snode_bmod(icol, jsupno, fsupc, dense, glu); // Eliminate the current column info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_glu); if ( info ) { m_info = NumericalIssue; + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info < panel_lsubk(panel_lsub, k, m); //FIXME - VectorBlock repfnz_k(repfnz, k, m); //FIXME + VectorBlock panel_lsubk(panel_lsub, k, m); + VectorBlock repfnz_k(repfnz, k, m); info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; m_info = NumericalIssue; m_factorizationIsOk = false; return; } // Numeric updates to this column - VectorBlock dense_k(dense, k, m); //FIXME - VectorBlock segrep_k(segrep, nseg1, m) // FIXME Check the length + VectorBlock dense_k(dense, k, m); + VectorBlock segrep_k(segrep, nseg1, m); info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) { + std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; m_info = NumericalIssue; m_factorizationIsOk = false; return; } // Copy the U-segments to ucol(*) - //FIXME Check that repfnz_k, dense_k... have stored references to modified columns info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_glu); if ( info ) { + std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; m_info = NumericalIssue; m_factorizationIsOk = false; return; @@ -453,6 +480,7 @@ void SparseLU::factorize(const MatrixType& matrix) info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); if ( info ) { + std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info < n ) { k = 0; @@ -504,18 +532,18 @@ void SparseLU::factorize(const MatrixType& matrix) } template -bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const +bool SparseLU::_solve(const MatrixBase &b, MatrixBase &X) const { eigen_assert(m_isInitialized && "The matrix should be factorized first"); EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - x = b; /* on return, x is overwritten by the computed solution */ + X = b; /* on return, X is overwritten by the computed solution */ int nrhs = b.cols(); // Permute the right hand side to form Pr*B - x = m_perm_r * x; + X = m_perm_r * X; // Forward solve PLy = Pb; Index fsupc; // First column of the current supernode @@ -547,7 +575,7 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const { irow = m_Lstore.rowIndex()[iptr]; ++luptr; - x(irow, j) -= x(fsupc, j) * Lval[luptr]; + X(irow, j) -= X(fsupc, j) * Lval[luptr]; } } } @@ -558,8 +586,8 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const // Triangular solve luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the loop Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); -// Map, 0, OuterStride > u( &(x(fsupc,0)), nsupc, nrhs, OuterStride<>(x.rows()) ); - Matrix& u = x.block(fsupc, 0, nsupc, nrhs); //FIXME Check this +// Map, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); + Matrix& u = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this u = A.triangularView().solve(u); // Matrix-vector product @@ -573,7 +601,7 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const for (i = 0; i < nrow; i++) { irow = m_Lstore.rowIndex()[iptr]; - x(irow, j) -= work(i, j); // Scatter operation + X(irow, j) -= work(i, j); // Scatter operation work(i, j) = Scalar(0); iptr++; } @@ -594,13 +622,13 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const { for (j = 0; j < nrhs; j++) { - x(fsupc, j) /= Lval[luptr]; + X(fsupc, j) /= Lval[luptr]; } } else { Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Matrix& u = x.block(fsupc, 0, nsupc, nrhs); + Matrix& u = X.block(fsupc, 0, nsupc, nrhs); u = A.triangularView().solve(u); } @@ -608,17 +636,17 @@ bool SparseLU::_solve(const MatrixBase &b, MatrixBase &x) const { for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { - for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) - { - irow = m_Ustore.InnerIndices()[i]; - x(irow, j) -= x(irow, jcol) * m_Ustore.Values()[i]; - } + for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) + { + irow = m_Ustore.InnerIndices()[i]; + X(irow, j) -= X(irow, jcol) * m_Ustore.Values()[i]; + } } } } // End For U-solve // Permute back the solution - x = x * m_perm_c; + X = m_perm_c * X; return true; } diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 1394eccdf..618d05eac 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -82,19 +82,12 @@ */ #ifndef EIGEN_LU_STRUCTS #define EIGEN_LU_STRUCTS -namespace Eigen { - -#define LU_NBR_MEMTYPE 4 /* 0: lusup - 1: ucol - 2: lsub - 3: usub */ -typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC} colperm_t; -typedef enum {DOFACT, SamePattern, Factored} fact_t; typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; template struct { + typedef typename IndexVector::Index Index; IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) ScalarVector lusup; // nonzero values of L ordered by columns @@ -113,5 +106,4 @@ struct { int num_expansions; } GlobalLU_t; -}// End namespace Eigen #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index bed4f9519..965a0c0ad 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -54,35 +54,40 @@ * \param segrep segment representative ... * \param repfnz ??? First nonzero column in each row ??? ... * \param fpanelc First column in the current panel - * \param Glu Global LU data. + * \param glu Global LU data. * \return 0 - successful return * > 0 - number of bytes allocated when run out of space * */ template -int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLu_t& Glu) +int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) { - int jsupno, k, ksub, krep, krep_ind, ksupno; + int jsupno, k, ksub, krep, krep_ind, ksupno; + int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = number of columns in a supernode * nsupr = number of rows in a supernode * luptr = location of supernodal LU-block in storage * kfnz = first nonz in the k-th supernodal segment - * no-zeros = no lf leading zeros in a supernodal U-segment + * no_zeros = no lf leading zeros in a supernodal U-segment */ - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; - IndexVector& xlusup = Glu.xlusup; - ScalarVector& lusup = Glu.lusup; - Index& nzlumax = Glu.nzlumax; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + IndexVector& xlusup = glu.xlusup; + ScalarVector& lusup = glu.lusup; + Index& nzlumax = glu.nzlumax; int jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order k = nseg - 1; + int d_fsupc; // distance between the first column of the current panel and the + // first column of the current snode + int fst_col; // First column within small LU update + int segsize; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep(k); k--; @@ -110,35 +115,36 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense krep_ind = lptr + nsupc - 1; // NOTE Unlike the original implementation in SuperLU, the only feature - // here is a sup-col update. + // available here is a sup-col update. // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; // First, copy U[*,j] segment from dense(*) to tempv(*) isub = lptr + no_zeros; - for (i = 0; i ww segsize; i++) + for (i = 0; i < segsize; i++) { irow = lsub(isub); - tempv(i) = densee(irow); + tempv(i) = dense(irow); ++isub; } // Dense triangular solve -- start effective triangle luptr += nsupr * no_zeros + no_zeros; // Form Eigen matrix and vector Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - Map u(tempv.data(), segsize); + VectorBlock u(tempv, 0, segsize); + u = A.triangularView().solve(u); // Dense matrix-vector product y <-- A*x luptr += segsize; - new (&A) (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - Map l( &(tempv.data()[segsize]), segsize); + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + VectorBlock l(tempv, segsize, nrow); l= A * u; // Scatter tempv[] into SPA dense[] as a temporary storage isub = lptr + no_zeros; - for (i = 0; i w segsize; i++) + for (i = 0; i < segsize; i++) { irow = lsub(isub); dense(irow) = tempv(i); @@ -150,8 +156,8 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense for (i = 0; i < nrow; i++) { irow = lsub(isub); - dense(irow) -= tempv(segsize + i); - tempv(segsize + i) = Scalar(0.0); + dense(irow) -= l(i); + l(i) = Scalar(0.0); ++isub; } } // end if jsupno @@ -165,9 +171,9 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); while (new_next > nzlumax ) { - mem = LUmemXpand(Glu.lusup, nzlumax, nextlu, LUSUP, Glu); + mem = LUmemXpand(glu.lusup, nzlumax, nextlu, LUSUP, glu); if (mem) return mem; - lsub = Glu.lsub; //FIXME Why is it updated here. + //lsub = glu.lsub; // Should not be updated here } for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) @@ -183,8 +189,8 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense /* For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column * of the supernode, whichever is bigger. There are two cases: - * 1) fsupc < fpanelc, then fst_col <- fpanelc - * 2) fsupc >= fpanelc, then fst_col <-fsupc + * 1) fsupc < fpanelc, then fst_col <-- fpanelc + * 2) fsupc >= fpanelc, then fst_col <-- fsupc */ fst_col = std::max(fsupc, fpanelc); @@ -203,11 +209,11 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense // points to the beginning of jcol in snode L\U(jsupno) ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map l( &(lusup.data()[ufirst]), nsupc ); + VectorBlock u(lusup, ufirst, nsupc); u = A.triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - Map l( &(lusup.data()[ufirst+nsupc]), nsupr ); + VectorBlock l(lusup, ufirst+nsupc, nrow); l = l - A * u; } // End if fst_col diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 1c832d60e..7fda536a9 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -65,13 +65,13 @@ * \param marker * \param parent * \param xplore - * \param Glu global LU data + * \param glu global LU data * \return 0 success * > 0 number of bytes allocated when run out of space * */ -template -int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLu_t& Glu) +template +int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { typedef typename IndexVector::IndexVector; @@ -85,15 +85,15 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In int xdfs, maxdfs, kpar; int mem; // Initialize pointers - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; - IndexVector& nzlmax = Glu.nzlmax; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + IndexVector& nzlmax = glu.nzlmax; nsuper = supno(jcol); jsuper = nsuper; - nextl = xlsup(jcol); + nextl = xlsub(jcol); VectorBlock marker2(marker, 2*m, m); // For each nonzero in A(*,jcol) do dfs for (k = 0; lsub_col[k] != IND_EMPTY; k++) @@ -106,16 +106,16 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In if (kmark == jcol) continue; // For each unmarker nbr krow of jcol - // krow is in L: place it in structure of L(*,jcol) marker2(krow) = jcol; kperm = perm_r(krow); if (kperm == IND_EMPTY ) { + // krow is in L: place it in structure of L(*,jcol) lsub(nextl++) = krow; // krow is indexed into A if ( nextl >= nzlmax ) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, Glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); if ( mem ) return mem; } if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing @@ -157,13 +157,13 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In marker2(kchild) = jcol; chperm = perm_r(kchild); - // if kchild is in L: place it in L(*,k) if (chperm == IND_EMPTY) { + // if kchild is in L: place it in L(*,k) lsub(nextl++) = kchild; if (nextl >= nzlmax) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); if (mem) return mem; } if (chmark != jcolm1) jsuper = IND_EMPTY; @@ -201,7 +201,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In // place supernode-rep krep in postorder DFS. // backtrack dfs to its parent - segrep(nseg) = ;krep; + segrep(nseg) = krep; ++nseg; kpar = parent(krep); // Pop from stack, mimic recursion if (kpar == IND_EMPTY) break; // dfs done @@ -217,7 +217,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In } // for each nonzero ... - // check to see if j belongs in the same supeprnode as j-1 + // check to see if j belongs in the same supernode as j-1 if ( jcol == 0 ) { // Do nothing for column 0 nsuper = supno(0) = 0 ; diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index dc53edcfb..c97bc6aa4 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -50,28 +50,28 @@ * \param jcol current column to update * \param nseg Number of segments in the U part * \param segrep segment representative ... - * \param repfnz ??? First nonzero column in each row ??? ... + * \param repfnz First nonzero column in each row ... * \param perm_r Row permutation * \param dense Store the full representation of the column - * \param Glu Global LU data. + * \param glu Global LU data. * \return 0 - successful return * > 0 - number of bytes allocated when run out of space * */ template -int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLu_t& Glu) +int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) { Index ksupno, k, ksub, krep, ksupno; typedef typename IndexVector::Index; - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; ScalarVector& ucol = GLu.ucol; - IndexVector& usub = Glu.usub; - IndexVector& xusub = Glu.xusub; - Index& nzumax = Glu.nzumax; + IndexVector& usub = glu.usub; + IndexVector& xusub = glu.xusub; + Index& nzumax = glu.nzumax; Index jsupno = supno(jcol); @@ -95,12 +95,11 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre new_next = nextu + segsize; while (new_next > nzumax) { - mem = LU_MemXpand(ucol, nzumax, nextu, UCOL, Glu); + mem = LU_MemXpand(ucol, nzumax, nextu, UCOL, glu); if (mem) return mem; - mem = LU_MemXpand(usub, nzumax, nextu, USUB, Glu); + mem = LU_MemXpand(usub, nzumax, nextu, USUB, glu); if (mem) return mem; - lsub = Glu.lsub; //FIXME Why setting this as well ?? } for (i = 0; i < segsize; i++) diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 93daa938c..212ecfa6a 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -56,21 +56,21 @@ * \param nseg Number of segments in the U part * \param dense Store the full representation of the panel * \param tempv working array - * \param segrep in ... - * \param repfnz in ... - * \param Glu Global LU data. + * \param segrep segment representative... first row in the segment + * \param repfnz First nonzero rows + * \param glu Global LU data. * * */ -template -void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, VectorType& dense, VectorType& tempv, VectorXi& segrep, VectorXi& repfnz, LU_GlobalLu_t& Glu) +template +void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) { - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; - VectorXi& xlusup = Glu.xlusup; - VectorType& lusup = Glu.lusup; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + IndexVector& xlusup = glu.xlusup; + ScalarVector& lusup = glu.lusup; int i,ksub,jj,nextl_col,irow; int fsupc, nsupc, nsupr, nrow; @@ -96,10 +96,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nrow = nsupr - nsupc; lptr = xlsub(fsupc); krep_ind = lptr + nsupc - 1; - - repfnz_col = repfnz; - dense_col = dense; - + // NOTE : Unlike the original implementation in SuperLU, the present implementation // does not include a 2-D block update. @@ -107,8 +104,8 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row - VectorBLock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row + VectorBLock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) @@ -123,8 +120,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int // Perform a trianglar solve and block update, // then scatter the result of sup-col update to dense[] no_zeros = kfnz - fsupc; - - // Copy U[*,j] segment from dense[*] to tempv[*] : + // First Copy U[*,j] segment from dense[*] to tempv[*] : // The result of triangular solve is in tempv[*]; // The result of matric-vector update is in dense_col[*] isub = lptr + no_zeros; @@ -138,19 +134,21 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int luptr += nsupr * no_zeros + no_zeros; // triangular solve with Eigen Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - Map > u( tempv.data(), segsize); +// Map > u( tempv.data(), segsize); + VectorBlock u(tempv, 0, segsize); u = A.triangularView().solve(u); luptr += segsize; // Dense Matrix vector product y <-- A*x; new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - Map l( &(tempv.data()[segsize]), segsize); +// Map l( &(tempv.data()[segsize]), nrow); + VectorBlock l(tempv, segsize, nrow); l= A * u; // Scatter tempv(*) into SPA dense(*) such that tempv(*) // can be used for the triangular solve of the next // column of the panel. The y will be copied into ucol(*) - // after the whole panel has been finished. + // after the whole panel has been finished... after column_dfs() and column_bmod() isub = lptr + no_zeros; for (i = 0; i < segsize; i++) @@ -166,8 +164,8 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int for (i = 0; i < nrow; i++) { irow = lsub(isub); - dense_col(irow) -= tempv(segsize + i); - tempv(segsize + i) = 0; + dense_col(irow) -= l(i); + l(i) = Scalar(0); ++isub; } diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 97e5121db..d3c2906b2 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -62,45 +62,50 @@ * marker[i] == jj, if i was visited during dfs of current column jj; * marker1[i] >= jcol, if i was visited by earlier columns in this panel; * - * \param m number of rows in the matrix - * \param w Panel size - * \param jcol Starting column of the panel - * \param A Input matrix in column-major storage - * \param perm_r Row permutation - * \param nseg Number of U segments - * ... + * \param [in]m number of rows in the matrix + * \param [in]w Panel size + * \param [in]jcol Starting column of the panel + * \param [in]A Input matrix in column-major storage + * \param [in]perm_r Row permutation + * \param [out]nseg Number of U segments + * \param [out]dense Accumulate the column vectors of the panel + * \param [out]panel_lsub Subscripts of the row in the panel + * \param [out]segrep Segment representative i.e first nonzero row of each segment + * \param [out]repfnz First nonzero location in each row + * \param [out]xprune + * \param [out]marker + * * */ -template -void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, VectorXi& perm_r, int& nseg, VectorType& dense, VectorXi& panel_lsub, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu) +template +void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu) { int jj; // Index through each column in the panel int nextl_col; // Next available position in panel_lsub[*,jj] int krow; // Row index of the current element int kperm; // permuted row index - int krep; // Supernode reprentative of the current row + int krep; // Supernode representative of the current row int kmark; int chperm, chmark, chrep, oldrep, kchild; int myfnz; // First nonzero element in the current column int xdfs, maxdfs, kpar; // Initialize pointers -// VectorXi& marker1 = marker.block(m, m); - VectorBlock marker1(marker, m, m); +// IndexVector& marker1 = marker.block(m, m); + VectorBlock marker1(marker, m, m); nseg = 0; - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; // For each column in the panel for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj - jcol) * m; - //FIXME - VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero location in each row - VectorBlock dense_col(dense.segment(nextl_col, m)); // Accumulate a column vector here + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row + VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here // For each nnz in A[*, jj] do depth first search diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 3bfe14e7e..32da92481 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -67,28 +67,30 @@ * \return 0 if success, i > 0 if U(i,i) is exactly zero * */ -template -int SparseLU::LU_pivotL(const int jcol, const Scalar u, VectorXi& perm_r, VectorXi& iperm_c, int& pivrow, GlobalLU_t& Glu) +template +int SparseLU::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu) { + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; // Initialize pointers - VectorXi& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - VectorXi& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - Scalar* lusup = Glu.lusup.data(); // Numerical values of the rectangular supernodes - VectorXi& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + IndexVector& lsub = Glu.lsub; // Compressed row subscripts of L rectangular supernodes. + IndexVector& xlsub = Glu.xlsub; // pointers to the beginning of each column subscript in lsub + ScalarVector& lusup = Glu.lusup; // Numerical values of L ordered by columns + IndexVector& xlusup = Glu.xlusup; // pointers to the beginning of each colum in lusup Index fsupc = (Glu.xsup)((Glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode - Scalar* lu_sup_ptr = &(lusup[xlusup(fsupc)]); // Start of the current supernode - Scalar* lu_col_ptr = &(lusup[xlusup(jcol)]); // Start of jcol in the supernode + Scalar* lu_sup_ptr = &(lusup.data()[xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(lusup.data()[xlusup(jcol)]); // Start of jcol in the supernode Index* lsub_ptr = &(lsub.data()[lptr]); // Start of row indices of the supernode // Determine the largest abs numerical value for partial pivoting Index diagind = iperm_c(jcol); // diagonal index Scalar pivmax = 0.0; Index pivptr = nsupc; - Index diag = -1; + Index diag = IND_EMPTY; Index old_pivptr = nsupc; Scalar rtemp; for (isub = nsupc; isub < nsupr; ++isub) { @@ -127,7 +129,7 @@ int SparseLU::LU_pivotL(const int jcol, const Scalar u, VectorXi& perm_r, Vector // Interchange row subscripts if (pivptr != nsupc ) { - std::swap( lsub_ptr(pivptr), lsub_ptr(nsupc) ); + std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] ); // Interchange numerical values as well, for the two rows in the whole snode // such that L is indexed the same way as A for (icol = 0; icol <= nsupc; icol++) diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index 687717d52..dd092b778 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -47,35 +47,35 @@ /** * \brief Prunes the L-structure. * - * It prunes the L-structure of supernodes whose L-structure constains the current pivot row "pivrow" + * It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow" * * * \param jcol The current column of L * \param [in]perm_r Row permutation * \param [out]pivrow The pivot row - * \param nseg Number of segments ??? + * \param nseg Number of segments * \param segrep * \param repfnz * \param [out]xprune * \param Glu Global LU data * */ -template -void SparseLU::LU_pruneL(const int jcol, const VectorXi& perm_r, const int pivrow, const int nseg, const VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, GlobalLU_t& Glu) +template +void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) { // Initialize pointers - VectorXi& xsup = Glu.xsup; - VectorXi& supno = Glu.supno; - VectorXi& lsub = Glu.lsub; - VectorXi& xlsub = Glu.xlsub; - VectorType& lusup = Glu.lusup; - VectorXi& xlusup = Glu.xlusup; + IndexVector& xsup = Glu.xsup; + IndexVector& supno = Glu.supno; + IndexVector& lsub = Glu.lsub; + IndexVector& xlsub = Glu.xlsub; + ScalarVector& lusup = Glu.lusup; + IndexVector& xlusup = Glu.xlusup; // For each supernode-rep irep in U(*,j] int jsupno = supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; - int kmin, kmax, ktemp, minloc, maxloc; + Index kmin, kmax, ktemp, minloc, maxloc; for (i = 0; i < nseg; i++) { irep = segrep(i); @@ -125,9 +125,7 @@ void SparseLU::LU_pruneL(const int jcol, const VectorXi& perm_r, const int pivro { // kmin below pivrow (not yet pivoted), and kmax // above pivrow: interchange the two suscripts - ktemp = lsub(kmin); - lsub(kmin) = lsub(kmax); - lsub(kmax) = ktemp; + std::swap(lsub(kmin), lsub(kmax)); // If the supernode has only one column, then we // only keep one set of subscripts. For any subscript @@ -144,7 +142,7 @@ void SparseLU::LU_pruneL(const int jcol, const VectorXi& perm_r, const int pivro } } // end while - xprune(irep) = kmin; + xprune(irep) = kmin; //Pruning } // end if do_prune } // end pruning } // End for each U-segment diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 6130a5622..1d6bed8bb 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -47,13 +47,13 @@ namespace internal { #define SPARSELU_SNODE_BMOD_H template int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, ScalarVector& tempv, LU_GlobalLu_t& Glu) + ScalarVector& dense, LU_GlobalLU_t& glu) { typedef typename Matrix IndexVector; - IndexVector& lsub = Glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - IndexVector& xlsub = Glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - ScalarVector& lusup = Glu.lusup; // Numerical values of the rectangular supernodes - IndexVector& xlusup = Glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) + IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) + IndexVector& xlsub = glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) + ScalarVector& lusup = glu.lusup; // Numerical values of the rectangular supernodes + IndexVector& xlusup = glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) int nextlu = xlusup(jcol); // Starting location of the next column to add int irow, isub; @@ -75,14 +75,16 @@ int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks - // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc..., fsupc:jcol) + // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) Map,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map > u(&(lusup.data()[ufirst]), nsupc); - u = A.triangularView().solve(u); +// Map > u(&(lusup.data()[ufirst]), nsupc); + VectorBlock u(lusup, ufirst, nsupc); + u = A.triangularView().solve(u); // Call the Eigen dense triangular solve interface // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - Map > l(&(lusup.data()[ufirst+nsupc], nsupc); +// Map > l(&(lusup.data()[ufirst+nsupc], nrow); + VectorBlock l(lusup, ufirst+nsupc, nrow); l = l - A * u; return 0; From f8a0745cb0426eb3095dbea24288a64eddab04f0 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 13 Jun 2012 18:26:05 +0200 Subject: [PATCH 12/73] Build process... --- Eigen/OrderingMethods | 2 +- Eigen/SparseLU | 17 + Eigen/src/OrderingMethods/Ordering.h | 166 +++---- Eigen/src/SparseLU/CMakeLists.txt | 6 + Eigen/src/SparseLU/SparseLU.h | 435 +++++++++--------- Eigen/src/SparseLU/SparseLU_Coletree.h | 97 ++-- Eigen/src/SparseLU/SparseLU_Matrix.h | 28 +- Eigen/src/SparseLU/SparseLU_Memory.h | 174 ++++--- Eigen/src/SparseLU/SparseLU_Structs.h | 13 +- Eigen/src/SparseLU/SparseLU_Utils.h | 3 - Eigen/src/SparseLU/SparseLU_column_bmod.h | 21 +- Eigen/src/SparseLU/SparseLU_column_dfs.h | 18 +- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 20 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 6 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 14 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 12 +- Eigen/src/SparseLU/SparseLU_pivotL.h | 15 +- Eigen/src/SparseLU/SparseLU_pruneL.h | 26 +- Eigen/src/SparseLU/SparseLU_relax_snode.h | 3 - Eigen/src/SparseLU/SparseLU_snode_bmod.h | 17 +- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 9 +- bench/spbench/CMakeLists.txt | 5 + 22 files changed, 559 insertions(+), 548 deletions(-) create mode 100644 Eigen/SparseLU create mode 100644 Eigen/src/SparseLU/CMakeLists.txt diff --git a/Eigen/OrderingMethods b/Eigen/OrderingMethods index 1e2d87452..bb43220e8 100644 --- a/Eigen/OrderingMethods +++ b/Eigen/OrderingMethods @@ -17,7 +17,7 @@ */ #include "src/OrderingMethods/Amd.h" - +#include "src/OrderingMethods/Ordering.h" #include "src/Core/util/ReenableStupidWarnings.h" #endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/Eigen/SparseLU b/Eigen/SparseLU new file mode 100644 index 000000000..452bc9f83 --- /dev/null +++ b/Eigen/SparseLU @@ -0,0 +1,17 @@ +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + + +/** \ingroup Sparse_modules + * \defgroup SparseLU_Module SparseLU module + * + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/SparseLU/SparseLU.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index c43c381a4..3a3e3f6fc 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -26,9 +26,7 @@ #ifndef EIGEN_ORDERING_H #define EIGEN_ORDERING_H -#include -#include - +#include "Amd.h" namespace Eigen { template class OrderingBase @@ -68,8 +66,23 @@ class OrderingBase if (m_isInitialized = true) return m_P; else abort(); // FIXME Should find a smoother way to exit with error code } + + /** + * Get the symmetric pattern A^T+A from the input matrix A. + * FIXME: The values should not be considered here + */ template - void at_plus_a(const MatrixType& mat); + void at_plus_a(const MatrixType& mat) + { + MatrixType C; + C = mat.transpose(); // NOTE: Could be costly + for (int i = 0; i < C.rows(); i++) + { + for (typename MatrixType::InnerIterator it(C, i); it; ++it) + it.valueRef() = 0.0; + } + m_mat = C + mat; + } /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { @@ -87,99 +100,30 @@ class OrderingBase PermutationType m_P; // The computed permutation mutable bool m_isInitialized; SparseMatrix m_mat; // Stores the (symmetrized) matrix to permute -} -/** - * Get the symmetric pattern A^T+A from the input matrix A. - * NOTE: The values should not be considered here - */ -template -void OrderingBase::at_plus_a(const MatrixType& mat) -{ - MatrixType C; - C = mat.transpose(); // NOTE: Could be costly - for (int i = 0; i < C.rows(); i++) - { - for (typename MatrixType::InnerIterator it(C, i); it; ++it) - it.valueRef() = 0.0; - } - m_mat = C + mat; - -/** - * Get the column approximate minimum degree ordering - * The matrix should be in column-major format - */ -template -class COLAMDOrdering: public OrderingBase< ColamdOrdering > -{ - public: - typedef OrderingBase< ColamdOrdering > Base; - typedef SparseMatrix MatrixType; - - public: - COLAMDOrdering():Base() {} - - COLAMDOrdering(const MatrixType& matrix):Base() - { - compute(matrix); - } - COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() - { - compute(matrix); - perm_c = this.get_perm(); - } - void compute(const MatrixType& mat) - { - // Test if the matrix is column major... - - int m = mat.rows(); - int n = mat.cols(); - int nnz = mat.nonZeros(); - // Get the recommended value of Alen to be used by colamd - int Alen = colamd_recommended(nnz, m, n); - // Set the default parameters - double knobs[COLAMD_KNOBS]; - colamd_set_defaults(knobs); - - int info; - VectorXi p(n), A(nnz); - for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); - for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); - // Call Colamd routine to compute the ordering - info = colamd(m, n, Alen, A,p , knobs, stats) - eigen_assert( (info != FALSE)&& "COLAMD failed " ); - - m_P.resize(n); - for (int i = 0; i < n; i++) m_P(p(i)) = i; - m_isInitialized = true; - } - protected: - using Base::m_isInitialized; - using Base m_P; -} +}; /** * Get the approximate minimum degree ordering * If the matrix is not structurally symmetric, an ordering of A^T+A is computed * \tparam Scalar The type of the scalar of the matrix for which the ordering is applied * \tparam Index The type of indices of the matrix - * \tparam _UpLo If the matrix is symmetric, indicates which part to use */ -template -class AMDordering : public OrderingBase > +template +class AMDOrdering : public OrderingBase > { public: - enum { UpLo = _UpLo }; typedef OrderingBase< AMDOrdering > Base; typedef SparseMatrix MatrixType; + typedef PermutationMatrix PermutationType; public: AMDOrdering():Base(){} AMDOrdering(const MatrixType& mat):Base() { - compute(matrix); + compute(mat); } AMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() { - compute(matrix); + compute(mat); perm_c = this.get_perm(); } /** Compute the permutation vector from a column-major sparse matrix */ @@ -200,15 +144,75 @@ class AMDordering : public OrderingBase > m_mat = mat; // Call the AMD routine - m_mat.prune(keep_diag()); + m_mat.prune(keep_diag()); //Remove the diagonal elements internal::minimum_degree_ordering(m_mat, m_P); if (m_P.size()>0) m_isInitialized = true; } protected: + struct keep_diag{ + inline bool operator() (const Index& row, const Index& col, const Scalar&) const + { + return row!=col; + } + }; using Base::m_isInitialized; using Base::m_P; using Base::m_mat; -} +}; + + +/** + * Get the column approximate minimum degree ordering + * The matrix should be in column-major format + */ +// template +// class COLAMDOrdering: public OrderingBase< ColamdOrdering > +// { +// public: +// typedef OrderingBase< ColamdOrdering > Base; +// typedef SparseMatrix MatrixType; +// +// public: +// COLAMDOrdering():Base() {} +// +// COLAMDOrdering(const MatrixType& matrix):Base() +// { +// compute(matrix); +// } +// COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() +// { +// compute(matrix); +// perm_c = this.get_perm(); +// } +// void compute(const MatrixType& mat) +// { +// // Test if the matrix is column major... +// +// int m = mat.rows(); +// int n = mat.cols(); +// int nnz = mat.nonZeros(); +// // Get the recommended value of Alen to be used by colamd +// int Alen = colamd_recommended(nnz, m, n); +// // Set the default parameters +// double knobs[COLAMD_KNOBS]; +// colamd_set_defaults(knobs); +// +// int info; +// VectorXi p(n), A(nnz); +// for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); +// for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); +// // Call Colamd routine to compute the ordering +// info = colamd(m, n, Alen, A,p , knobs, stats) +// eigen_assert( (info != FALSE)&& "COLAMD failed " ); +// +// m_P.resize(n); +// for (int i = 0; i < n; i++) m_P(p(i)) = i; +// m_isInitialized = true; +// } +// protected: +// using Base::m_isInitialized; +// using Base m_P; +// }; } // end namespace Eigen #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/CMakeLists.txt b/Eigen/src/SparseLU/CMakeLists.txt new file mode 100644 index 000000000..69729ee89 --- /dev/null +++ b/Eigen/src/SparseLU/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_SparseLU_SRCS "*.h") + +INSTALL(FILES + ${Eigen_SparseLU_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/SparseLU COMPONENT Devel + ) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 36b1ce570..293dcd0b3 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -30,8 +30,8 @@ namespace Eigen { // Data structure needed by all routines -#include -#include +#include "SparseLU_Structs.h" +#include "SparseLU_Matrix.h" /** * \ingroup SparseLU_Module @@ -41,18 +41,20 @@ namespace Eigen { * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> */ -template +template class SparseLU { public: typedef _MatrixType MatrixType; + typedef _OrderingType OrderingType; typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; typedef typename MatrixType::Index Index; typedef SparseMatrix NCMatrix; typedef SuperNodalMatrix SCMatrix; - typedef GlobalLU_t LU_GlobalLU_t; typedef Matrix ScalarVector; typedef Matrix IndexVector; +// typedef GlobalLU_t LU_GlobalLU_t; typedef PermutationMatrix PermutationType; public: SparseLU():m_isInitialized(true),m_symmetricmode(false),m_diagpivotthresh(1.0) @@ -82,10 +84,10 @@ class SparseLU analyzePattern(matrix); //Factorize factorize(matrix); - } - template - bool SparseLU::_solve(const MatrixBase &b, MatrixBase &dest) const + } + inline Index rows() const { return m_mat.rows(); } + inline Index cols() const { return m_mat.cols(); } /** Indicate that the pattern of the input matrix is symmetric */ void isSymmetric(bool sym) { @@ -99,45 +101,152 @@ class SparseLU } - /** \returns the solution X of \f$ A X = b \f$ using the current decomposition of A. + /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \sa compute() */ - template - inline const internal::solve_retval solve(const MatrixBase& b) const - { - eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); - eigen_assert(rows()==b.rows() - && "SparseLU::solve(): invalid number of rows of the right hand side matrix b"); - return internal::solve_retval(*this, b.derived()); +// template +// inline const solve_retval solve(const MatrixBase& B) const +// { +// eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); +// eigen_assert(rows()==B.rows() +// && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); +// return solve_retval(*this, B.derived()); +// } + + template + bool _solve(const MatrixBase &B, MatrixBase &X) const + { + eigen_assert(m_isInitialized && "The matrix should be factorized first"); + EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + + X = B; /* on return, X is overwritten by the computed solution */ + + int nrhs = B.cols(); + + // Permute the right hand side to form Pr*B + X = m_perm_r * X; + + // Forward solve PLy = Pb; + Index n = B.rows(); + Index fsupc; // First column of the current supernode + Index istart; // Pointer index to the subscript of the current column + Index nsupr; // Number of rows in the current supernode + Index nsupc; // Number of columns in the current supernode + Index nrow; // Number of rows in the non-diagonal part of the supernode + Index luptr; // Pointer index to the current nonzero value + Index iptr; // row index pointer iterator + Index irow; //Current index row + const Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values + Matrix work(n, nrhs); // working vector + work.setZero(); + int j, k, i, icol,jcol; + for (k = 0; k <= m_Lstore.nsuper(); k ++) + { + fsupc = m_Lstore.supToCol()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.supToCol()[k+1] - fsupc; + nrow = nsupr - nsupc; + luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1 ) + { + for (j = 0; j < nrhs; j++) + { + for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) + { + irow = m_Lstore.rowIndex()[iptr]; + ++luptr; + X(irow, j) -= X(fsupc, j) * Lval[luptr]; + } + } + } + else + { + // The supernode has more than one column + + // Triangular solve + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + // Map, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); + Matrix& U = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this + U = A.template triangularView().solve(U); + + // Matrix-vector product + new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (j = 0; j < nrhs; j++) + { + iptr = istart + nsupc; + for (i = 0; i < nrow; i++) + { + irow = m_Lstore.rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } // end for all supernodes + + // Back solve Ux = y + for (k = m_Lstore.nsuper(); k >= 0; k--) + { + fsupc = m_Lstore.supToCol()[k]; + istart = m_Lstore.rowIndexPtr()[fsupc]; + nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + nsupc = m_Lstore.supToCol()[k+1] - fsupc; + luptr = m_Lstore.colIndexPtr()[fsupc]; + + if (nsupc == 1) + { + for (j = 0; j < nrhs; j++) + { + X(fsupc, j) /= Lval[luptr]; + } + } + else + { + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Matrix& U = X.block(fsupc, 0, nsupc, nrhs); + U = A.template triangularView().solve(U); + } + + for (j = 0; j < nrhs; ++j) + { + for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) + { + for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) + { + irow = m_Ustore.InnerIndices()[i]; + X(irow, j) -= X(jcol, j) * m_Ustore.Values()[i]; + } + } + } + } // End For U-solve + + // Permute back the solution + X = m_perm_c * X; + + return true; } + protected: // Functions - void initperfvalues(); - int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, - const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); - int LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, LU_GlobalLU_t& Glu); - int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, - IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu); - void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, - IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, - IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, - IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu); - void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, - ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, - IndexVector& repfnz, LU_GlobalLU_t& glu); - int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg, - IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, - IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu); - int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, - IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& Glu); - int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, - IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu); - void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, - const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) - + void initperfvalues() + { + m_panel_size = 12; + m_relax = 1; + m_maxsuper = 100; + m_rowblk = 200; + m_colblk = 60; + m_fillfactor = 20; + } + // Variables mutable ComputationInfo m_info; bool m_isInitialized; @@ -150,9 +259,7 @@ class SparseLU PermutationType m_perm_r ; // Row permutation IndexVector m_etree; // Column elimination tree - ScalarVector m_work; // Scalar work vector - IndexVector m_iwork; //Index work vector - static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors + static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors // FIXME All fields of this struct can be defined separately as class members // SuperLU/SparseLU options @@ -176,21 +283,9 @@ class SparseLU }; // End class SparseLU -/* Set the default values for performance */ -void SparseLU::initperfvalues() -{ - m_panel_size = 12; - m_relax = 1; - m_maxsuper = 100; - m_rowblk = 200; - m_colblk = 60; - m_fillfactor = 20; -} // Functions needed by the anaysis phase -#include -// Ordering interface -#include +#include "SparseLU_Coletree.h" /** * Compute the column permutation to minimize the fill-in (file amd.c ) * @@ -202,7 +297,7 @@ void SparseLU::initperfvalues() * */ template -void SparseLU::analyzePattern(const MatrixType& mat) +void SparseLU::analyzePattern(const MatrixType& mat) { //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. @@ -218,6 +313,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Apply the permutation to the column of the input matrix m_mat = mat * m_perm_c; + // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); @@ -230,8 +326,9 @@ void SparseLU::analyzePattern(const MatrixType& mat) LU_TreePostorder(m_mat.cols(), m_etree, post); // Renumber etree in postorder - iwork.resize(n+1); - for (i = 0; i < n; ++i) iwork(post(i)) = post(m_etree(i)); + int m = m_mat.cols(); + iwork.resize(m+1); + for (int i = 0; i < m; ++i) iwork(post(i)) = post(m_etree(i)); m_etree = iwork; // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree @@ -242,23 +339,23 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_perm_c = m_perm_c * post_perm; } // end postordering - m_analysisIsok = true; + m_analysisIsOk = true; } // Functions needed by the numerical factorization phase -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "SparseLU_Memory.h" +#include "SparseLU_heap_relax_snode.h" +#include "SparseLU_relax_snode.h" +#include "SparseLU_snode_dfs.h" +#include "SparseLU_snode_bmod.h" +#include "SparseLU_pivotL.h" +#include "SparseLU_panel_dfs.h" +#include "SparseLU_panel_bmod.h" +#include "SparseLU_column_dfs.h" +#include "SparseLU_column_bmod.h" +#include "SparseLU_copy_to_ucol.h" +#include "SparseLU_pruneL.h" +#include "SparseLU_Utils.h" /** * - Numerical factorization @@ -276,13 +373,17 @@ void SparseLU::analyzePattern(const MatrixType& mat) * failure occurred, plus A->ncol. If lwork = -1, it is * the estimated amount of space needed, plus A->ncol. */ -template -void SparseLU::factorize(const MatrixType& matrix) +template +void SparseLU::factorize(const MatrixType& matrix) { - eigen_assert(m_analysisIsok && "analyzePattern() should be called first"); + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + + ScalarVector work; // Scalar work vector + IndexVector iwork; //Index work vector + // Apply the column permutation computed in analyzepattern() m_mat = matrix * m_perm_c; m_mat.makeCompressed(); @@ -293,7 +394,7 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Allocate storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, m_work, m_iwork, lwork, m_fillratio, m_panel_size, m_maxsuper, m_rowblk, m_glu); + int info = LUMemInit(m, n, nnz, work, iwork, lwork, m_fillfactor, m_panel_size, m_maxsuper, m_rowblk, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -304,27 +405,27 @@ void SparseLU::factorize(const MatrixType& matrix) // Set up pointers for integer working arrays int idx = 0; - VectorBlock segrep(m_iwork, idx, m); + VectorBlock segrep(iwork, idx, m); idx += m; - VectorBlock parent(m_iwork, idx, m); + VectorBlock parent(iwork, idx, m); idx += m; - VectorBlock xplore(m_iwork, idx, m); + VectorBlock xplore(iwork, idx, m); idx += m; - VectorBlock repfnz(m_iwork, idx, maxpanel); + VectorBlock repfnz(iwork, idx, maxpanel); idx += maxpanel; - VectorBlock panel_lsub(m_iwork, idx, maxpanel) + VectorBlock panel_lsub(iwork, idx, maxpanel); idx += maxpanel; - VectorBlock xprune(m_iwork, idx, n); + VectorBlock xprune(iwork, idx, n); idx += n; - VectorBlock marker(m_iwork, idx, m * LU_NO_MARKER); + VectorBlock marker(iwork, idx, m * LU_NO_MARKER); repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - VectorBlock dense(m_work, 0, maxpanel); + VectorBlock dense(work, 0, maxpanel); dense.setZero(); - VectorBlock tempv(m_work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); + VectorBlock tempv(work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); tempv.setZero(); // Setup Permutation vectors @@ -334,9 +435,9 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode = true ) - internal::LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else - internal::LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_relax, marker, relax_end); m_perm_r.setConstant(-1); marker.setConstant(-1); @@ -346,6 +447,7 @@ void SparseLU::factorize(const MatrixType& matrix) IndexVector& xlsub = m_glu.xlsub; IndexVector& xlusup = m_glu.xlusup; IndexVector& xusub = m_glu.xusub; + ScalarVector& lusup = m_glu.lusup; Index& nzlumax = m_glu.nzlumax; supno(0) = IND_EMPTY; @@ -360,7 +462,8 @@ void SparseLU::factorize(const MatrixType& matrix) Index pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column - int irep,ir; + int irep,ir, icol; + int i, k, jj,j; for (jcol = 0; jcol < n; ) { if (relax_end(jcol) != IND_EMPTY) @@ -382,9 +485,10 @@ void SparseLU::factorize(const MatrixType& matrix) jsupno = supno(jcol); // Supernode number which column jcol belongs to fsupc = xsup(jsupno); //First column number of the current supernode new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); + int mem; while (new_next > nzlumax ) { - mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu); + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); if (mem) { std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; @@ -401,10 +505,10 @@ void SparseLU::factorize(const MatrixType& matrix) dense(it.row()) = it.val(); // Numeric update within the snode - LU_snode_bmod(icol, jsupno, fsupc, dense, glu); + LU_snode_bmod(icol, jsupno, fsupc, dense, m_glu); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_glu); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -419,7 +523,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Work on one panel of panel_size columns // Adjust panel size so that a panel won't overlap with the next relaxed snode. - int panel_size = wdef; // upper bound on panel width + int panel_size = m_panel_size; // upper bound on panel width for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) @@ -438,7 +542,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol, j< jcol + panel_size; jj++) + for ( jj = jcol; j< jcol + panel_size; jj++) { k = (jj - jcol) * m; // Column index for w-wide arrays @@ -446,7 +550,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r, m_maxsuper, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; @@ -467,7 +571,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_glu); + info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, m_perm_r, dense_k, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; @@ -506,9 +610,9 @@ void SparseLU::factorize(const MatrixType& matrix) k = 0; for (i = 0; i < m; ++i) { - if ( perm_r(i) == IND_EMPTY ) + if ( m_perm_r(i) == IND_EMPTY ) { - perm_r(i) = n + k; + m_perm_r(i) = n + k; ++k; } } @@ -518,140 +622,21 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply permutation to the L subscripts LU_fixupL(n, m_perm_r, m_glu); - // Free work space iwork and work - //... + // Create supernode matrix L - m_Lstore.setInfos(m, n, m_nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno; Glu.xsup); - // Create the column major upper sparse matrix U - new (&m_Ustore) Map > ( m, n, m_nnzU, Glu.xusub.data(), Glu.usub.data(), Glu.ucol.data() ); //FIXME - this.m_Ustore = m_Ustore; + m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); + // Create the column major upper sparse matrix U; + // it is assumed here that MatrixType = SparseMatrix + new (&m_Ustore) Map ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + this.m_Ustore = m_Ustore; //FIXME Is it necessary m_info = Success; - m_factorizationIsOk = ok; + m_factorizationIsOk = true; } -template -bool SparseLU::_solve(const MatrixBase &b, MatrixBase &X) const -{ - eigen_assert(m_isInitialized && "The matrix should be factorized first"); - EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, - THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - - X = b; /* on return, X is overwritten by the computed solution */ - - int nrhs = b.cols(); - - // Permute the right hand side to form Pr*B - X = m_perm_r * X; - - // Forward solve PLy = Pb; - Index fsupc; // First column of the current supernode - Index istart; // Pointer index to the subscript of the current column - Index nsupr; // Number of rows in the current supernode - Index nsupc; // Number of columns in the current supernode - Index nrow; // Number of rows in the non-diagonal part of the supernode - Index luptr; // Pointer index to the current nonzero value - Index iptr; // row index pointer iterator - Index irow; //Current index row - Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values - Matrix work(n,nrhs); // working vector - work.setZero(); - int j; - for (k = 0; k <= m_Lstore.nsuper(); k ++) - { - fsupc = m_Lstore.sup_to_col()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; - nrow = nsupr - nsupc; - - if (nsupc == 1 ) - { - for (j = 0; j < nrhs; j++) - { - luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the for loop - for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) - { - irow = m_Lstore.rowIndex()[iptr]; - ++luptr; - X(irow, j) -= X(fsupc, j) * Lval[luptr]; - } - } - } - else - { - // The supernode has more than one column - - // Triangular solve - luptr = m_Lstore.colIndexPtr()[fsupc]; //FIXME Should be outside the loop - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); -// Map, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); - Matrix& u = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this - u = A.triangularView().solve(u); - - // Matrix-vector product - new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - work.block(0, 0, nrow, nrhs) = A * u; - - //Begin Scatter - for (j = 0; j < nrhs; j++) - { - iptr = istart + nsupc; - for (i = 0; i < nrow; i++) - { - irow = m_Lstore.rowIndex()[iptr]; - X(irow, j) -= work(i, j); // Scatter operation - work(i, j) = Scalar(0); - iptr++; - } - } - } - } // end for all supernodes - - // Back solve Ux = y - for (k = m_Lstore.nsuper(); k >= 0; k--) - { - fsupc = m_Lstore.sup_to_col()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore..rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.sup_to_col()[k+1] - fsupc; - luptr = m_Lstore.colIndexPtr()[fsupc]; - - if (nsupc == 1) - { - for (j = 0; j < nrhs; j++) - { - X(fsupc, j) /= Lval[luptr]; - } - } - else - { - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Matrix& u = X.block(fsupc, 0, nsupc, nrhs); - u = A.triangularView().solve(u); - } - - for (j = 0; j < nrhs; ++j) - { - for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) - { - for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) - { - irow = m_Ustore.InnerIndices()[i]; - X(irow, j) -= X(irow, jcol) * m_Ustore.Values()[i]; - } - } - } - } // End For U-solve - - // Permute back the solution - X = m_perm_c * X; - - return true; -} -namespace internal { +/*namespace internal { template struct solve_retval, Rhs> @@ -666,7 +651,7 @@ struct solve_retval, Rhs> } }; -} // end namespace internal +}*/ // end namespace internal diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 4c42387be..00bb97796 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -44,13 +44,28 @@ */ #ifndef SPARSELU_COLETREE_H #define SPARSELU_COLETREE_H +/** Find the root of the tree/set containing the vertex i : Use Path halving */ +template +int etree_find (int i, IndexVector& pp) +{ + int p = pp(i); // Parent + int gp = pp(p); // Grand parent + while (gp != p) + { + pp(i) = gp; // Parent pointer on find path is changed to former grand parent + i = gp; + p = pp(i); + gp = pp(p); + } + return p; +} /** Compute the column elimination tree of a sparse matrix * NOTE : The matrix is supposed to be in column-major format. * */ template -int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) +int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { int nc = mat.cols(); // Number of columns int nr = mat.rows(); // Number of rows @@ -87,7 +102,7 @@ int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { // A sequence of interleaved find and union is performed row = firstcol(it.row()); if (row >= col) continue; - rset = internal::etree_find(row, pp); // Find the name of the set containing row + rset = etree_find(row, pp); // Find the name of the set containing row rroot = root(rset); if (rroot != col) { @@ -100,52 +115,6 @@ int SparseLU::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) return 0; } -/** Find the root of the tree/set containing the vertex i : Use Path halving */ -template -int etree_find (int i, IndexVector& pp) -{ - int p = pp(i); // Parent - int gp = pp(p); // Grand parent - while (gp != p) - { - pp(i) = gp; // Parent pointer on find path is changed to former grand parent - i = gp; - p = pp(i); - gp = pp(p); - } - return p; -} - -/** - * Post order a tree - * \param parent Input tree - * \param post postordered tree - */ -template -void SparseLU::LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) -{ - IndexVector first_kid, next_kid; // Linked list of children - int postnum; - // Allocate storage for working arrays and results - first_kid.resize(n+1); - next_kid.setZero(n+1); - post.setZero(n+1); - - // Set up structure describing children - int v, dad; - first_kid.setConstant(-1); - for (v = n-1, v >= 0; v--) - { - dad = parent(v); - next_kid(v) = first_kid(dad); - first_kid(dad) = v; - } - - // Depth-first search from dummy root vertex #n - postnum = 0; - internal::LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); - return post; -} /** * Depth-first search from vertex n. No recursion. * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. @@ -190,4 +159,36 @@ void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVecto } } + +/** + * Post order a tree + * \param parent Input tree + * \param post postordered tree + */ +template +void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) +{ + IndexVector first_kid, next_kid; // Linked list of children + int postnum; + // Allocate storage for working arrays and results + first_kid.resize(n+1); + next_kid.setZero(n+1); + post.setZero(n+1); + + // Set up structure describing children + int v, dad; + first_kid.setConstant(-1); + for (v = n-1; v >= 0; v--) + { + dad = parent(v); + next_kid(v) = first_kid(dad); + first_kid(dad) = v; + } + + // Depth-first search from dummy root vertex #n + postnum = 0; + LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); + return post; +} + #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index e4bf7eda8..70570ab9c 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -45,17 +45,17 @@ template class SuperNodalMatrix { public: - typedef typename _Scalar Scalar; - typedef typename _Index Index; + typedef _Scalar Scalar; + typedef _Index Index; public: SuperNodalMatrix() { } - SuperNodalMatrix(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + SuperNodalMatrix(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { - setInfos(m, n, nnz, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); + setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); } ~SuperNodalMatrix() @@ -68,12 +68,11 @@ class SuperNodalMatrix * FIXME This class will be modified such that it can be use in the course * of the factorization. */ - void setInfos(Index m, Index n, Index nnz, Scalar *nzval, Index* nzval_colptr, Index* rowind, + void setInfos(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) { m_row = m; m_col = n; - m_nnz = nnz; m_nzval = nzval; m_nzval_colptr = nzval_colptr; m_rowind = rowind; @@ -159,14 +158,14 @@ class SuperNodalMatrix protected: Index m_row; // Number of rows Index m_col; // Number of columns - Index m_nnz; // Number of nonzero values +// Index m_nnz; // Number of nonzero values Index m_nsuper; // Number of supernodes Scalar* m_nzval; //array of nonzero values packed by column Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j Index* m_rowind; // Array of compressed row indices of rectangular supernodes Index* m_rowind_colptr; //rowind_colptr[j] stores the location in rowind[] which starts column j - Index *m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs - Index *m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode + Index* m_col_to_sup; // col_to_sup[j] is the supernode number to which column j belongs + Index* m_sup_to_col; //sup_to_col[s] points to the starting column of the s-th supernode private : }; @@ -176,7 +175,7 @@ class SuperNodalMatrix * */ template -class SuperNodalMatrix::InnerIterator +class SuperNodalMatrix::InnerIterator { public: InnerIterator(const SuperNodalMatrix& mat, Index outer) @@ -184,7 +183,7 @@ class SuperNodalMatrix::InnerIterator m_outer(outer), m_idval(mat.colIndexPtr()[outer]), m_startval(m_idval), - m_endval(mat.colIndexPtr()[outer+1]) + m_endval(mat.colIndexPtr()[outer+1]), m_idrow(mat.rowIndexPtr()[outer]), m_startidrow(m_idrow), m_endidrow(mat.rowIndexPtr()[outer+1]) @@ -197,7 +196,7 @@ class SuperNodalMatrix::InnerIterator } inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } - inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_idval]; } + inline Scalar& valueRef() { return const_cast(m_matrix.valuePtr()[m_idval]); } inline Index index() const { return m_matrix.rowIndex()[m_idrow]; } inline Index row() const { return index(); } @@ -221,13 +220,14 @@ class SuperNodalMatrix::InnerIterator const Index m_startidrow; // Start of the row indices of the current column value const Index m_endidrow; // End of the row indices of the current column value }; + /** - * \brief Iterator class to iterate over nonzeros Supernodes in the triangular supernodal matrix + * \brief Iterator class to iterate over Supernodes in the triangular supernodal matrix * * The final goal is to use this class when dealing with supernodes during numerical factorization */ template -class SuperNodalMatrix::SuperNodeIterator +class SuperNodalMatrix::SuperNodeIterator { public: SuperNodeIterator(const SuperNodalMatrix& mat) diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index b2888e9a0..ea9ef6d89 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -54,87 +54,7 @@ #define LU_GluIntArray(n) (5* (n) + 5) #define LU_TempSpace(m, w) ( (2*w + 4 + LU_NO_MARKER) * m * sizeof(Index) \ + (w + 1) * m * sizeof(Scalar) ) - -namespace internal { - -/** - * \brief Allocate various working space for the numerical factorization phase. - * \param m number of rows of the input matrix - * \param n number of columns - * \param annz number of initial nonzeros in the matrix - * \param work scalar working space needed by all factor routines - * \param iwork Integer working space - * \param lwork if lwork=-1, this routine returns an estimated size of the required memory - * \param glu persistent data to facilitate multiple factors : will be deleted later ?? - * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed - * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation - */ -template -int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, GlobalLU_t& glu) -{ - typedef typename ScalarVector::Scalar; - typedef typename IndexVector::Index; - - int& num_expansions = glu.num_expansions; //No memory expansions so far - num_expansions = 0; - // Guess the size for L\U factors - Index& nzlmax = glu.nzlmax; - Index& nzumax = glu.nzumax; - Index& nzlumax = glu.nzlumax; - nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U - nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor - // Return the estimated size to the user if necessary - if (lwork == IND_EMPTY) - { - int estimated_size; - estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, m_panel_size) - + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n); - return estimated_size; - } - - // Setup the required space - - // First allocate Integer pointers for L\U factors - glu.xsup.resize(n+1); - glu.supno.resize(n+1); - glu.xlsub.resize(n+1); - glu.xlusup.resize(n+1); - glu.xusub.resize(n+1); - - // Reserve memory for L/U factors - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol,nzumax, 0, 0, num_expansions); - expand(glu.lsub,nzlmax, 0, 0, num_expansions); - expand(glu.usub,nzumax, 0, 1, num_expansions); - - // Check if the memory is correctly allocated, - // FIXME Should be a try... catch section here - while ( !glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()) - { - //Reduce the estimated size and retry - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; - - if (nzlumax < annz ) return nzlumax; - - expand(glu.lsup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol, nzumax, 0, 0, num_expansions); - expand(glu.lsub, nzlmax, 0, 0, num_expansions); - expand(glu.usub, nzumax, 0, 1, num_expansions); - } - - // LUWorkInit : Now, allocate known working storage - int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n; - int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk); - iwork.resize(isize); - work.resize(isize); - - ++num_expansions; - return 0; - -} // end LuMemInit /** * Expand the existing storage to accomodate more fill-ins @@ -145,7 +65,7 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in * \param [in,out]num_expansions Number of times the memory has been expanded */ template -int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) +int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase @@ -195,6 +115,85 @@ int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_p return 0; } +/** + * \brief Allocate various working space for the numerical factorization phase. + * \param m number of rows of the input matrix + * \param n number of columns + * \param annz number of initial nonzeros in the matrix + * \param work scalar working space needed by all factor routines + * \param iwork Integer working space + * \param lwork if lwork=-1, this routine returns an estimated size of the required memory + * \param glu persistent data to facilitate multiple factors : will be deleted later ?? + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation + */ +template +int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, LU_GlobalLU_t& glu) +{ + typedef typename ScalarVector::Scalar Scalar; + typedef typename IndexVector::Index Index; + + int& num_expansions = glu.num_expansions; //No memory expansions so far + num_expansions = 0; + // Guess the size for L\U factors + Index& nzlmax = glu.nzlmax; + Index& nzumax = glu.nzumax; + Index& nzlumax = glu.nzlumax; + nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U + nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor + + // Return the estimated size to the user if necessary + if (lwork == IND_EMPTY) + { + int estimated_size; + estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size) + + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n; + return estimated_size; + } + + // Setup the required space + + // First allocate Integer pointers for L\U factors + glu.xsup.resize(n+1); + glu.supno.resize(n+1); + glu.xlsub.resize(n+1); + glu.xlusup.resize(n+1); + glu.xusub.resize(n+1); + + // Reserve memory for L/U factors + expand(glu.lusup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol,nzumax, 0, 0, num_expansions); + expand(glu.lsub,nzlmax, 0, 0, num_expansions); + expand(glu.usub,nzumax, 0, 1, num_expansions); + + // Check if the memory is correctly allocated, + // FIXME Should be a try... catch section here + while ( !glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()) + { + //Reduce the estimated size and retry + nzlumax /= 2; + nzumax /= 2; + nzlmax /= 2; + + if (nzlumax < annz ) return nzlumax; + + expand(glu.lsup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol, nzumax, 0, 0, num_expansions); + expand(glu.lsub, nzlmax, 0, 0, num_expansions); + expand(glu.usub, nzumax, 0, 1, num_expansions); + } + + // LUWorkInit : Now, allocate known working storage + int isize = (2 * panel_size + 3 + LU_NO_MARKER) * m + n; + int dsize = m * panel_size + LU_NUM_TEMPV(m, panel_size, maxsuper, rowblk); + iwork.resize(isize); + work.resize(isize); + + ++num_expansions; + return 0; + +} // end LuMemInit + /** * \brief Expand the existing storage * \param vec vector to expand @@ -203,18 +202,17 @@ int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_p * \param glu Global data structure * \return 0 on success, > 0 size of the memory allocated so far */ -template -int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& glu) +template +int LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, int& num_expansions) { int failed_size; - int& num_expansions = glu.num_expansions; if (memtype == USUB) - failed_size = expand(vec, maxlen, next, 1, num_expansions); + failed_size = expand(vec, maxlen, next, 1, num_expansions); else - failed_size = expand(vec, maxlen, next, 0, num_expansions); + failed_size = expand(vec, maxlen, next, 0, num_expansions); if (failed_size) - return faileld_size; + return failed_size; // The following code is not really needed since maxlen is passed by reference // and correspond to the appropriate field in glu @@ -236,6 +234,4 @@ int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memt return 0 ; } - -}// Namespace Internal #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 618d05eac..fd2a59a41 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -82,11 +82,11 @@ */ #ifndef EIGEN_LU_STRUCTS #define EIGEN_LU_STRUCTS -typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType; +typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; -template -struct { +template +struct LU_GlobalLU_t { typedef typename IndexVector::Index Index; IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) @@ -96,14 +96,11 @@ struct { IndexVector xlsub; // pointers to the beginning of each column in lsub Index nzlmax; // Current max size of lsub Index nzlumax; // Current max size of lusup - ScalarVector ucol; // nonzero values of U ordered by columns IndexVector usub; // row indices of U columns in ucol IndexVector xusub; // Pointers to the beginning of each column of U in ucol Index nzumax; // Current max size of ucol - Index n; // Number of columns in the matrix - + Index n; // Number of columns in the matrix int num_expansions; -} GlobalLU_t; - +}; #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 5c12b6243..9e63bf7e4 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -25,7 +25,6 @@ #ifdef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H -// Number of marker arrays used in the factorization each of size n template void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) @@ -34,7 +33,6 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU IndexVector& xlsub = Glu.xlsub; nnzL = 0; nnzU = (Glu.xusub)(n); - int nnzL0 = 0; int nsuper = (Glu.supno)(n); int jlen, irep; @@ -52,7 +50,6 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU jlen--; } irep = xsup(i+1) - 1; - nnzL0 += xprune(irep) - xlsub(irep); } } diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 965a0c0ad..da464cbfc 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COLUMN_BMOD_H #define SPARSELU_COLUMN_BMOD_H + /** * \brief Performs numeric block updates (sup-col) in topological order * @@ -59,11 +60,13 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) +template +int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) { - + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; int jsupno, k, ksub, krep, krep_ind, ksupno; + int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode * fsupc = first supernodal column @@ -81,7 +84,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense ScalarVector& lusup = glu.lusup; Index& nzlumax = glu.nzlumax; - int jsupno = supno(jcol); + jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order k = nseg - 1; int d_fsupc; // distance between the first column of the current panel and the @@ -134,7 +137,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); - u = A.triangularView().solve(u); + u = A.template triangularView().solve(u); // Dense matrix-vector product y <-- A*x luptr += segsize; @@ -168,18 +171,18 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense fsupc = xsup(jsupno); // copy the SPA dense into L\U[*,j] + int mem; new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); while (new_next > nzlumax ) { - mem = LUmemXpand(glu.lusup, nzlumax, nextlu, LUSUP, glu); + mem = LUMemXpand(glu.lusup, nzlumax, nextlu, LUSUP, glu.num_expansions); if (mem) return mem; - //lsub = glu.lsub; // Should not be updated here } for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) { irow = lsub(isub); - lusub(nextlu) = dense(irow); + lusup(nextlu) = dense(irow); dense(irow) = Scalar(0.0); ++nextlu; } @@ -210,7 +213,7 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock u(lusup, ufirst, nsupc); - u = A.triangularView().solve(u); + u = A.template triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 7fda536a9..8c6202d67 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COLUMN_DFS_H #define SPARSELU_COLUMN_DFS_H + /** * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary * @@ -57,6 +58,7 @@ * \param m number of rows in the matrix * \param jcol Current column * \param perm_r Row permutation + * \param maxsuper * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended * \param lsub_col defines the rhs vector to start the dfs * \param [in,out] segrep Segment representatives - new segments appended @@ -71,9 +73,10 @@ * */ template -int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, IndexVector& nseg, IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { - typedef typename IndexVector::IndexVector; + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; int jcolp1, jcolm1, jsuper, nsuper, nextl; int krow; // Row index of the current element @@ -95,6 +98,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In jsuper = nsuper; nextl = xlsub(jcol); VectorBlock marker2(marker, 2*m, m); + int fsupc, jptr, jm1ptr, ito, ifrom, istop; // For each nonzero in A(*,jcol) do dfs for (k = 0; lsub_col[k] != IND_EMPTY; k++) { @@ -115,7 +119,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In lsub(nextl++) = krow; // krow is indexed into A if ( nextl >= nzlmax ) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if ( mem ) return mem; } if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing @@ -163,7 +167,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In lsub(nextl++) = kchild; if (nextl >= nzlmax) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } if (chmark != jcolm1) jsuper = IND_EMPTY; @@ -186,7 +190,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In xplore(krep) = xdfs; oldrep = krep; krep = chrep; // Go deeped down G(L^t) - parent(krep) = olddrep; + parent(krep) = oldrep; repfnz(krep) = chperm; xdfs = xlsub(krep); maxdfs = xprune(krep); @@ -230,7 +234,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In // Make sure the number of columns in a supernode doesn't // exceed threshold - if ( (jcol - fsupc) >= m_maxsuper) jsuper = IND_EMPTY; + if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; /* If jcol starts a new supernode, reclaim storage space in * lsub from previous supernode. Note we only store @@ -241,7 +245,7 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, In { // starts a new supernode if ( (fsupc < jcolm1-1) ) { // >= 3 columns in nsuper - ito = xlsub(fsupcc+1) + ito = xlsub(fsupc+1); xlsub(jcolm1) = ito; istop = ito + jptr - jm1ptr; xprune(jcolm1) = istop; // intialize xprune(jcol-1) diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index c97bc6aa4..31411175c 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_COPY_TO_UCOL_H #define SPARSELU_COPY_TO_UCOL_H + /** * \brief Performs numeric block updates (sup-col) in topological order * @@ -58,17 +59,18 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) +template < typename IndexVector, typename ScalarVector> +int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) { - Index ksupno, k, ksub, krep, ksupno; - typedef typename IndexVector::Index; + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; + Index ksub, krep, ksupno; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; IndexVector& xlsub = glu.xlsub; - ScalarVector& ucol = GLu.ucol; + ScalarVector& ucol = glu.ucol; IndexVector& usub = glu.usub; IndexVector& xusub = glu.xusub; Index& nzumax = glu.nzumax; @@ -76,10 +78,11 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre Index jsupno = supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order - k = nseg - 1; + int k = nseg - 1, i; Index nextu = xusub(jcol); Index kfnz, isub, segsize; Index new_next,irow; + Index fsupc, mem; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep(k); k--; @@ -95,9 +98,9 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre new_next = nextu + segsize; while (new_next > nzumax) { - mem = LU_MemXpand(ucol, nzumax, nextu, UCOL, glu); + mem = LUMemXpand(ucol, nzumax, nextu, UCOL, glu.num_expansions); if (mem) return mem; - mem = LU_MemXpand(usub, nzumax, nextu, USUB, glu); + mem = LUMemXpand(usub, nzumax, nextu, USUB, glu.num_expansions); if (mem) return mem; } @@ -120,4 +123,5 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segre xusub(jcol + 1) = nextu; // close U(*,jcol) return 0; } + #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 4190e0462..1766c3c2b 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -42,8 +42,7 @@ #ifndef SPARSELU_HEAP_RELAX_SNODE_H #define SPARSELU_HEAP_RELAX_SNODE_H -#include -namespace internal { +#include "SparseLU_Coletree.h" /** * \brief Identify the initial relaxed supernodes * @@ -85,12 +84,12 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, if (parent != n) // not the dummy root descendants(parent) += descendants(j) + 1; } - // Identify the relaxed supernodes by postorder traversal of the etree register int snode_start; // beginning of a snode register int k; int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree int nsuper_et = 0; // Number of relaxed snodes in the original etree + int l; for (j = 0; j < n; ) { parent = et(j); @@ -132,5 +131,4 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // Recover the original etree et = et_save; } -} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 212ecfa6a..4f19b5ac8 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -63,8 +63,9 @@ * */ template -void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) { + typedef typename ScalarVector::Scalar Scalar; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; @@ -74,11 +75,10 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int int i,ksub,jj,nextl_col,irow; int fsupc, nsupc, nsupr, nrow; - int krep, krep_ind; - int nrow; + int krep, krep_ind, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... - int segsze,no_zeros,irow ; + int segsize,no_zeros,isub ; // For each nonz supernode segment of U[*,j] in topological order int k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) @@ -105,7 +105,7 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int { nextl_col = (jj-jcol) * m; VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row - VectorBLock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + VectorBlock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) @@ -134,14 +134,12 @@ void SparseLU::LU_panel_bmod(const int m, const int w, const int jcol, const int luptr += nsupr * no_zeros + no_zeros; // triangular solve with Eigen Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); -// Map > u( tempv.data(), segsize); VectorBlock u(tempv, 0, segsize); - u = A.triangularView().solve(u); + u = A.template triangularView().solve(u); luptr += segsize; // Dense Matrix vector product y <-- A*x; new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); -// Map l( &(tempv.data()[segsize]), nrow); VectorBlock l(tempv, segsize, nrow); l= A * u; diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index d3c2906b2..6f6922ee0 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -78,7 +78,7 @@ * */ template -void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& Glu) +void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { int jj; // Index through each column in the panel @@ -95,10 +95,10 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType // IndexVector& marker1 = marker.block(m, m); VectorBlock marker1(marker, m, m); nseg = 0; - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; // For each column in the panel for (jj = jcol; jj < jcol + w; jj++) { @@ -109,7 +109,7 @@ void SparseLU::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType // For each nnz in A[*, jj] do depth first search - for (MatrixType::InnerIterator it(A, jj); it; ++it) + for (typename MatrixType::InnerIterator it(A, jj); it; ++it) { krow = it.row(); dense_col(krow) = it.val(); diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 32da92481..4a50b2cca 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -63,22 +63,22 @@ * \param [in,out]perm_r Row permutation (threshold pivoting) * \param [in] iperm_c column permutation - used to finf diagonal of Pc*A*Pc' * \param [out]pivrow The pivot row - * \param Glu Global LU data + * \param glu Global LU data * \return 0 if success, i > 0 if U(i,i) is exactly zero * */ template -int SparseLU::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& Glu) +int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) { typedef typename IndexVector::Index Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers - IndexVector& lsub = Glu.lsub; // Compressed row subscripts of L rectangular supernodes. - IndexVector& xlsub = Glu.xlsub; // pointers to the beginning of each column subscript in lsub - ScalarVector& lusup = Glu.lusup; // Numerical values of L ordered by columns - IndexVector& xlusup = Glu.xlusup; // pointers to the beginning of each colum in lusup + IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. + IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub + ScalarVector& lusup = glu.lusup; // Numerical values of L ordered by columns + IndexVector& xlusup = glu.xlusup; // pointers to the beginning of each colum in lusup - Index fsupc = (Glu.xsup)((Glu.supno)(jcol)); // First column in the supernode containing the column jcol + Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode @@ -93,6 +93,7 @@ int SparseLU::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexV Index diag = IND_EMPTY; Index old_pivptr = nsupc; Scalar rtemp; + Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { rtemp = std::abs(lu_col_ptr[isub]); if (rtemp > pivmax) { diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index dd092b778..c006f6707 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -44,6 +44,7 @@ */ #ifndef SPARSELU_PRUNEL_H #define SPARSELU_PRUNEL_H + /** * \brief Prunes the L-structure. * @@ -57,25 +58,27 @@ * \param segrep * \param repfnz * \param [out]xprune - * \param Glu Global LU data + * \param glu Global LU data * */ template -void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, GlobalLU_t& Glu) +void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) { + typedef typename IndexVector::Index Index; + typedef typename ScalarVector::Scalar Scalar; // Initialize pointers - IndexVector& xsup = Glu.xsup; - IndexVector& supno = Glu.supno; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; - ScalarVector& lusup = Glu.lusup; - IndexVector& xlusup = Glu.xlusup; + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + ScalarVector& lusup = glu.lusup; + IndexVector& xlusup = glu.xlusup; // For each supernode-rep irep in U(*,j] int jsupno = supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; - Index kmin, kmax, ktemp, minloc, maxloc; + Index kmin, kmax, ktemp, minloc, maxloc,krow; for (i = 0; i < nseg; i++) { irep = segrep(i); @@ -88,12 +91,12 @@ void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pi // If a snode overlaps with the next panel, then the U-segment // is fragmented into two parts -- irep and irep1. We should let // pruning occur at the rep-column in irep1s snode. - if (supno(irep) == supno(irep1) continue; // don't prune + if (supno(irep) == supno(irep1) ) continue; // don't prune // If it has not been pruned & it has a nonz in row L(pivrow,i) if (supno(irep) != jsupno ) { - if ( xprune (irep) >= xlsub(irep1) + if ( xprune (irep) >= xlsub(irep1) ) { kmin = xlsub(irep); kmax = xlsub(irep1) - 1; @@ -147,4 +150,5 @@ void SparseLU::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pi } // end pruning } // End for each U-segment } + #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h index f7b478560..0006dde33 100644 --- a/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -42,7 +42,6 @@ #ifndef SPARSELU_RELAX_SNODE_H #define SPARSELU_RELAX_SNODE_H -namespace internal { /** * \brief Identify the initial relaxed supernodes * @@ -87,6 +86,4 @@ void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, Inde } // End postorder traversal of the etree } - -} // end namespace internal #endif diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 1d6bed8bb..a7034e607 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -42,14 +42,13 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ -namespace internal { #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H -template -int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index fsupc, - ScalarVector& dense, LU_GlobalLU_t& glu) +template +int LU_snode_bmod (const int jcol, const int jsupno, const int fsupc, + ScalarVector& dense, LU_GlobalLU_t& glu) { - typedef typename Matrix IndexVector; + typedef typename ScalarVector::Scalar Scalar; IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) IndexVector& xlsub = glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) ScalarVector& lusup = glu.lusup; // Numerical values of the rectangular supernodes @@ -77,17 +76,15 @@ int SparseLU::LU_dsnode_bmod (const Index jcol, const Index jsupno, const Index // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) Map,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); -// Map > u(&(lusup.data()[ufirst]), nsupc); VectorBlock u(lusup, ufirst, nsupc); - u = A.triangularView().solve(u); // Call the Eigen dense triangular solve interface + u = A.template triangularView().solve(u); // Call the Eigen dense triangular solve interface // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); // Map > l(&(lusup.data()[ufirst+nsupc], nrow); VectorBlock l(lusup, ufirst+nsupc, nrow); l = l - A * u; - - return 0; + } + return 0; } -} // End namespace internal #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index 669f172f5..c49fc1461 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -44,7 +44,6 @@ */ #ifdef SPARSELU_SNODE_DFS_H #define SPARSELU_SNODE_DFS_H -namespace eigen { /** * \brief Determine the union of the row structures of those columns within the relaxed snode. * NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, @@ -59,7 +58,7 @@ namespace eigen { * \return 0 on success, > 0 size of the memory when memory allocation failed */ template - int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) + int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { typedef typename IndexVector::Index; IndexVector& xsup = glu.xsup; @@ -86,7 +85,7 @@ namespace eigen { lsub(nextl++) = krow; if( nextl >= nzlmax ) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } } @@ -100,7 +99,7 @@ namespace eigen { Index new_next = nextl + (nextl - xlsub(jcol)); while (new_next > nzlmax) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu); + mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; } Index ifrom, ito = nextl; @@ -115,6 +114,4 @@ namespace eigen { xlsub(kcol+1) = nextl; return 0; } - -} // end namespace eigen #endif \ No newline at end of file diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 079912266..4b3c6f8e3 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -63,3 +63,8 @@ endif(RT_LIBRARY) add_executable(spbenchsolver spbenchsolver.cpp) target_link_libraries (spbenchsolver ${SPARSE_LIBS}) +add_executable(spsolver sp_solver.cpp) +target_link_libraries (spsolver ${SPARSE_LIBS}) + +add_executable(test_sparseLU test_sparseLU.cpp) +target_link_libraries (test_sparseLU ${SPARSE_LIBS}) \ No newline at end of file From 0c9b08e46e7507d9f13200f0702bc57ed6aae52c Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 14 Jun 2012 18:45:04 +0200 Subject: [PATCH 13/73] build complete... almost --- Eigen/src/OrderingMethods/Ordering.h | 21 ++- Eigen/src/SparseLU/SparseLU.h | 162 ++++++++++-------- Eigen/src/SparseLU/SparseLU_Coletree.h | 1 - Eigen/src/SparseLU/SparseLU_Matrix.h | 61 +++++-- Eigen/src/SparseLU/SparseLU_Memory.h | 19 +- Eigen/src/SparseLU/SparseLU_Structs.h | 2 +- Eigen/src/SparseLU/SparseLU_Utils.h | 38 ++-- Eigen/src/SparseLU/SparseLU_column_bmod.h | 11 +- Eigen/src/SparseLU/SparseLU_column_dfs.h | 12 +- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 6 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 10 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 12 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 10 +- Eigen/src/SparseLU/SparseLU_pivotL.h | 3 +- Eigen/src/SparseLU/SparseLU_pruneL.h | 8 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 3 +- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 10 +- bench/spbench/test_sparseLU.cpp | 64 +++++++ 18 files changed, 280 insertions(+), 173 deletions(-) create mode 100644 bench/spbench/test_sparseLU.cpp diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 3a3e3f6fc..eedaed144 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -32,9 +32,8 @@ template class OrderingBase { public: - typedef typename internal::traits::MatrixType MatrixType; - typedef typename MatrixType::Scalar Scalar; - typedef typename MatrixType::Index Index; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::Index Index; typedef PermutationMatrix PermutationType; public: @@ -42,10 +41,12 @@ class OrderingBase { } + template OrderingBase(const MatrixType& mat):OrderingBase() { compute(mat); } + template Derived& compute(const MatrixType& mat) { return derived().compute(mat); @@ -61,9 +62,9 @@ class OrderingBase /** * Get the permutation vector */ - PermutationType& get_perm(const MatrixType& mat) + PermutationType& get_perm() { - if (m_isInitialized = true) return m_P; + if (m_isInitialized == true) return m_P; else abort(); // FIXME Should find a smoother way to exit with error code } @@ -101,7 +102,6 @@ class OrderingBase mutable bool m_isInitialized; SparseMatrix m_mat; // Stores the (symmetrized) matrix to permute }; - /** * Get the approximate minimum degree ordering * If the matrix is not structurally symmetric, an ordering of A^T+A is computed @@ -161,6 +161,15 @@ class AMDOrdering : public OrderingBase > }; +namespace internal { + template + struct traits > + { + typedef _Scalar Scalar; + typedef _Index Index; + }; +} + /** * Get the column approximate minimum degree ordering * The matrix should be in column-major format diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 293dcd0b3..682cd465c 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -54,15 +54,15 @@ class SparseLU typedef SuperNodalMatrix SCMatrix; typedef Matrix ScalarVector; typedef Matrix IndexVector; -// typedef GlobalLU_t LU_GlobalLU_t; typedef PermutationMatrix PermutationType; public: - SparseLU():m_isInitialized(true),m_symmetricmode(false),m_diagpivotthresh(1.0) + SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) { initperfvalues(); } - SparseLU(const MatrixType& matrix):SparseLU() + SparseLU(const MatrixType& matrix):m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) { + initperfvalues(); compute(matrix); } @@ -114,8 +114,23 @@ class SparseLU // return solve_retval(*this, B.derived()); // } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the PaStiX reports a problem + * \c InvalidInput if the input matrix is invalid + * + * \sa iparm() + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + template - bool _solve(const MatrixBase &B, MatrixBase &X) const + bool _solve(const MatrixBase &B, MatrixBase &X) const { eigen_assert(m_isInitialized && "The matrix should be factorized first"); EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, @@ -141,7 +156,7 @@ class SparseLU const Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values Matrix work(n, nrhs); // working vector work.setZero(); - int j, k, i, icol,jcol; + int j, k, i,jcol; for (k = 0; k <= m_Lstore.nsuper(); k ++) { fsupc = m_Lstore.supToCol()[k]; @@ -168,13 +183,12 @@ class SparseLU // The supernode has more than one column // Triangular solve - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - // Map, 0, OuterStride > u( &(X(fsupc,0)), nsupc, nrhs, OuterStride<>(X.rows()) ); - Matrix& U = X.block(fsupc, 0, nsupc, nrhs); //FIXME Check this + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Block U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS U = A.template triangularView().solve(U); // Matrix-vector product - new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); work.block(0, 0, nrow, nrhs) = A * U; //Begin Scatter @@ -210,8 +224,8 @@ class SparseLU } else { - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Matrix& U = X.block(fsupc, 0, nsupc, nrhs); + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Block U(X, fsupc, 0, nsupc, nrhs); U = A.template triangularView().solve(U); } @@ -221,8 +235,8 @@ class SparseLU { for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) { - irow = m_Ustore.InnerIndices()[i]; - X(irow, j) -= X(jcol, j) * m_Ustore.Values()[i]; + irow = m_Ustore.innerIndexPtr()[i]; + X(irow, j) -= X(jcol, j) * m_Ustore.valuePtr()[i]; } } } @@ -254,12 +268,12 @@ class SparseLU bool m_analysisIsOk; NCMatrix m_mat; // The input (permuted ) matrix SCMatrix m_Lstore; // The lower triangular matrix (supernodal) - NCMatrix m_Ustore; // The upper triangular matrix + MappedSparseMatrix m_Ustore; // The upper triangular matrix PermutationType m_perm_c; // Column permutation PermutationType m_perm_r ; // Row permutation IndexVector m_etree; // Column elimination tree - static LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors + LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors // FIXME All fields of this struct can be defined separately as class members // SuperLU/SparseLU options @@ -332,9 +346,11 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_etree = iwork; // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - PermutationType post_perm(post); + + PermutationType post_perm(m);; + for (int i = 0; i < m; i++) + post_perm.indices()(i) = post(i); //m_mat = m_mat * post_perm; // FIXME This should surely be in factorize() - // Composition of the two permutations m_perm_c = m_perm_c * post_perm; } // end postordering @@ -357,6 +373,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) #include "SparseLU_pruneL.h" #include "SparseLU_Utils.h" + /** * - Numerical factorization * - Interleaved with the symbolic factorization @@ -380,9 +397,8 @@ void SparseLU::factorize(const MatrixType& matrix) eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); eigen_assert((matrix.rows() == matrix.cols()) && "Only for squared matrices"); + typedef typename IndexVector::Scalar Index; - ScalarVector work; // Scalar work vector - IndexVector iwork; //Index work vector // Apply the column permutation computed in analyzepattern() m_mat = matrix * m_perm_c; @@ -394,7 +410,7 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_panel_size * m; // Allocate storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, work, iwork, lwork, m_fillfactor, m_panel_size, m_maxsuper, m_rowblk, m_glu); + int info = LUMemInit(m, n, nnz, lwork, m_fillfactor, m_panel_size, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -404,29 +420,37 @@ void SparseLU::factorize(const MatrixType& matrix) // Set up pointers for integer working arrays - int idx = 0; - VectorBlock segrep(iwork, idx, m); - idx += m; - VectorBlock parent(iwork, idx, m); - idx += m; - VectorBlock xplore(iwork, idx, m); - idx += m; - VectorBlock repfnz(iwork, idx, maxpanel); - idx += maxpanel; - VectorBlock panel_lsub(iwork, idx, maxpanel); - idx += maxpanel; - VectorBlock xprune(iwork, idx, n); - idx += n; - VectorBlock marker(iwork, idx, m * LU_NO_MARKER); +// int idx = 0; +// VectorBlock segrep(iwork, idx, m); +// idx += m; +// VectorBlock parent(iwork, idx, m); +// idx += m; +// VectorBlock xplore(iwork, idx, m); +// idx += m; +// VectorBlock repfnz(iwork, idx, maxpanel); +// idx += maxpanel; +// VectorBlock panel_lsub(iwork, idx, maxpanel); +// idx += maxpanel; +// VectorBlock xprune(iwork, idx, n); +// idx += n; +// VectorBlock marker(iwork, idx, m * LU_NO_MARKER); + // Set up pointers for integer working arrays + IndexVector segrep(m); + IndexVector parent(m); + IndexVector xplore(m); + IndexVector repfnz(maxpanel); + IndexVector panel_lsub(maxpanel); + IndexVector xprune(n); + IndexVector marker(m*LU_NO_MARKER); repfnz.setConstant(-1); panel_lsub.setConstant(-1); // Set up pointers for scalar working arrays - VectorBlock dense(work, 0, maxpanel); - dense.setZero(); - VectorBlock tempv(work, maxpanel, LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); - tempv.setZero(); + ScalarVector dense; + dense.setZero(maxpanel); + ScalarVector tempv; + tempv.setZero(LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); // Setup Permutation vectors // Compute the inverse of perm_c @@ -434,12 +458,13 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); - if ( m_symmetricmode = true ) - LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + if ( m_symmetricmode == true ) + LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_relax, marker, relax_end); - m_perm_r.setConstant(-1); + m_perm_r.resize(m); + m_perm_r.indices().setConstant(-1); //FIXME marker.setConstant(-1); IndexVector& xsup = m_glu.xsup; @@ -451,19 +476,19 @@ void SparseLU::factorize(const MatrixType& matrix) Index& nzlumax = m_glu.nzlumax; supno(0) = IND_EMPTY; - xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = 0; + xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = Index(0); // Work on one 'panel' at a time. A panel is one of the following : // (a) a relaxed supernode at the bottom of the etree, or // (b) panel_size contiguous columns, defined by the user - register int jcol,kcol; + int jcol,kcol; IndexVector panel_histo(n); Index nextu, nextlu, jsupno, fsupc, new_next; Index pivrow; // Pivotal row number in the original row matrix int nseg1; // Number of segments in U-column above panel row jcol int nseg; // Number of segments in each U-column - int irep,ir, icol; - int i, k, jj,j; + int irep, icol; + int i, k, jj; for (jcol = 0; jcol < n; ) { if (relax_end(jcol) != IND_EMPTY) @@ -472,7 +497,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Factorize the relaxed supernode(jcol:kcol) // First, determine the union of the row structure of the snode - info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker); + info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker, m_glu); if ( info ) { std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; @@ -488,7 +513,7 @@ void SparseLU::factorize(const MatrixType& matrix) int mem; while (new_next > nzlumax ) { - mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); + mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); if (mem) { std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; @@ -502,13 +527,13 @@ void SparseLU::factorize(const MatrixType& matrix) xusub(icol+1) = nextu; // Scatter into SPA dense(*) for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) - dense(it.row()) = it.val(); + dense(it.row()) = it.value(); // Numeric update within the snode - LU_snode_bmod(icol, jsupno, fsupc, dense, m_glu); + LU_snode_bmod(icol, fsupc, dense, m_glu); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); + info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -536,13 +561,13 @@ void SparseLU::factorize(const MatrixType& matrix) panel_size = n - jcol; // Symbolic outer factorization on a panel of columns - LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r, nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); // Sparse LU within the panel, and below the panel diagonal - for ( jj = jcol; j< jcol + panel_size; jj++) + for ( jj = jcol; jj< jcol + panel_size; jj++) { k = (jj - jcol) * m; // Column index for w-wide arrays @@ -550,7 +575,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, m_perm_r, m_maxsuper, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( !info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; @@ -559,7 +584,7 @@ void SparseLU::factorize(const MatrixType& matrix) return; } // Numeric updates to this column - VectorBlock dense_k(dense, k, m); + VectorBlock dense_k(dense, k, m); VectorBlock segrep_k(segrep, nseg1, m); info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) @@ -571,7 +596,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, m_perm_r, dense_k, m_glu); + info = LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; @@ -581,7 +606,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Form the L-segment - info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_glu); + info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <::factorize(const MatrixType& matrix) } // Prune columns (0:jj-1) using column jj - LU_pruneL(jj, m_perm_r, pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); // Reset repfnz for this column for (i = 0; i < nseg; i++) @@ -604,23 +629,10 @@ void SparseLU::factorize(const MatrixType& matrix) } // end else } // end for -- end elimination - // Adjust row permutation in the case of rectangular matrices... Deprecated - if (m > n ) - { - k = 0; - for (i = 0; i < m; ++i) - { - if ( m_perm_r(i) == IND_EMPTY ) - { - m_perm_r(i) = n + k; - ++k; - } - } - } // Count the number of nonzeros in factors - LU_countnz(n, xprune, m_nnzL, m_nnzU, m_glu); + LU_countnz(n, m_nnzL, m_nnzU, m_glu); // Apply permutation to the L subscripts - LU_fixupL(n, m_perm_r, m_glu); + LU_fixupL/**/(n, m_perm_r.indices(), m_glu); @@ -628,8 +640,8 @@ void SparseLU::factorize(const MatrixType& matrix) m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); // Create the column major upper sparse matrix U; // it is assumed here that MatrixType = SparseMatrix - new (&m_Ustore) Map ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); - this.m_Ustore = m_Ustore; //FIXME Is it necessary + new (&m_Ustore) MappedSparseMatrix ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); + //this.m_Ustore = m_Ustore; //FIXME Is it necessary m_info = Success; m_factorizationIsOk = true; diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 00bb97796..585b02fdf 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -188,7 +188,6 @@ void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) // Depth-first search from dummy root vertex #n postnum = 0; LU_nr_etdfs(n, parent, first_kid, next_kid, post, postnum); - return post; } #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 70570ab9c..5b2c64154 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -46,14 +46,16 @@ class SuperNodalMatrix { public: typedef _Scalar Scalar; - typedef _Index Index; + typedef _Index Index; + typedef Matrix IndexVector; + typedef Matrix ScalarVector; public: SuperNodalMatrix() { } - SuperNodalMatrix(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, - Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) + SuperNodalMatrix(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) { setInfos(m, n, nzval, nzval_colptr, rowind, rowind_colptr, col_to_sup, sup_to_col); } @@ -68,17 +70,17 @@ class SuperNodalMatrix * FIXME This class will be modified such that it can be use in the course * of the factorization. */ - void setInfos(Index m, Index n, Scalar *nzval, Index* nzval_colptr, Index* rowind, - Index* rowind_colptr, Index* col_to_sup, Index* sup_to_col ) + void setInfos(int m, int n, ScalarVector& nzval, IndexVector& nzval_colptr, IndexVector& rowind, + IndexVector& rowind_colptr, IndexVector& col_to_sup, IndexVector& sup_to_col ) { m_row = m; m_col = n; - m_nzval = nzval; - m_nzval_colptr = nzval_colptr; - m_rowind = rowind; - m_rowind_colptr = rowind_colptr; - m_col_to_sup = col_to_sup; - m_sup_to_col = sup_to_col; + m_nzval = nzval.data(); + m_nzval_colptr = nzval_colptr.data(); + m_rowind = rowind.data(); + m_rowind_colptr = rowind_colptr.data(); + m_col_to_sup = col_to_sup.data(); + m_sup_to_col = sup_to_col.data(); } @@ -108,6 +110,10 @@ class SuperNodalMatrix return m_nzval; } + const Scalar* valuePtr() const + { + return m_nzval; + } /** * Return the pointers to the beginning of each column in \ref valuePtr() */ @@ -116,6 +122,11 @@ class SuperNodalMatrix return m_nzval_colptr; } + const Index* colIndexPtr() const + { + return m_nzval_colptr; + } + /** * Return the array of compressed row indices of all supernodes */ @@ -123,6 +134,12 @@ class SuperNodalMatrix { return m_rowind; } + + const Index* rowIndex() const + { + return m_rowind; + } + /** * Return the location in \em rowvaluePtr() which starts each column */ @@ -130,17 +147,33 @@ class SuperNodalMatrix { return m_rowind_colptr; } + + const Index* rowIndexPtr() const + { + return m_rowind_colptr; + } + /** * Return the array of column-to-supernode mapping */ - Index colToSup() + Index* colToSup() + { + return m_col_to_sup; + } + + const Index* colToSup() const { return m_col_to_sup; } /** * Return the array of supernode-to-column mapping */ - Index supToCol() + Index* supToCol() + { + return m_sup_to_col; + } + + const Index* supToCol() const { return m_sup_to_col; } @@ -148,7 +181,7 @@ class SuperNodalMatrix /** * Return the number of supernodes */ - int nsuper() + int nsuper() const { return m_nsuper; } diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index ea9ef6d89..60ebfcaa1 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -61,11 +61,11 @@ * \param vec Valid pointer to the vector to allocate or expand * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector * \param [in]len_to_copy Current number of elements in the factors - * \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand + * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand * \param [in,out]num_expansions Number of times the memory has been expanded */ template -int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions) +int expand(VectorType& vec, int& length, int len_to_copy, int keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase @@ -120,18 +120,16 @@ int expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& * \param m number of rows of the input matrix * \param n number of columns * \param annz number of initial nonzeros in the matrix - * \param work scalar working space needed by all factor routines - * \param iwork Integer working space * \param lwork if lwork=-1, this routine returns an estimated size of the required memory * \param glu persistent data to facilitate multiple factors : will be deleted later ?? * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation */ -template -int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, int panel_size, int maxsuper, int rowblk, LU_GlobalLU_t& glu) +template +int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; @@ -177,17 +175,12 @@ int LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, in if (nzlumax < annz ) return nzlumax; - expand(glu.lsup, nzlumax, 0, 0, num_expansions); + expand(glu.lusup, nzlumax, 0, 0, num_expansions); expand(glu.ucol, nzumax, 0, 0, num_expansions); expand(glu.lsub, nzlmax, 0, 0, num_expansions); expand(glu.usub, nzumax, 0, 1, num_expansions); } - // LUWorkInit : Now, allocate known working storage - int isize = (2 * panel_size + 3 + LU_NO_MARKER) * m + n; - int dsize = m * panel_size + LU_NUM_TEMPV(m, panel_size, maxsuper, rowblk); - iwork.resize(isize); - work.resize(isize); ++num_expansions; return 0; diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index fd2a59a41..e05eabe2a 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -87,7 +87,7 @@ typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} LU_MemType; template struct LU_GlobalLU_t { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; IndexVector xsup; //First supernode column ... xsup(s) points to the beginning of the s-th supernode IndexVector supno; // Supernode number corresponding to this column (column to supernode mapping) ScalarVector lusup; // nonzero values of L ordered by columns diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 9e63bf7e4..0352c7872 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -22,20 +22,21 @@ // License and a copy of the GNU General Public License along with // Eigen. If not, see . -#ifdef EIGEN_SPARSELU_UTILS_H +#ifndef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H -template -void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU, GlobalLU_t& Glu) + +template +void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t& glu) { - IndexVector& xsup = Glu.xsup; - IndexVector& xlsub = Glu.xlsub; + IndexVector& xsup = glu.xsup; + IndexVector& xlsub = glu.xlsub; nnzL = 0; - nnzU = (Glu.xusub)(n); - int nsuper = (Glu.supno)(n); - int jlen, irep; - + nnzU = (glu.xusub)(n); + int nsuper = (glu.supno)(n); + int jlen; + int i, j, fsupc; if (n <= 0 ) return; // For each supernode for (i = 0; i <= nsuper; i++) @@ -46,10 +47,9 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU for (j = fsupc; j < xsup(i+1); j++) { nnzL += jlen; - nnzLU += j - fsupc + 1; + nnzU += j - fsupc + 1; jlen--; } - irep = xsup(i+1) - 1; } } @@ -60,16 +60,16 @@ void SparseLU::LU_countnz(const int n, IndexVector& xprune, int& nnzL, int& nnzU * and applies permutation to the remaining subscripts * */ -template -void SparseLU::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& Glu) +template +void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t& glu) { - int nsuper, fsupc, i, j, k, jstart; - IndexVector& xsup = GLu.xsup; - IndexVector& lsub = Glu.lsub; - IndexVector& xlsub = Glu.xlsub; + int fsupc, i, j, k, jstart; + IndexVector& xsup = glu.xsup; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; int nextl = 0; - int nsuper = (Glu.supno)(n); + int nsuper = (glu.supno)(n); // For each supernode for (i = 0; i <= nsuper; i++) @@ -80,7 +80,7 @@ void SparseLU::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& Glu for (j = jstart; j < xlsub(fsupc + 1); j++) { lsub(nextl) = perm_r(lsub(j)); // Now indexed into P*A - nextl++ + nextl++; } for (k = fsupc+1; k < xsup(i+1); k++) xlsub(k) = nextl; // other columns in supernode i diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index da464cbfc..8dadeaa93 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -60,12 +60,12 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) +template +int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - int jsupno, k, ksub, krep, krep_ind, ksupno; + int jsupno, k, ksub, krep, ksupno; int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode @@ -115,7 +115,6 @@ int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVe nsupc = krep - fst_col + 1; nsupr = xlsub(fsupc+1) - xlsub(fsupc); nrow = nsupr - d_fsupc - nsupc; - krep_ind = lptr + nsupc - 1; // NOTE Unlike the original implementation in SuperLU, the only feature // available here is a sup-col update. @@ -213,7 +212,7 @@ int LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVe ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock u(lusup, ufirst, nsupc); - u = A.template triangularView().solve(u); + u = A.template triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 8c6202d67..7d9e8be79 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -72,13 +72,13 @@ * > 0 number of bytes allocated when run out of space * */ -template -int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, IndexVector& nseg, IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - int jcolp1, jcolm1, jsuper, nsuper, nextl; + int jsuper, nsuper, nextl; int krow; // Row index of the current element int kperm; // permuted row index int krep; // Supernode reprentative of the current row @@ -92,8 +92,10 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; IndexVector& xlsub = glu.xlsub; - IndexVector& nzlmax = glu.nzlmax; + Index& nzlmax = glu.nzlmax; + int jcolm1 = jcol - 1; + int jcolp1 = jcol + 1; nsuper = supno(jcol); jsuper = nsuper; nextl = xlsub(jcol); diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 31411175c..a0cab563d 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -59,10 +59,10 @@ * > 0 - number of bytes allocated when run out of space * */ -template < typename IndexVector, typename ScalarVector> -int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLU_t& glu) +template +int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzType& repfnz ,IndexVector& perm_r, DenseType& dense, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; Index ksub, krep, ksupno; diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 1766c3c2b..791538729 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -59,9 +59,9 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // The etree may not be postordered, but its heap ordered IndexVector post; - TreePostorder(n, et, post); // Post order etree + LU_TreePostorder(n, et, post); // Post order etree IndexVector inv_post(n+1); - register int i; + int i; for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()??? // Renumber etree in postorder @@ -76,7 +76,7 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, // compute the number of descendants of each node in the etree relax_end.setConstant(IND_EMPTY); - register int j, parent; + int j, parent; descendants.setZero(); for (j = 0; j < n; j++) { @@ -85,8 +85,8 @@ void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, descendants(parent) += descendants(j) + 1; } // Identify the relaxed supernodes by postorder traversal of the etree - register int snode_start; // beginning of a snode - register int k; + int snode_start; // beginning of a snode + int k; int nsuper_et_post = 0; // Number of relaxed snodes in postordered etree int nsuper_et = 0; // Number of relaxed snodes in the original etree int l; diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 4f19b5ac8..ffd085357 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -62,8 +62,8 @@ * * */ -template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_GlobalLU_t& glu) +template +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; IndexVector& xsup = glu.xsup; @@ -75,7 +75,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca int i,ksub,jj,nextl_col,irow; int fsupc, nsupc, nsupr, nrow; - int krep, krep_ind, kfnz; + int krep, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... int segsize,no_zeros,isub ; @@ -95,8 +95,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca nsupr = xlsub(fsupc+1) - xlsub(fsupc); nrow = nsupr - nsupc; lptr = xlsub(fsupc); - krep_ind = lptr + nsupc - 1; - // NOTE : Unlike the original implementation in SuperLU, the present implementation // does not include a 2-D block update. @@ -104,8 +102,8 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj-jcol) * m; - VectorBlock repfnz_col(repfnz.segment(nextl_col, m)); // First nonzero column index for each row - VectorBlock dense_col(dense.segment(nextl_col, m)); // Scatter/gather entire matrix column from/to here + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 6f6922ee0..f7a93ab48 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -77,8 +77,8 @@ * * */ -template -void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { int jj; // Index through each column in the panel @@ -105,14 +105,14 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index nextl_col = (jj - jcol) * m; VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row - VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here + VectorBlock dense_col(dense,nextl_col, m); // Accumulate a column vector here // For each nnz in A[*, jj] do depth first search for (typename MatrixType::InnerIterator it(A, jj); it; ++it) { krow = it.row(); - dense_col(krow) = it.val(); + dense_col(krow) = it.value(); kmark = marker(krow); if (kmark == jj) continue; // krow visited before, go to the next nonzero @@ -126,7 +126,7 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index } else { - // krow is in U : if its supernode-representative krep + // krow is in U : if its sup²ernode-representative krep // has been explored, update repfnz(*) krep = xsup(supno(kperm)+1) - 1; myfnz = repfnz_col(krep); diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 4a50b2cca..39151f1e0 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -70,7 +70,7 @@ template int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. @@ -91,7 +91,6 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott Scalar pivmax = 0.0; Index pivptr = nsupc; Index diag = IND_EMPTY; - Index old_pivptr = nsupc; Scalar rtemp; Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index c006f6707..42218ba4a 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -61,10 +61,10 @@ * \param glu Global LU data * */ -template -void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) +template +void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index Index; + typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; // Initialize pointers IndexVector& xsup = glu.xsup; @@ -78,7 +78,7 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons int jsupno = supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; - Index kmin, kmax, ktemp, minloc, maxloc,krow; + Index kmin, kmax, minloc, maxloc,krow; for (i = 0; i < nseg; i++) { irep = segrep(i); diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index a7034e607..47145bc0c 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -45,8 +45,7 @@ #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H template -int LU_snode_bmod (const int jcol, const int jsupno, const int fsupc, - ScalarVector& dense, LU_GlobalLU_t& glu) +int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index c49fc1461..3e7033c67 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -42,7 +42,7 @@ * granted, provided the above notices are retained, and a notice that * the code was modified is included with the above copyright notice. */ -#ifdef SPARSELU_SNODE_DFS_H +#ifndef SPARSELU_SNODE_DFS_H #define SPARSELU_SNODE_DFS_H /** * \brief Determine the union of the row structures of those columns within the relaxed snode. @@ -58,9 +58,9 @@ * \return 0 on success, > 0 size of the memory when memory allocation failed */ template - int LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) + int LU_snode_dfs(const int jcol, const int kcol, const typename IndexVector::Scalar* asub, const typename IndexVector::Scalar* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { - typedef typename IndexVector::Index; + typedef typename IndexVector::Scalar Index; IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; // Supernode number corresponding to this column IndexVector& lsub = glu.lsub; @@ -74,9 +74,9 @@ for (i = jcol; i <=kcol; i++) { // For each nonzero in A(*,i) - for (k = colptr(i); k < colptr(i+1); k++) + for (k = colptr[i]; k < colptr[i+1]; k++) { - krow = asub(k); + krow = asub[k]; kmark = marker(krow); if ( kmark != kcol ) { diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp new file mode 100644 index 000000000..0bbbb0627 --- /dev/null +++ b/bench/spbench/test_sparseLU.cpp @@ -0,0 +1,64 @@ +// Small bench routine for Eigen available in Eigen +// (C) Desire NUENTSA WAKAM, INRIA + +#include +#include +#include +#include +#include + +using namespace std; +using namespace Eigen; + +int main(int argc, char **args) +{ + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + VectorXd b, x, tmp; + SparseLU, AMDOrdering > solver; + ifstream matrix_file; + string line; + int n; + + // Set parameters + /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ + if (argc < 2) assert(false && "please, give the matrix market file "); + loadMarket(A, args[1]); + cout << "End charging matrix " << endl; + bool iscomplex=false, isvector=false; + int sym; + getMarketHeader(args[1], sym, iscomplex, isvector); + if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } + if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} + if (sym != 0) { // symmetric matrices, only the lower part is stored + SparseMatrix temp; + temp = A; + A = temp.selfadjointView(); + } + n = A.cols(); + /* Fill the right hand side */ + + if (argc > 2) + loadMarketVector(b, args[2]); + else + { + b.resize(n); + tmp.resize(n); +// tmp.setRandom(); + for (int i = 0; i < n; i++) tmp(i) = i; + b = A * tmp ; + } + + /* Compute the factorization */ + solver.compute(A); + + solver._solve(b, x); + /* Check the accuracy */ + VectorXd tmp2 = b - A*x; + double tempNorm = tmp2.norm()/b.norm(); + cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; + + return 0; +} \ No newline at end of file From f0c34c6822855bbb91d788cfd0983fd3ceb6345f Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 15 Jun 2012 17:23:54 +0200 Subject: [PATCH 14/73] Build finished... start debugging --- Eigen/src/SparseLU/SparseLU.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 682cd465c..a07b5a9f3 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -130,9 +130,10 @@ class SparseLU } template - bool _solve(const MatrixBase &B, MatrixBase &X) const - { - eigen_assert(m_isInitialized && "The matrix should be factorized first"); + bool _solve(const MatrixBase &B, MatrixBase &_X) const + { + Dest& X(_X.derived()); + eigen_assert(m_factorizationIsOk && "The matrix should be factorized first"); EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); @@ -184,7 +185,8 @@ class SparseLU // Triangular solve Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Block U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS + Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); +// Block > U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS U = A.template triangularView().solve(U); // Matrix-vector product @@ -225,7 +227,7 @@ class SparseLU else { Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Block U(X, fsupc, 0, nsupc, nrhs); + Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); U = A.template triangularView().solve(U); } @@ -576,7 +578,7 @@ void SparseLU::factorize(const MatrixType& matrix) VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); - if ( !info ) + if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; m_info = NumericalIssue; @@ -585,7 +587,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Numeric updates to this column VectorBlock dense_k(dense, k, m); - VectorBlock segrep_k(segrep, nseg1, m); + VectorBlock segrep_k(segrep, nseg1, m-nseg1); info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) { From 15f15635335d459e9515aa89f0e5a9618e7f3924 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 29 Jun 2012 17:45:10 +0200 Subject: [PATCH 15/73] Before moving to the new building --- Eigen/src/SparseLU/SparseLU.h | 3 ++- Eigen/src/SparseLU/SparseLU_Matrix.h | 1 + Eigen/src/SparseLU/SparseLU_column_bmod.h | 1 + Eigen/src/SparseLU/SparseLU_pruneL.h | 2 +- bench/spbench/test_sparseLU.cpp | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index a07b5a9f3..e4a4c3a7b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -186,7 +186,6 @@ class SparseLU // Triangular solve Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); -// Block > U(X, fsupc, 0, nsupc, nrhs); //FIXME TODO Consider more RHS U = A.template triangularView().solve(U); // Matrix-vector product @@ -536,6 +535,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Eliminate the current column info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { m_info = NumericalIssue; @@ -609,6 +609,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Form the L-segment info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index 42218ba4a..91c795fac 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -123,7 +123,7 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons if (perm_r(lsub(kmax)) == IND_EMPTY) kmax--; else if ( perm_r(lsub(kmin)) != IND_EMPTY) - kmin--; + kmin++; else { // kmin below pivrow (not yet pivoted), and kmax diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 0bbbb0627..4727cc12b 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -52,6 +52,7 @@ int main(int argc, char **args) } /* Compute the factorization */ + solver.isSymmetric(true); solver.compute(A); solver._solve(b, x); From 203a0343fdfb83919ffdb486d5375d239a1b2a59 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 6 Jul 2012 13:34:06 +0200 Subject: [PATCH 16/73] Update Ordering interface --- Eigen/src/OrderingMethods/Ordering.h | 124 +++++---------------------- 1 file changed, 20 insertions(+), 104 deletions(-) diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index eedaed144..3751f9bee 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -28,52 +28,14 @@ #include "Amd.h" namespace Eigen { -template -class OrderingBase -{ - public: - typedef typename internal::traits::Scalar Scalar; - typedef typename internal::traits::Index Index; - typedef PermutationMatrix PermutationType; - - public: - OrderingBase():m_isInitialized(false) - { - - } - template - OrderingBase(const MatrixType& mat):OrderingBase() - { - compute(mat); - } - template - Derived& compute(const MatrixType& mat) - { - return derived().compute(mat); - } - Derived& derived() - { - return *static_cast(this); - } - const Derived& derived() const - { - return *static_cast(this); - } - /** - * Get the permutation vector - */ - PermutationType& get_perm() - { - if (m_isInitialized == true) return m_P; - else abort(); // FIXME Should find a smoother way to exit with error code - } +namespace internal { /** * Get the symmetric pattern A^T+A from the input matrix A. * FIXME: The values should not be considered here */ template - void at_plus_a(const MatrixType& mat) + void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat) { MatrixType C; C = mat.transpose(); // NOTE: Could be costly @@ -82,94 +44,48 @@ class OrderingBase for (typename MatrixType::InnerIterator it(C, i); it; ++it) it.valueRef() = 0.0; } - m_mat = C + mat; + symmat = C + mat; } - /** keeps off-diagonal entries; drops diagonal entries */ - struct keep_diag { - inline bool operator() (const Index& row, const Index& col, const Scalar&) const - { - return row!=col; - } - }; - - protected: - void init() - { - m_isInitialized = false; - } - PermutationType m_P; // The computed permutation - mutable bool m_isInitialized; - SparseMatrix m_mat; // Stores the (symmetrized) matrix to permute -}; +} + /** * Get the approximate minimum degree ordering * If the matrix is not structurally symmetric, an ordering of A^T+A is computed - * \tparam Scalar The type of the scalar of the matrix for which the ordering is applied * \tparam Index The type of indices of the matrix */ -template -class AMDOrdering : public OrderingBase > +template +class AMDOrdering { public: - typedef OrderingBase< AMDOrdering > Base; - typedef SparseMatrix MatrixType; typedef PermutationMatrix PermutationType; - public: - AMDOrdering():Base(){} - AMDOrdering(const MatrixType& mat):Base() - { - compute(mat); - } - AMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() - { - compute(mat); - perm_c = this.get_perm(); - } + /** Compute the permutation vector from a column-major sparse matrix */ - void compute(const MatrixType& mat) + template + void operator()(const MatrixType& mat, PermutationType& perm) { // Compute the symmetric pattern - at_plus_a(mat); + SparseMatrix symm; + internal::ordering_helper_at_plus_a(mat,symm); // Call the AMD routine - m_mat.prune(keep_diag()); - internal::minimum_degree_ordering(m_mat, m_P); - if (m_P.size()>0) m_isInitialized = true; + //m_mat.prune(keep_diag()); + internal::minimum_degree_ordering(symm, perm); } + /** Compute the permutation with a self adjoint matrix */ template - void compute(const SparseSelfAdjointView& mat) - { - m_mat = mat; + void operator()(const SparseSelfAdjointView& mat, PermutationType& perm) + { + SparseMatrix C = mat; // Call the AMD routine - m_mat.prune(keep_diag()); //Remove the diagonal elements - internal::minimum_degree_ordering(m_mat, m_P); - if (m_P.size()>0) m_isInitialized = true; + // m_mat.prune(keep_diag()); //Remove the diagonal elements + internal::minimum_degree_ordering(C, perm); } - protected: - struct keep_diag{ - inline bool operator() (const Index& row, const Index& col, const Scalar&) const - { - return row!=col; - } - }; - using Base::m_isInitialized; - using Base::m_P; - using Base::m_mat; }; -namespace internal { - template - struct traits > - { - typedef _Scalar Scalar; - typedef _Index Index; - }; -} - /** * Get the column approximate minimum degree ordering * The matrix should be in column-major format From b5a83867cac92a539b3a4d1cf8fcdcf8e9e9f5b2 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 6 Jul 2012 20:18:16 +0200 Subject: [PATCH 17/73] Update Ordering interface --- Eigen/src/OrderingMethods/Ordering.h | 26 +++++++++++++++-- Eigen/src/SparseLU/SparseLU.h | 35 +++++++++++++++++------ Eigen/src/SparseLU/SparseLU_Coletree.h | 9 ++++-- Eigen/src/SparseLU/SparseLU_relax_snode.h | 5 ++-- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 8 +++--- Eigen/src/SuperLUSupport/SuperLUSupport.h | 3 ++ bench/spbench/test_sparseLU.cpp | 4 +-- 7 files changed, 67 insertions(+), 23 deletions(-) diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 3751f9bee..670cca9c4 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -60,7 +60,9 @@ class AMDOrdering public: typedef PermutationMatrix PermutationType; - /** Compute the permutation vector from a column-major sparse matrix */ + /** Compute the permutation vector from a sparse matrix + * This routine is much faster if the input matrix is column-major + */ template void operator()(const MatrixType& mat, PermutationType& perm) { @@ -73,7 +75,7 @@ class AMDOrdering internal::minimum_degree_ordering(symm, perm); } - /** Compute the permutation with a self adjoint matrix */ + /** Compute the permutation with a selfadjoint matrix */ template void operator()(const SparseSelfAdjointView& mat, PermutationType& perm) { @@ -85,6 +87,26 @@ class AMDOrdering } }; +/** + * Get the natural ordering + * + *NOTE Returns an empty permutation matrix + * \tparam Index The type of indices of the matrix + */ +template +class NaturalOrdering +{ + public: + typedef PermutationMatrix PermutationType; + + /** Compute the permutation vector from a column-major sparse matrix */ + template + void operator()(const MatrixType& mat, PermutationType& perm) + { + perm.resize(0); + } + +}; /** * Get the column approximate minimum degree ordering diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index e4a4c3a7b..74f710563 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -255,7 +255,7 @@ class SparseLU void initperfvalues() { m_panel_size = 12; - m_relax = 1; + m_relax = 6; m_maxsuper = 100; m_rowblk = 200; m_colblk = 60; @@ -320,26 +320,31 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Compute the fill-reducing ordering // TODO Currently, the only available ordering method is AMD. - OrderingType ord(mat); - m_perm_c = ord.get_perm(); + OrderingType ord; + ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; + //DEBUG : Set the natural ordering + for (int i = 0; i < mat.cols(); i++) + m_perm_c.indices()(i) = i; // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c; + m_mat = mat * m_perm_c.inverse(); // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); + LU_sp_coletree(m_mat, m_etree); - + // In symmetric mode, do not do postorder here if (!m_symmetricmode) { IndexVector post, iwork; // Post order etree LU_TreePostorder(m_mat.cols(), m_etree, post); + // Renumber etree in postorder int m = m_mat.cols(); iwork.resize(m+1); @@ -348,12 +353,15 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - PermutationType post_perm(m);; + PermutationType post_perm(m); //FIXME Use vector constructor for (int i = 0; i < m; i++) post_perm.indices()(i) = post(i); - //m_mat = m_mat * post_perm; // FIXME This should surely be in factorize() + +// m_mat = m_mat * post_perm.inverse(); // FIXME This should surely be in factorize() + // Composition of the two permutations m_perm_c = m_perm_c * post_perm; + } // end postordering m_analysisIsOk = true; @@ -402,9 +410,14 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply the column permutation computed in analyzepattern() - m_mat = matrix * m_perm_c; + m_mat = matrix * m_perm_c.inverse(); m_mat.makeCompressed(); + // DEBUG ... Watch matrix permutation + const int *asub_in = matrix.innerIndexPtr(); + const int *colptr_in = matrix.outerIndexPtr(); + int * asub = m_mat.innerIndexPtr(); + int * colptr = m_mat.outerIndexPtr(); int m = m_mat.rows(); int n = m_mat.cols(); int nnz = m_mat.nonZeros(); @@ -455,7 +468,8 @@ void SparseLU::factorize(const MatrixType& matrix) // Setup Permutation vectors // Compute the inverse of perm_c - PermutationType iperm_c (m_perm_c.inverse() ); +// PermutationType iperm_c (m_perm_c.inverse() ); + PermutationType iperm_c (m_perm_c); // Identify initial relaxed snodes IndexVector relax_end(n); @@ -464,6 +478,9 @@ void SparseLU::factorize(const MatrixType& matrix) else LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + //DEBUG +// std::cout<< "relax_end " <= nzlmax ) { mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } } } @@ -100,7 +100,7 @@ while (new_next > nzlmax) { mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } Index ifrom, ito = nextl; for (ifrom = xlsub(jcol); ifrom < nextl;) diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 60a3eb09a..9c2e6e17e 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -627,6 +627,9 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); + //DEBUG + m_sluOptions.ColPerm = NATURAL; + m_sluOptions.Equil = NO; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 4727cc12b..841011f30 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -17,7 +17,7 @@ int main(int argc, char **args) typedef Matrix DenseMatrix; typedef Matrix DenseRhs; VectorXd b, x, tmp; - SparseLU, AMDOrdering > solver; + SparseLU, AMDOrdering > solver; ifstream matrix_file; string line; int n; @@ -52,7 +52,7 @@ int main(int argc, char **args) } /* Compute the factorization */ - solver.isSymmetric(true); +// solver.isSymmetric(true); solver.compute(A); solver._solve(b, x); From 3095e4a5f92d41c0b60b2c01b0a73903c70c1218 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 9 Jul 2012 19:09:48 +0200 Subject: [PATCH 18/73] Correct bug for triangular solve within supernodes --- Eigen/src/SparseLU/SparseLU.h | 4 ++-- Eigen/src/SparseLU/SparseLU_column_bmod.h | 4 ++-- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 5 +++-- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 74f710563..8fbb794ff 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -186,7 +186,7 @@ class SparseLU // Triangular solve Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); - U = A.template triangularView().solve(U); + U = A.template triangularView().solve(U); // Matrix-vector product new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); @@ -593,7 +593,7 @@ void SparseLU::factorize(const MatrixType& matrix) nseg = nseg1; // begin after all the panel segments //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); - VectorBlock repfnz_k(repfnz, k, m); + VectorBlock repfnz_k(repfnz, k, m); info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( info ) { diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 02ea7c360..3042eb5f8 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -137,7 +137,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); - u = A.template triangularView().solve(u); + u = A.template triangularView().solve(u); // Dense matrix-vector product y <-- A*x luptr += segsize; @@ -213,7 +213,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca ufirst = xlusup(jcol) + d_fsupc; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock u(lusup, ufirst, nsupc); - u = A.template triangularView().solve(u); + u = A.template triangularView().solve(u); new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index ffd085357..9861efa2c 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -132,8 +132,9 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca luptr += nsupr * no_zeros + no_zeros; // triangular solve with Eigen Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + std::cout<< " Matrix \n" << A << std::endl; VectorBlock u(tempv, 0, segsize); - u = A.template triangularView().solve(u); + u = A.template triangularView().solve(u); luptr += segsize; // Dense Matrix vector product y <-- A*x; @@ -164,7 +165,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca l(i) = Scalar(0); ++isub; } - + std::cout<< jj << " : " << dense_col.transpose() << std::endl; } // End for each column in the panel } // End for each updating supernode diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 47145bc0c..44438d037 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -76,7 +76,7 @@ int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_Glob // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) Map,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); VectorBlock u(lusup, ufirst, nsupc); - u = A.template triangularView().solve(u); // Call the Eigen dense triangular solve interface + u = A.template triangularView().solve(u); // Call the Eigen dense triangular solve interface // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); From de2544cc9bb9955d1ca9da50ed7c83a8a0856adf Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 10 Jul 2012 19:16:57 +0200 Subject: [PATCH 19/73] working version of sparse LU without fill-reducing permutation --- Eigen/src/SparseLU/SparseLU.h | 6 +++--- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 2 -- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 8fbb794ff..3bd85930c 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -234,7 +234,7 @@ class SparseLU { for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { - for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol]; i++) + for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol+1]; i++) { irow = m_Ustore.innerIndexPtr()[i]; X(irow, j) -= X(jcol, j) * m_Ustore.valuePtr()[i]; @@ -454,7 +454,7 @@ void SparseLU::factorize(const MatrixType& matrix) IndexVector xplore(m); IndexVector repfnz(maxpanel); IndexVector panel_lsub(maxpanel); - IndexVector xprune(n); + IndexVector xprune(n); xprune.setZero(); IndexVector marker(m*LU_NO_MARKER); repfnz.setConstant(-1); @@ -642,7 +642,7 @@ void SparseLU::factorize(const MatrixType& matrix) for (i = 0; i < nseg; i++) { irep = segrep(i); - repfnz(irep) = IND_EMPTY; + repfnz_k(irep) = IND_EMPTY; } } // end SparseLU within the panel jcol += panel_size; // Move to the next panel diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 9861efa2c..59ec69ec8 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -132,7 +132,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca luptr += nsupr * no_zeros + no_zeros; // triangular solve with Eigen Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - std::cout<< " Matrix \n" << A << std::endl; VectorBlock u(tempv, 0, segsize); u = A.template triangularView().solve(u); @@ -165,7 +164,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca l(i) = Scalar(0); ++isub; } - std::cout<< jj << " : " << dense_col.transpose() << std::endl; } // End for each column in the panel } // End for each updating supernode diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index f7a93ab48..908ee67ac 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -126,7 +126,7 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index } else { - // krow is in U : if its sup²ernode-representative krep + // krow is in U : if its supernode-representative krep // has been explored, update repfnz(*) krep = xsup(supno(kperm)+1) - 1; myfnz = repfnz_col(krep); From e529bc9cc1e23a748fb345bc25428001db6adb53 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 10 Jul 2012 19:18:50 +0200 Subject: [PATCH 20/73] correct bug when applying column permutation --- Eigen/src/SparseLU/SparseLU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 3bd85930c..db1b8a5bb 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -244,7 +244,7 @@ class SparseLU } // End For U-solve // Permute back the solution - X = m_perm_c * X; + X = m_perm_c.inverse() * X; return true; } From 773804691a9203af41c06109f79372a048a584df Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 13 Jul 2012 17:32:25 +0200 Subject: [PATCH 21/73] working version of sparse LU with unsymmetric supernodes and fill-reducing permutation --- Eigen/src/SparseLU/SparseLU.h | 75 ++++++---------------- Eigen/src/SparseLU/SparseLU_Memory.h | 22 +++---- Eigen/src/SparseLU/SparseLU_column_dfs.h | 4 +- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 2 +- 4 files changed, 35 insertions(+), 68 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index db1b8a5bb..3d8c8532f 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -137,15 +137,16 @@ class SparseLU EIGEN_STATIC_ASSERT((Dest::Flags&RowMajorBit)==0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); - X = B; /* on return, X is overwritten by the computed solution */ int nrhs = B.cols(); + Index n = B.rows(); - // Permute the right hand side to form Pr*B - X = m_perm_r * X; + // Permute the right hand side to form X = Pr*B + // on return, X is overwritten by the computed solution + X.resize(n,nrhs); + X = m_perm_r * B; // Forward solve PLy = Pb; - Index n = B.rows(); Index fsupc; // First column of the current supernode Index istart; // Pointer index to the subscript of the current column Index nsupr; // Number of rows in the current supernode @@ -324,13 +325,9 @@ void SparseLU::analyzePattern(const MatrixType& mat) ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; - - //DEBUG : Set the natural ordering - for (int i = 0; i < mat.cols(); i++) - m_perm_c.indices()(i) = i; - + // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c.inverse(); + m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix // Compute the column elimination tree of the permuted matrix @@ -352,15 +349,12 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_etree = iwork; // Postmultiply A*Pc by post, i.e reorder the matrix according to the postorder of the etree - - PermutationType post_perm(m); //FIXME Use vector constructor + PermutationType post_perm(m); //FIXME Use directly a constructor with post for (int i = 0; i < m; i++) post_perm.indices()(i) = post(i); - -// m_mat = m_mat * post_perm.inverse(); // FIXME This should surely be in factorize() - - // Composition of the two permutations - m_perm_c = m_perm_c * post_perm; + + // Combine the two permutations : postorder the permutation for future use + m_perm_c = post_perm * m_perm_c; } // end postordering @@ -413,16 +407,11 @@ void SparseLU::factorize(const MatrixType& matrix) m_mat = matrix * m_perm_c.inverse(); m_mat.makeCompressed(); - // DEBUG ... Watch matrix permutation - const int *asub_in = matrix.innerIndexPtr(); - const int *colptr_in = matrix.outerIndexPtr(); - int * asub = m_mat.innerIndexPtr(); - int * colptr = m_mat.outerIndexPtr(); int m = m_mat.rows(); int n = m_mat.cols(); int nnz = m_mat.nonZeros(); int maxpanel = m_panel_size * m; - // Allocate storage common to the factor routines + // Allocate working storage common to the factor routines int lwork = 0; int info = LUMemInit(m, n, nnz, lwork, m_fillfactor, m_panel_size, m_glu); if (info) @@ -432,30 +421,14 @@ void SparseLU::factorize(const MatrixType& matrix) return ; } - // Set up pointers for integer working arrays -// int idx = 0; -// VectorBlock segrep(iwork, idx, m); -// idx += m; -// VectorBlock parent(iwork, idx, m); -// idx += m; -// VectorBlock xplore(iwork, idx, m); -// idx += m; -// VectorBlock repfnz(iwork, idx, maxpanel); -// idx += maxpanel; -// VectorBlock panel_lsub(iwork, idx, maxpanel); -// idx += maxpanel; -// VectorBlock xprune(iwork, idx, n); -// idx += n; -// VectorBlock marker(iwork, idx, m * LU_NO_MARKER); - // Set up pointers for integer working arrays - IndexVector segrep(m); - IndexVector parent(m); - IndexVector xplore(m); + IndexVector segrep(m); segrep.setZero(); + IndexVector parent(m); parent.setZero(); + IndexVector xplore(m); xplore.setZero(); IndexVector repfnz(maxpanel); IndexVector panel_lsub(maxpanel); IndexVector xprune(n); xprune.setZero(); - IndexVector marker(m*LU_NO_MARKER); + IndexVector marker(m*LU_NO_MARKER); marker.setZero(); repfnz.setConstant(-1); panel_lsub.setConstant(-1); @@ -466,10 +439,8 @@ void SparseLU::factorize(const MatrixType& matrix) ScalarVector tempv; tempv.setZero(LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); - // Setup Permutation vectors // Compute the inverse of perm_c -// PermutationType iperm_c (m_perm_c.inverse() ); - PermutationType iperm_c (m_perm_c); + PermutationType iperm_c(m_perm_c.inverse()); // Identify initial relaxed snodes IndexVector relax_end(n); @@ -478,11 +449,9 @@ void SparseLU::factorize(const MatrixType& matrix) else LU_relax_snode(n, m_etree, m_relax, marker, relax_end); - //DEBUG -// std::cout<< "relax_end " <::factorize(const MatrixType& matrix) ScalarVector& lusup = m_glu.lusup; Index& nzlumax = m_glu.nzlumax; - supno(0) = IND_EMPTY; + supno(0) = IND_EMPTY; xsup.setConstant(0); xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = Index(0); // Work on one 'panel' at a time. A panel is one of the following : @@ -552,7 +521,6 @@ void SparseLU::factorize(const MatrixType& matrix) // Eliminate the current column info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); - eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { m_info = NumericalIssue; @@ -626,7 +594,6 @@ void SparseLU::factorize(const MatrixType& matrix) // Form the L-segment info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); - eigen_assert(info==0 && " SINGULAR MATRIX"); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <::factorize(const MatrixType& matrix) // Count the number of nonzeros in factors LU_countnz(n, m_nnzL, m_nnzU, m_glu); // Apply permutation to the L subscripts - LU_fixupL/**/(n, m_perm_r.indices(), m_glu); + LU_fixupL(n, m_perm_r.indices(), m_glu); // Create supernode matrix L m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); // Create the column major upper sparse matrix U; - // it is assumed here that MatrixType = SparseMatrix new (&m_Ustore) MappedSparseMatrix ( m, n, m_nnzU, m_glu.xusub.data(), m_glu.usub.data(), m_glu.ucol.data() ); - //this.m_Ustore = m_Ustore; //FIXME Is it necessary m_info = Success; m_factorizationIsOk = true; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 60ebfcaa1..a17079199 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -60,12 +60,12 @@ * Expand the existing storage to accomodate more fill-ins * \param vec Valid pointer to the vector to allocate or expand * \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector - * \param [in]len_to_copy Current number of elements in the factors + * \param [in]nbElts Current number of elements in the factors * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand * \param [in,out]num_expansions Number of times the memory has been expanded */ template -int expand(VectorType& vec, int& length, int len_to_copy, int keep_prev, int& num_expansions) +int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase @@ -77,8 +77,8 @@ int expand(VectorType& vec, int& length, int len_to_copy, int keep_prev, int& n new_len = alpha * length ; VectorType old_vec; // Temporary vector to hold the previous values - if (len_to_copy > 0 ) - old_vec = vec; // old_vec should be of size len_to_copy... to be checked + if (nbElts > 0 ) + old_vec = vec.segment(0,nbElts); // old_vec should be of size nbElts... to be checked //expand the current vector //FIXME Should be in a try ... catch region vec.resize(new_len); @@ -107,8 +107,8 @@ int expand(VectorType& vec, int& length, int len_to_copy, int keep_prev, int& n } // end allocation //Copy the previous values to the newly allocated space - if (len_to_copy > 0) - vec.segment(0, len_to_copy) = old_vec; + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; } // end expansion length = new_len; if(num_expansions) ++num_expansions; @@ -137,7 +137,7 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, Index& nzlmax = glu.nzlmax; Index& nzumax = glu.nzumax; Index& nzlumax = glu.nzlumax; - nzumax = nzlumax = fillratio * annz; // estimated number of nonzeros in U + nzumax = nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary @@ -191,18 +191,18 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, * \brief Expand the existing storage * \param vec vector to expand * \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size - * \param next current number of elements in the vector. + * \param nbElts current number of elements in the vector. * \param glu Global data structure * \return 0 on success, > 0 size of the memory allocated so far */ template -int LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, int& num_expansions) +int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions) { int failed_size; if (memtype == USUB) - failed_size = expand(vec, maxlen, next, 1, num_expansions); + failed_size = expand(vec, maxlen, nbElts, 1, num_expansions); else - failed_size = expand(vec, maxlen, next, 0, num_expansions); + failed_size = expand(vec, maxlen, nbElts, 0, num_expansions); if (failed_size) return failed_size; diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 7d9e8be79..70cfe40ea 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -44,7 +44,6 @@ */ #ifndef SPARSELU_COLUMN_DFS_H #define SPARSELU_COLUMN_DFS_H - /** * \brief Performs a symbolic factorization on column jcol and decide the supernode boundary * @@ -234,6 +233,9 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper jptr = xlsub(jcol); // Not yet compressed jm1ptr = xlsub(jcolm1); + // Use supernodes of type T2 : see SuperLU paper + if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY; + // Make sure the number of columns in a supernode doesn't // exceed threshold if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index a0cab563d..9e1708da1 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -108,7 +108,7 @@ int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzTy for (i = 0; i < segsize; i++) { irow = lsub(isub); - usub(nextu) = perm_r(irow); // Unlike teh L part, the U part is stored in its final order + usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order ucol(nextu) = dense(irow); dense(irow) = Scalar(0.0); nextu++; From b0cba2d988de3f4535e0b7ac9799b19700e09b7c Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 18 Jul 2012 16:59:00 +0200 Subject: [PATCH 22/73] Add a draft (not clean ) version of the COLAMD ordering implementation --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 2515 ++++++++++++++++++++- Eigen/src/OrderingMethods/Ordering.h | 93 +- Eigen/src/SparseLU/SparseLU.h | 28 +- Eigen/src/SuperLUSupport/SuperLUSupport.h | 2 +- bench/spbench/test_sparseLU.cpp | 22 +- 5 files changed, 2604 insertions(+), 56 deletions(-) diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 8caee7740..39701d0af 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -1,5 +1,2518 @@ +// // This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Desire Nuentsa Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +// This file is modified from the eigen_colamd/symamd library. The copyright is below + +// The authors of the code itself are Stefan I. Larimore and Timothy A. +// Davis (davis@cise.ufl.edu), University of Florida. The algorithm was +// developed in collaboration with John Gilbert, Xerox PARC, and Esmond +// Ng, Oak Ridge National Laboratory. +// +// Date: +// +// September 8, 2003. Version 2.3. +// +// Acknowledgements: +// +// This work was supported by the National Science Foundation, under +// grants DMS-9504974 and DMS-9803599. +// +// Notice: +// +// Copyright (c) 1998-2003 by the University of Florida. +// All Rights Reserved. +// +// THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY +// EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. +// +// Permission is hereby granted to use, copy, modify, and/or distribute +// this program, provided that the Copyright, this License, and the +// Availability of the original version is retained on all copies and made +// accessible to the end-user of any code or package that includes COLAMD +// or any modified version of COLAMD. +// +// Availability: +// +// The eigen_colamd/symamd library is available at +// +// http://www.cise.ufl.edu/research/sparse/eigen_colamd/ + +// This is the http://www.cise.ufl.edu/research/sparse/eigen_colamd/eigen_colamd.h +// file. It is required by the eigen_colamd.c, colamdmex.c, and symamdmex.c +// files, and by any C code that calls the routines whose prototypes are +// listed below, or that uses the eigen_colamd/symamd definitions listed below. + #ifndef EIGEN_COLAMD_H #define EIGEN_COLAMD_H -#endif \ No newline at end of file +/* Ensure that debugging is turned off: */ +#ifndef COLAMD_NDEBUG +#define COLAMD_NDEBUG +#endif /* NDEBUG */ + +/* ========================================================================== */ +/* === Knob and statistics definitions ====================================== */ +/* ========================================================================== */ + +/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ +#define EIGEN_COLAMD_KNOBS 20 + +/* number of output statistics. Only stats [0..6] are currently used. */ +#define EIGEN_COLAMD_STATS 20 + +/* knobs [0] and stats [0]: dense row knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_ROW 0 + +/* knobs [1] and stats [1]: dense column knob and output statistic. */ +#define EIGEN_COLAMD_DENSE_COL 1 + +/* stats [2]: memory defragmentation count output statistic */ +#define EIGEN_COLAMD_DEFRAG_COUNT 2 + +/* stats [3]: eigen_colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define EIGEN_COLAMD_STATUS 3 + +/* stats [4..6]: error info, or info on jumbled columns */ +#define EIGEN_COLAMD_INFO1 4 +#define EIGEN_COLAMD_INFO2 5 +#define EIGEN_COLAMD_INFO3 6 + +/* error codes returned in stats [3]: */ +#define EIGEN_COLAMD_OK (0) +#define EIGEN_COLAMD_OK_BUT_JUMBLED (1) +#define EIGEN_COLAMD_ERROR_A_not_present (-1) +#define EIGEN_COLAMD_ERROR_p_not_present (-2) +#define EIGEN_COLAMD_ERROR_nrow_negative (-3) +#define EIGEN_COLAMD_ERROR_ncol_negative (-4) +#define EIGEN_COLAMD_ERROR_nnz_negative (-5) +#define EIGEN_COLAMD_ERROR_p0_nonzero (-6) +#define EIGEN_COLAMD_ERROR_A_too_small (-7) +#define EIGEN_COLAMD_ERROR_col_length_negative (-8) +#define EIGEN_COLAMD_ERROR_row_index_out_of_bounds (-9) +#define EIGEN_COLAMD_ERROR_out_of_memory (-10) +#define EIGEN_COLAMD_ERROR_internal_error (-999) + +/* ========================================================================== */ +/* === Definitions ========================================================== */ +/* ========================================================================== */ + +#define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define EIGEN_ONES_COMPLEMENT(r) (-(r)-1) + +/* -------------------------------------------------------------------------- */ + +#define EIGEN_COLAMD_EMPTY (-1) + +/* Row and column status */ +#define EIGEN_ALIVE (0) +#define EIGEN_DEAD (-1) + +/* Column status */ +#define EIGEN_DEAD_PRINCIPAL (-1) +#define EIGEN_DEAD_NON_PRINCIPAL (-2) + +/* Macros for row and column status update and checking. */ +#define EIGEN_ROW_IS_DEAD(r) EIGEN_ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define EIGEN_ROW_IS_MARKED_DEAD(row_mark) (row_mark < EIGEN_ALIVE) +#define EIGEN_ROW_IS_ALIVE(r) (Row [r].shared2.mark >= EIGEN_ALIVE) +#define EIGEN_COL_IS_DEAD(c) (Col [c].start < EIGEN_ALIVE) +#define EIGEN_COL_IS_ALIVE(c) (Col [c].start >= EIGEN_ALIVE) +#define EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == EIGEN_DEAD_PRINCIPAL) +#define EIGEN_KILL_ROW(r) { Row [r].shared2.mark = EIGEN_DEAD ; } +#define EIGEN_KILL_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_PRINCIPAL ; } +#define EIGEN_KILL_NON_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_NON_PRINCIPAL ; } + +/* ========================================================================== */ +/* === Colamd reporting mechanism =========================================== */ +/* ========================================================================== */ + +#ifdef MATLAB_MEX_FILE + +/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ +#define PRINTF mexPrintf + +/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ +#define INDEX(i) ((i)+1) + +#else + +/* Use printf in standard C environment, for debugging and statistics output. */ +/* Output is generated only if debugging is enabled at compile time, or if */ +/* the caller explicitly calls eigen_colamd_report or symamd_report. */ +#define PRINTF printf + +/* In C, matrices are 0-based and indices are reported as such in *_report */ +#define INDEX(i) (i) + +#endif /* MATLAB_MEX_FILE */ + + // == Row and Column structures == + +typedef struct EIGEN_Colamd_Col_struct +{ + int start ; /* index for A of first row in this column, or EIGEN_DEAD */ + /* if column is dead */ + int length ; /* number of rows in this column */ + union + { + int thickness ; /* number of original columns represented by this */ + /* col, if the column is alive */ + int parent ; /* parent in parent tree super-column structure, if */ + /* the column is dead */ + } shared1 ; + union + { + int score ; /* the score used to maintain heap, if col is alive */ + int order ; /* pivot ordering of this column, if col is dead */ + } shared2 ; + union + { + int headhash ; /* head of a hash bucket, if col is at the head of */ + /* a degree list */ + int hash ; /* hash value, if col is not in a degree list */ + int prev ; /* previous column in degree list, if col is in a */ + /* degree list (but not at the head of a degree list) */ + } shared3 ; + union + { + int degree_next ; /* next column, if col is in a degree list */ + int hash_next ; /* next column, if col is in a hash list */ + } shared4 ; + +} EIGEN_Colamd_Col ; + +typedef struct EIGEN_Colamd_Row_struct +{ + int start ; /* index for A of first col in this row */ + int length ; /* number of principal columns in this row */ + union + { + int degree ; /* number of principal & non-principal columns in row */ + int p ; /* used as a row pointer in eigen_init_rows_cols () */ + } shared1 ; + union + { + int mark ; /* for computing set differences and marking dead rows*/ + int first_column ;/* first column in row (used in garbage collection) */ + } shared2 ; + +} EIGEN_Colamd_Row ; + +/* ========================================================================== */ +/* === Colamd recommended memory size ======================================= */ +/* ========================================================================== */ + +/* + The recommended length Alen of the array A passed to eigen_colamd is given by + the EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + argument is negative. 2*nnz space is required for the row and column + indices of the matrix. EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) space is + required for the Col and Row arrays, respectively, which are internal to + eigen_colamd. An additional n_col space is the minimal amount of "elbow room", + and nnz/5 more space is recommended for run time efficiency. + + This macro is not needed when using symamd. + + Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid + gcc -pedantic warning messages. +*/ + +#define EIGEN_COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (EIGEN_Colamd_Col) / sizeof (int))) +#define EIGEN_COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (EIGEN_Colamd_Row) / sizeof (int))) + +#define EIGEN_COLAMD_RECOMMENDED(nnz, n_row, n_col) \ +( \ +((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ +? \ + (-1) \ +: \ + (2 * (nnz) + EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ +) + + // Various routines +int eigen_colamd_recommended (int nnz, int n_row, int n_col) ; + +void eigen_colamd_set_defaults (double knobs [EIGEN_COLAMD_KNOBS]) ; + +bool eigen_colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[EIGEN_COLAMD_KNOBS], int stats [EIGEN_COLAMD_STATS]) ; + +void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); + +int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); + +void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], + double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); + +int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], + int n_col2, int max_deg, int pfree); + +void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); + +void eigen_detect_super_cols ( +#ifndef COLAMD_NDEBUG + int n_col, + EIGEN_Colamd_Row Row [], +#endif /* COLAMD_NDEBUG */ + EIGEN_Colamd_Col Col [], + int A [], + int head [], + int row_start, + int row_length ) ; + + int eigen_garbage_collection (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int *pfree) ; + + int eigen_clear_mark (int n_row, EIGEN_Colamd_Row Row [] ) ; + + void eigen_print_report (char *method, int stats [EIGEN_COLAMD_STATS]) ; + +/* ========================================================================== */ +/* === Debugging prototypes and definitions ================================= */ +/* ========================================================================== */ + +#ifndef COLAMD_NDEBUG + +/* colamd_debug is the *ONLY* global variable, and is only */ +/* present when debugging */ + + int colamd_debug ; /* debug print level */ + +#define COLAMD_DEBUG0(params) { (void) PRINTF params ; } +#define COLAMD_DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } +#define COLAMD_DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } +#define COLAMD_DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } +#define COLAMD_DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } + +#ifdef MATLAB_MEX_FILE +#define COLAMD_ASSERT(expression) (mxAssert ((expression), "")) +#else +#define COLAMD_ASSERT(expression) (assert (expression)) +#endif /* MATLAB_MEX_FILE */ + + void eigen_colamd_get_debug /* gets the debug print level from getenv */ +( + char *method +) ; + + void eigen_debug_deg_lists +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) ; + + void eigen_debug_mark +( + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) ; + + void eigen_debug_matrix +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) ; + + void eigen_debug_structures +( + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) ; + +#else /* COLAMD_NDEBUG */ + +/* === No debugging ========================================================= */ + +#define COLAMD_DEBUG0(params) ; +#define COLAMD_DEBUG1(params) ; +#define COLAMD_DEBUG2(params) ; +#define COLAMD_DEBUG3(params) ; +#define COLAMD_DEBUG4(params) ; + +#define COLAMD_ASSERT(expression) ((void) 0) + +#endif /* COLAMD_NDEBUG */ + + + +/** + * \brief Returns the recommended value of Alen + * + * Returns recommended value of Alen for use by eigen_colamd. + * Returns -1 if any input argument is negative. + * The use of this routine or macro is optional. + * Note that the macro uses its arguments more than once, + * so be careful for side effects, if you pass expressions as arguments to EIGEN_COLAMD_RECOMMENDED. + * + * \param nnz nonzeros in A + * \param n_row number of rows in A + * \param n_col number of columns in A + * \return recommended value of Alen for use by eigen_colamd + */ +int eigen_colamd_recommended ( int nnz, int n_row, int n_col) +{ + + return (EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; +} + +/** + * \brief set default parameters The use of this routine is optional. + * + * Colamd: rows with more than (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col) + * entries are removed prior to ordering. Columns with more than + * (knobs [EIGEN_COLAMD_DENSE_COL] * n_row) entries are removed prior to + * ordering, and placed last in the output column ordering. + * + * EIGEN_COLAMD_DENSE_ROW and EIGEN_COLAMD_DENSE_COL are defined as 0 and 1, + * respectively, in eigen_colamd.h. Default values of these two knobs + * are both 0.5. Currently, only knobs [0] and knobs [1] are + * used, but future versions may use more knobs. If so, they will + * be properly set to their defaults by the future version of + * eigen_colamd_set_defaults, so that the code that calls eigen_colamd will + * not need to change, assuming that you either use + * eigen_colamd_set_defaults, or pass a (double *) NULL pointer as the + * knobs array to eigen_colamd or symamd. + * + * \param knobs parameter settings for eigen_colamd + */ +void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) +{ + /* === Local variables ================================================== */ + + int i ; + + if (!knobs) + { + return ; /* no knobs to initialize */ + } + for (i = 0 ; i < EIGEN_COLAMD_KNOBS ; i++) + { + knobs [i] = 0 ; + } + knobs [EIGEN_COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ + knobs [EIGEN_COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ +} + +/** + * \brief Computes a column ordering using the column approximate minimum degree ordering + * + * Computes a column ordering (Q) of A such that P(AQ)=LU or + * (AQ)'AQ=LL' have less fill-in and require fewer floating point + * operations than factorizing the unpermuted matrix A or A'A, + * respectively. + * + * + * \param n_row number of rows in A + * \param n_col number of columns in A + * \param Alen, size of the array A + * \param A row indices of the matrix, of size ALen + * \param p column pointers of A, of size n_col+1 + * \param knobs parameter settings for eigen_colamd + * \param stats eigen_colamd output statistics and error codes + */ +bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[EIGEN_COLAMD_KNOBS], int stats[EIGEN_COLAMD_STATS]) +{ + /* === Local variables ================================================== */ + + int i ; /* loop index */ + int nnz ; /* nonzeros in A */ + int Row_size ; /* size of Row [], in integers */ + int Col_size ; /* size of Col [], in integers */ + int need ; /* minimum required length of A */ + EIGEN_Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + EIGEN_Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int ngarbage ; /* number of garbage collections performed */ + int max_deg ; /* maximum row degree */ + double default_knobs [EIGEN_COLAMD_KNOBS] ; /* default knobs array */ + +#ifndef COLAMD_NDEBUG + eigen_colamd_get_debug ("eigen_colamd") ; +#endif /* COLAMD_NDEBUG */ + + /* === Check the input arguments ======================================== */ + + if (!stats) + { + COLAMD_DEBUG0 (("eigen_colamd: stats not present\n")) ; + return (false) ; + } + for (i = 0 ; i < EIGEN_COLAMD_STATS ; i++) + { + stats [i] = 0 ; + } + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK ; + stats [EIGEN_COLAMD_INFO1] = -1 ; + stats [EIGEN_COLAMD_INFO2] = -1 ; + + if (!A) /* A is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: A not present\n")) ; + return (false) ; + } + + if (!p) /* p is not present */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p_not_present ; + COLAMD_DEBUG0 (("eigen_colamd: p not present\n")) ; + return (false) ; + } + + if (n_row < 0) /* n_row must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nrow_negative ; + stats [EIGEN_COLAMD_INFO1] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: nrow negative %d\n", n_row)) ; + return (false) ; + } + + if (n_col < 0) /* n_col must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_ncol_negative ; + stats [EIGEN_COLAMD_INFO1] = n_col ; + COLAMD_DEBUG0 (("eigen_colamd: ncol negative %d\n", n_col)) ; + return (false) ; + } + + nnz = p [n_col] ; + if (nnz < 0) /* nnz must be >= 0 */ + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nnz_negative ; + stats [EIGEN_COLAMD_INFO1] = nnz ; + COLAMD_DEBUG0 (("eigen_colamd: number of entries negative %d\n", nnz)) ; + return (false) ; + } + + if (p [0] != 0) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p0_nonzero ; + stats [EIGEN_COLAMD_INFO1] = p [0] ; + COLAMD_DEBUG0 (("eigen_colamd: p[0] not zero %d\n", p [0])) ; + return (false) ; + } + + /* === If no knobs, set default knobs =================================== */ + + if (!knobs) + { + eigen_colamd_set_defaults (default_knobs) ; + knobs = default_knobs ; + } + + /* === Allocate the Row and Col arrays from array A ===================== */ + + Col_size = EIGEN_COLAMD_C (n_col) ; + Row_size = EIGEN_COLAMD_R (n_row) ; + need = 2*nnz + n_col + Col_size + Row_size ; + + if (need > Alen) + { + /* not enough space in array A to perform the ordering */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_too_small ; + stats [EIGEN_COLAMD_INFO1] = need ; + stats [EIGEN_COLAMD_INFO2] = Alen ; + COLAMD_DEBUG0 (("eigen_colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + return (false) ; + } + + Alen -= Col_size + Row_size ; + Col = (EIGEN_Colamd_Col *) &A [Alen] ; + Row = (EIGEN_Colamd_Row *) &A [Alen + Col_size] ; + + /* === Construct the row and column data structures ===================== */ + + if (!eigen_init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + { + /* input matrix is invalid */ + COLAMD_DEBUG0 (("eigen_colamd: Matrix invalid\n")) ; + return (false) ; + } + + /* === Initialize scores, kill dense rows/columns ======================= */ + + eigen_init_scoring (n_row, n_col, Row, Col, A, p, knobs, + &n_row2, &n_col2, &max_deg) ; + + /* === Order the supercolumns =========================================== */ + + ngarbage = eigen_find_ordering (n_row, n_col, Alen, Row, Col, A, p, + n_col2, max_deg, 2*nnz) ; + + /* === Order the non-principal columns ================================== */ + + eigen_order_children (n_col, Col, p) ; + + /* === Return statistics in stats ======================================= */ + + stats [EIGEN_COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [EIGEN_COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [EIGEN_COLAMD_DEFRAG_COUNT] = ngarbage ; + COLAMD_DEBUG0 (("eigen_colamd: done.\n")) ; + return (true) ; +} + +/* ========================================================================== */ +/* === eigen_colamd_report ======================================================== */ +/* ========================================================================== */ + + void eigen_colamd_report +( + int stats [EIGEN_COLAMD_STATS] +) +{ + eigen_print_report ("eigen_colamd", stats) ; +} + + +/* ========================================================================== */ +/* === NON-USER-CALLABLE ROUTINES: ========================================== */ +/* ========================================================================== */ + +/* There are no user-callable routines beyond this point in the file */ + + +/* ========================================================================== */ +/* === eigen_init_rows_cols ======================================================= */ +/* ========================================================================== */ + +/* + Takes the column form of the matrix in A and creates the row form of the + matrix. Also, row and column attributes are stored in the Col and Row + structs. If the columns are un-sorted or contain duplicate row indices, + this routine will also sort and remove duplicate row indices from the + column form of the matrix. Returns false if the matrix is invalid, + true otherwise. Not user-callable. +*/ + + int eigen_init_rows_cols /* returns true if OK, or false otherwise */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A, of size Alen */ + int p [], /* pointers to columns in A, of size n_col+1 */ + int stats [EIGEN_COLAMD_STATS] /* eigen_colamd statistics */ +) +{ + /* === Local variables ================================================== */ + + int col ; /* a column index */ + int row ; /* a row index */ + int *cp ; /* a column pointer */ + int *cp_end ; /* a pointer to the end of a column */ + int *rp ; /* a row pointer */ + int *rp_end ; /* a pointer to the end of a row */ + int last_row ; /* previous row */ + + /* === Initialize columns, and check column pointers ==================== */ + + for (col = 0 ; col < n_col ; col++) + { + Col [col].start = p [col] ; + Col [col].length = p [col+1] - p [col] ; + + if (Col [col].length < 0) + { + /* column pointers must be non-decreasing */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_col_length_negative ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = Col [col].length ; + COLAMD_DEBUG0 (("eigen_colamd: col %d length %d < 0\n", col, Col [col].length)) ; + return (false) ; + } + + Col [col].shared1.thickness = 1 ; + Col [col].shared2.score = 0 ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [col].shared4.degree_next = EIGEN_COLAMD_EMPTY ; + } + + /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ + + /* === Scan columns, compute row degrees, and check row indices ========= */ + + stats [EIGEN_COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].length = 0 ; + Row [row].shared2.mark = -1 ; + } + + for (col = 0 ; col < n_col ; col++) + { + last_row = -1 ; + + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + + while (cp < cp_end) + { + row = *cp++ ; + + /* make sure row indices within range */ + if (row < 0 || row >= n_row) + { + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_row_index_out_of_bounds ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + stats [EIGEN_COLAMD_INFO3] = n_row ; + COLAMD_DEBUG0 (("eigen_colamd: row %d col %d out of bounds\n", row, col)) ; + return (false) ; + } + + if (row <= last_row || Row [row].shared2.mark == col) + { + /* row index are unsorted or repeated (or both), thus col */ + /* is jumbled. This is a notice, not an error condition. */ + stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK_BUT_JUMBLED ; + stats [EIGEN_COLAMD_INFO1] = col ; + stats [EIGEN_COLAMD_INFO2] = row ; + (stats [EIGEN_COLAMD_INFO3]) ++ ; + COLAMD_DEBUG1 (("eigen_colamd: row %d col %d unsorted/duplicate\n",row,col)); + } + + if (Row [row].shared2.mark != col) + { + Row [row].length++ ; + } + else + { + /* this is a repeated entry in the column, */ + /* it will be removed */ + Col [col].length-- ; + } + + /* mark the row as having been seen in this column */ + Row [row].shared2.mark = col ; + + last_row = row ; + } + } + + /* === Compute row pointers ============================================= */ + + /* row form of the matrix starts directly after the column */ + /* form of matrix in A */ + Row [0].start = p [n_col] ; + Row [0].shared1.p = Row [0].start ; + Row [0].shared2.mark = -1 ; + for (row = 1 ; row < n_row ; row++) + { + Row [row].start = Row [row-1].start + Row [row-1].length ; + Row [row].shared1.p = Row [row].start ; + Row [row].shared2.mark = -1 ; + } + + /* === Create row form ================================================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + /* if cols jumbled, watch for repeated row indices */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + row = *cp++ ; + if (Row [row].shared2.mark != col) + { + A [(Row [row].shared1.p)++] = col ; + Row [row].shared2.mark = col ; + } + } + } + } + else + { + /* if cols not jumbled, we don't need the mark (this is faster) */ + for (col = 0 ; col < n_col ; col++) + { + cp = &A [p [col]] ; + cp_end = &A [p [col+1]] ; + while (cp < cp_end) + { + A [(Row [*cp++].shared1.p)++] = col ; + } + } + } + + /* === Clear the row marks and set row degrees ========================== */ + + for (row = 0 ; row < n_row ; row++) + { + Row [row].shared2.mark = 0 ; + Row [row].shared1.degree = Row [row].length ; + } + + /* === See if we need to re-create columns ============================== */ + + if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + { + COLAMD_DEBUG0 (("eigen_colamd: reconstructing column form, matrix jumbled\n")) ; + +#ifndef COLAMD_NDEBUG + /* make sure column lengths are correct */ + for (col = 0 ; col < n_col ; col++) + { + p [col] = Col [col].length ; + } + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + p [*rp++]-- ; + } + } + for (col = 0 ; col < n_col ; col++) + { + COLAMD_ASSERT (p [col] == 0) ; + } + /* now p is all zero (different than when debugging is turned off) */ +#endif /* COLAMD_NDEBUG */ + + /* === Compute col pointers ========================================= */ + + /* col form of the matrix starts at A [0]. */ + /* Note, we may have a gap between the col form and the row */ + /* form if there were duplicate entries, if so, it will be */ + /* removed upon the first garbage collection */ + Col [0].start = 0 ; + p [0] = Col [0].start ; + for (col = 1 ; col < n_col ; col++) + { + /* note that the lengths here are for pruned columns, i.e. */ + /* no duplicate row indices will exist for these columns */ + Col [col].start = Col [col-1].start + Col [col-1].length ; + p [col] = Col [col].start ; + } + + /* === Re-create col form =========================================== */ + + for (row = 0 ; row < n_row ; row++) + { + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + A [(p [*rp++])++] = row ; + } + } + } + + /* === Done. Matrix is not (or no longer) jumbled ====================== */ + + return (true) ; +} + + +/* ========================================================================== */ +/* === eigen_init_scoring ========================================================= */ +/* ========================================================================== */ + +/* + Kills dense or empty columns and rows, calculates an initial score for + each column, and places all columns in the degree lists. Not user-callable. +*/ + + void eigen_init_scoring +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + double knobs [EIGEN_COLAMD_KNOBS],/* parameters */ + int *p_n_row2, /* number of non-dense, non-empty rows */ + int *p_n_col2, /* number of non-dense, non-empty columns */ + int *p_max_deg /* maximum row degree */ +) +{ + /* === Local variables ================================================== */ + + int c ; /* a column index */ + int r, row ; /* a row index */ + int *cp ; /* a column pointer */ + int deg ; /* degree of a row or column */ + int *cp_end ; /* a pointer to the end of a column */ + int *new_cp ; /* new column pointer */ + int col_length ; /* length of pruned column */ + int score ; /* current column score */ + int n_col2 ; /* number of non-dense, non-empty columns */ + int n_row2 ; /* number of non-dense, non-empty rows */ + int dense_row_count ; /* remove rows with more entries than this */ + int dense_col_count ; /* remove cols with more entries than this */ + int min_score ; /* smallest column score */ + int max_deg ; /* maximum row degree */ + int next_col ; /* Used to add to degree list.*/ + +#ifndef COLAMD_NDEBUG + int debug_count ; /* debug only. */ +#endif /* COLAMD_NDEBUG */ + + /* === Extract knobs ==================================================== */ + + dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col, n_col)) ; + dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_COL] * n_row, n_row)) ; + COLAMD_DEBUG1 (("eigen_colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + max_deg = 0 ; + n_col2 = n_col ; + n_row2 = n_row ; + + /* === Kill empty columns =============================================== */ + + /* Put the empty columns at the end in their natural order, so that LU */ + /* factorization can proceed as far as possible. */ + for (c = n_col-1 ; c >= 0 ; c--) + { + deg = Col [c].length ; + if (deg == 0) + { + /* this is a empty column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense columns =============================================== */ + + /* Put the dense columns at the end, in their natural order */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip any dead columns */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + deg = Col [c].length ; + if (deg > dense_col_count) + { + /* this is a dense column, kill and order it last */ + Col [c].shared2.order = --n_col2 ; + /* decrement the row degrees */ + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + Row [*cp++].shared1.degree-- ; + } + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + + /* === Kill dense and empty rows ======================================== */ + + for (r = 0 ; r < n_row ; r++) + { + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (deg >= 0 && deg <= n_col) ; + if (deg > dense_row_count || deg == 0) + { + /* kill a dense or empty row */ + EIGEN_KILL_ROW (r) ; + --n_row2 ; + } + else + { + /* keep track of max degree of remaining rows */ + max_deg = COLAMD_MAX (max_deg, deg) ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + + /* === Compute initial column scores ==================================== */ + + /* At this point the row degrees are accurate. They reflect the number */ + /* of "live" (non-dense) columns in each row. No empty rows exist. */ + /* Some "live" columns may contain only dead rows, however. These are */ + /* pruned in the code below. */ + + /* now find the initial matlab score for each column */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* skip dead column */ + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + score = 0 ; + cp = &A [Col [c].start] ; + new_cp = cp ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + /* skip if dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + /* compact the column */ + *new_cp++ = row ; + /* add row's external degree */ + score += Row [row].shared1.degree - 1 ; + /* guard against integer overflow */ + score = COLAMD_MIN (score, n_col) ; + } + /* determine pruned column length */ + col_length = (int) (new_cp - &A [Col [c].start]) ; + if (col_length == 0) + { + /* a newly-made null column (all rows in this col are "dense" */ + /* and have already been killed) */ + COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; + Col [c].shared2.order = --n_col2 ; + EIGEN_KILL_PRINCIPAL_COL (c) ; + } + else + { + /* set column length and set score */ + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + Col [c].length = col_length ; + Col [c].shared2.score = score ; + } + } + COLAMD_DEBUG1 (("eigen_colamd: Dense, null, and newly-null columns killed: %d\n", + n_col-n_col2)) ; + + /* At this point, all empty rows and columns are dead. All live columns */ + /* are "clean" (containing no dead rows) and simplicial (no supercolumns */ + /* yet). Rows may contain dead columns, but all live rows contain at */ + /* least one live column. */ + +#ifndef COLAMD_NDEBUG + eigen_debug_structures (n_row, n_col, Row, Col, A, n_col2) ; +#endif /* COLAMD_NDEBUG */ + + /* === Initialize degree lists ========================================== */ + +#ifndef COLAMD_NDEBUG + debug_count = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* clear the hash buckets */ + for (c = 0 ; c <= n_col ; c++) + { + head [c] = EIGEN_COLAMD_EMPTY ; + } + min_score = n_col ; + /* place in reverse order, so low column indices are at the front */ + /* of the lists. This is to encourage natural tie-breaking */ + for (c = n_col-1 ; c >= 0 ; c--) + { + /* only add principal columns to degree lists */ + if (EIGEN_COL_IS_ALIVE (c)) + { + COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", + c, Col [c].shared2.score, min_score, n_col)) ; + + /* === Add columns score to DList =============================== */ + + score = Col [c].shared2.score ; + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (score <= n_col) ; + COLAMD_ASSERT (head [score] >= EIGEN_COLAMD_EMPTY) ; + + /* now add this column to dList at proper score location */ + next_col = head [score] ; + Col [c].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [c].shared4.degree_next = next_col ; + + /* if there already was a column with the same score, set its */ + /* previous pointer to this new column */ + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = c ; + } + head [score] = c ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, score) ; + +#ifndef COLAMD_NDEBUG + debug_count++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG1 (("eigen_colamd: Live cols %d out of %d, non-princ: %d\n", + debug_count, n_col, n_col-debug_count)) ; + COLAMD_ASSERT (debug_count == n_col2) ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Return number of remaining columns, and max row degree =========== */ + + *p_n_col2 = n_col2 ; + *p_n_row2 = n_row2 ; + *p_max_deg = max_deg ; +} + + +/* ========================================================================== */ +/* === eigen_find_ordering ======================================================== */ +/* ========================================================================== */ + +/* + Order the principal columns of the supercolumn form of the matrix + (no supercolumns on input). Uses a minimum approximate column minimum + degree ordering method. Not user-callable. +*/ + + int eigen_find_ordering /* return the number of garbage collections */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows of A */ + int n_col, /* number of columns of A */ + int Alen, /* size of A, 2*nnz + n_col or larger */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* column form and row form of A */ + int head [], /* of size n_col+1 */ + int n_col2, /* Remaining columns to order */ + int max_deg, /* Maximum row degree */ + int pfree /* index of first free slot (2*nnz on entry) */ +) +{ + /* === Local variables ================================================== */ + + int k ; /* current pivot ordering step */ + int pivot_col ; /* current pivot column */ + int *cp ; /* a column pointer */ + int *rp ; /* a row pointer */ + int pivot_row ; /* current pivot row */ + int *new_cp ; /* modified column pointer */ + int *new_rp ; /* modified row pointer */ + int pivot_row_start ; /* pointer to start of pivot row */ + int pivot_row_degree ; /* number of columns in pivot row */ + int pivot_row_length ; /* number of supercolumns in pivot row */ + int pivot_col_score ; /* score of pivot column */ + int needed_memory ; /* free space needed for pivot row */ + int *cp_end ; /* pointer to the end of a column */ + int *rp_end ; /* pointer to the end of a row */ + int row ; /* a row index */ + int col ; /* a column index */ + int max_score ; /* maximum possible score */ + int cur_score ; /* score of current column */ + unsigned int hash ; /* hash value for supernode detection */ + int head_column ; /* head of hash bucket */ + int first_col ; /* first column in hash bucket */ + int tag_mark ; /* marker value for mark array */ + int row_mark ; /* Row [row].shared2.mark */ + int set_difference ; /* set difference size of row with pivot row */ + int min_score ; /* smallest column score */ + int col_thickness ; /* "thickness" (no. of columns in a supercol) */ + int max_mark ; /* maximum value of tag_mark */ + int pivot_col_thickness ; /* number of columns represented by pivot col */ + int prev_col ; /* Used by Dlist operations. */ + int next_col ; /* Used by Dlist operations. */ + int ngarbage ; /* number of garbage collections performed */ + +#ifndef COLAMD_NDEBUG + int debug_d ; /* debug loop counter */ + int debug_step = 0 ; /* debug loop counter */ +#endif /* COLAMD_NDEBUG */ + + /* === Initialization and clear mark ==================================== */ + + max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ + tag_mark = eigen_clear_mark (n_row, Row) ; + min_score = 0 ; + ngarbage = 0 ; + COLAMD_DEBUG1 (("eigen_colamd: Ordering, n_col2=%d\n", n_col2)) ; + + /* === Order the columns ================================================ */ + + for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) + { + +#ifndef COLAMD_NDEBUG + if (debug_step % 100 == 0) + { + COLAMD_DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + else + { + COLAMD_DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; + } + debug_step++ ; + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + + /* === Select pivot column, and order it ============================ */ + + /* make sure degree list isn't empty */ + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (head [min_score] >= EIGEN_COLAMD_EMPTY) ; + +#ifndef COLAMD_NDEBUG + for (debug_d = 0 ; debug_d < min_score ; debug_d++) + { + COLAMD_ASSERT (head [debug_d] == EIGEN_COLAMD_EMPTY) ; + } +#endif /* COLAMD_NDEBUG */ + + /* get pivot column from head of minimum degree list */ + while (head [min_score] == EIGEN_COLAMD_EMPTY && min_score < n_col) + { + min_score++ ; + } + pivot_col = head [min_score] ; + COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; + next_col = Col [pivot_col].shared4.degree_next ; + head [min_score] = next_col ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = EIGEN_COLAMD_EMPTY ; + } + + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (pivot_col)) ; + COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; + + /* remember score for defrag check */ + pivot_col_score = Col [pivot_col].shared2.score ; + + /* the pivot column is the kth column in the pivot order */ + Col [pivot_col].shared2.order = k ; + + /* increment order count by column thickness */ + pivot_col_thickness = Col [pivot_col].shared1.thickness ; + k += pivot_col_thickness ; + COLAMD_ASSERT (pivot_col_thickness > 0) ; + + /* === Garbage_collection, if necessary ============================= */ + + needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; + if (pfree + needed_memory >= Alen) + { + pfree = eigen_garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + ngarbage++ ; + /* after garbage collection we will have enough */ + COLAMD_ASSERT (pfree + needed_memory < Alen) ; + /* garbage collection has wiped out the Row[].shared2.mark array */ + tag_mark = eigen_clear_mark (n_row, Row) ; + +#ifndef COLAMD_NDEBUG + eigen_debug_matrix (n_row, n_col, Row, Col, A) ; +#endif /* COLAMD_NDEBUG */ + } + + /* === Compute pivot row pattern ==================================== */ + + /* get starting location for this new merged row */ + pivot_row_start = pfree ; + + /* initialize new row counts to zero */ + pivot_row_degree = 0 ; + + /* tag pivot column as having been visited so it isn't included */ + /* in merged pivot row */ + Col [pivot_col].shared1.thickness = -pivot_col_thickness ; + + /* pivot row is the union of all rows in the pivot column pattern */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", EIGEN_ROW_IS_ALIVE (row), row)) ; + /* skip if row is dead */ + if (EIGEN_ROW_IS_DEAD (row)) + { + continue ; + } + rp = &A [Row [row].start] ; + rp_end = rp + Row [row].length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + /* add the column, if alive and untagged */ + col_thickness = Col [col].shared1.thickness ; + if (col_thickness > 0 && EIGEN_COL_IS_ALIVE (col)) + { + /* tag column in pivot row */ + Col [col].shared1.thickness = -col_thickness ; + COLAMD_ASSERT (pfree < Alen) ; + /* place column in pivot row */ + A [pfree++] = col ; + pivot_row_degree += col_thickness ; + } + } + } + + /* clear tag on pivot column */ + Col [pivot_col].shared1.thickness = pivot_col_thickness ; + max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check2\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Kill all rows used to construct pivot row ==================== */ + + /* also kill pivot row, temporarily */ + cp = &A [Col [pivot_col].start] ; + cp_end = cp + Col [pivot_col].length ; + while (cp < cp_end) + { + /* may be killing an already dead row */ + row = *cp++ ; + COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + + /* === Select a row index to use as the new pivot row =============== */ + + pivot_row_length = pfree - pivot_row_start ; + if (pivot_row_length > 0) + { + /* pick the "pivot" row arbitrarily (first row in col) */ + pivot_row = A [Col [pivot_col].start] ; + COLAMD_DEBUG3 (("Pivotal row is %d\n", pivot_row)) ; + } + else + { + /* there is no pivot row, since it is of zero length */ + pivot_row = EIGEN_COLAMD_EMPTY ; + COLAMD_ASSERT (pivot_row_length == 0) ; + } + COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; + + /* === Approximate degree computation =============================== */ + + /* Here begins the computation of the approximate degree. The column */ + /* score is the sum of the pivot row "length", plus the size of the */ + /* set differences of each row in the column minus the pattern of the */ + /* pivot row itself. The column ("thickness") itself is also */ + /* excluded from the column score (we thus use an approximate */ + /* external degree). */ + + /* The time taken by the following code (compute set differences, and */ + /* add them up) is proportional to the size of the data structure */ + /* being scanned - that is, the sum of the sizes of each column in */ + /* the pivot row. Thus, the amortized time to compute a column score */ + /* is proportional to the size of that column (where size, in this */ + /* context, is the column "length", or the number of row indices */ + /* in that column). The number of row indices in a column is */ + /* monotonically non-decreasing, from the length of the original */ + /* column on input to eigen_colamd. */ + + /* === Compute set differences ====================================== */ + + COLAMD_DEBUG3 (("** Computing set differences phase. **\n")) ; + + /* pivot row is currently dead - it will be revived later. */ + + COLAMD_DEBUG3 (("Pivot row: ")) ; + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_DEBUG3 (("Col: %d\n", col)) ; + + /* clear tags used to construct pivot row pattern */ + col_thickness = -Col [col].shared1.thickness ; + COLAMD_ASSERT (col_thickness > 0) ; + Col [col].shared1.thickness = col_thickness ; + + /* === Remove column from degree list =========================== */ + + cur_score = Col [col].shared2.score ; + prev_col = Col [col].shared3.prev ; + next_col = Col [col].shared4.degree_next ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (cur_score >= EIGEN_COLAMD_EMPTY) ; + if (prev_col == EIGEN_COLAMD_EMPTY) + { + head [cur_score] = next_col ; + } + else + { + Col [prev_col].shared4.degree_next = next_col ; + } + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = prev_col ; + } + + /* === Scan the column ========================================== */ + + cp = &A [Col [col].start] ; + cp_end = cp + Col [col].length ; + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row != pivot_row) ; + set_difference = row_mark - tag_mark ; + /* check if the row has been seen yet */ + if (set_difference < 0) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + set_difference = Row [row].shared1.degree ; + } + /* subtract column thickness from this row's set difference */ + set_difference -= col_thickness ; + COLAMD_ASSERT (set_difference >= 0) ; + /* absorb this row if the set difference becomes zero */ + if (set_difference == 0) + { + COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; + EIGEN_KILL_ROW (row) ; + } + else + { + /* save the new mark */ + Row [row].shared2.mark = set_difference + tag_mark ; + } + } + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k-pivot_row_degree, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Add up set differences for each column ======================= */ + + COLAMD_DEBUG3 (("** Adding set differences phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + /* get a column */ + col = *rp++ ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + hash = 0 ; + cur_score = 0 ; + cp = &A [Col [col].start] ; + /* compact the column */ + new_cp = cp ; + cp_end = cp + Col [col].length ; + + COLAMD_DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ; + + while (cp < cp_end) + { + /* get a row */ + row = *cp++ ; + COLAMD_ASSERT(row >= 0 && row < n_row) ; + row_mark = Row [row].shared2.mark ; + /* skip if dead */ + if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + { + continue ; + } + COLAMD_ASSERT (row_mark > tag_mark) ; + /* compact the column */ + *new_cp++ = row ; + /* compute hash function */ + hash += row ; + /* add set difference */ + cur_score += row_mark - tag_mark ; + /* integer overflow... */ + cur_score = COLAMD_MIN (cur_score, n_col) ; + } + + /* recompute the column's length */ + Col [col].length = (int) (new_cp - &A [Col [col].start]) ; + + /* === Further mass elimination ================================= */ + + if (Col [col].length == 0) + { + COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; + /* nothing left but the pivot row in this column */ + EIGEN_KILL_PRINCIPAL_COL (col) ; + pivot_row_degree -= Col [col].shared1.thickness ; + COLAMD_ASSERT (pivot_row_degree >= 0) ; + /* order it */ + Col [col].shared2.order = k ; + /* increment order count by column thickness */ + k += Col [col].shared1.thickness ; + } + else + { + /* === Prepare for supercolumn detection ==================== */ + + COLAMD_DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ; + + /* save score so far */ + Col [col].shared2.score = cur_score ; + + /* add column to hash table, for supercolumn detection */ + hash %= n_col + 1 ; + + COLAMD_DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ; + COLAMD_ASSERT (hash <= n_col) ; + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* degree list "hash" is non-empty, use prev (shared3) of */ + /* first column in degree list as head of hash bucket */ + first_col = Col [head_column].shared3.headhash ; + Col [head_column].shared3.headhash = col ; + } + else + { + /* degree list "hash" is empty, use head as hash bucket */ + first_col = - (head_column + 2) ; + head [hash] = - (col + 2) ; + } + Col [col].shared4.hash_next = first_col ; + + /* save hash function in Col [col].shared3.hash */ + Col [col].shared3.hash = (int) hash ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + } + } + + /* The approximate external column degree is now computed. */ + + /* === Supercolumn detection ======================================== */ + + COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; + + eigen_detect_super_cols ( + +#ifndef COLAMD_NDEBUG + n_col, Row, +#endif /* COLAMD_NDEBUG */ + + Col, A, head, pivot_row_start, pivot_row_length) ; + + /* === Kill the pivotal column ====================================== */ + + EIGEN_KILL_PRINCIPAL_COL (pivot_col) ; + + /* === Clear mark =================================================== */ + + tag_mark += (max_deg + 1) ; + if (tag_mark >= max_mark) + { + COLAMD_DEBUG2 (("clearing tag_mark\n")) ; + tag_mark = eigen_clear_mark (n_row, Row) ; + } + +#ifndef COLAMD_NDEBUG + COLAMD_DEBUG3 (("check3\n")) ; + eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; +#endif /* COLAMD_NDEBUG */ + + /* === Finalize the new pivot row, and column scores ================ */ + + COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; + + /* for each column in pivot row */ + rp = &A [pivot_row_start] ; + /* compact the pivot row */ + new_rp = rp ; + rp_end = rp + pivot_row_length ; + while (rp < rp_end) + { + col = *rp++ ; + /* skip dead columns */ + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + *new_rp++ = col ; + /* add new pivot row to column */ + A [Col [col].start + (Col [col].length++)] = pivot_row ; + + /* retrieve score so far and add on pivot row's degree. */ + /* (we wait until here for this in case the pivot */ + /* row's degree was reduced due to mass elimination). */ + cur_score = Col [col].shared2.score + pivot_row_degree ; + + /* calculate the max possible score as the number of */ + /* external columns minus the 'k' value minus the */ + /* columns thickness */ + max_score = n_col - k - Col [col].shared1.thickness ; + + /* make the score the external degree of the union-of-rows */ + cur_score -= Col [col].shared1.thickness ; + + /* make sure score is less or equal than the max score */ + cur_score = COLAMD_MIN (cur_score, max_score) ; + COLAMD_ASSERT (cur_score >= 0) ; + + /* store updated score */ + Col [col].shared2.score = cur_score ; + + /* === Place column back in degree list ========================= */ + + COLAMD_ASSERT (min_score >= 0) ; + COLAMD_ASSERT (min_score <= n_col) ; + COLAMD_ASSERT (cur_score >= 0) ; + COLAMD_ASSERT (cur_score <= n_col) ; + COLAMD_ASSERT (head [cur_score] >= EIGEN_COLAMD_EMPTY) ; + next_col = head [cur_score] ; + Col [col].shared4.degree_next = next_col ; + Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; + if (next_col != EIGEN_COLAMD_EMPTY) + { + Col [next_col].shared3.prev = col ; + } + head [cur_score] = col ; + + /* see if this score is less than current min */ + min_score = COLAMD_MIN (min_score, cur_score) ; + + } + +#ifndef COLAMD_NDEBUG + eigen_debug_deg_lists (n_row, n_col, Row, Col, head, + min_score, n_col2-k, max_deg) ; +#endif /* COLAMD_NDEBUG */ + + /* === Resurrect the new pivot row ================================== */ + + if (pivot_row_degree > 0) + { + /* update pivot row length to reflect any cols that were killed */ + /* during super-col detection and mass elimination */ + Row [pivot_row].start = pivot_row_start ; + Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ; + Row [pivot_row].shared1.degree = pivot_row_degree ; + Row [pivot_row].shared2.mark = 0 ; + /* pivot row is no longer dead */ + } + } + + /* === All principal columns have now been ordered ====================== */ + + return (ngarbage) ; +} + + +/* ========================================================================== */ +/* === eigen_order_children ======================================================= */ +/* ========================================================================== */ + +/* + The eigen_find_ordering routine has ordered all of the principal columns (the + representatives of the supercolumns). The non-principal columns have not + yet been ordered. This routine orders those columns by walking up the + parent tree (a column is a child of the column which absorbed it). The + final permutation vector is then placed in p [0 ... n_col-1], with p [0] + being the first column, and p [n_col-1] being the last. It doesn't look + like it at first glance, but be assured that this routine takes time linear + in the number of columns. Although not immediately obvious, the time + taken by this routine is O (n_col), that is, linear in the number of + columns. Not user-callable. +*/ + + void eigen_order_children +( + /* === Parameters ======================================================= */ + + int n_col, /* number of columns of A */ + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int p [] /* p [0 ... n_col-1] is the column permutation*/ +) +{ + /* === Local variables ================================================== */ + + int i ; /* loop counter for all columns */ + int c ; /* column index */ + int parent ; /* index of column's parent */ + int order ; /* column's order */ + + /* === Order each non-principal column ================================== */ + + for (i = 0 ; i < n_col ; i++) + { + /* find an un-ordered non-principal column */ + COLAMD_ASSERT (EIGEN_COL_IS_DEAD (i)) ; + if (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EIGEN_COLAMD_EMPTY) + { + parent = i ; + /* once found, find its principal parent */ + do + { + parent = Col [parent].shared1.parent ; + } while (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (parent)) ; + + /* now, order all un-ordered non-principal columns along path */ + /* to this parent. collapse tree at the same time */ + c = i ; + /* get order of parent */ + order = Col [parent].shared2.order ; + + do + { + COLAMD_ASSERT (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* order this column */ + Col [c].shared2.order = order++ ; + /* collaps tree */ + Col [c].shared1.parent = parent ; + + /* get immediate parent of this column */ + c = Col [c].shared1.parent ; + + /* continue until we hit an ordered column. There are */ + /* guarranteed not to be anymore unordered columns */ + /* above an ordered column */ + } while (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + + /* re-order the super_col parent to largest order for this group */ + Col [parent].shared2.order = order ; + } + } + + /* === Generate the permutation ========================================= */ + + for (c = 0 ; c < n_col ; c++) + { + p [Col [c].shared2.order] = c ; + } +} + + +/* ========================================================================== */ +/* === eigen_detect_super_cols ==================================================== */ +/* ========================================================================== */ + +/* + Detects supercolumns by finding matches between columns in the hash buckets. + Check amongst columns in the set A [row_start ... row_start + row_length-1]. + The columns under consideration are currently *not* in the degree lists, + and have already been placed in the hash buckets. + + The hash bucket for columns whose hash function is equal to h is stored + as follows: + + if head [h] is >= 0, then head [h] contains a degree list, so: + + head [h] is the first column in degree bucket h. + Col [head [h]].headhash gives the first column in hash bucket h. + + otherwise, the degree list is empty, and: + + -(head [h] + 2) is the first column in hash bucket h. + + For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous + column" pointer. Col [c].shared3.hash is used instead as the hash number + for that column. The value of Col [c].shared4.hash_next is the next column + in the same hash bucket. + + Assuming no, or "few" hash collisions, the time taken by this routine is + linear in the sum of the sizes (lengths) of each column whose score has + just been computed in the approximate degree computation. + Not user-callable. +*/ + + void eigen_detect_super_cols +( + /* === Parameters ======================================================= */ + +#ifndef COLAMD_NDEBUG + /* these two parameters are only needed when debugging is enabled: */ + int n_col, /* number of columns of A */ + EIGEN_Colamd_Row Row [], /* of size n_row+1 */ +#endif /* COLAMD_NDEBUG */ + + EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + int A [], /* row indices of A */ + int head [], /* head of degree lists and hash buckets */ + int row_start, /* pointer to set of columns to check */ + int row_length /* number of columns to check */ +) +{ + /* === Local variables ================================================== */ + + int hash ; /* hash value for a column */ + int *rp ; /* pointer to a row */ + int c ; /* a column index */ + int super_c ; /* column index of the column to absorb into */ + int *cp1 ; /* column pointer for column super_c */ + int *cp2 ; /* column pointer for column c */ + int length ; /* length of column super_c */ + int prev_c ; /* column preceding c in hash bucket */ + int i ; /* loop counter */ + int *rp_end ; /* pointer to the end of the row */ + int col ; /* a column index in the row to check */ + int head_column ; /* first column in hash bucket or degree list */ + int first_col ; /* first column in hash bucket */ + + /* === Consider each column in the row ================================== */ + + rp = &A [row_start] ; + rp_end = rp + row_length ; + while (rp < rp_end) + { + col = *rp++ ; + if (EIGEN_COL_IS_DEAD (col)) + { + continue ; + } + + /* get hash number for this column */ + hash = Col [col].shared3.hash ; + COLAMD_ASSERT (hash <= n_col) ; + + /* === Get the first column in this hash bucket ===================== */ + + head_column = head [hash] ; + if (head_column > EIGEN_COLAMD_EMPTY) + { + first_col = Col [head_column].shared3.headhash ; + } + else + { + first_col = - (head_column + 2) ; + } + + /* === Consider each column in the hash bucket ====================== */ + + for (super_c = first_col ; super_c != EIGEN_COLAMD_EMPTY ; + super_c = Col [super_c].shared4.hash_next) + { + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (super_c)) ; + COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; + length = Col [super_c].length ; + + /* prev_c is the column preceding column c in the hash bucket */ + prev_c = super_c ; + + /* === Compare super_c with all columns after it ================ */ + + for (c = Col [super_c].shared4.hash_next ; + c != EIGEN_COLAMD_EMPTY ; c = Col [c].shared4.hash_next) + { + COLAMD_ASSERT (c != super_c) ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (c)) ; + COLAMD_ASSERT (Col [c].shared3.hash == hash) ; + + /* not identical if lengths or scores are different */ + if (Col [c].length != length || + Col [c].shared2.score != Col [super_c].shared2.score) + { + prev_c = c ; + continue ; + } + + /* compare the two columns */ + cp1 = &A [Col [super_c].start] ; + cp2 = &A [Col [c].start] ; + + for (i = 0 ; i < length ; i++) + { + /* the columns are "clean" (no dead rows) */ + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp1)) ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp2)) ; + /* row indices will same order for both supercols, */ + /* no gather scatter nessasary */ + if (*cp1++ != *cp2++) + { + break ; + } + } + + /* the two columns are different if the for-loop "broke" */ + if (i != length) + { + prev_c = c ; + continue ; + } + + /* === Got it! two columns are identical =================== */ + + COLAMD_ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ; + + Col [super_c].shared1.thickness += Col [c].shared1.thickness ; + Col [c].shared1.parent = super_c ; + EIGEN_KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in eigen_order_children() */ + Col [c].shared2.order = EIGEN_COLAMD_EMPTY ; + /* remove c from hash bucket */ + Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; + } + } + + /* === Empty this hash bucket ======================================= */ + + if (head_column > EIGEN_COLAMD_EMPTY) + { + /* corresponding degree list "hash" is not empty */ + Col [head_column].shared3.headhash = EIGEN_COLAMD_EMPTY ; + } + else + { + /* corresponding degree list "hash" is empty */ + head [hash] = EIGEN_COLAMD_EMPTY ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_garbage_collection =================================================== */ +/* ========================================================================== */ + +/* + Defragments and compacts columns and rows in the workspace A. Used when + all avaliable memory has been used while performing row merging. Returns + the index of the first free position in A, after garbage collection. The + time taken by this routine is linear is the size of the array A, which is + itself linear in the number of nonzeros in the input matrix. + Not user-callable. +*/ + + int eigen_garbage_collection /* returns the new value of pfree */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows */ + int n_col, /* number of columns */ + EIGEN_Colamd_Row Row [], /* row info */ + EIGEN_Colamd_Col Col [], /* column info */ + int A [], /* A [0 ... Alen-1] holds the matrix */ + int *pfree /* &A [0] ... pfree is in use */ +) +{ + /* === Local variables ================================================== */ + + int *psrc ; /* source pointer */ + int *pdest ; /* destination pointer */ + int j ; /* counter */ + int r ; /* a row index */ + int c ; /* a column index */ + int length ; /* length of a row or column */ + +#ifndef COLAMD_NDEBUG + int debug_rows ; + COLAMD_DEBUG2 (("Defrag..\n")) ; + for (psrc = &A[0] ; psrc < pfree ; psrc++) COLAMD_ASSERT (*psrc >= 0) ; + debug_rows = 0 ; +#endif /* COLAMD_NDEBUG */ + + /* === Defragment the columns =========================================== */ + + pdest = &A[0] ; + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + psrc = &A [Col [c].start] ; + + /* move and compact the column */ + COLAMD_ASSERT (pdest <= psrc) ; + Col [c].start = (int) (pdest - &A [0]) ; + length = Col [c].length ; + for (j = 0 ; j < length ; j++) + { + r = *psrc++ ; + if (EIGEN_ROW_IS_ALIVE (r)) + { + *pdest++ = r ; + } + } + Col [c].length = (int) (pdest - &A [Col [c].start]) ; + } + } + + /* === Prepare to defragment the rows =================================== */ + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + if (Row [r].length == 0) + { + /* this row is of zero length. cannot compact it, so kill it */ + COLAMD_DEBUG3 (("Defrag row kill\n")) ; + EIGEN_KILL_ROW (r) ; + } + else + { + /* save first column index in Row [r].shared2.first_column */ + psrc = &A [Row [r].start] ; + Row [r].shared2.first_column = *psrc ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + /* flag the start of the row with the one's complement of row */ + *psrc = EIGEN_ONES_COMPLEMENT (r) ; + +#ifndef COLAMD_NDEBUG + debug_rows++ ; +#endif /* COLAMD_NDEBUG */ + + } + } + } + + /* === Defragment the rows ============================================== */ + + psrc = pdest ; + while (psrc < pfree) + { + /* find a negative number ... the start of a row */ + if (*psrc++ < 0) + { + psrc-- ; + /* get the row index */ + r = EIGEN_ONES_COMPLEMENT (*psrc) ; + COLAMD_ASSERT (r >= 0 && r < n_row) ; + /* restore first column index */ + *psrc = Row [r].shared2.first_column ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + + /* move and compact the row */ + COLAMD_ASSERT (pdest <= psrc) ; + Row [r].start = (int) (pdest - &A [0]) ; + length = Row [r].length ; + for (j = 0 ; j < length ; j++) + { + c = *psrc++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + *pdest++ = c ; + } + } + Row [r].length = (int) (pdest - &A [Row [r].start]) ; + +#ifndef COLAMD_NDEBUG + debug_rows-- ; +#endif /* COLAMD_NDEBUG */ + + } + } + /* ensure we found all the rows */ + COLAMD_ASSERT (debug_rows == 0) ; + + /* === Return the new value of pfree ==================================== */ + + return ((int) (pdest - &A [0])) ; +} + + +/* ========================================================================== */ +/* === eigen_clear_mark =========================================================== */ +/* ========================================================================== */ + +/* + Clears the Row [].shared2.mark array, and returns the new tag_mark. + Return value is the new tag_mark. Not user-callable. +*/ + + int eigen_clear_mark /* return the new value for tag_mark */ +( + /* === Parameters ======================================================= */ + + int n_row, /* number of rows in A */ + EIGEN_Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ +) +{ + /* === Local variables ================================================== */ + + int r ; + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + Row [r].shared2.mark = 0 ; + } + } + return (1) ; +} + + + +/* ========================================================================== */ +/* === eigen_print_report ========================================================= */ +/* ========================================================================== */ + + void eigen_print_report +( + char *method, + int stats [EIGEN_COLAMD_STATS] +) +{ + + int i1, i2, i3 ; + + if (!stats) + { + PRINTF ("%s: No statistics available.\n", method) ; + return ; + } + + i1 = stats [EIGEN_COLAMD_INFO1] ; + i2 = stats [EIGEN_COLAMD_INFO2] ; + i3 = stats [EIGEN_COLAMD_INFO3] ; + + if (stats [EIGEN_COLAMD_STATUS] >= 0) + { + PRINTF ("%s: OK. ", method) ; + } + else + { + PRINTF ("%s: ERROR. ", method) ; + } + + switch (stats [EIGEN_COLAMD_STATUS]) + { + + case EIGEN_COLAMD_OK_BUT_JUMBLED: + + PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; + + PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", + method, i3) ; + + PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", + method, INDEX (i2)) ; + + PRINTF ("%s: last seen in column: %d", + method, INDEX (i1)) ; + + /* no break - fall through to next case instead */ + + case EIGEN_COLAMD_OK: + + PRINTF ("\n") ; + + PRINTF ("%s: number of dense or empty rows ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_ROW]) ; + + PRINTF ("%s: number of dense or empty columns ignored: %d\n", + method, stats [EIGEN_COLAMD_DENSE_COL]) ; + + PRINTF ("%s: number of garbage collections performed: %d\n", + method, stats [EIGEN_COLAMD_DEFRAG_COUNT]) ; + break ; + + case EIGEN_COLAMD_ERROR_A_not_present: + + PRINTF ("Array A (row indices of matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_p_not_present: + + PRINTF ("Array p (column pointers for matrix) not present.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_nrow_negative: + + PRINTF ("Invalid number of rows (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_ncol_negative: + + PRINTF ("Invalid number of columns (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_nnz_negative: + + PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_p0_nonzero: + + PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; + break ; + + case EIGEN_COLAMD_ERROR_A_too_small: + + PRINTF ("Array A too small.\n") ; + PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", + i1, i2) ; + break ; + + case EIGEN_COLAMD_ERROR_col_length_negative: + + PRINTF + ("Column %d has a negative number of nonzero entries (%d).\n", + INDEX (i1), i2) ; + break ; + + case EIGEN_COLAMD_ERROR_row_index_out_of_bounds: + + PRINTF + ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", + INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; + break ; + + case EIGEN_COLAMD_ERROR_out_of_memory: + + PRINTF ("Out of memory.\n") ; + break ; + + case EIGEN_COLAMD_ERROR_internal_error: + + /* if this happens, there is a bug in the code */ + PRINTF + ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ; + break ; + } +} + + + + +/* ========================================================================== */ +/* === eigen_colamd debugging routines ============================================ */ +/* ========================================================================== */ + +/* When debugging is disabled, the remainder of this file is ignored. */ + +#ifndef COLAMD_NDEBUG + + +/* ========================================================================== */ +/* === eigen_debug_structures ===================================================== */ +/* ========================================================================== */ + +/* + At this point, all empty rows and columns are dead. All live columns + are "clean" (containing no dead rows) and simplicial (no supercolumns + yet). Rows may contain dead columns, but all live rows contain at + least one live column. +*/ + + void eigen_debug_structures +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [], + int n_col2 +) +{ + /* === Local variables ================================================== */ + + int i ; + int c ; + int *cp ; + int *cp_end ; + int len ; + int score ; + int r ; + int *rp ; + int *rp_end ; + int deg ; + + /* === Check A, Row, and Col ============================================ */ + + for (c = 0 ; c < n_col ; c++) + { + if (EIGEN_COL_IS_ALIVE (c)) + { + len = Col [c].length ; + score = Col [c].shared2.score ; + COLAMD_DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (score >= 0) ; + COLAMD_ASSERT (Col [c].shared1.thickness == 1) ; + cp = &A [Col [c].start] ; + cp_end = cp + len ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + } + } + else + { + i = Col [c].shared2.order ; + COLAMD_ASSERT (i >= n_col2 && i < n_col) ; + } + } + + for (r = 0 ; r < n_row ; r++) + { + if (EIGEN_ROW_IS_ALIVE (r)) + { + i = 0 ; + len = Row [r].length ; + deg = Row [r].shared1.degree ; + COLAMD_ASSERT (len > 0) ; + COLAMD_ASSERT (deg > 0) ; + rp = &A [Row [r].start] ; + rp_end = rp + len ; + while (rp < rp_end) + { + c = *rp++ ; + if (EIGEN_COL_IS_ALIVE (c)) + { + i++ ; + } + } + COLAMD_ASSERT (i > 0) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_deg_lists ====================================================== */ +/* ========================================================================== */ + +/* + Prints the contents of the degree lists. Counts the number of columns + in the degree list and compares it to the total it should have. Also + checks the row degrees. +*/ + + void eigen_debug_deg_lists +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int head [], + int min_score, + int should, + int max_deg +) +{ + /* === Local variables ================================================== */ + + int deg ; + int col ; + int have ; + int row ; + + /* === Check the degree lists =========================================== */ + + if (n_col > 10000 && colamd_debug <= 0) + { + return ; + } + have = 0 ; + COLAMD_DEBUG4 (("Degree lists: %d\n", min_score)) ; + for (deg = 0 ; deg <= n_col ; deg++) + { + col = head [deg] ; + if (col == EIGEN_COLAMD_EMPTY) + { + continue ; + } + COLAMD_DEBUG4 (("%d:", deg)) ; + while (col != EIGEN_COLAMD_EMPTY) + { + COLAMD_DEBUG4 ((" %d", col)) ; + have += Col [col].shared1.thickness ; + COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + col = Col [col].shared4.degree_next ; + } + COLAMD_DEBUG4 (("\n")) ; + } + COLAMD_DEBUG4 (("should %d have %d\n", should, have)) ; + COLAMD_ASSERT (should == have) ; + + /* === Check the row degrees ============================================ */ + + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (row = 0 ; row < n_row ; row++) + { + if (EIGEN_ROW_IS_ALIVE (row)) + { + COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; + } + } +} + + +/* ========================================================================== */ +/* === eigen_debug_mark =========================================================== */ +/* ========================================================================== */ + +/* + Ensures that the tag_mark is less that the maximum and also ensures that + each entry in the mark array is less than the tag mark. +*/ + + void eigen_debug_mark +( + /* === Parameters ======================================================= */ + + int n_row, + EIGEN_Colamd_Row Row [], + int tag_mark, + int max_mark +) +{ + /* === Local variables ================================================== */ + + int r ; + + /* === Check the Row marks ============================================== */ + + COLAMD_ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; + if (n_row > 10000 && colamd_debug <= 0) + { + return ; + } + for (r = 0 ; r < n_row ; r++) + { + COLAMD_ASSERT (Row [r].shared2.mark < tag_mark) ; + } +} + + +/* ========================================================================== */ +/* === eigen_debug_matrix ========================================================= */ +/* ========================================================================== */ + +/* + Prints out the contents of the columns and the rows. +*/ + + void eigen_debug_matrix +( + /* === Parameters ======================================================= */ + + int n_row, + int n_col, + EIGEN_Colamd_Row Row [], + EIGEN_Colamd_Col Col [], + int A [] +) +{ + /* === Local variables ================================================== */ + + int r ; + int c ; + int *rp ; + int *rp_end ; + int *cp ; + int *cp_end ; + + /* === Dump the rows and columns of the matrix ========================== */ + + if (colamd_debug < 3) + { + return ; + } + COLAMD_DEBUG3 (("DUMP MATRIX:\n")) ; + for (r = 0 ; r < n_row ; r++) + { + COLAMD_DEBUG3 (("Row %d alive? %d\n", r, EIGEN_ROW_IS_ALIVE (r))) ; + if (EIGEN_ROW_IS_DEAD (r)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d degree %d\n", + Row [r].start, Row [r].length, Row [r].shared1.degree)) ; + rp = &A [Row [r].start] ; + rp_end = rp + Row [r].length ; + while (rp < rp_end) + { + c = *rp++ ; + COLAMD_DEBUG4 ((" %d col %d\n", EIGEN_COL_IS_ALIVE (c), c)) ; + } + } + + for (c = 0 ; c < n_col ; c++) + { + COLAMD_DEBUG3 (("Col %d alive? %d\n", c, EIGEN_COL_IS_ALIVE (c))) ; + if (EIGEN_COL_IS_DEAD (c)) + { + continue ; + } + COLAMD_DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", + Col [c].start, Col [c].length, + Col [c].shared1.thickness, Col [c].shared2.score)) ; + cp = &A [Col [c].start] ; + cp_end = cp + Col [c].length ; + while (cp < cp_end) + { + r = *cp++ ; + COLAMD_DEBUG4 ((" %d row %d\n", EIGEN_ROW_IS_ALIVE (r), r)) ; + } + } +} + + void eigen_colamd_get_debug +( + char *method +) +{ + colamd_debug = 0 ; /* no debug printing */ + + /* get "D" environment variable, which gives the debug printing level */ + if (getenv ("D")) + { + colamd_debug = atoi (getenv ("D")) ; + } + + COLAMD_DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", + method, colamd_debug)) ; +} + +#endif /* NDEBUG */ + +#endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 670cca9c4..cbd2e5d34 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -27,6 +27,7 @@ #define EIGEN_ORDERING_H #include "Amd.h" +#include "Eigen_Colamd.h" namespace Eigen { namespace internal { @@ -112,54 +113,50 @@ class NaturalOrdering * Get the column approximate minimum degree ordering * The matrix should be in column-major format */ -// template -// class COLAMDOrdering: public OrderingBase< ColamdOrdering > -// { -// public: -// typedef OrderingBase< ColamdOrdering > Base; -// typedef SparseMatrix MatrixType; -// -// public: -// COLAMDOrdering():Base() {} -// -// COLAMDOrdering(const MatrixType& matrix):Base() -// { -// compute(matrix); -// } -// COLAMDOrdering(const MatrixType& mat, PermutationType& perm_c):Base() -// { -// compute(matrix); -// perm_c = this.get_perm(); -// } -// void compute(const MatrixType& mat) -// { -// // Test if the matrix is column major... -// -// int m = mat.rows(); -// int n = mat.cols(); -// int nnz = mat.nonZeros(); -// // Get the recommended value of Alen to be used by colamd -// int Alen = colamd_recommended(nnz, m, n); -// // Set the default parameters -// double knobs[COLAMD_KNOBS]; -// colamd_set_defaults(knobs); -// -// int info; -// VectorXi p(n), A(nnz); -// for(int i=0; i < n; i++) p(i) = mat.outerIndexPtr()(i); -// for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()(i); -// // Call Colamd routine to compute the ordering -// info = colamd(m, n, Alen, A,p , knobs, stats) -// eigen_assert( (info != FALSE)&& "COLAMD failed " ); -// -// m_P.resize(n); -// for (int i = 0; i < n; i++) m_P(p(i)) = i; -// m_isInitialized = true; -// } -// protected: -// using Base::m_isInitialized; -// using Base m_P; -// }; +template +class COLAMDOrdering; +#include "Eigen_Colamd.h" + +template +class COLAMDOrdering +{ + public: + typedef PermutationMatrix PermutationType; + typedef Matrix IndexVector; + /** Compute the permutation vector form a sparse matrix */ + + + + template + void operator() (const MatrixType& mat, PermutationType& perm) + { + int m = mat.rows(); + int n = mat.cols(); + int nnz = mat.nonZeros(); + // Get the recommended value of Alen to be used by colamd + int Alen = eigen_colamd_recommended(nnz, m, n); + // Set the default parameters + double knobs [EIGEN_COLAMD_KNOBS]; + int stats [EIGEN_COLAMD_STATS]; + eigen_colamd_set_defaults(knobs); + + int info; + IndexVector p(n+1), A(Alen); + for(int i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; + for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; + // Call Colamd routine to compute the ordering + info = eigen_colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + eigen_assert( info && "COLAMD failed " ); + + perm.resize(n); + for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; + + } + + private: + + +}; } // end namespace Eigen #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 3d8c8532f..bb1decc4c 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -99,8 +99,29 @@ class SparseLU { m_diagpivotthresh = thresh; } - - + + /** Return the number of nonzero elements in the L factor */ + int nnzL() + { + if (m_factorizationIsOk) + return m_nnzL; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } + /** Return the number of nonzero elements in the U factor */ + int nnzU() + { + if (m_factorizationIsOk) + return m_nnzU; + else + { + std::cerr<<"Numerical factorization should be done before\n"; + return 0; + } + } /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A. * * \sa compute() @@ -325,7 +346,8 @@ void SparseLU::analyzePattern(const MatrixType& mat) ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; - + + // Apply the permutation to the column of the input matrix m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index 9c2e6e17e..e3fae4a36 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -628,7 +628,7 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); //DEBUG - m_sluOptions.ColPerm = NATURAL; +// m_sluOptions.ColPerm = COLAMD; m_sluOptions.Equil = NO; int info = 0; RealScalar recip_pivot_growth, rcond; diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 841011f30..6fbf03454 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -6,6 +6,7 @@ #include #include #include +#include using namespace std; using namespace Eigen; @@ -17,10 +18,12 @@ int main(int argc, char **args) typedef Matrix DenseMatrix; typedef Matrix DenseRhs; VectorXd b, x, tmp; - SparseLU, AMDOrdering > solver; +// SparseLU, AMDOrdering > solver; + SparseLU, COLAMDOrdering > solver; ifstream matrix_file; string line; int n; + BenchTimer timer; // Set parameters /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ @@ -53,13 +56,26 @@ int main(int argc, char **args) /* Compute the factorization */ // solver.isSymmetric(true); - solver.compute(A); - + timer.start(); +// solver.compute(A); + solver.analyzePattern(A); + timer.stop(); + cout << "Time to analyze " << timer.value() << std::endl; + timer.reset(); + timer.start(); + solver.factorize(A); + timer.stop(); + cout << "Factorize Time " << timer.value() << std::endl; + timer.reset(); + timer.start(); solver._solve(b, x); + timer.stop(); + cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ VectorXd tmp2 = b - A*x; double tempNorm = tmp2.norm()/b.norm(); cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; + cout << "Number of nonzeros in the factor : " << solver.nnzL() + solver.nnzU() << std::endl; return 0; } \ No newline at end of file From 59642da88bf83709e918667680e4ed63af4c31e5 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 19 Jul 2012 18:03:44 +0200 Subject: [PATCH 23/73] Add exception handler to memory allocation --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 8 +- Eigen/src/OrderingMethods/Ordering.h | 6 - Eigen/src/SparseLU/SparseLU.h | 3 - Eigen/src/SparseLU/SparseLU_Coletree.h | 2 - Eigen/src/SparseLU/SparseLU_Matrix.h | 1 - Eigen/src/SparseLU/SparseLU_Memory.h | 157 +++++++++++++--------- Eigen/src/SparseLU/SparseLU_column_bmod.h | 1 - Eigen/src/SparseLU/SparseLU_panel_dfs.h | 1 - Eigen/src/SparseLU/SparseLU_snode_bmod.h | 1 - bench/spbench/CMakeLists.txt | 2 +- bench/spbench/test_sparseLU.cpp | 21 +-- 11 files changed, 104 insertions(+), 99 deletions(-) diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 39701d0af..0af137d54 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -155,7 +155,6 @@ #endif /* MATLAB_MEX_FILE */ // == Row and Column structures == - typedef struct EIGEN_Colamd_Col_struct { int start ; /* index for A of first row in this column, or EIGEN_DEAD */ @@ -248,11 +247,9 @@ void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); -void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], - double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); +void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); -int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], - int n_col2, int max_deg, int pfree); +int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); @@ -2514,5 +2511,4 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E } #endif /* NDEBUG */ - #endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index cbd2e5d34..47cd6f169 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -124,9 +124,6 @@ class COLAMDOrdering typedef PermutationMatrix PermutationType; typedef Matrix IndexVector; /** Compute the permutation vector form a sparse matrix */ - - - template void operator() (const MatrixType& mat, PermutationType& perm) { @@ -152,9 +149,6 @@ class COLAMDOrdering for (int i = 0; i < n; i++) perm.indices()(p(i)) = i; } - - private: - }; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index bb1decc4c..25fad0f29 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -339,9 +339,6 @@ void SparseLU::analyzePattern(const MatrixType& mat) //TODO It is possible as in SuperLU to compute row and columns scaling vectors to equilibrate the matrix mat. - // Compute the fill-reducing ordering - // TODO Currently, the only available ordering method is AMD. - OrderingType ord; ord(mat,m_perm_c); //FIXME Check the right semantic behind m_perm_c diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 1329d383f..142f4995e 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -94,7 +94,6 @@ int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) int rset, cset, rroot; for (col = 0; col < nc; col++) { -// cset = pp(col) = col; // Initially, each element is in its own set //FIXME pp(col) = col; cset = col; root(cset) = col; @@ -108,7 +107,6 @@ int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) if (rroot != col) { parent(rroot) = col; -// cset = pp(cset) = rset; // Get the union of cset and rset //FIXME pp(cset) = rset; cset = rset; root(cset) = col; diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 90a0f2740..9f2dcaa56 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -192,7 +192,6 @@ class SuperNodalMatrix protected: Index m_row; // Number of rows Index m_col; // Number of columns -// Index m_nnz; // Number of nonzero values Index m_nsuper; // Number of supernodes Scalar* m_nzval; //array of nonzero values packed by column Index* m_nzval_colptr; //nzval_colptr[j] Stores the location in nzval[] which starts column j diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index a17079199..7a2ab93df 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -78,41 +78,82 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex VectorType old_vec; // Temporary vector to hold the previous values if (nbElts > 0 ) - old_vec = vec.segment(0,nbElts); // old_vec should be of size nbElts... to be checked + old_vec = vec.segment(0,nbElts); + + //Allocate or expand the current vector + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + if ( !num_expansions ) + { + // First time to allocate from LUMemInit() + throw; // Pass the exception to LUMemInit() which has a try... catch block + } + if (keep_prev) + { + // In this case, the memory length should not not be reduced + return new_len; + } + else + { + // Reduce the size and increase again + int tries = 0; // Number of attempts + do + { + alpha = LU_Reduce(alpha); + new_len = alpha * length ; + try + { + vec.resize(new_len); + } + catch(std::bad_alloc& ) + { + tries += 1; + if ( tries > 10) return new_len; + } + } while (!vec.size()); + } + } + //Copy the previous values to the newly allocated space + if (nbElts > 0) + vec.segment(0, nbElts) = old_vec; + + + length = new_len; + if(num_expansions) ++num_expansions; + return 0; - //expand the current vector //FIXME Should be in a try ... catch region - vec.resize(new_len); /* * Test if the memory has been well allocated * otherwise reduce the size and try to reallocate * copy data from previous vector (if exists) to the newly allocated vector */ - if ( num_expansions != 0 ) // The memory has been expanded before - { - int tries = 0; - if (keep_prev) - { - if (!vec.size()) return new_len ; - } - else - { - while (!vec.size()) - { - // Reduce the size and allocate again - if ( ++tries > 10) return new_len; - alpha = LU_Reduce(alpha); - new_len = alpha * length ; - vec.resize(new_len); //FIXME Should be in a try catch section - } - } // end allocation - - //Copy the previous values to the newly allocated space - if (nbElts > 0) - vec.segment(0, nbElts) = old_vec; - } // end expansion - length = new_len; - if(num_expansions) ++num_expansions; - return 0; +// if ( num_expansions != 0 ) // The memory has been expanded before +// { +// int tries = 0; +// if (keep_prev) +// { +// if (!vec.size()) return new_len ; +// } +// else +// { +// while (!vec.size()) +// { +// // Reduce the size and allocate again +// if ( ++tries > 10) return new_len; +// alpha = LU_Reduce(alpha); +// new_len = alpha * length ; +// vec.resize(new_len); //FIXME Should be in a try catch section +// } +// } // end allocation +// +// //Copy the previous values to the newly allocated space +// if (nbElts > 0) +// vec.segment(0, nbElts) = old_vec; +// } // end expansion } /** @@ -122,8 +163,8 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex * \param annz number of initial nonzeros in the matrix * \param lwork if lwork=-1, this routine returns an estimated size of the required memory * \param glu persistent data to facilitate multiple factors : will be deleted later ?? - * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed - * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the row permutation + * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success + * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation */ template int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t& glu) @@ -159,27 +200,26 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, glu.xusub.resize(n+1); // Reserve memory for L/U factors - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol,nzumax, 0, 0, num_expansions); - expand(glu.lsub,nzlmax, 0, 0, num_expansions); - expand(glu.usub,nzumax, 0, 1, num_expansions); - - // Check if the memory is correctly allocated, - // FIXME Should be a try... catch section here - while ( !glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()) + do { - //Reduce the estimated size and retry - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; + try + { + expand(glu.lusup, nzlumax, 0, 0, num_expansions); + expand(glu.ucol,nzumax, 0, 0, num_expansions); + expand(glu.lsub,nzlmax, 0, 0, num_expansions); + expand(glu.usub,nzumax, 0, 1, num_expansions); + } + catch(std::bad_alloc& ) + { + //Reduce the estimated size and retry + nzlumax /= 2; + nzumax /= 2; + nzlmax /= 2; + if (nzlumax < annz ) return nzlumax; + } - if (nzlumax < annz ) return nzlumax; - - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol, nzumax, 0, 0, num_expansions); - expand(glu.lsub, nzlmax, 0, 0, num_expansions); - expand(glu.usub, nzumax, 0, 1, num_expansions); - } + } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); + ++num_expansions; @@ -207,23 +247,6 @@ int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int if (failed_size) return failed_size; - // The following code is not really needed since maxlen is passed by reference - // and correspond to the appropriate field in glu -// switch ( mem_type ) { -// case LUSUP: -// glu.nzlumax = maxlen; -// break; -// case UCOL: -// glu.nzumax = maxlen; -// break; -// case LSUB: -// glu.nzlmax = maxlen; -// break; -// case USUB: -// glu.nzumax = maxlen; -// break; -// } - return 0 ; } diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 3042eb5f8..00787721b 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -133,7 +133,6 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Dense triangular solve -- start effective triangle luptr += nsupr * no_zeros + no_zeros; // Form Eigen matrix and vector -// std::cout<< "jcol " << jcol << " rows " << segsize << std::endl; Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); VectorBlock u(tempv, 0, segsize); diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 908ee67ac..70ea0f51f 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -92,7 +92,6 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index int xdfs, maxdfs, kpar; // Initialize pointers -// IndexVector& marker1 = marker.block(m, m); VectorBlock marker1(marker, m, m); nseg = 0; IndexVector& xsup = glu.xsup; diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 44438d037..fc8042f52 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -80,7 +80,6 @@ int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_Glob // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); -// Map > l(&(lusup.data()[ufirst+nsupc], nrow); VectorBlock l(lusup, ufirst+nsupc, nrow); l = l - A * u; } diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 4b3c6f8e3..a093cc5d9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -67,4 +67,4 @@ add_executable(spsolver sp_solver.cpp) target_link_libraries (spsolver ${SPARSE_LIBS}) add_executable(test_sparseLU test_sparseLU.cpp) -target_link_libraries (test_sparseLU ${SPARSE_LIBS}) \ No newline at end of file +target_link_libraries (test_sparseLU ${SPARSE_LIBS}) diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 6fbf03454..31273add5 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -13,13 +13,14 @@ using namespace Eigen; int main(int argc, char **args) { - SparseMatrix A; - typedef SparseMatrix::Index Index; - typedef Matrix DenseMatrix; - typedef Matrix DenseRhs; - VectorXd b, x, tmp; -// SparseLU, AMDOrdering > solver; - SparseLU, COLAMDOrdering > solver; + typedef complex scalar; + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + Matrix b, x, tmp; +// SparseLU, AMDOrdering > solver; + SparseLU, COLAMDOrdering > solver; ifstream matrix_file; string line; int n; @@ -36,7 +37,7 @@ int main(int argc, char **args) if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} if (sym != 0) { // symmetric matrices, only the lower part is stored - SparseMatrix temp; + SparseMatrix temp; temp = A; A = temp.selfadjointView(); } @@ -72,8 +73,8 @@ int main(int argc, char **args) timer.stop(); cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ - VectorXd tmp2 = b - A*x; - double tempNorm = tmp2.norm()/b.norm(); + Matrix tmp2 = b - A*x; + scalar tempNorm = tmp2.norm()/b.norm(); cout << "Relative norm of the computed solution : " << tempNorm <<"\n"; cout << "Number of nonzeros in the factor : " << solver.nnzL() + solver.nnzU() << std::endl; From 925ace196c182759026d3eb3edc06565ab5f01ee Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 19 Jul 2012 18:15:23 +0200 Subject: [PATCH 24/73] correct bug in the complex version --- Eigen/src/SparseLU/SparseLU_pivotL.h | 11 ++++++----- bench/spbench/test_sparseLU.cpp | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 39151f1e0..0c767c23a 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -71,7 +71,8 @@ template int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) { typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::RealScalar RealScalar; // Initialize pointers IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub @@ -88,10 +89,10 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott // Determine the largest abs numerical value for partial pivoting Index diagind = iperm_c(jcol); // diagonal index - Scalar pivmax = 0.0; + RealScalar pivmax = 0.0; Index pivptr = nsupc; Index diag = IND_EMPTY; - Scalar rtemp; + RealScalar rtemp; Index isub, icol, itemp, k; for (isub = nsupc; isub < nsupr; ++isub) { rtemp = std::abs(lu_col_ptr[isub]); @@ -109,7 +110,7 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott return (jcol+1); } - Scalar thresh = diagpivotthresh * pivmax; + RealScalar thresh = diagpivotthresh * pivmax; // Choose appropriate pivotal element @@ -119,7 +120,7 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott { // Diagonal element exists rtemp = std::abs(lu_col_ptr[diag]); - if (rtemp != Scalar(0.0) && rtemp >= thresh) pivptr = diag; + if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag; } pivrow = lsub_ptr[pivptr]; } diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 31273add5..08b6c926e 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -14,6 +14,7 @@ using namespace Eigen; int main(int argc, char **args) { typedef complex scalar; +// typedef double scalar; SparseMatrix A; typedef SparseMatrix::Index Index; typedef Matrix DenseMatrix; @@ -34,7 +35,7 @@ int main(int argc, char **args) bool iscomplex=false, isvector=false; int sym; getMarketHeader(args[1], sym, iscomplex, isvector); - if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } +// if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} if (sym != 0) { // symmetric matrices, only the lower part is stored SparseMatrix temp; From c0fa5811ec233a5a3065cce78b1bca155a9b4fc8 Mon Sep 17 00:00:00 2001 From: "Desire NUENTSA W." Date: Fri, 27 Jul 2012 11:36:58 +0200 Subject: [PATCH 25/73] Refactoring codes for numeric updates --- Eigen/src/SparseLU/SparseLU.h | 1 + Eigen/src/SparseLU/SparseLU_column_bmod.h | 43 +------- Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 92 ++++++++++++++++ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 51 +-------- bench/spbench/sp_solver.cpp | 124 ++++++++++++++++++++++ bench/spbench/test_sparseLU.cpp | 4 +- 6 files changed, 224 insertions(+), 91 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLU_kernel_bmod.h create mode 100644 bench/spbench/sp_solver.cpp diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 25fad0f29..474dfdedc 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -388,6 +388,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) #include "SparseLU_snode_bmod.h" #include "SparseLU_pivotL.h" #include "SparseLU_panel_dfs.h" +#include "SparseLU_kernel_bmod.h" #include "SparseLU_panel_bmod.h" #include "SparseLU_column_dfs.h" #include "SparseLU_column_bmod.h" diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 00787721b..1457b6f35 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -66,7 +66,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; int jsupno, k, ksub, krep, ksupno; - int lptr, nrow, isub, i, irow, nextlu, new_next, ufirst; + int lptr, nrow, isub, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; /* krep = representative of current k-th supernode * fsupc = first supernodal column @@ -122,46 +122,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; - // First, copy U[*,j] segment from dense(*) to tempv(*) - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - tempv(i) = dense(irow); - ++isub; - } - // Dense triangular solve -- start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // Form Eigen matrix and vector - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - - u = A.template triangularView().solve(u); - - // Dense matrix-vector product y <-- A*x - luptr += segsize; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv[] into SPA dense[] as a temporary storage - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - dense(irow) = tempv(i); - tempv(i) = Scalar(0.0); - ++isub; - } - - // Scatter l into SPA dense[] - for (i = 0; i < nrow; i++) - { - irow = lsub(isub); - dense(irow) -= l(i); - l(i) = Scalar(0.0); - ++isub; - } + LU_kernel_bmod(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // end if jsupno } // end for each segment diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h new file mode 100644 index 000000000..d5df70fd2 --- /dev/null +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -0,0 +1,92 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . + +#ifndef SPARSELU_KERNEL_BMOD_H +#define SPARSELU_KERNEL_BMOD_H + +/** + * \brief Performs numeric block updates from a given supernode to a single column + * + * \param segsize Size of the segment (and blocks ) to use for updates + * \param [in,out]dense Packed values of the original matrix + * \param tempv temporary vector to use for updates + * \param lusup array containing the supernodes + * \param nsupr Number of rows in the supernode + * \param nrow Number of rows in the rectangular part of the supernode + * \param lsub compressed row subscripts of supernodes + * \param lptr pointer to the first column of the current supernode in lsub + * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode + * \return 0 on success + */ +template +int LU_kernel_bmod(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) +{ + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + VectorBlock u(tempv, 0, segsize); + + u = A.template triangularView().solve(u); + + // Dense matrix-vector product y <-- A*x + luptr += segsize; + new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + VectorBlock l(tempv, segsize, nrow); + l= A * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < segsize; i++) + { + irow = lsub(isub); + dense(irow) = tempv(i); + tempv(i) = Scalar(0.0); + ++isub; + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub); + dense(irow) -= l(i); + l(i) = Scalar(0.0); + ++isub; + } + + return 0; +} +#endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 59ec69ec8..ebff787ee 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -73,12 +73,12 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca IndexVector& xlusup = glu.xlusup; ScalarVector& lusup = glu.lusup; - int i,ksub,jj,nextl_col,irow; + int ksub,jj,nextl_col; int fsupc, nsupc, nsupr, nrow; int krep, kfnz; int lptr; // points to the row subscripts of a supernode int luptr; // ... - int segsize,no_zeros,isub ; + int segsize,no_zeros ; // For each nonz supernode segment of U[*,j] in topological order int k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) @@ -118,52 +118,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // Perform a trianglar solve and block update, // then scatter the result of sup-col update to dense[] no_zeros = kfnz - fsupc; - // First Copy U[*,j] segment from dense[*] to tempv[*] : - // The result of triangular solve is in tempv[*]; - // The result of matric-vector update is in dense_col[*] - isub = lptr + no_zeros; - for (i = 0; i < segsize; ++i) - { - irow = lsub(isub); - tempv(i) = dense_col(irow); // Gather to a compact vector - ++isub; - } - // Start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // triangular solve with Eigen - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - u = A.template triangularView().solve(u); - - luptr += segsize; - // Dense Matrix vector product y <-- A*x; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv(*) into SPA dense(*) such that tempv(*) - // can be used for the triangular solve of the next - // column of the panel. The y will be copied into ucol(*) - // after the whole panel has been finished... after column_dfs() and column_bmod() - - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) - { - irow = lsub(isub); - dense_col(irow) = tempv(i); - tempv(i) = Scalar(0.0); - isub++; - } - - // Scatter the update from &tempv[segsize] into SPA dense(*) - // Start dense rectangular L - for (i = 0; i < nrow; i++) - { - irow = lsub(isub); - dense_col(irow) -= l(i); - l(i) = Scalar(0); - ++isub; - } + LU_kernel_bmod(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // End for each column in the panel } // End for each updating supernode diff --git a/bench/spbench/sp_solver.cpp b/bench/spbench/sp_solver.cpp new file mode 100644 index 000000000..e18f2d1c3 --- /dev/null +++ b/bench/spbench/sp_solver.cpp @@ -0,0 +1,124 @@ +// Small bench routine for Eigen available in Eigen +// (C) Desire NUENTSA WAKAM, INRIA + +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +// #include +#include + +using namespace std; +using namespace Eigen; + +int main(int argc, char **args) +{ + SparseMatrix A; + typedef SparseMatrix::Index Index; + typedef Matrix DenseMatrix; + typedef Matrix DenseRhs; + VectorXd b, x, tmp; + BenchTimer timer,totaltime; + //SparseLU > solver; + SuperLU > solver; + ifstream matrix_file; + string line; + int n; + // Set parameters +// solver.iparm(IPARM_THREAD_NBR) = 4; + /* Fill the matrix with sparse matrix stored in Matrix-Market coordinate column-oriented format */ + if (argc < 2) assert(false && "please, give the matrix market file "); + + timer.start(); + totaltime.start(); + loadMarket(A, args[1]); + cout << "End charging matrix " << endl; + bool iscomplex=false, isvector=false; + int sym; + getMarketHeader(args[1], sym, iscomplex, isvector); + if (iscomplex) { cout<< " Not for complex matrices \n"; return -1; } + if (isvector) { cout << "The provided file is not a matrix file\n"; return -1;} + if (sym != 0) { // symmetric matrices, only the lower part is stored + SparseMatrix temp; + temp = A; + A = temp.selfadjointView(); + } + timer.stop(); + + n = A.cols(); + // ====== TESTS FOR SPARSE TUTORIAL ====== +// cout<< "OuterSize " << A.outerSize() << " inner " << A.innerSize() << endl; +// SparseMatrix mat1(A); +// SparseMatrix mat2; +// cout << " norm of A " << mat1.norm() << endl; ; +// PermutationMatrix perm(n); +// perm.resize(n,1); +// perm.indices().setLinSpaced(n, 0, n-1); +// mat2 = perm * mat1; +// mat.subrows(); +// mat2.resize(n,n); +// mat2.reserve(10); +// mat2.setConstant(); +// std::cout<< "NORM " << mat1.squaredNorm()<< endl; + + cout<< "Time to load the matrix " << timer.value() < 2) + loadMarketVector(b, args[2]); + else + { + b.resize(n); + tmp.resize(n); +// tmp.setRandom(); + for (int i = 0; i < n; i++) tmp(i) = i; + b = A * tmp ; + } +// Scaling > scal; +// scal.computeRef(A); +// b = scal.LeftScaling().cwiseProduct(b); + + /* Compute the factorization */ + cout<< "Starting the factorization "<< endl; + timer.reset(); + timer.start(); + cout<< "Size of Input Matrix "<< b.size()<<"\n\n"; + cout<< "Rows and columns "<< A.rows() <<" " < scalar; -// typedef double scalar; +// typedef complex scalar; + typedef double scalar; SparseMatrix A; typedef SparseMatrix::Index Index; typedef Matrix DenseMatrix; From ce30d50e3ed9723ed3ecd38e7c99661730c12813 Mon Sep 17 00:00:00 2001 From: "Desire NUENTSA W." Date: Fri, 27 Jul 2012 16:38:20 +0200 Subject: [PATCH 26/73] Improve the permutation --- Eigen/src/SparseCore/SparseMatrix.h | 12 ++++++++++++ Eigen/src/SparseLU/SparseLU.h | 26 ++++++++++++++++++++----- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 13 ++++++------- 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 214f130f5..52a9dab70 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -477,6 +477,18 @@ class SparseMatrix m_data.squeeze(); } + /** Turns the matrix into the uncompressed mode */ + void Uncompress() + { + if(m_innerNonZeros != 0) + return; + m_innerNonZeros = new Index[m_outerSize]; + for (int i = 0; i < m_outerSize; i++) + { + m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; + } + } + /** Suppresses all nonzeros which are \b much \b smaller \b than \a reference under the tolerence \a epsilon */ void prune(Scalar reference, RealScalar epsilon = NumTraits::dummy_precision()) { diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 474dfdedc..70898958b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -346,8 +346,17 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Apply the permutation to the column of the input matrix - m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix - +// m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix + + //First copy the whole input matrix. + m_mat = mat; + m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + //Then, permute only the column pointers + for (int i = 0; i < mat.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = mat.outerIndexPtr()[i+1] - mat.outerIndexPtr()[i]; + } // Compute the column elimination tree of the permuted matrix if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); @@ -424,8 +433,15 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply the column permutation computed in analyzepattern() - m_mat = matrix * m_perm_c.inverse(); - m_mat.makeCompressed(); + // m_mat = matrix * m_perm_c.inverse(); + m_mat = matrix; + m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + //Then, permute only the column pointers + for (int i = 0; i < matrix.cols(); i++) + { + m_mat.outerIndexPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i]; + m_mat.innerNonZeroPtr()[m_perm_c.indices()(i)] = matrix.outerIndexPtr()[i+1] - matrix.outerIndexPtr()[i]; + } int m = m_mat.rows(); int n = m_mat.cols(); @@ -504,7 +520,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Factorize the relaxed supernode(jcol:kcol) // First, determine the union of the row structure of the snode - info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker, m_glu); + info = LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); if ( info ) { std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index 6b2817262..150d9d0ef 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -57,8 +57,8 @@ * \param marker (in/out) working vector * \return 0 on success, > 0 size of the memory when memory allocation failed */ - template - int LU_snode_dfs(const int jcol, const int kcol, const typename IndexVector::Scalar* asub, const typename IndexVector::Scalar* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) + template + int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { typedef typename IndexVector::Scalar Index; IndexVector& xsup = glu.xsup; @@ -69,14 +69,13 @@ int mem; Index nsuper = ++supno(jcol); // Next available supernode number int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub - int i,k; int krow,kmark; - for (i = jcol; i <=kcol; i++) + for (int i = jcol; i <=kcol; i++) { // For each nonzero in A(*,i) - for (k = colptr[i]; k < colptr[i+1]; k++) + for (typename MatrixType::InnerIterator it(mat, i); it; ++it) { - krow = asub[k]; + krow = it.row(); kmark = marker(krow); if ( kmark != kcol ) { @@ -105,7 +104,7 @@ Index ifrom, ito = nextl; for (ifrom = xlsub(jcol); ifrom < nextl;) lsub(ito++) = lsub(ifrom++); - for (i = jcol+1; i <=kcol; i++) xlsub(i) = nextl; + for (int i = jcol+1; i <=kcol; i++) xlsub(i) = nextl; nextl = ito; } xsup(nsuper+1) = kcol + 1; // Start of next available supernode From 8f6d5eacb4af8fc31301625652a5017e6c2e50eb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 29 Jul 2012 22:26:00 +0200 Subject: [PATCH 27/73] optimize LU_kernel_bmod for small cases, and add an important .noalias() --- Eigen/src/SparseLU/SparseLU_column_bmod.h | 5 +- Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 123 ++++++++++++++-------- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 5 +- 3 files changed, 85 insertions(+), 48 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 1457b6f35..5c26bd70e 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -122,7 +122,10 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; - LU_kernel_bmod(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + if(segsize==1) + LU_kernel_bmod<1>::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else + LU_kernel_bmod::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // end if jsupno } // end for each segment diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index d5df70fd2..0d4b20f59 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -39,54 +39,85 @@ * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode * \return 0 on success */ -template -int LU_kernel_bmod(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) +template struct LU_kernel_bmod { - typedef typename ScalarVector::Scalar Scalar; - // First, copy U[*,j] segment from dense(*) to tempv(*) - // The result of triangular solve is in tempv[*]; - // The result of matric-vector update is in dense[*] - int isub = lptr + no_zeros; - int i, irow; - for (i = 0; i < segsize; i++) + template + EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) { - irow = lsub(isub); - tempv(i) = dense(irow); - ++isub; + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map > u(tempv.data(), segsize); + + u = A.template triangularView().solve(u); + + // Dense matrix-vector product y <-- B*x + luptr += segsize; + Map, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + Map > l(tempv.data()+segsize, nrow); + if(SegSizeAtCompileTime==2) + l = u(0) * B.col(0) + u(1) * B.col(1); + else if(SegSizeAtCompileTime==3) + l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2); + else + l.noalias() = B * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub++); + dense(irow) = tempv(i); + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub++); + dense(irow) -= l(i); + } } - // Dense triangular solve -- start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // Form Eigen matrix and vector - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - - u = A.template triangularView().solve(u); - - // Dense matrix-vector product y <-- A*x - luptr += segsize; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv[] into SPA dense[] as a temporary storage - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) +}; + +template <> struct LU_kernel_bmod<1> +{ + template + EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) { - irow = lsub(isub); - dense(irow) = tempv(i); - tempv(i) = Scalar(0.0); - ++isub; + typedef typename ScalarVector::Scalar Scalar; + Scalar f = dense(lsub(lptr + no_zeros)); + luptr += nsupr * no_zeros + no_zeros + 1; + const Scalar* a(lusup.data() + luptr); + const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1); + int i = 0; + for (; i+1 < nrow; i+=2) + { + int i0 = *(irow++); + int i1 = *(irow++); + Scalar a0 = *(a++); + Scalar a1 = *(a++); + Scalar d0 = dense.coeff(i0); + Scalar d1 = dense.coeff(i1); + d0 -= f*a0; + d1 -= f*a1; + dense.coeffRef(i0) = d0; + dense.coeffRef(i1) = d1; + } + if(i::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else LU_kernel_bmod::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // End for each column in the panel } // End for each updating supernode From e88817cc513278c82f7b8ca2b4752e43876ed29d Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 30 Jul 2012 19:28:31 +0200 Subject: [PATCH 28/73] add another missing .noalias() --- Eigen/src/SparseLU/SparseLU_column_bmod.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 5c26bd70e..6bc6d80ea 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -180,7 +180,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); - l = l - A * u; + l.noalias() -= A * u; } // End if fst_col return 0; From 390d6599baf0c3da1cbf924d8852ee8ade16ec67 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 1 Aug 2012 11:35:23 +0200 Subject: [PATCH 29/73] Add missing .noalias() --- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index fc8042f52..d087d1073 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -24,7 +24,7 @@ /* - * NOTE: This file is the modified version of dsnode_bmod.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, @@ -81,7 +81,7 @@ int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_Glob // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); VectorBlock l(lusup, ufirst+nsupc, nrow); - l = l - A * u; + l.noalias() -= A * u; } return 0; } From 02935b424911e7cde21180d0611f4994c1b41207 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 1 Aug 2012 11:38:32 +0200 Subject: [PATCH 30/73] switch to MPL license --- Eigen/src/SparseLU/SparseLU.h | 21 ++------ Eigen/src/SparseLU/SparseLU_Coletree.h | 22 ++------ Eigen/src/SparseLU/SparseLU_Matrix.h | 21 ++------ Eigen/src/SparseLU/SparseLU_Memory.h | 50 ++----------------- Eigen/src/SparseLU/SparseLU_Structs.h | 21 ++------ Eigen/src/SparseLU/SparseLU_Utils.h | 22 ++------ Eigen/src/SparseLU/SparseLU_column_bmod.h | 21 ++------ Eigen/src/SparseLU/SparseLU_column_dfs.h | 23 ++------- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 24 ++------- .../src/SparseLU/SparseLU_heap_relax_snode.h | 21 ++------ Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 21 ++------ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 23 ++------- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 23 ++------- Eigen/src/SparseLU/SparseLU_pivotL.h | 21 ++------ Eigen/src/SparseLU/SparseLU_pruneL.h | 23 ++------- Eigen/src/SparseLU/SparseLU_relax_snode.h | 21 ++------ Eigen/src/SparseLU/SparseLU_snode_bmod.h | 21 ++------ Eigen/src/SparseLU/SparseLU_snode_dfs.h | 23 ++------- 18 files changed, 62 insertions(+), 360 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 70898958b..0e4bd7c69 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SPARSE_LU diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 142f4995e..964f5e433 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -3,24 +3,10 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + /* diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 9f2dcaa56..9381189c8 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -4,24 +4,9 @@ // Copyright (C) 2012 Désiré Nuentsa-Wakam // Copyright (C) 2012 Gael Guennebaud // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_SPARSELU_MATRIX_H #define EIGEN_SPARSELU_MATRIX_H diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 7a2ab93df..531c2dba6 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* @@ -125,35 +110,6 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex length = new_len; if(num_expansions) ++num_expansions; return 0; - - /* - * Test if the memory has been well allocated - * otherwise reduce the size and try to reallocate - * copy data from previous vector (if exists) to the newly allocated vector - */ -// if ( num_expansions != 0 ) // The memory has been expanded before -// { -// int tries = 0; -// if (keep_prev) -// { -// if (!vec.size()) return new_len ; -// } -// else -// { -// while (!vec.size()) -// { -// // Reduce the size and allocate again -// if ( ++tries > 10) return new_len; -// alpha = LU_Reduce(alpha); -// new_len = alpha * length ; -// vec.resize(new_len); //FIXME Should be in a try catch section -// } -// } // end allocation -// -// //Copy the previous values to the newly allocated space -// if (nbElts > 0) -// vec.segment(0, nbElts) = old_vec; -// } // end expansion } /** diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index e05eabe2a..4b4dfdc77 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* * NOTE: This file comes from a partly modified version of files slu_[s,d,c,z]defs.h diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 0352c7872..9719820fd 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -3,24 +3,10 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + #ifndef EIGEN_SPARSELU_UTILS_H #define EIGEN_SPARSELU_UTILS_H diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 6bc6d80ea..2f0c0e7a0 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 70cfe40ea..36c97f947 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -3,28 +3,13 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of xcolumn_dfs.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]column_dfs.c file in SuperLU * -- SuperLU routine (version 2.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 9e1708da1..68d8563fa 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -3,28 +3,12 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . - +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of xcopy_to_ucol.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]copy_to_ucol.c file in SuperLU * -- SuperLU routine (version 2.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 791538729..1bda70aaf 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* This file is a modified version of heap_relax_snode.c file in SuperLU * -- SuperLU routine (version 3.0) -- diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index 0d4b20f59..5a7774480 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef SPARSELU_KERNEL_BMOD_H #define SPARSELU_KERNEL_BMOD_H diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index f01db3e37..b0d234c15 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -3,28 +3,13 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of xpanel_bmod.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]panel_bmod.c file in SuperLU * -- SuperLU routine (version 3.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 70ea0f51f..79dd4da40 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -3,28 +3,13 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of xpanel_dfs.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]panel_dfs.c file in SuperLU * -- SuperLU routine (version 2.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 0c767c23a..6e2ce87a1 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index 91c795fac..6f935896e 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -3,28 +3,13 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of xpruneL.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]pruneL.c file in SuperLU * -- SuperLU routine (version 2.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h index 5123e94bf..a9a0a00c1 100644 --- a/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* This file is a modified version of heap_relax_snode.c file in SuperLU * -- SuperLU routine (version 3.0) -- diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index d087d1073..6b82b0727 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -3,24 +3,9 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index 150d9d0ef..c202c8f48 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -3,28 +3,13 @@ // // Copyright (C) 2012 Désiré Nuentsa-Wakam // -// Eigen is free software; you can redistribute it and/or -// modify it under the terms of the GNU Lesser General Public -// License as published by the Free Software Foundation; either -// version 3 of the License, or (at your option) any later version. -// -// Alternatively, you can redistribute it and/or -// modify it under the terms of the GNU General Public License as -// published by the Free Software Foundation; either version 2 of -// the License, or (at your option) any later version. -// -// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public -// License and a copy of the GNU General Public License along with -// Eigen. If not, see . +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /* - * NOTE: This file is the modified version of dsnode_dfs.c file in SuperLU + * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU * -- SuperLU routine (version 2.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, From 3a0f5a2a7f5c9f6c53f22379c6e4ac055887ab96 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 1 Aug 2012 11:40:56 +0200 Subject: [PATCH 31/73] Update copyrights sections --- Eigen/src/SparseLU/SparseLU_column_bmod.h | 1 + Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 1 + Eigen/src/SparseLU/SparseLU_panel_bmod.h | 1 + 3 files changed, 3 insertions(+) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 2f0c0e7a0..c83c8732f 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index 5a7774480..d5cad49b1 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index b0d234c15..8ad7eefff 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -2,6 +2,7 @@ // for linear algebra. // // Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed From 6e8aa96e0f0cace4517b8cff28f3a659ea734507 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 3 Aug 2012 13:05:27 +0200 Subject: [PATCH 32/73] correct bug when solving with multiple Rhs --- Eigen/src/SparseLU/SparseLU.h | 42 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 0e4bd7c69..0b1347f87 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -8,8 +8,8 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_SPARSE_LU -#define EIGEN_SPARSE_LU +#ifndef EIGEN_SPARSE_LU_H +#define EIGEN_SPARSE_LU_H namespace Eigen { @@ -111,14 +111,14 @@ class SparseLU * * \sa compute() */ -// template -// inline const solve_retval solve(const MatrixBase& B) const -// { -// eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); -// eigen_assert(rows()==B.rows() -// && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); -// return solve_retval(*this, B.derived()); -// } + template + inline const internal::solve_retval solve(const MatrixBase& B) const + { + eigen_assert(m_factorizationIsOk && "SparseLU is not initialized."); + eigen_assert(rows()==B.rows() + && "SparseLU::solve(): invalid number of rows of the right hand side matrix B"); + return internal::solve_retval(*this, B.derived()); + } /** \brief Reports whether previous computation was successful. @@ -150,7 +150,8 @@ class SparseLU // Permute the right hand side to form X = Pr*B // on return, X is overwritten by the computed solution X.resize(n,nrhs); - X = m_perm_r * B; + for(int j = 0; j < nrhs; ++j) + X.col(j) = m_perm_r * B.col(j); // Forward solve PLy = Pb; Index fsupc; // First column of the current supernode @@ -172,12 +173,12 @@ class SparseLU nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; nsupc = m_Lstore.supToCol()[k+1] - fsupc; nrow = nsupr - nsupc; - luptr = m_Lstore.colIndexPtr()[fsupc]; if (nsupc == 1 ) { for (j = 0; j < nrhs; j++) { + luptr = m_Lstore.colIndexPtr()[fsupc]; for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) { irow = m_Lstore.rowIndex()[iptr]; @@ -189,10 +190,11 @@ class SparseLU else { // The supernode has more than one column + luptr = m_Lstore.colIndexPtr()[fsupc]; // Triangular solve Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); + Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(n) ); U = A.template triangularView().solve(U); // Matrix-vector product @@ -233,7 +235,7 @@ class SparseLU else { Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(X.rows()) ); + Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(n) ); U = A.template triangularView().solve(U); } @@ -251,12 +253,12 @@ class SparseLU } // End For U-solve // Permute back the solution - X = m_perm_c.inverse() * X; + for (j = 0; j < nrhs; ++j) + X.col(j) = m_perm_c.inverse() * X.col(j); return true; } - protected: // Functions void initperfvalues() @@ -344,7 +346,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) } // Compute the column elimination tree of the permuted matrix - if (m_etree.size() == 0) m_etree.resize(m_mat.cols()); + /*if (m_etree.size() == 0) */m_etree.resize(m_mat.cols()); LU_sp_coletree(m_mat, m_etree); @@ -654,7 +656,7 @@ void SparseLU::factorize(const MatrixType& matrix) } -/*namespace internal { +namespace internal { template struct solve_retval, Rhs> @@ -665,11 +667,11 @@ struct solve_retval, Rhs> template void evalTo(Dest& dst) const { - dec().derived()._solve(rhs(),dst); + dec()._solve(rhs(),dst); } }; -}*/ // end namespace internal +} // end namespace internal From 7dc39b703706b56a4a46255dabfeeddf50e76581 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 3 Aug 2012 13:05:45 +0200 Subject: [PATCH 33/73] Add unit tests --- bench/spbench/test_sparseLU.cpp | 2 +- test/CMakeLists.txt | 2 +- test/sparse_solver.h | 4 +-- test/sparselu.cpp | 43 +++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 test/sparselu.cpp diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index ecf254b3d..59f8252d0 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -70,7 +70,7 @@ int main(int argc, char **args) cout << "Factorize Time " << timer.value() << std::endl; timer.reset(); timer.start(); - solver._solve(b, x); + x = solver.solve(b); timer.stop(); cout << "solve time " << timer.value() << std::endl; /* Check the accuracy */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8de795baa..0d5ab61af 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -200,7 +200,7 @@ ei_add_test(vectorwiseop) ei_add_test(simplicial_cholesky) ei_add_test(conjugate_gradient) ei_add_test(bicgstab) - +ei_add_test(sparselu) if(UMFPACK_FOUND) ei_add_test(umfpack_support "" "${UMFPACK_ALL_LIBS}") diff --git a/test/sparse_solver.h b/test/sparse_solver.h index 1bbaeb2dd..f529ad7e9 100644 --- a/test/sparse_solver.h +++ b/test/sparse_solver.h @@ -163,9 +163,9 @@ inline std::string get_matrixfolder() { std::string mat_folder = TEST_REAL_CASES; if( internal::is_same >::value || internal::is_same >::value ) - mat_folder = mat_folder + static_cast("/complex/"); + mat_folder = mat_folder + static_cast("/complex/"); else - mat_folder = mat_folder + static_cast("/real/"); + mat_folder = mat_folder + static_cast("/real/"); return mat_folder; } #endif diff --git a/test/sparselu.cpp b/test/sparselu.cpp new file mode 100644 index 000000000..e960f9c93 --- /dev/null +++ b/test/sparselu.cpp @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// Eigen is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 3 of the License, or (at your option) any later version. +// +// Alternatively, you can redistribute it and/or +// modify it under the terms of the GNU General Public License as +// published by the Free Software Foundation; either version 2 of +// the License, or (at your option) any later version. +// +// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License and a copy of the GNU General Public License along with +// Eigen. If not, see . +#include "sparse_solver.h" +#include +#include + +template void test_sparselu_T() +{ + SparseLU, COLAMDOrdering > sparselu_colamd; + SparseLU, AMDOrdering > sparselu_amd; + + check_sparse_square_solving(sparselu_colamd); + check_sparse_square_solving(sparselu_amd); +} + +void test_sparselu() +{ + CALL_SUBTEST_1(test_sparselu_T()); + CALL_SUBTEST_2(test_sparselu_T()); + CALL_SUBTEST_3(test_sparselu_T >()); + CALL_SUBTEST_4(test_sparselu_T >()); +} \ No newline at end of file From 48dc95f1dac25a49bb8168fd5d7d9f49fd7d1a11 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 2 Aug 2012 18:28:16 +0200 Subject: [PATCH 34/73] factorize column_dfs and panel_dfs --- Eigen/src/SparseLU/SparseLU_column_dfs.h | 173 ++++---------- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 278 +++++++++++++---------- 2 files changed, 201 insertions(+), 250 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 36c97f947..a4562af9c 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -56,157 +56,68 @@ * > 0 number of bytes allocated when run out of space * */ +template +struct LU_column_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t& glu) + : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu) + {} + bool update_segrep(Index /*krep*/, Index /*jj*/) + { + return true; + } + void mem_expand(IndexVector& lsub, int& nextl, int chmark) + { + if (nextl >= m_glu.nzlmax) + LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; + } + enum { ExpandMem = true }; + + int m_jcol; + int& m_jsuper_ref; + LU_GlobalLU_t& m_glu; +}; + template int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - int jsuper, nsuper, nextl; - int krow; // Row index of the current element - int kperm; // permuted row index - int krep; // Supernode reprentative of the current row - int k, kmark; - int chperm, chmark, chrep, oldrep, kchild; - int myfnz; // First nonzero element in the current column - int xdfs, maxdfs, kpar; - int mem; // Initialize pointers IndexVector& xsup = glu.xsup; IndexVector& supno = glu.supno; IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - Index& nzlmax = glu.nzlmax; + IndexVector& xlsub = glu.xlsub; - int jcolm1 = jcol - 1; - int jcolp1 = jcol + 1; - nsuper = supno(jcol); - jsuper = nsuper; - nextl = xlsub(jcol); + int jsuper = supno(jcol); + int nextl = xlsub(jcol); VectorBlock marker2(marker, 2*m, m); - int fsupc, jptr, jm1ptr, ito, ifrom, istop; + + + LU_column_dfs_traits traits(jcol, jsuper, glu); + // For each nonzero in A(*,jcol) do dfs - for (k = 0; lsub_col[k] != IND_EMPTY; k++) + for (int k = 0; lsub_col[k] != IND_EMPTY; k++) { - krow = lsub_col(k); + int krow = lsub_col(k); lsub_col(k) = IND_EMPTY; - kmark = marker2(krow); + int kmark = marker2(krow); // krow was visited before, go to the next nonz; - if (kmark == jcol) continue; - - // For each unmarker nbr krow of jcol - marker2(krow) = jcol; - kperm = perm_r(krow); - - if (kperm == IND_EMPTY ) - { - // krow is in L: place it in structure of L(*,jcol) - lsub(nextl++) = krow; // krow is indexed into A - if ( nextl >= nzlmax ) - { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if ( mem ) return mem; - } - if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing - } - else - { - // krow is in U : if its supernode-rep krep - // has been explored, update repfnz(*) - krep = xsup(supno(kperm)+1) - 1; - myfnz = repfnz(krep); - - if (myfnz != IND_EMPTY ) - { - // visited before - if (myfnz > kperm) repfnz(krep) = kperm; - // continue; - } - else - { - // otherwise, perform dfs starting at krep - oldrep = IND_EMPTY; - parent(krep) = oldrep; - repfnz(krep) = kperm; - xdfs = xlsub(krep); - maxdfs = xprune(krep); - - do - { - // For each unmarked kchild of krep - while (xdfs < maxdfs) - { - kchild = lsub(xdfs); - xdfs++; - chmark = marker2(kchild); - - if (chmark != jcol) - { - // Not reached yet - marker2(kchild) = jcol; - chperm = perm_r(kchild); - - if (chperm == IND_EMPTY) - { - // if kchild is in L: place it in L(*,k) - lsub(nextl++) = kchild; - if (nextl >= nzlmax) - { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; - } - if (chmark != jcolm1) jsuper = IND_EMPTY; - } - else - { - // if kchild is in U : - // chrep = its supernode-rep. If its rep has been explored, - // update its repfnz - chrep = xsup(supno(chperm)+1) - 1; - myfnz = repfnz(chrep); - if (myfnz != IND_EMPTY) - { - // Visited before - if ( myfnz > chperm) repfnz(chrep) = chperm; - } - else - { - // continue dfs at super-rep of kchild - xplore(krep) = xdfs; - oldrep = krep; - krep = chrep; // Go deeped down G(L^t) - parent(krep) = oldrep; - repfnz(krep) = chperm; - xdfs = xlsub(krep); - maxdfs = xprune(krep); - } // else myfnz - } // else for chperm - - } // if chmark - - } // end while - - // krow has no more unexplored nbrs; - // place supernode-rep krep in postorder DFS. - // backtrack dfs to its parent - - segrep(nseg) = krep; - ++nseg; - kpar = parent(krep); // Pop from stack, mimic recursion - if (kpar == IND_EMPTY) break; // dfs done - krep = kpar; - xdfs = xplore(krep); - maxdfs = xprune(krep); - - } while ( kpar != IND_EMPTY); - - } // else myfnz - - } // else kperm + if (kmark == jcol) continue; + LU_dfs_kernel(jcol, perm_r, nseg, lsub, segrep, repfnz, xprune, marker2, parent, + xplore, glu, nextl, krow, traits); } // for each nonzero ... + int fsupc, jptr, jm1ptr, ito, ifrom, istop; + int nsuper = supno(jcol); + int jcolp1 = jcol + 1; + int jcolm1 = jcol - 1; + // check to see if j belongs in the same supernode as j-1 if ( jcol == 0 ) { // Do nothing for column 0 diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 79dd4da40..75fbd0b0e 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -29,6 +29,132 @@ */ #ifndef SPARSELU_PANEL_DFS_H #define SPARSELU_PANEL_DFS_H + +template +void LU_dfs_kernel(const int jj, IndexVector& perm_r, + int& nseg, IndexVector& panel_lsub, IndexVector& segrep, + VectorBlock& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + IndexVector& xplore, LU_GlobalLU_t& glu, + int& nextl_col, int krow, Traits& traits + ) +{ + IndexVector& xsup = glu.xsup; + IndexVector& supno = glu.supno; + IndexVector& lsub = glu.lsub; + IndexVector& xlsub = glu.xlsub; + + int kmark = marker(krow); + + // For each unmarked krow of jj + marker(krow) = jj; + int kperm = perm_r(krow); + if (kperm == IND_EMPTY ) { + // krow is in L : place it in structure of L(*, jj) + panel_lsub(nextl_col++) = krow; // krow is indexed into A + + traits.mem_expand(panel_lsub, nextl_col, kmark); + } + else + { + // krow is in U : if its supernode-representative krep + // has been explored, update repfnz(*) + // krep = supernode representative of the current row + int krep = xsup(supno(kperm)+1) - 1; + // First nonzero element in the current column: + int myfnz = repfnz_col(krep); + + if (myfnz != IND_EMPTY ) + { + // Representative visited before + if (myfnz > kperm ) repfnz_col(krep) = kperm; + + } + else + { + // Otherwise, perform dfs starting at krep + int oldrep = IND_EMPTY; + parent(krep) = oldrep; + repfnz_col(krep) = kperm; + int xdfs = xlsub(krep); + int maxdfs = xprune(krep); + + int kpar; + do + { + // For each unmarked kchild of krep + while (xdfs < maxdfs) + { + int kchild = lsub(xdfs); + xdfs++; + int chmark = marker(kchild); + + if (chmark != jj ) + { + marker(kchild) = jj; + int chperm = perm_r(kchild); + + if (chperm == IND_EMPTY) + { + // case kchild is in L: place it in L(*, j) + panel_lsub(nextl_col++) = kchild; + traits.mem_expand(panel_lsub, nextl_col, chmark); + } + else + { + // case kchild is in U : + // chrep = its supernode-rep. If its rep has been explored, + // update its repfnz(*) + int chrep = xsup(supno(chperm)+1) - 1; + myfnz = repfnz_col(chrep); + + if (myfnz != IND_EMPTY) + { // Visited before + if (myfnz > chperm) + repfnz_col(chrep) = chperm; + } + else + { // Cont. dfs at snode-rep of kchild + xplore(krep) = xdfs; + oldrep = krep; + krep = chrep; // Go deeper down G(L) + parent(krep) = oldrep; + repfnz_col(krep) = chperm; + xdfs = xlsub(krep); + maxdfs = xprune(krep); + + } // end if myfnz != -1 + } // end if chperm == -1 + + } // end if chmark !=jj + } // end while xdfs < maxdfs + + // krow has no more unexplored nbrs : + // Place snode-rep krep in postorder DFS, if this + // segment is seen for the first time. (Note that + // "repfnz(krep)" may change later.) + // Baktrack dfs to its parent + if(traits.update_segrep(krep,jj)) + //if (marker1(krep) < jcol ) + { + segrep(nseg) = krep; + ++nseg; + //marker1(krep) = jj; + } + + kpar = parent(krep); // Pop recursion, mimic recursion + if (kpar == IND_EMPTY) + break; // dfs done + krep = kpar; + xdfs = xplore(krep); + maxdfs = xprune(krep); + + } while (kpar != IND_EMPTY); // Do until empty stack + + } // end if (myfnz = -1) + + } // end if (kperm == -1) +} + /** * \brief Performs a symbolic factorization on a panel of columns [jcol, jcol+w) * @@ -62,29 +188,42 @@ * * */ + +template +struct LU_panel_dfs_traits +{ + typedef typename IndexVector::Scalar Index; + LU_panel_dfs_traits(Index jcol, Index* marker) + : m_jcol(jcol), m_marker(marker) + {} + bool update_segrep(Index krep, Index jj) + { + if(m_marker[krep] void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) { - - int jj; // Index through each column in the panel int nextl_col; // Next available position in panel_lsub[*,jj] - int krow; // Row index of the current element - int kperm; // permuted row index - int krep; // Supernode representative of the current row - int kmark; - int chperm, chmark, chrep, oldrep, kchild; - int myfnz; // First nonzero element in the current column - int xdfs, maxdfs, kpar; // Initialize pointers VectorBlock marker1(marker, m, m); nseg = 0; - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; + + LU_panel_dfs_traits traits(jcol, marker1.data()); + // For each column in the panel - for (jj = jcol; jj < jcol + w; jj++) + for (int jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj - jcol) * m; @@ -95,114 +234,15 @@ void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, Index // For each nnz in A[*, jj] do depth first search for (typename MatrixType::InnerIterator it(A, jj); it; ++it) { - krow = it.row(); - dense_col(krow) = it.value(); - kmark = marker(krow); + int krow = it.row(); + dense_col(krow) = it.value(); + + int kmark = marker(krow); if (kmark == jj) continue; // krow visited before, go to the next nonzero - // For each unmarked krow of jj - marker(krow) = jj; - kperm = perm_r(krow); - if (kperm == IND_EMPTY ) { - // krow is in L : place it in structure of L(*, jj) - panel_lsub(nextl_col++) = krow; // krow is indexed into A - } - else - { - // krow is in U : if its supernode-representative krep - // has been explored, update repfnz(*) - krep = xsup(supno(kperm)+1) - 1; - myfnz = repfnz_col(krep); - - if (myfnz != IND_EMPTY ) - { - // Representative visited before - if (myfnz > kperm ) repfnz_col(krep) = kperm; - - } - else - { - // Otherwise, perform dfs starting at krep - oldrep = IND_EMPTY; - parent(krep) = oldrep; - repfnz_col(krep) = kperm; - xdfs = xlsub(krep); - maxdfs = xprune(krep); - - do - { - // For each unmarked kchild of krep - while (xdfs < maxdfs) - { - kchild = lsub(xdfs); - xdfs++; - chmark = marker(kchild); - - if (chmark != jj ) - { - marker(kchild) = jj; - chperm = perm_r(kchild); - - if (chperm == IND_EMPTY) - { - // case kchild is in L: place it in L(*, j) - panel_lsub(nextl_col++) = kchild; - } - else - { - // case kchild is in U : - // chrep = its supernode-rep. If its rep has been explored, - // update its repfnz(*) - chrep = xsup(supno(chperm)+1) - 1; - myfnz = repfnz_col(chrep); - - if (myfnz != IND_EMPTY) - { // Visited before - if (myfnz > chperm) - repfnz_col(chrep) = chperm; - } - else - { // Cont. dfs at snode-rep of kchild - xplore(krep) = xdfs; - oldrep = krep; - krep = chrep; // Go deeper down G(L) - parent(krep) = oldrep; - repfnz_col(krep) = chperm; - xdfs = xlsub(krep); - maxdfs = xprune(krep); - - } // end if myfnz != -1 - } // end if chperm == -1 - - } // end if chmark !=jj - } // end while xdfs < maxdfs - - // krow has no more unexplored nbrs : - // Place snode-rep krep in postorder DFS, if this - // segment is seen for the first time. (Note that - // "repfnz(krep)" may change later.) - // Baktrack dfs to its parent - if (marker1(krep) < jcol ) - { - segrep(nseg) = krep; - ++nseg; - marker1(krep) = jj; - } - - kpar = parent(krep); // Pop recursion, mimic recursion - if (kpar == IND_EMPTY) - break; // dfs done - krep = kpar; - xdfs = xplore(krep); - maxdfs = xprune(krep); - - } while (kpar != IND_EMPTY); // Do until empty stack - - } // end if (myfnz = -1) - - } // end if (kperm == -1) - + LU_dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent, + xplore, glu, nextl_col, krow, traits); }// end for nonzeros in column jj } // end for column jj From 03509d1387dc2634667cf9039e58b9caa3bcbffa Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 3 Aug 2012 15:37:44 +0200 Subject: [PATCH 35/73] SparseLU: add leverage level3 ops --- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 130 ++++++++++++++++++++--- 1 file changed, 115 insertions(+), 15 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 8ad7eefff..62c677a93 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -81,34 +81,134 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca nsupr = xlsub(fsupc+1) - xlsub(fsupc); nrow = nsupr - nsupc; lptr = xlsub(fsupc); - // NOTE : Unlike the original implementation in SuperLU, the present implementation - // does not include a 2-D block update. - // Sequence through each column in the panel + // loop over the panel columns to detect the actual number of columns and rows + int u_rows = 0; + int u_cols = 0; for (jj = jcol; jj < jcol + w; jj++) { nextl_col = (jj-jcol) * m; VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row - VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here kfnz = repfnz_col(krep); if ( kfnz == IND_EMPTY ) continue; // skip any zero segment segsize = krep - kfnz + 1; - luptr = xlusup(fsupc); + u_cols++; + u_rows = std::max(segsize,u_rows); + } + + // if the blocks are large enough, use level 3 + // TODO find better heuristics! + if(nsupc >= 50 && nrow > 50 && u_cols>6) + { + Map > U(tempv.data(), u_rows, u_cols); - // NOTE : Unlike the original implementation in SuperLU, - // there is no update feature for col-col, 2col-col ... + // gather U + int u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = xlusup(fsupc); + no_zeros = kfnz - fsupc; + + int isub = lptr + no_zeros; + int off = u_rows-segsize; + for (int i = 0; i < segsize; i++) + { + int irow = lsub(isub); + U(i+off,u_col) = dense_col(irow); + ++isub; + } + + u_col++; + } - // Perform a trianglar solve and block update, - // then scatter the result of sup-col update to dense[] - no_zeros = kfnz - fsupc; - if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else LU_kernel_bmod::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - } // End for each column in the panel + // solve U = A^-1 U + luptr = xlusup(fsupc); + no_zeros = (krep - u_rows + 1) - fsupc; + luptr += nsupr * no_zeros + no_zeros; + Map, 0, OuterStride<> > A(lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) ); + U = A.template triangularView().solve(U); + + // update + luptr += u_rows; + Map, 0, OuterStride<> > B(lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) ); + assert(tempv.size()>w*u_rows + nrow*w); + Map > L(tempv.data()+w*u_rows, nrow, u_cols); + L.noalias() = B * U; + + // scatter U and L + u_col = 0; + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + no_zeros = kfnz - fsupc; + int isub = lptr + no_zeros; + + int off = u_rows-segsize; + for (int i = 0; i < segsize; i++) + { + int irow = lsub(isub++); + dense_col(irow) = U.coeff(i+off,u_col); + U.coeffRef(i,u_col) = 0; + } + + // Scatter l into SPA dense[] + for (int i = 0; i < nrow; i++) + { + int irow = lsub(isub++); + dense_col(irow) -= L.coeff(i+off,u_col); + L.coeffRef(i,u_col) = 0; + } + u_col++; + } + } + else // level 2 only + { + // Sequence through each column in the panel + for (jj = jcol; jj < jcol + w; jj++) + { + nextl_col = (jj-jcol) * m; + VectorBlock repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row + VectorBlock dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here + + kfnz = repfnz_col(krep); + if ( kfnz == IND_EMPTY ) + continue; // skip any zero segment + + segsize = krep - kfnz + 1; + luptr = xlusup(fsupc); + + // NOTE : Unlike the original implementation in SuperLU, + // there is no update feature for col-col, 2col-col ... + + // Perform a trianglar solve and block update, + // then scatter the result of sup-col update to dense[] + no_zeros = kfnz - fsupc; + if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else LU_kernel_bmod::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + } // End for each column in the panel + } } // End for each updating supernode } From 70db61c269ae14dfd1e07af07b2b54c3aa068fd6 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 3 Aug 2012 16:36:00 +0200 Subject: [PATCH 36/73] Prefix with glu, the global structure --- Eigen/src/SparseLU/SparseLU_Memory.h | 26 +++++----- Eigen/src/SparseLU/SparseLU_Utils.h | 31 ++++++------ Eigen/src/SparseLU/SparseLU_column_bmod.h | 57 ++++++++++------------ Eigen/src/SparseLU/SparseLU_column_dfs.h | 46 ++++++++--------- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 35 +++++-------- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 36 ++++++-------- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 16 +++--- Eigen/src/SparseLU/SparseLU_pivotL.h | 15 ++---- Eigen/src/SparseLU/SparseLU_pruneL.h | 37 ++++++-------- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 33 +++++++------ Eigen/src/SparseLU/SparseLU_snode_dfs.h | 35 ++++++------- 11 files changed, 156 insertions(+), 211 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 531c2dba6..48b36f5b4 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -130,19 +130,15 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; - // Guess the size for L\U factors - Index& nzlmax = glu.nzlmax; - Index& nzumax = glu.nzumax; - Index& nzlumax = glu.nzlumax; - nzumax = nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U - nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor + glu.nzumax = glu.nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U + glu.nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary if (lwork == IND_EMPTY) { int estimated_size; estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size) - + (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n; + + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n; return estimated_size; } @@ -160,18 +156,18 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, { try { - expand(glu.lusup, nzlumax, 0, 0, num_expansions); - expand(glu.ucol,nzumax, 0, 0, num_expansions); - expand(glu.lsub,nzlmax, 0, 0, num_expansions); - expand(glu.usub,nzumax, 0, 1, num_expansions); + expand(glu.lusup, glu.nzlumax, 0, 0, num_expansions); + expand(glu.ucol,glu.nzumax, 0, 0, num_expansions); + expand(glu.lsub,glu.nzlmax, 0, 0, num_expansions); + expand(glu.usub,glu.nzumax, 0, 1, num_expansions); } catch(std::bad_alloc& ) { //Reduce the estimated size and retry - nzlumax /= 2; - nzumax /= 2; - nzlmax /= 2; - if (nzlumax < annz ) return nzlumax; + glu.nzlumax /= 2; + glu.nzumax /= 2; + glu.nzlmax /= 2; + if (glu.nzlumax < annz ) return glu.nzlumax; } } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size()); diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 9719820fd..316b09ab0 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -12,12 +12,12 @@ #define EIGEN_SPARSELU_UTILS_H - +/** + * \brief Count Nonzero elements in the factors + */ template void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t& glu) { - IndexVector& xsup = glu.xsup; - IndexVector& xlsub = glu.xlsub; nnzL = 0; nnzU = (glu.xusub)(n); int nsuper = (glu.supno)(n); @@ -27,10 +27,10 @@ void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t& glu) { int fsupc, i, j, k, jstart; - IndexVector& xsup = glu.xsup; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; int nextl = 0; int nsuper = (glu.supno)(n); @@ -60,19 +57,19 @@ void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t fpanelc d_fsupc = fst_col - fsupc; - luptr = xlusup(fst_col) + d_fsupc; - lptr = xlsub(fsupc) + d_fsupc; + luptr = glu.xlusup(fst_col) + d_fsupc; + lptr = glu.xlsub(fsupc) + d_fsupc; kfnz = repfnz(krep); kfnz = std::max(kfnz, fpanelc); segsize = krep - kfnz + 1; nsupc = krep - fst_col + 1; - nsupr = xlsub(fsupc+1) - xlsub(fsupc); + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); nrow = nsupr - d_fsupc - nsupc; // NOTE Unlike the original implementation in SuperLU, the only feature @@ -109,34 +102,34 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; if(segsize==1) - LU_kernel_bmod<1>::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); else - LU_kernel_bmod::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + LU_kernel_bmod::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); } // end if jsupno } // end for each segment // Process the supernodal portion of L\U[*,j] - nextlu = xlusup(jcol); - fsupc = xsup(jsupno); + nextlu = glu.xlusup(jcol); + fsupc = glu.xsup(jsupno); // copy the SPA dense into L\U[*,j] int mem; - new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc); - while (new_next > nzlumax ) + new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); + while (new_next > glu.nzlumax ) { - mem = LUMemXpand(glu.lusup, nzlumax, nextlu, LUSUP, glu.num_expansions); + mem = LUMemXpand(glu.glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); if (mem) return mem; } - for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) { - irow = lsub(isub); - lusup(nextlu) = dense(irow); + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); dense(irow) = Scalar(0.0); ++nextlu; } - xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); + glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol); /* For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column @@ -152,20 +145,20 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // d_fsupc = 0 if fsupc >= fpanelc d_fsupc = fst_col - fsupc; - lptr = xlsub(fsupc) + d_fsupc; - luptr = xlusup(fst_col) + d_fsupc; - nsupr = xlsub(fsupc+1) - xlsub(fsupc); // leading dimension + lptr = glu.xlsub(fsupc) + d_fsupc; + luptr = glu.xlusup(fst_col) + d_fsupc; + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension nsupc = jcol - fst_col; // excluding jcol nrow = nsupr - d_fsupc - nsupc; // points to the beginning of jcol in snode L\U(jsupno) - ufirst = xlusup(jcol) + d_fsupc; - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - VectorBlock u(lusup, ufirst, nsupc); + ufirst = glu.xlusup(jcol) + d_fsupc; + Map, 0, OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock u(glu.lusup, ufirst, nsupc); u = A.template triangularView().solve(u); - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - VectorBlock l(lusup, ufirst+nsupc, nrow); + new (&A) Map, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock l(glu.lusup, ufirst+nsupc, nrow); l.noalias() -= A * u; } // End if fst_col diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index a4562af9c..d01b84dc4 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -67,10 +67,10 @@ struct LU_column_dfs_traits { return true; } - void mem_expand(IndexVector& lsub, int& nextl, int chmark) + void mem_expand(IndexVector& glu.lsub, int& nextl, int chmark) { if (nextl >= m_glu.nzlmax) - LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + LUMemXpand(glu.lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; } enum { ExpandMem = true }; @@ -84,16 +84,10 @@ template & glu) { typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector - // Initialize pointers - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - - int jsuper = supno(jcol); - int nextl = xlsub(jcol); + int jsuper = glu.supno(jcol); + int nextl = glu.xlsub(jcol); VectorBlock marker2(marker, 2*m, m); @@ -109,25 +103,25 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper // krow was visited before, go to the next nonz; if (kmark == jcol) continue; - LU_dfs_kernel(jcol, perm_r, nseg, lsub, segrep, repfnz, xprune, marker2, parent, + LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent, xplore, glu, nextl, krow, traits); } // for each nonzero ... int fsupc, jptr, jm1ptr, ito, ifrom, istop; - int nsuper = supno(jcol); + int nsuper = glu.supno(jcol); int jcolp1 = jcol + 1; int jcolm1 = jcol - 1; // check to see if j belongs in the same supernode as j-1 if ( jcol == 0 ) { // Do nothing for column 0 - nsuper = supno(0) = 0 ; + nsuper = glu.supno(0) = 0 ; } else { - fsupc = xsup(nsuper); - jptr = xlsub(jcol); // Not yet compressed - jm1ptr = xlsub(jcolm1); + fsupc = glu.xsup(nsuper); + jptr = glu.xlsub(jcol); // Not yet compressed + jm1ptr = glu.xlsub(jcolm1); // Use supernodes of type T2 : see SuperLU paper if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY; @@ -137,7 +131,7 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY; /* If jcol starts a new supernode, reclaim storage space in - * lsub from previous supernode. Note we only store + * glu.lsub from previous supernode. Note we only store * the subscript set of the first and last columns of * a supernode. (first for num values, last for pruning) */ @@ -145,26 +139,26 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper { // starts a new supernode if ( (fsupc < jcolm1-1) ) { // >= 3 columns in nsuper - ito = xlsub(fsupc+1); - xlsub(jcolm1) = ito; + ito = glu.xlsub(fsupc+1); + glu.xlsub(jcolm1) = ito; istop = ito + jptr - jm1ptr; xprune(jcolm1) = istop; // intialize xprune(jcol-1) - xlsub(jcol) = istop; + glu.xlsub(jcol) = istop; for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) - lsub(ito) = lsub(ifrom); + glu.lsub(ito) = glu.lsub(ifrom); nextl = ito; // = istop + length(jcol) } nsuper++; - supno(jcol) = nsuper; + glu.supno(jcol) = nsuper; } // if a new supernode } // end else: jcol > 0 // Tidy up the pointers before exit - xsup(nsuper+1) = jcolp1; - supno(jcolp1) = nsuper; + glu.xsup(nsuper+1) = jcolp1; + glu.supno(jcolp1) = nsuper; xprune(jcol) = nextl; // Intialize upper bound for pruning - xlsub(jcolp1) = nextl; + glu.xlsub(jcolp1) = nextl; return 0; } diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 68d8563fa..541785881 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -49,51 +49,42 @@ int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzTy typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; Index ksub, krep, ksupno; - - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - ScalarVector& ucol = glu.ucol; - IndexVector& usub = glu.usub; - IndexVector& xusub = glu.xusub; - Index& nzumax = glu.nzumax; - - Index jsupno = supno(jcol); + + Index jsupno = glu.supno(jcol); // For each nonzero supernode segment of U[*,j] in topological order int k = nseg - 1, i; - Index nextu = xusub(jcol); + Index nextu = glu.xusub(jcol); Index kfnz, isub, segsize; Index new_next,irow; Index fsupc, mem; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep(k); k--; - ksupno = supno(krep); + ksupno = glu.supno(krep); if (jsupno != ksupno ) // should go into ucol(); { kfnz = repfnz(krep); if (kfnz != IND_EMPTY) { // Nonzero U-segment - fsupc = xsup(ksupno); - isub = xlsub(fsupc) + kfnz - fsupc; + fsupc = glu.xsup(ksupno); + isub = glu.xlsub(fsupc) + kfnz - fsupc; segsize = krep - kfnz + 1; new_next = nextu + segsize; - while (new_next > nzumax) + while (new_next > glu.nzumax) { - mem = LUMemXpand(ucol, nzumax, nextu, UCOL, glu.num_expansions); + mem = LUMemXpand(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions); if (mem) return mem; - mem = LUMemXpand(usub, nzumax, nextu, USUB, glu.num_expansions); + mem = LUMemXpand(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions); if (mem) return mem; } for (i = 0; i < segsize; i++) { - irow = lsub(isub); - usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order - ucol(nextu) = dense(irow); + irow = glu.lsub(isub); + glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order + glu.ucol(nextu) = dense(irow); dense(irow) = Scalar(0.0); nextu++; isub++; @@ -104,7 +95,7 @@ int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzTy } // end if jsupno } // end for each segment - xusub(jcol + 1) = nextu; // close U(*,jcol) + glu.xusub(jcol + 1) = nextu; // close U(*,jcol) return 0; } diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 62c677a93..50da8123e 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -52,12 +52,6 @@ template void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - IndexVector& xlusup = glu.xlusup; - ScalarVector& lusup = glu.lusup; int ksub,jj,nextl_col; int fsupc, nsupc, nsupr, nrow; @@ -76,11 +70,11 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca * nsupr = number of rows in a supernode */ krep = segrep(k); k--; - fsupc = xsup(supno(krep)); + fsupc = glu.xsup(glu.supno(krep)); nsupc = krep - fsupc + 1; - nsupr = xlsub(fsupc+1) - xlsub(fsupc); + nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); nrow = nsupr - nsupc; - lptr = xlsub(fsupc); + lptr = glu.xlsub(fsupc); // loop over the panel columns to detect the actual number of columns and rows int u_rows = 0; @@ -118,14 +112,14 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca continue; // skip any zero segment segsize = krep - kfnz + 1; - luptr = xlusup(fsupc); + luptr = glu.xlusup(fsupc); no_zeros = kfnz - fsupc; int isub = lptr + no_zeros; int off = u_rows-segsize; for (int i = 0; i < segsize; i++) { - int irow = lsub(isub); + int irow = glu.lsub(isub); U(i+off,u_col) = dense_col(irow); ++isub; } @@ -134,15 +128,15 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca } // solve U = A^-1 U - luptr = xlusup(fsupc); + luptr = glu.xlusup(fsupc); no_zeros = (krep - u_rows + 1) - fsupc; luptr += nsupr * no_zeros + no_zeros; - Map, 0, OuterStride<> > A(lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) ); + Map, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) ); U = A.template triangularView().solve(U); // update luptr += u_rows; - Map, 0, OuterStride<> > B(lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) ); + Map, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) ); assert(tempv.size()>w*u_rows + nrow*w); Map > L(tempv.data()+w*u_rows, nrow, u_cols); L.noalias() = B * U; @@ -166,7 +160,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca int off = u_rows-segsize; for (int i = 0; i < segsize; i++) { - int irow = lsub(isub++); + int irow = glu.lsub(isub++); dense_col(irow) = U.coeff(i+off,u_col); U.coeffRef(i,u_col) = 0; } @@ -174,7 +168,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // Scatter l into SPA dense[] for (int i = 0; i < nrow; i++) { - int irow = lsub(isub++); + int irow = glu.lsub(isub++); dense_col(irow) -= L.coeff(i+off,u_col); L.coeffRef(i,u_col) = 0; } @@ -195,7 +189,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca continue; // skip any zero segment segsize = krep - kfnz + 1; - luptr = xlusup(fsupc); + luptr = glu.xlusup(fsupc); // NOTE : Unlike the original implementation in SuperLU, // there is no update feature for col-col, 2col-col ... @@ -203,10 +197,10 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // Perform a trianglar solve and block update, // then scatter the result of sup-col update to dense[] no_zeros = kfnz - fsupc; - if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); - else LU_kernel_bmod::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); + else LU_kernel_bmod::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros); } // End for each column in the panel } diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 75fbd0b0e..3581f6d9c 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -38,10 +38,6 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, int& nextl_col, int krow, Traits& traits ) { - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; int kmark = marker(krow); @@ -59,7 +55,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, // krow is in U : if its supernode-representative krep // has been explored, update repfnz(*) // krep = supernode representative of the current row - int krep = xsup(supno(kperm)+1) - 1; + int krep = glu.xsup(glu.supno(kperm)+1) - 1; // First nonzero element in the current column: int myfnz = repfnz_col(krep); @@ -75,7 +71,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, int oldrep = IND_EMPTY; parent(krep) = oldrep; repfnz_col(krep) = kperm; - int xdfs = xlsub(krep); + int xdfs = glu.xlsub(krep); int maxdfs = xprune(krep); int kpar; @@ -84,7 +80,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, // For each unmarked kchild of krep while (xdfs < maxdfs) { - int kchild = lsub(xdfs); + int kchild = glu.lsub(xdfs); xdfs++; int chmark = marker(kchild); @@ -104,7 +100,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, // case kchild is in U : // chrep = its supernode-rep. If its rep has been explored, // update its repfnz(*) - int chrep = xsup(supno(chperm)+1) - 1; + int chrep = glu.xsup(glu.supno(chperm)+1) - 1; myfnz = repfnz_col(chrep); if (myfnz != IND_EMPTY) @@ -119,7 +115,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r, krep = chrep; // Go deeper down G(L) parent(krep) = oldrep; repfnz_col(krep) = chperm; - xdfs = xlsub(krep); + xdfs = glu.xlsub(krep); maxdfs = xprune(krep); } // end if myfnz != -1 @@ -205,7 +201,7 @@ struct LU_panel_dfs_traits } return false; } - void mem_expand(IndexVector& /*lsub*/, int /*nextl*/, int /*chmark*/) {} + void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {} enum { ExpandMem = false }; Index m_jcol; Index* m_marker; diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 6e2ce87a1..4ad49adee 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -58,19 +58,14 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; typedef typename ScalarVector::RealScalar RealScalar; - // Initialize pointers - IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes. - IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub - ScalarVector& lusup = glu.lusup; // Numerical values of L ordered by columns - IndexVector& xlusup = glu.xlusup; // pointers to the beginning of each colum in lusup Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 - Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion - Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode - Scalar* lu_sup_ptr = &(lusup.data()[xlusup(fsupc)]); // Start of the current supernode - Scalar* lu_col_ptr = &(lusup.data()[xlusup(jcol)]); // Start of jcol in the supernode - Index* lsub_ptr = &(lsub.data()[lptr]); // Start of row indices of the supernode + Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion + Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode + Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode + Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode + Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode // Determine the largest abs numerical value for partial pivoting Index diagind = iperm_c(jcol); // diagonal index diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index 6f935896e..f29285bd4 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -51,16 +51,9 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons { typedef typename IndexVector::Scalar Index; typedef typename ScalarVector::Scalar Scalar; - // Initialize pointers - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - ScalarVector& lusup = glu.lusup; - IndexVector& xlusup = glu.xlusup; - + // For each supernode-rep irep in U(*,j] - int jsupno = supno(jcol); + int jsupno = glu.supno(jcol); int i,irep,irep1; bool movnum, do_prune = false; Index kmin, kmax, minloc, maxloc,krow; @@ -76,18 +69,18 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons // If a snode overlaps with the next panel, then the U-segment // is fragmented into two parts -- irep and irep1. We should let // pruning occur at the rep-column in irep1s snode. - if (supno(irep) == supno(irep1) ) continue; // don't prune + if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune // If it has not been pruned & it has a nonz in row L(pivrow,i) - if (supno(irep) != jsupno ) + if (glu.supno(irep) != jsupno ) { - if ( xprune (irep) >= xlsub(irep1) ) + if ( xprune (irep) >= glu.xlsub(irep1) ) { - kmin = xlsub(irep); - kmax = xlsub(irep1) - 1; + kmin = glu.xlsub(irep); + kmax = glu.xlsub(irep1) - 1; for (krow = kmin; krow <= kmax; krow++) { - if (lsub(krow) == pivrow) + if (glu.lsub(krow) == pivrow) { do_prune = true; break; @@ -100,20 +93,20 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons // do a quicksort-type partition // movnum=true means that the num values have to be exchanged movnum = false; - if (irep == xsup(supno(irep)) ) // Snode of size 1 + if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1 movnum = true; while (kmin <= kmax) { - if (perm_r(lsub(kmax)) == IND_EMPTY) + if (perm_r(glu.lsub(kmax)) == IND_EMPTY) kmax--; - else if ( perm_r(lsub(kmin)) != IND_EMPTY) + else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY) kmin++; else { // kmin below pivrow (not yet pivoted), and kmax // above pivrow: interchange the two suscripts - std::swap(lsub(kmin), lsub(kmax)); + std::swap(glu.lsub(kmin), glu.lsub(kmax)); // If the supernode has only one column, then we // only keep one set of subscripts. For any subscript @@ -121,9 +114,9 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons // done on the numerical values. if (movnum) { - minloc = xlusup(irep) + ( kmin - xlsub(irep) ); - maxloc = xlusup(irep) + ( kmax - xlsub(irep) ); - std::swap(lusup(minloc), lusup(maxloc)); + minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) ); + maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) ); + std::swap(glu.lusup(minloc), glu.lusup(maxloc)); } kmin++; kmax--; diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 6b82b0727..18e6a93d2 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -33,39 +33,40 @@ template int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; - IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??) - IndexVector& xlsub = glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*) - ScalarVector& lusup = glu.lusup; // Numerical values of the rectangular supernodes - IndexVector& xlusup = glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*) - int nextlu = xlusup(jcol); // Starting location of the next column to add + /* lsub : Compressed row subscripts of ( rectangular supernodes ) + * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*) + * lusup : Numerical values of the rectangular supernodes + * xlusup[j] is the starting location of the j-th column in lusup(*) + */ + int nextlu = glu.xlusup(jcol); // Starting location of the next column to add int irow, isub; // Process the supernodal portion of L\U[*,jcol] - for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++) + for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++) { - irow = lsub(isub); - lusup(nextlu) = dense(irow); + irow = glu.lsub(isub); + glu.lusup(nextlu) = dense(irow); dense(irow) = 0; ++nextlu; } - xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) + glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 ) if (fsupc < jcol ){ - int luptr = xlusup(fsupc); // points to the first column of the supernode - int nsupr = xlsub(fsupc + 1) -xlsub(fsupc); //Number of rows in the supernode + int luptr = glu.xlusup(fsupc); // points to the first column of the supernode + int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol] - int ufirst = xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) + int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno) int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol) - Map,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - VectorBlock u(lusup, ufirst, nsupc); + Map,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + VectorBlock u(glu.lusup, ufirst, nsupc); u = A.template triangularView().solve(u); // Call the Eigen dense triangular solve interface // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol) - new (&A) Map,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - VectorBlock l(lusup, ufirst+nsupc, nrow); + new (&A) Map,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + VectorBlock l(glu.lusup, ufirst+nsupc, nrow); l.noalias() -= A * u; } return 0; diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index c202c8f48..edb927cdc 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -46,14 +46,9 @@ int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) { typedef typename IndexVector::Scalar Index; - IndexVector& xsup = glu.xsup; - IndexVector& supno = glu.supno; // Supernode number corresponding to this column - IndexVector& lsub = glu.lsub; - IndexVector& xlsub = glu.xlsub; - Index& nzlmax = glu.nzlmax; int mem; - Index nsuper = ++supno(jcol); // Next available supernode number - int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub + Index nsuper = ++glu.supno(jcol); // Next available supernode number + int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub int krow,kmark; for (int i = jcol; i <=kcol; i++) { @@ -66,36 +61,36 @@ { // First time to visit krow marker(krow) = kcol; - lsub(nextl++) = krow; - if( nextl >= nzlmax ) + glu.lsub(nextl++) = krow; + if( nextl >= glu.nzlmax ) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); + mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } } } - supno(i) = nsuper; + glu.supno(i) = nsuper; } // If supernode > 1, then make a copy of the subscripts for pruning if (jcol < kcol) { - Index new_next = nextl + (nextl - xlsub(jcol)); - while (new_next > nzlmax) + Index new_next = nextl + (nextl - glu.xlsub(jcol)); + while (new_next > glu.nzlmax) { - mem = LUMemXpand(lsub, nzlmax, nextl, LSUB, glu.num_expansions); + mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } Index ifrom, ito = nextl; - for (ifrom = xlsub(jcol); ifrom < nextl;) - lsub(ito++) = lsub(ifrom++); - for (int i = jcol+1; i <=kcol; i++) xlsub(i) = nextl; + for (ifrom = glu.xlsub(jcol); ifrom < nextl;) + glu.lsub(ito++) = glu.lsub(ifrom++); + for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; nextl = ito; } - xsup(nsuper+1) = kcol + 1; // Start of next available supernode - supno(kcol+1) = nsuper; + glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode + glu.supno(kcol+1) = nsuper; xprune(kcol) = nextl; - xlsub(kcol+1) = nextl; + glu.xlsub(kcol+1) = nextl; return 0; } #endif \ No newline at end of file From a51806993b1a437af308db9c6893cab71e7ca814 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 3 Aug 2012 16:43:12 +0200 Subject: [PATCH 37/73] Prefix with glu, the global structure --- Eigen/src/SparseLU/SparseLU_column_bmod.h | 2 +- Eigen/src/SparseLU/SparseLU_column_dfs.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index d4488e57f..bf25a33fc 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -117,7 +117,7 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc); while (new_next > glu.nzlumax ) { - mem = LUMemXpand(glu.glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); + mem = LUMemXpand(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions); if (mem) return mem; } diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index d01b84dc4..568e0686c 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -67,10 +67,10 @@ struct LU_column_dfs_traits { return true; } - void mem_expand(IndexVector& glu.lsub, int& nextl, int chmark) + void mem_expand(IndexVector& lsub, int& nextl, int chmark) { if (nextl >= m_glu.nzlmax) - LUMemXpand(glu.lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; } enum { ExpandMem = true }; @@ -84,7 +84,7 @@ template & glu) { typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector + typedef typename ScalarVector::Scalar Scalar; int jsuper = glu.supno(jcol); int nextl = glu.xlsub(jcol); From 4d3b7e2a1351d60b9ee26d0fe3442cd5b3a1f8a9 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 6 Aug 2012 14:55:02 +0200 Subject: [PATCH 38/73] Add support for Metis fill-reducing ordering ; it is generally more efficient than COLAMD ordering --- Eigen/MetisSupport | 26 +++++ Eigen/src/MetisSupport/CMakeLists.txt | 6 ++ Eigen/src/MetisSupport/MetisSupport.h | 138 ++++++++++++++++++++++++++ bench/spbench/CMakeLists.txt | 6 ++ bench/spbench/test_sparseLU.cpp | 8 ++ cmake/FindMetis.cmake | 3 +- 6 files changed, 186 insertions(+), 1 deletion(-) create mode 100644 Eigen/MetisSupport create mode 100644 Eigen/src/MetisSupport/CMakeLists.txt create mode 100644 Eigen/src/MetisSupport/MetisSupport.h diff --git a/Eigen/MetisSupport b/Eigen/MetisSupport new file mode 100644 index 000000000..a44086ad9 --- /dev/null +++ b/Eigen/MetisSupport @@ -0,0 +1,26 @@ +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + */ + + +#include "src/MetisSupport/MetisSupport.h" + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/Eigen/src/MetisSupport/CMakeLists.txt b/Eigen/src/MetisSupport/CMakeLists.txt new file mode 100644 index 000000000..2bad31416 --- /dev/null +++ b/Eigen/src/MetisSupport/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_MetisSupport_SRCS "*.h") + +INSTALL(FILES + ${Eigen_MetisSupport_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/src/MetisSupport COMPONENT Devel + ) diff --git a/Eigen/src/MetisSupport/MetisSupport.h b/Eigen/src/MetisSupport/MetisSupport.h new file mode 100644 index 000000000..a762d96f6 --- /dev/null +++ b/Eigen/src/MetisSupport/MetisSupport.h @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef METIS_SUPPORT_H +#define METIS_SUPPORT_H + +namespace Eigen { +/** + * Get the fill-reducing ordering from the METIS package + * + * If A is the original matrix and Ap is the permuted matrix, + * the fill-reducing permutation is defined as follows : + * Row (column) i of A is the matperm(i) row (column) of Ap. + * WARNING: As computed by METIS, this corresponds to the vector iperm (instead of perm) + */ +template +class MetisOrdering +{ +public: + typedef PermutationMatrix PermutationType; + typedef Matrix IndexVector; + + template + void get_symmetrized_graph(const MatrixType& A) + { + Index m = A.cols(); + + // Get the transpose of the input matrix + MatrixType At = A.transpose(); + // Get the number of nonzeros elements in each row/col of At+A + Index TotNz = 0; + IndexVector visited(m); + visited.setConstant(-1); + for (int j = 0; j < m; j++) + { + // Compute the union structure of of A(j,:) and At(j,:) + visited(j) = j; // Do not include the diagonal element + // Get the nonzeros in row/column j of A + for (typename MatrixType::InnerIterator it(A, j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + ++TotNz; + } + } + //Get the nonzeros in row/column j of At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + ++TotNz; + } + } + } + // Reserve place for A + At + m_indexPtr.resize(m+1); + m_innerIndices.resize(TotNz); + + // Now compute the real adjacency list of each column/row + visited.setConstant(-1); + Index CurNz = 0; + for (int j = 0; j < m; j++) + { + m_indexPtr(j) = CurNz; + + visited(j) = j; // Do not include the diagonal element + // Add the pattern of row/column j of A to A+At + for (typename MatrixType::InnerIterator it(A,j); it; ++it) + { + Index idx = it.index(); // Get the row index (for column major) or column index (for row major) + if (visited(idx) != j ) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + CurNz++; + } + } + //Add the pattern of row/column j of At to A+At + for (typename MatrixType::InnerIterator it(At, j); it; ++it) + { + Index idx = it.index(); + if(visited(idx) != j) + { + visited(idx) = j; + m_innerIndices(CurNz) = idx; + ++CurNz; + } + } + } + m_indexPtr(m) = CurNz; + } + + template + void operator() (const MatrixType& A, PermutationType& matperm) + { + Index m = A.cols(); + IndexVector perm(m),iperm(m); + // First, symmetrize the matrix graph. + get_symmetrized_graph(A); + int output_error; + + // Call the fill-reducing routine from METIS + output_error = METIS_NodeND(&m, m_indexPtr.data(), m_innerIndices.data(), NULL, NULL, perm.data(), iperm.data()); + + if(output_error != METIS_OK) + { + //FIXME The ordering interface should define a class of possible errors + std::cerr << "ERROR WHILE CALLING THE METIS PACKAGE \n"; + return; + } + + // Get the fill-reducing permutation + //NOTE: If Ap is the permuted matrix then perm and iperm vectors are defined as follows + // Row (column) i of Ap is the perm(i) row(column) of A, and row (column) i of A is the iperm(i) row(column) of Ap + + // To be consistent with the use of the permutation in SparseLU module, we thus keep the iperm vector + matperm.resize(m); + for (int j = 0; j < m; j++) + matperm.indices()(j) = iperm(j); + + } + + protected: + IndexVector m_indexPtr; // Pointer to the adjacenccy list of each row/column + IndexVector m_innerIndices; // Adjacency list +}; + +}// end namespace eigen +#endif \ No newline at end of file diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index a093cc5d9..2eb0befa9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -66,5 +66,11 @@ target_link_libraries (spbenchsolver ${SPARSE_LIBS}) add_executable(spsolver sp_solver.cpp) target_link_libraries (spsolver ${SPARSE_LIBS}) +if(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) + add_definitions("-DEIGEN_METIS_SUPPORT") +endif(METIS_FOUND) + add_executable(test_sparseLU test_sparseLU.cpp) target_link_libraries (test_sparseLU ${SPARSE_LIBS}) diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 59f8252d0..8c78b0c9b 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -7,6 +7,9 @@ #include #include #include +#ifdef EIGEN_METIS_SUPPORT +#include +#endif using namespace std; using namespace Eigen; @@ -21,7 +24,12 @@ int main(int argc, char **args) typedef Matrix DenseRhs; Matrix b, x, tmp; // SparseLU, AMDOrdering > solver; +#ifdef EIGEN_METIS_SUPPORT + SparseLU, MetisOrdering > solver; +#else SparseLU, COLAMDOrdering > solver; +#endif + ifstream matrix_file; string line; int n; diff --git a/cmake/FindMetis.cmake b/cmake/FindMetis.cmake index e4d6ef258..627c3e9ae 100644 --- a/cmake/FindMetis.cmake +++ b/cmake/FindMetis.cmake @@ -12,10 +12,11 @@ find_path(METIS_INCLUDES ${INCLUDE_INSTALL_DIR} PATH_SUFFIXES metis + include ) -find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR}) +find_library(METIS_LIBRARIES metis PATHS $ENV{METISDIR} ${LIB_INSTALL_DIR} PATH_SUFFIXES lib) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(METIS DEFAULT_MSG From 43f74cb5b1c19e8eb0d6f9f5bf0635eeb0447c85 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 7 Aug 2012 13:55:50 +0200 Subject: [PATCH 39/73] Bug in 2D block update, disable it for now --- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 50da8123e..bfe13b38a 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -95,7 +95,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // if the blocks are large enough, use level 3 // TODO find better heuristics! - if(nsupc >= 50 && nrow > 50 && u_cols>6) + if(false && nsupc >= 50 && nrow > 50 && u_cols>6) { Map > U(tempv.data(), u_rows, u_cols); From 63d2dcfb7045ce4346b67e09c5c836e49fb75c6e Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 7 Aug 2012 17:10:42 +0200 Subject: [PATCH 40/73] Clean the supernodal matrix class --- Eigen/src/SparseLU/SparseLU.h | 99 +++++------------------ Eigen/src/SparseLU/SparseLU_Matrix.h | 113 ++++++++++++++++++--------- 2 files changed, 95 insertions(+), 117 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 0b1347f87..997f4e352 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -153,107 +153,48 @@ class SparseLU for(int j = 0; j < nrhs; ++j) X.col(j) = m_perm_r * B.col(j); - // Forward solve PLy = Pb; - Index fsupc; // First column of the current supernode - Index istart; // Pointer index to the subscript of the current column - Index nsupr; // Number of rows in the current supernode - Index nsupc; // Number of columns in the current supernode - Index nrow; // Number of rows in the non-diagonal part of the supernode - Index luptr; // Pointer index to the current nonzero value - Index iptr; // row index pointer iterator - Index irow; //Current index row - const Scalar * Lval = m_Lstore.valuePtr(); // Nonzero values - Matrix work(n, nrhs); // working vector - work.setZero(); - int j, k, i,jcol; - for (k = 0; k <= m_Lstore.nsuper(); k ++) - { - fsupc = m_Lstore.supToCol()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.supToCol()[k+1] - fsupc; - nrow = nsupr - nsupc; - - if (nsupc == 1 ) - { - for (j = 0; j < nrhs; j++) - { - luptr = m_Lstore.colIndexPtr()[fsupc]; - for (iptr = istart+1; iptr < m_Lstore.rowIndexPtr()[fsupc+1]; iptr++) - { - irow = m_Lstore.rowIndex()[iptr]; - ++luptr; - X(irow, j) -= X(fsupc, j) * Lval[luptr]; - } - } - } - else - { - // The supernode has more than one column - luptr = m_Lstore.colIndexPtr()[fsupc]; - - // Triangular solve - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(n) ); - U = A.template triangularView().solve(U); - - // Matrix-vector product - new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); - work.block(0, 0, nrow, nrhs) = A * U; - - //Begin Scatter - for (j = 0; j < nrhs; j++) - { - iptr = istart + nsupc; - for (i = 0; i < nrow; i++) - { - irow = m_Lstore.rowIndex()[iptr]; - X(irow, j) -= work(i, j); // Scatter operation - work(i, j) = Scalar(0); - iptr++; - } - } - } - } // end for all supernodes + //Forward substitution with L + m_Lstore.solveInPlace(X); - // Back solve Ux = y - for (k = m_Lstore.nsuper(); k >= 0; k--) + // Backward solve with U + for (int k = m_Lstore.nsuper(); k >= 0; k--) { - fsupc = m_Lstore.supToCol()[k]; - istart = m_Lstore.rowIndexPtr()[fsupc]; - nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; - nsupc = m_Lstore.supToCol()[k+1] - fsupc; - luptr = m_Lstore.colIndexPtr()[fsupc]; + Index fsupc = m_Lstore.supToCol()[k]; + Index istart = m_Lstore.rowIndexPtr()[fsupc]; + Index nsupr = m_Lstore.rowIndexPtr()[fsupc+1] - istart; + Index nsupc = m_Lstore.supToCol()[k+1] - fsupc; + Index luptr = m_Lstore.colIndexPtr()[fsupc]; if (nsupc == 1) { - for (j = 0; j < nrhs; j++) + for (int j = 0; j < nrhs; j++) { - X(fsupc, j) /= Lval[luptr]; + X(fsupc, j) /= m_Lstore.valuePtr()[luptr]; } } else { - Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); - Map< Matrix, 0, OuterStride<> > U (&(X.data()[fsupc]), nsupc, nrhs, OuterStride<>(n) ); + Map, 0, OuterStride<> > A( &(m_Lstore.valuePtr()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); U = A.template triangularView().solve(U); } - for (j = 0; j < nrhs; ++j) + for (int j = 0; j < nrhs; ++j) { - for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) + for (int jcol = fsupc; jcol < fsupc + nsupc; jcol++) { - for (i = m_Ustore.outerIndexPtr()[jcol]; i < m_Ustore.outerIndexPtr()[jcol+1]; i++) + typename MappedSparseMatrix::InnerIterator it(m_Ustore, jcol); + for ( ; it; ++it) { - irow = m_Ustore.innerIndexPtr()[i]; - X(irow, j) -= X(jcol, j) * m_Ustore.valuePtr()[i]; + Index irow = it.index(); + X(irow, j) -= X(jcol, j) * it.value(); } } } } // End For U-solve // Permute back the solution - for (j = 0; j < nrhs; ++j) + for (int j = 0; j < nrhs; ++j) X.col(j) = m_perm_c.inverse() * X.col(j); return true; diff --git a/Eigen/src/SparseLU/SparseLU_Matrix.h b/Eigen/src/SparseLU/SparseLU_Matrix.h index 9381189c8..31aeee64d 100644 --- a/Eigen/src/SparseLU/SparseLU_Matrix.h +++ b/Eigen/src/SparseLU/SparseLU_Matrix.h @@ -171,8 +171,13 @@ class SuperNodalMatrix { return m_nsuper; } + class InnerIterator; - class SuperNodeIterator; + template + void solveInPlace( MatrixBase&X) const; + + + protected: Index m_row; // Number of rows @@ -189,7 +194,7 @@ class SuperNodalMatrix }; /** - * \brief InnerIterator class to iterate over nonzero values in the triangular supernodal matrix + * \brief InnerIterator class to iterate over nonzero values of the current column in the supernode * */ template @@ -209,7 +214,7 @@ class SuperNodalMatrix::InnerIterator inline InnerIterator& operator++() { m_idval++; - m_idrow++ ; + m_idrow++; return *this; } inline Scalar value() const { return m_matrix.valuePtr()[m_idval]; } @@ -229,48 +234,80 @@ class SuperNodalMatrix::InnerIterator } protected: - const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix - const Index m_outer; // Current column - Index m_idval; //Index to browse the values in the current column - const Index m_startval; // Start of the column value - const Index m_endval; // End of the column value - Index m_idrow; //Index to browse the row indices - const Index m_startidrow; // Start of the row indices of the current column value - const Index m_endidrow; // End of the row indices of the current column value + const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix + const Index m_outer; // Current column + Index m_idval; //Index to browse the values in the current column + const Index m_startval; // Start of the column value + const Index m_endval; // End of the column value + Index m_idrow; //Index to browse the row indices + const Index m_startidrow; // Start of the row indices of the current column value + const Index m_endidrow; // End of the row indices of the current column value }; /** - * \brief Iterator class to iterate over Supernodes in the triangular supernodal matrix + * \brief Solve with the supernode triangular matrix * - * The final goal is to use this class when dealing with supernodes during numerical factorization */ template -class SuperNodalMatrix::SuperNodeIterator +template +void SuperNodalMatrix::solveInPlace( MatrixBase&X) const { - public: - SuperNodeIterator(const SuperNodalMatrix& mat) + Index n = X.rows(); + int nrhs = X.cols(); + const Scalar * Lval = valuePtr(); // Nonzero values + Matrix work(n, nrhs); // working vector + work.setZero(); + for (int k = 0; k <= nsuper(); k ++) { + Index fsupc = supToCol()[k]; // First column of the current supernode + Index istart = rowIndexPtr()[fsupc]; // Pointer index to the subscript of the current column + Index nsupr = rowIndexPtr()[fsupc+1] - istart; // Number of rows in the current supernode + Index nsupc = supToCol()[k+1] - fsupc; // Number of columns in the current supernode + Index nrow = nsupr - nsupc; // Number of rows in the non-diagonal part of the supernode + Index irow; //Current index row - } - SuperNodeIterator(const SuperNodalMatrix& mat, Index supno) - { - - } - - /* - * Available Methods : - * Browse all supernodes (operator ++ ) - * Number of supernodes - * Columns of the current supernode - * triangular matrix of the current supernode - * rectangular part of the current supernode - */ - protected: - const SuperNodalMatrix& m_matrix; // Supernodal lower triangular matrix - Index m_idsup; // Index to browse all supernodes - const Index m_nsuper; // Number of all supernodes - Index m_startidsup; - Index m_endidsup; - -}; + if (nsupc == 1 ) + { + for (int j = 0; j < nrhs; j++) + { + InnerIterator it(*this, fsupc); + ++it; // Skip the diagonal element + for (; it; ++it) + { + irow = it.row(); + X(irow, j) -= X(fsupc, j) * it.value(); + } + } + } + else + { + // The supernode has more than one column + Index luptr = colIndexPtr()[fsupc]; + + // Triangular solve + Map, 0, OuterStride<> > A( &(Lval[luptr]), nsupc, nsupc, OuterStride<>(nsupr) ); + Map< Matrix, 0, OuterStride<> > U (&(X(fsupc,0)), nsupc, nrhs, OuterStride<>(n) ); + U = A.template triangularView().solve(U); + + // Matrix-vector product + new (&A) Map, 0, OuterStride<> > ( &(Lval[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) ); + work.block(0, 0, nrow, nrhs) = A * U; + + //Begin Scatter + for (int j = 0; j < nrhs; j++) + { + Index iptr = istart + nsupc; + for (int i = 0; i < nrow; i++) + { + irow = rowIndex()[iptr]; + X(irow, j) -= work(i, j); // Scatter operation + work(i, j) = Scalar(0); + iptr++; + } + } + } + } +} + + #endif \ No newline at end of file From 288e6aab14cc12e604bd1a12f0cba20d88edf54f Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 3 Sep 2012 10:33:39 +0200 Subject: [PATCH 41/73] Insert XSL styles into output XML files --- bench/spbench/spbench.xsl | 83 ----------------------------- bench/spbench/spbenchsolver.cpp | 2 +- bench/spbench/spbenchsolver.h | 62 +++++++++++----------- bench/spbench/spbenchstyle.h | 94 +++++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 115 deletions(-) delete mode 100644 bench/spbench/spbench.xsl create mode 100644 bench/spbench/spbenchstyle.h diff --git a/bench/spbench/spbench.xsl b/bench/spbench/spbench.xsl deleted file mode 100644 index 7727542f8..000000000 --- a/bench/spbench/spbench.xsl +++ /dev/null @@ -1,83 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Matrix N NNZ Sym SPD - - - - -
Compute Time
Solve Time
Total Time
Error - - ( - - ) -
- - -
- -
\ No newline at end of file diff --git a/bench/spbench/spbenchsolver.cpp b/bench/spbench/spbenchsolver.cpp index ff0ce4c7d..4acd0039c 100644 --- a/bench/spbench/spbenchsolver.cpp +++ b/bench/spbench/spbenchsolver.cpp @@ -14,7 +14,7 @@ void bench_printhelp() cout<< " OPTIONS : \n"; cout<< " -h or --help \n print this help and return\n\n"; cout<< " -d matrixdir \n Use matrixdir as the matrix folder instead of the one specified in the environment variable EIGEN_MATRIXDIR\n\n"; - cout<< " -o outputfile.html \n Output the statistics to a html file \n\n"; + cout<< " -o outputfile.xml \n Output the statistics to a xml file \n\n"; cout<< " --eps Sets the relative tolerance for iterative solvers (default 1e-08) \n\n"; cout<< " --maxits Sets the maximum number of iterations (default 1000) \n\n"; diff --git a/bench/spbench/spbenchsolver.h b/bench/spbench/spbenchsolver.h index 99f05aabc..c48ed7aa7 100644 --- a/bench/spbench/spbenchsolver.h +++ b/bench/spbench/spbenchsolver.h @@ -10,7 +10,7 @@ #include #include -#include "Eigen/SparseCore" +#include #include #include #include @@ -22,6 +22,8 @@ #include #include +#include "spbenchstyle.h" + #ifdef EIGEN_CHOLMOD_SUPPORT #include #endif @@ -85,103 +87,103 @@ void printStatheader(std::ofstream& out) // Print XML header // NOTE It would have been much easier to write these XML documents using external libraries like tinyXML or Xerces-C++. - out << " \n"; - out << " \n"; - out << " \n"; - out << "\n\n"; + out << " \n"; + out << " \n"; + out << "\n]>"; + out << "\n\n\n"; - - // Write the root XML element - out << "\n \n" ; + out << "\n \n" ; //root XML element + // Print the xsl style section + printBenchStyle(out); // List all available solvers out << " \n"; #ifdef EIGEN_UMFPACK_SUPPORT - out <<" \n"; + out <<" \n"; out << " LU \n"; out << " UMFPACK \n"; out << " \n"; #endif #ifdef EIGEN_SUPERLU_SUPPORT - out <<" \n"; + out <<" \n"; out << " LU \n"; out << " SUPERLU \n"; out << " \n"; #endif #ifdef EIGEN_CHOLMOD_SUPPORT - out <<" \n"; + out <<" \n"; out << " LLT SP \n"; out << " CHOLMOD \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LLT \n"; out << " CHOLMOD \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LDLT \n"; out << " CHOLMOD \n"; out << " \n"; #endif #ifdef EIGEN_PARDISO_SUPPORT - out <<" \n"; + out <<" \n"; out << " LU \n"; out << " PARDISO \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LLT \n"; out << " PARDISO \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LDLT \n"; out << " PARDISO \n"; out << " \n"; #endif #ifdef EIGEN_PASTIX_SUPPORT - out <<" \n"; + out <<" \n"; out << " LU \n"; out << " PASTIX \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LLT \n"; out << " PASTIX \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LDLT \n"; out << " PASTIX \n"; out << " \n"; #endif - out <<" \n"; + out <<" \n"; out << " BICGSTAB \n"; out << " EIGEN \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " BICGSTAB_ILUT \n"; out << " EIGEN \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " GMRES_ILUT \n"; out << " EIGEN \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LDLT \n"; out << " EIGEN \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " LLT \n"; out << " EIGEN \n"; out << " \n"; - out <<" \n"; + out <<" \n"; out << " CG \n"; out << " EIGEN \n"; out << " \n"; @@ -260,7 +262,7 @@ template void call_directsolver(Solver& solver, const int solver_id, const typename Solver::MatrixType& A, const Matrix& b, const Matrix& refX, std::string& statFile) { std::ofstream statbuf(statFile.c_str(), std::ios::app); - statbuf << " \n"; + statbuf << " \n"; call_solver(solver, solver_id, A, b, refX,statbuf); statbuf << " \n"; statbuf.close(); @@ -273,7 +275,7 @@ void call_itersolver(Solver &solver, const int solver_id, const typename Solver: solver.setMaxIterations(MaximumIters); std::ofstream statbuf(statFile.c_str(), std::ios::app); - statbuf << " \n"; + statbuf << " \n"; call_solver(solver, solver_id, A, b, refX,statbuf); statbuf << " "<< solver.iterations() << "\n"; statbuf << " \n"; @@ -303,7 +305,6 @@ void SelectSolvers(const SparseMatrix&A, unsigned int sym, Matrix solver; call_directsolver(solver, EIGEN_SUPERLU, A, b, refX,statFile); - printStatItem(stat, best_time_id, best_time_val); } #endif @@ -448,7 +449,6 @@ void SelectSolvers(const SparseMatrix&A, unsigned int sym, Matrix solver; // call_itersolver(solver,EIGEN_CG_PRECOND, A, b, refX,statFile); -// printStatItem(stat, best_time_id, best_time_val); // } } // End SPD matrices } @@ -504,8 +504,8 @@ void Browse_Matrices(const string folder, bool statFileExists, std::string& stat if(statFileExists) { std::ofstream statbuf(statFile.c_str(), std::ios::app); - statbuf << " \n"; + statbuf << " \n"; statbuf << " \n"; statbuf.close(); } diff --git a/bench/spbench/spbenchstyle.h b/bench/spbench/spbenchstyle.h new file mode 100644 index 000000000..17a05ce71 --- /dev/null +++ b/bench/spbench/spbenchstyle.h @@ -0,0 +1,94 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef SPBENCHSTYLE_H +#define SPBENCHSTYLE_H + +void printBenchStyle(std::ofstream& out) +{ + out << "\n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + "; + out<<"\n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + "; + + out<<" \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + "; + out<<" \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + "; + out<<" \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ + \n \ +
Matrix N NNZ Sym SPD \n \ + \n \ + \n \ + \n \ + \n \ +
Compute Time
Solve Time
Total Time
Error \n \ + \n \ + (\n \ + \n \ + )\n \ +
\n \ + \n \ + \n \ +
\n \ +
\n\n"; + +} +#endif \ No newline at end of file From 3a22c47fb5ab1d34498fdaa5b1263c1426d97a37 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 3 Sep 2012 14:49:03 +0200 Subject: [PATCH 42/73] Bug in blas 3 2D block update --- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index bfe13b38a..90d3297a1 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -95,7 +95,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // if the blocks are large enough, use level 3 // TODO find better heuristics! - if(false && nsupc >= 50 && nrow > 50 && u_cols>6) + if(true /*nsupc >= 50 && nrow > 50 && u_cols>6*/) { Map > U(tempv.data(), u_rows, u_cols); @@ -126,7 +126,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca u_col++; } - // solve U = A^-1 U luptr = glu.xlusup(fsupc); no_zeros = (krep - u_rows + 1) - fsupc; @@ -162,14 +161,14 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca { int irow = glu.lsub(isub++); dense_col(irow) = U.coeff(i+off,u_col); - U.coeffRef(i,u_col) = 0; + U.coeffRef(i+off,u_col) = 0; } // Scatter l into SPA dense[] for (int i = 0; i < nrow; i++) { int irow = glu.lsub(isub++); - dense_col(irow) -= L.coeff(i+off,u_col); + dense_col(irow) -= L.coeff(i,u_col); L.coeffRef(i,u_col) = 0; } u_col++; From 2e38666d01e395ecb4ffb41e3d031aff1f984308 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 4 Sep 2012 11:36:57 +0200 Subject: [PATCH 43/73] correct bug in Blas 3 2D block update --- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 90d3297a1..36b4f74df 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -49,7 +49,7 @@ * */ template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; @@ -95,7 +95,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // if the blocks are large enough, use level 3 // TODO find better heuristics! - if(true /*nsupc >= 50 && nrow > 50 && u_cols>6*/) + if( nsupc >= 50 && nrow > 50 && u_cols>6) { Map > U(tempv.data(), u_rows, u_cols); @@ -117,13 +117,13 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca int isub = lptr + no_zeros; int off = u_rows-segsize; + for (int i = 0; i < off; i++) U(i,u_col) = 0; for (int i = 0; i < segsize; i++) { int irow = glu.lsub(isub); U(i+off,u_col) = dense_col(irow); ++isub; } - u_col++; } // solve U = A^-1 U From 2280f2490e802320838f8e51884abe8667a6112d Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 4 Sep 2012 12:21:07 +0200 Subject: [PATCH 44/73] Init perf values --- Eigen/src/SparseLU/SparseLU.h | 37 ++++++++++-------------- Eigen/src/SparseLU/SparseLU_Structs.h | 12 ++++++++ Eigen/src/SparseLU/SparseLU_panel_bmod.h | 4 +-- bench/spbench/test_sparseLU.cpp | 10 ++++--- 4 files changed, 35 insertions(+), 28 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 997f4e352..6a6579493 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -204,12 +204,12 @@ class SparseLU // Functions void initperfvalues() { - m_panel_size = 12; - m_relax = 6; - m_maxsuper = 100; - m_rowblk = 200; - m_colblk = 60; - m_fillfactor = 20; + m_perfv.panel_size = 12; + m_perfv.relax = 6; + m_perfv.maxsuper = 100; + m_perfv.rowblk = 200; + m_perfv.colblk = 60; + m_perfv.fillfactor = 20; } // Variables @@ -231,14 +231,7 @@ class SparseLU bool m_symmetricmode; // values for performance - int m_panel_size; // a panel consists of at most consecutive columns - int m_relax; // To control degree of relaxing supernodes. If the number of nodes (columns) - // in a subtree of the elimination tree is less than relax, this subtree is considered - // as one supernode regardless of the row structures of those columns - int m_maxsuper; // The maximum size for a supernode in complete LU - int m_rowblk; // The minimum row dimension for 2-D blocking to be used; - int m_colblk; // The minimum column dimension for 2-D blocking to be used; - int m_fillfactor; // The estimated fills factors for L and U, compared with A + LU_perfvalues m_perfv; RealScalar m_diagpivotthresh; // Specifies the threshold used for a diagonal entry to be an acceptable pivot int m_nnzL, m_nnzU; // Nonzeros in L and U factors @@ -374,10 +367,10 @@ void SparseLU::factorize(const MatrixType& matrix) int m = m_mat.rows(); int n = m_mat.cols(); int nnz = m_mat.nonZeros(); - int maxpanel = m_panel_size * m; + int maxpanel = m_perfv.panel_size * m; // Allocate working storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, lwork, m_fillfactor, m_panel_size, m_glu); + int info = LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -401,7 +394,7 @@ void SparseLU::factorize(const MatrixType& matrix) ScalarVector dense; dense.setZero(maxpanel); ScalarVector tempv; - tempv.setZero(LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk) ); + tempv.setZero(LU_NUM_TEMPV(m, m_perfv.panel_size, m_perfv.maxsuper, m_perfv.rowblk) ); // Compute the inverse of perm_c PermutationType iperm_c(m_perm_c.inverse()); @@ -409,9 +402,9 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode == true ) - LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_relax, marker, relax_end); + LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); m_perm_r.resize(m); @@ -499,7 +492,7 @@ void SparseLU::factorize(const MatrixType& matrix) { // Work on one panel of panel_size columns // Adjust panel size so that a panel won't overlap with the next relaxed snode. - int panel_size = m_panel_size; // upper bound on panel width + int panel_size = m_perfv.panel_size; // upper bound on panel width for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) @@ -515,7 +508,7 @@ void SparseLU::factorize(const MatrixType& matrix) LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order - LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_glu); + LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); // Sparse LU within the panel, and below the panel diagonal for ( jj = jcol; jj< jcol + panel_size; jj++) @@ -526,7 +519,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, m_perm_r.indices(), m_maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; diff --git a/Eigen/src/SparseLU/SparseLU_Structs.h b/Eigen/src/SparseLU/SparseLU_Structs.h index 4b4dfdc77..7b3aa250c 100644 --- a/Eigen/src/SparseLU/SparseLU_Structs.h +++ b/Eigen/src/SparseLU/SparseLU_Structs.h @@ -88,4 +88,16 @@ struct LU_GlobalLU_t { Index n; // Number of columns in the matrix int num_expansions; }; + +// Values to set for performance +struct LU_perfvalues { + int panel_size; // a panel consists of at most consecutive columns + int relax; // To control degree of relaxing supernodes. If the number of nodes (columns) + // in a subtree of the elimination tree is less than relax, this subtree is considered + // as one supernode regardless of the row structures of those columns + int maxsuper; // The maximum size for a supernode in complete LU + int rowblk; // The minimum row dimension for 2-D blocking to be used; + int colblk; // The minimum column dimension for 2-D blocking to be used; + int fillfactor; // The estimated fills factors for L and U, compared with A +}; #endif \ No newline at end of file diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 36b4f74df..1b31cc31a 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -49,7 +49,7 @@ * */ template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t& glu) +void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_perfvalues& perfv, LU_GlobalLU_t& glu) { typedef typename ScalarVector::Scalar Scalar; @@ -95,7 +95,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca // if the blocks are large enough, use level 3 // TODO find better heuristics! - if( nsupc >= 50 && nrow > 50 && u_cols>6) + if( nsupc >= perfv.colblk && nrow > perfv.rowblk && u_cols>perfv.relax) { Map > U(tempv.data(), u_rows, u_cols); diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index 8c78b0c9b..c6511a9bc 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -24,11 +24,13 @@ int main(int argc, char **args) typedef Matrix DenseRhs; Matrix b, x, tmp; // SparseLU, AMDOrdering > solver; -#ifdef EIGEN_METIS_SUPPORT - SparseLU, MetisOrdering > solver; -#else +// #ifdef EIGEN_METIS_SUPPORT +// SparseLU, MetisOrdering > solver; +// std::cout<< "ORDERING : METIS\n"; +// #else SparseLU, COLAMDOrdering > solver; -#endif + std::cout<< "ORDERING : COLAMD\n"; +// #endif ifstream matrix_file; string line; From 063705b5be5a41e324773887d3d5ae065321a719 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 7 Sep 2012 13:14:57 +0200 Subject: [PATCH 45/73] Add tutorial for sparse solvers --- Eigen/src/SparseCore/SparseMatrix.h | 2 +- Eigen/src/SparseLU/SparseLU.h | 4 +- Eigen/src/SuperLUSupport/SuperLUSupport.h | 4 +- bench/spbench/CMakeLists.txt | 1 + doc/I17_SparseLinearSystems.dox | 110 ++++++++++++++++++++++ 5 files changed, 115 insertions(+), 6 deletions(-) create mode 100644 doc/I17_SparseLinearSystems.dox diff --git a/Eigen/src/SparseCore/SparseMatrix.h b/Eigen/src/SparseCore/SparseMatrix.h index 52a9dab70..87f3fb873 100644 --- a/Eigen/src/SparseCore/SparseMatrix.h +++ b/Eigen/src/SparseCore/SparseMatrix.h @@ -478,7 +478,7 @@ class SparseMatrix } /** Turns the matrix into the uncompressed mode */ - void Uncompress() + void uncompress() { if(m_innerNonZeros != 0) return; diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 6a6579493..e2076138a 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -271,7 +271,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) //First copy the whole input matrix. m_mat = mat; - m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. FIXME : This vector is filled but not subsequently used. //Then, permute only the column pointers for (int i = 0; i < mat.cols(); i++) { @@ -356,7 +356,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Apply the column permutation computed in analyzepattern() // m_mat = matrix * m_perm_c.inverse(); m_mat = matrix; - m_mat.Uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. + m_mat.uncompress(); //NOTE: The effect of this command is only to create the InnerNonzeros pointers. //Then, permute only the column pointers for (int i = 0; i < matrix.cols(); i++) { diff --git a/Eigen/src/SuperLUSupport/SuperLUSupport.h b/Eigen/src/SuperLUSupport/SuperLUSupport.h index e3fae4a36..faefd8169 100644 --- a/Eigen/src/SuperLUSupport/SuperLUSupport.h +++ b/Eigen/src/SuperLUSupport/SuperLUSupport.h @@ -627,9 +627,7 @@ void SuperLU::factorize(const MatrixType& a) this->initFactorization(a); - //DEBUG -// m_sluOptions.ColPerm = COLAMD; - m_sluOptions.Equil = NO; + m_sluOptions.ColPerm = COLAMD; int info = 0; RealScalar recip_pivot_growth, rcond; RealScalar ferr, berr; diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 2eb0befa9..5451843b9 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -74,3 +74,4 @@ endif(METIS_FOUND) add_executable(test_sparseLU test_sparseLU.cpp) target_link_libraries (test_sparseLU ${SPARSE_LIBS}) + diff --git a/doc/I17_SparseLinearSystems.dox b/doc/I17_SparseLinearSystems.dox new file mode 100644 index 000000000..740bee18e --- /dev/null +++ b/doc/I17_SparseLinearSystems.dox @@ -0,0 +1,110 @@ +namespace Eigen { +/** \page TopicSparseSystems Solving Sparse Linear Systems +In Eigen, there are several methods available to solve linear systems when the coefficient matrix is sparse. Because of the special representation of this class of matrices, special care should be taken in order to get a good performance. See \ref TutorialSparse for a detailed introduction about sparse matrices in Eigen. In this page, we briefly present the main steps that are common to all the linear solvers in Eigen together with the main concepts behind them. Depending on the properties of the matrix, the desired accuracy, the end-user is able to tune these steps in order to improve the performance of its code. However, an impatient user does not need to know deeply what's hiding behind these steps: the last section presents a benchmark routine that can be easily used to get an insight on the performance of all the available solvers. + +\b Table \b of \b contents \n + - \ref TheSparseCompute + - \ref TheSparseSolve + - \ref BenchmarkRoutine + + As summarized in \ref TutorialSparseDirectSolvers, there are many built-in solvers in Eigen as well as interface to external solvers libraries. All these solvers follow the same calling sequence. The basic steps are as follows : +\code +#include +// ... +SparseMatrix A; +// fill A +VectorXd b, x; +// fill b +// solve Ax = b +SolverClassName > solver; +solver.compute(A); +if(solver.info()!=Succeeded) { + // decomposition failed + return; +} +x = solver.solve(b); +if(solver.info()!=Succeeded) { + // solving failed + return; +} +\endcode + +\section TheSparseCompute The Compute Step +In the compute() function, the matrix is generally factorized: LLT for self-adjoint matrices, LDLT for general hermitian matrices and LU for non hermitian matrices. These are the results of using direct solvers. For this class of solvers precisely, the compute step is further subdivided into analyzePattern() and factorize(). + +The goal of analyzePattern() is to reorder the nonzero elements of the matrix, such that the factorization step creates less fill-in. This step exploits only the structure of the matrix. Hence, the results of this step can be used for other linear systems where the matrix has the same structure. Note however that sometimes, some external solvers (like SuperLU) require that the values of the matrix are set in this step, for instance to equilibrate the rows and columns of the matrix. In this situation, the results of this step can note be used with other matrices. + +Eigen provides a limited set of methods to reorder the matrix in this step, either built-in (COLAMD, AMD) or external (METIS). These methods are set in template parameter list of the solver : +\code +DirectSolverClassName, OrderingMethod > solver; +\endcode + +See \link Ordering_Modules the Ordering module \endlink for the list of available methods and the associated options. + +In factorize(), the factors of the coefficient matrix are computed. This step should be called each time the values of the matrix change. However, the structural pattern of the matrix should not change between multiple calls. + +For iterative solvers, the compute step is used to eventually setup a preconditioner. Remember that, basically, the goal of the preconditioner is to speedup the convergence of an iterative method by solving a modified linear system where the coefficient matrix has more clustered eigenvalues. For real problems, an iterative solver should always be used with a preconditioner. In Eigen, a preconditioner is selected by simply adding it as a template parameter to the iterative solver object. +\code +IterativeSolverClassName, PreconditionerName > solver; +\endcode + +FIXME How to get a reference to the preconditioner, in order to set the parameters + +For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. +See \link Sparse_modules the Sparse module \endlink for the list of available preconditioners in Eigen. +\section TheSparseSolve The Solve step +The solve() function computes the solution of the linear systems with one or many right hand sides. +\code +X = solver.solve(B); +\endcode +Here, B can be a vector or a matrix where the columns form the different right hand sides. The solve() function can be called several times as well, for instance When all the right hand sides are not available at once. +\code +x1 = solver.solve(b1); +// Get the second right hand side b2 +x2 = solver.solve(b2); +// ... +\endcode +For direct methods, the solution are computed at the machine precision. Sometimes, the solution need not be too accurate. In this case, the iterative methods are more suitable and the desired accuracy can be set before the solve step using setTolerance(). For all the available functions, please, refer to the documentation of the \link IterativeLinearSolvers_module Iterative solvers module \endlink. + +\section BenchmarkRoutine +Most of the time, all you need is to know how much time it will take to qolve your system, and hopefully, what is the most suitable solver. In Eigen, we provide a benchmark routine that can be used for this purpose. It is very easy to use. First, it should be activated at the configuration step with the flag TEST_REAL_CASES. Then, in bench/spbench, you can compile the routine by typing \b make \e spbenchsolver. You can then run it with --help option to get the list of all available options. Basically, the matrices to test should be in \link http://math.nist.gov/MatrixMarket/formats.html MatrixMarket Coordinate format \endlink, and the routine returns the statistics from all available solvers in Eigen. + +The following table gives an example of XHTML statistics from several Eigen built-in and external solvers. + + +
Matrix N NNZ UMFPACK SUPERLU PASTIX LU BiCGSTAB BiCGSTAB+ILUT GMRES+ILUT LDLT CHOLMOD LDLT PASTIX LDLT LLT CHOLMOD SP LLT CHOLMOD LLT PASTIX LLT CG
vector_graphics 12855 72069 Compute Time 0.02545490.02156770.07018270.0001533880.01401070.01537090.01016010.009305020.0649689 +
Solve Time 0.003378350.0009518260.004843730.03748860.00464450.008477540.0005418130.0002936960.00485376 +
Total Time 0.02883330.02251950.07502650.0376420.01865520.02384840.01070190.009598710.0698227 +
Error(Iter) 1.299e-16 2.04207e-16 4.83393e-15 3.94856e-11 (80) 1.03861e-12 (3) 5.81088e-14 (6) 1.97578e-16 1.83927e-16 4.24115e-15 +
poisson_SPD 19788 308232 Compute Time 0.4250261.823780.6173670.0004789211.340011.334710.7964190.8575730.4730070.8148260.1847190.8615550.4705590.000458188 +
Solve Time 0.02800530.01944020.02687470.2494370.05484440.09269910.008502040.00531710.02589320.008746030.005781550.005303610.02489420.239093 +
Total Time 0.4530311.843220.6442410.2499161.394861.427410.8049210.8628910.49890.8235720.1905010.8668590.4954530.239551 +
Error(Iter) 4.67146e-16 1.068e-15 1.3397e-15 6.29233e-11 (201) 3.68527e-11 (6) 3.3168e-15 (16) 1.86376e-15 1.31518e-16 1.42593e-15 3.45361e-15 3.14575e-16 2.21723e-15 7.21058e-16 9.06435e-12 (261) +
sherman2 1080 23094 Compute Time 0.006317540.0150520.0247514 -0.02144250.0217988 +
Solve Time 0.0004784240.0003379980.0010291 -0.002431520.00246152 +
Total Time 0.006795970.015390.0257805 -0.0238740.0242603 +
Error(Iter) 1.83099e-15 8.19351e-15 2.625e-14 1.3678e+69 (1080) 4.1911e-12 (7) 5.0299e-13 (12) +
bcsstk01_SPD 48 400 Compute Time 0.0001690790.000107890.0005725381.425e-069.1612e-058.3985e-055.6489e-057.0913e-050.0004682515.7389e-058.0212e-055.8394e-050.0004630171.333e-06 +
Solve Time 1.2288e-051.1124e-050.0002863878.5896e-051.6381e-051.6984e-053.095e-064.115e-060.0003254383.504e-067.369e-063.454e-060.0002940956.0516e-05 +
Total Time 0.0001813670.0001190140.0008589258.7321e-050.0001079930.0001009695.9584e-057.5028e-050.0007936896.0893e-058.7581e-056.1848e-050.0007571126.1849e-05 +
Error(Iter) 1.03474e-16 2.23046e-16 2.01273e-16 4.87455e-07 (48) 1.03553e-16 (2) 3.55965e-16 (2) 2.48189e-16 1.88808e-16 1.97976e-16 2.37248e-16 1.82701e-16 2.71474e-16 2.11322e-16 3.547e-09 (48) +
sherman1 1000 3750 Compute Time 0.002288050.002092310.005282689.846e-060.001635220.001621550.0007892590.0008044950.00438269 +
Solve Time 0.0002137889.7983e-050.0009388310.006298350.0003617640.000787944.3989e-052.5331e-050.000917166 +
Total Time 0.002501840.002190290.006221510.00630820.001996980.002409490.0008332480.0008298260.00529986 +
Error(Iter) 1.16839e-16 2.25968e-16 2.59116e-16 3.76779e-11 (248) 4.13343e-11 (4) 2.22347e-14 (10) 2.05861e-16 1.83555e-16 1.02917e-15 +
young1c 841 4089 Compute Time 0.002358430.002172280.005680751.2735e-050.002648660.00258236 +
Solve Time 0.0003295990.0001686340.000801180.05347380.001871930.00450211 +
Total Time 0.002688030.002340910.006481930.05348650.004520590.00708447 +
Error(Iter) 1.27029e-16 2.81321e-16 5.0492e-15 8.0507e-11 (706) 3.00447e-12 (8) 1.46532e-12 (16) +
mhd1280b 1280 22778 Compute Time 0.002348980.002070790.005709182.5976e-050.003025630.002980360.001445250.0009199220.00426444 +
Solve Time 0.001033920.0002119110.001050.01104320.0006282870.003920890.0001383036.2446e-050.00097564 +
Total Time 0.00338290.00228270.006759180.01106920.003653920.006901240.001583550.0009823680.00524008 +
Error(Iter) 1.32953e-16 3.08646e-16 6.734e-16 8.83132e-11 (40) 1.51153e-16 (1) 6.08556e-16 (8) 1.89264e-16 1.97477e-16 6.68126e-09 +
crashbasis 160000 1750416 Compute Time 3.20195.789215.75730.003835153.10063.09921 +
Solve Time 0.2619150.1062250.4021411.490890.248880.443673 +
Total Time 3.463815.8954216.15941.494733.349483.54288 +
Error(Iter) 1.76348e-16 4.58395e-16 1.67982e-14 8.64144e-11 (61) 8.5996e-12 (2) 6.04042e-14 (5) + +
+*/ +} \ No newline at end of file From 5433986f5ae8cd1c07f7c77d655a54c0e775b292 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 7 Sep 2012 14:01:51 +0200 Subject: [PATCH 46/73] multiple warnings for unused variable --- Eigen/src/Core/products/GeneralMatrixVector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/products/GeneralMatrixVector.h b/Eigen/src/Core/products/GeneralMatrixVector.h index 639af8ed4..8895d3ab2 100644 --- a/Eigen/src/Core/products/GeneralMatrixVector.h +++ b/Eigen/src/Core/products/GeneralMatrixVector.h @@ -81,7 +81,7 @@ EIGEN_DONT_INLINE static void run( const Index peels = 2; const Index LhsPacketAlignedMask = LhsPacketSize-1; const Index ResPacketAlignedMask = ResPacketSize-1; - const Index PeelAlignedMask = ResPacketSize*peels-1; +// const Index PeelAlignedMask = ResPacketSize*peels-1; const Index size = rows; // How many coeffs of the result do we have to skip to be aligned. @@ -335,7 +335,7 @@ EIGEN_DONT_INLINE static void run( const Index peels = 2; const Index RhsPacketAlignedMask = RhsPacketSize-1; const Index LhsPacketAlignedMask = LhsPacketSize-1; - const Index PeelAlignedMask = RhsPacketSize*peels-1; +// const Index PeelAlignedMask = RhsPacketSize*peels-1; const Index depth = cols; // How many coeffs of the result do we have to skip to be aligned. From 06d2fe453d5dfbdc8a69dcd02f17801fc3f75b86 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 7 Sep 2012 23:19:24 +0200 Subject: [PATCH 47/73] remove stupid assert in blue norm. --- Eigen/src/Core/StableNorm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h index d8bf7db70..7499b195e 100644 --- a/Eigen/src/Core/StableNorm.h +++ b/Eigen/src/Core/StableNorm.h @@ -131,7 +131,6 @@ MatrixBase::blueNorm() const abig = internal::sqrt(abig); if(abig > overfl) { - eigen_assert(false && "overflow"); return rbig; } if(amed > RealScalar(0)) From 721671cc4e5950e8cb1c905be720d4318bf9fcdb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 8 Sep 2012 11:52:03 +0200 Subject: [PATCH 48/73] fix bug #501: remove aggressive mat/scalar optimization (was replaced by mat*(1/scalar) for non integer types) --- Eigen/src/Core/Functors.h | 44 ++++++++++----------------------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index c9e8ab150..09388972a 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -454,7 +454,7 @@ struct functor_traits > * indeed it seems better to declare m_other as a Packet and do the pset1() once * in the constructor. However, in practice: * - GCC does not like m_other as a Packet and generate a load every time it needs it - * - on the other hand GCC is able to moves the pset1() away the loop :) + * - on the other hand GCC is able to moves the pset1() outside the loop :) * - simpler code ;) * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y) */ @@ -485,33 +485,6 @@ template struct functor_traits > { enum { Cost = NumTraits::MulCost, PacketAccess = false }; }; -template -struct scalar_quotient1_impl { - typedef typename packet_traits::type Packet; - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_impl(const scalar_quotient1_impl& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_impl(const Scalar& other) : m_other(static_cast(1) / other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; } - EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const - { return internal::pmul(a, pset1(m_other)); } - const Scalar m_other; -}; -template -struct functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = packet_traits::HasMul }; }; - -template -struct scalar_quotient1_impl { - // FIXME default copy constructors seems bugged with std::complex<> - EIGEN_STRONG_INLINE scalar_quotient1_impl(const scalar_quotient1_impl& other) : m_other(other.m_other) { } - EIGEN_STRONG_INLINE scalar_quotient1_impl(const Scalar& other) : m_other(other) {} - EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } - typename add_const_on_value_type::Nested>::type m_other; -}; -template -struct functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; - /** \internal * \brief Template functor to divide a scalar by a fixed other one * @@ -521,14 +494,19 @@ struct functor_traits > * \sa class CwiseUnaryOp, MatrixBase::operator/ */ template -struct scalar_quotient1_op : scalar_quotient1_impl::IsInteger > { - EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) - : scalar_quotient1_impl::IsInteger >(other) {} +struct scalar_quotient1_op { + typedef typename packet_traits::type Packet; + // FIXME default copy constructors seems bugged with std::complex<> + EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { } + EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {} + EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; } + EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const + { return internal::pdiv(a, pset1(m_other)); } + typename add_const_on_value_type::Nested>::type m_other; }; template struct functor_traits > -: functor_traits::IsInteger> > -{}; +{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = packet_traits::HasDiv }; }; // nullary functors From 2c99d8413316c97e771a37c7ff04ab38d7cd158a Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 10 Sep 2012 12:41:26 +0200 Subject: [PATCH 49/73] add SparseLU in sparse bench --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 1091 ++++++---------------- Eigen/src/OrderingMethods/Ordering.h | 14 +- Eigen/src/SparseLU/SparseLU.h | 2 +- bench/spbench/CMakeLists.txt | 11 +- bench/spbench/spbenchsolver.h | 72 +- 5 files changed, 363 insertions(+), 827 deletions(-) diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 0af137d54..686c0f9f9 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -7,7 +7,7 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -// This file is modified from the eigen_colamd/symamd library. The copyright is below +// This file is modified from the colamd/symamd library. The copyright is below // The authors of the code itself are Stefan I. Larimore and Timothy A. // Davis (davis@cise.ufl.edu), University of Florida. The algorithm was @@ -39,18 +39,19 @@ // // Availability: // -// The eigen_colamd/symamd library is available at +// The colamd/symamd library is available at // -// http://www.cise.ufl.edu/research/sparse/eigen_colamd/ +// http://www.cise.ufl.edu/research/sparse/colamd/ -// This is the http://www.cise.ufl.edu/research/sparse/eigen_colamd/eigen_colamd.h -// file. It is required by the eigen_colamd.c, colamdmex.c, and symamdmex.c +// This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h +// file. It is required by the colamd.c, colamdmex.c, and symamdmex.c // files, and by any C code that calls the routines whose prototypes are -// listed below, or that uses the eigen_colamd/symamd definitions listed below. +// listed below, or that uses the colamd/symamd definitions listed below. #ifndef EIGEN_COLAMD_H #define EIGEN_COLAMD_H +namespace internal { /* Ensure that debugging is turned off: */ #ifndef COLAMD_NDEBUG #define COLAMD_NDEBUG @@ -61,42 +62,42 @@ /* ========================================================================== */ /* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ -#define EIGEN_COLAMD_KNOBS 20 +#define COLAMD_KNOBS 20 /* number of output statistics. Only stats [0..6] are currently used. */ -#define EIGEN_COLAMD_STATS 20 +#define COLAMD_STATS 20 /* knobs [0] and stats [0]: dense row knob and output statistic. */ -#define EIGEN_COLAMD_DENSE_ROW 0 +#define COLAMD_DENSE_ROW 0 /* knobs [1] and stats [1]: dense column knob and output statistic. */ -#define EIGEN_COLAMD_DENSE_COL 1 +#define COLAMD_DENSE_COL 1 /* stats [2]: memory defragmentation count output statistic */ -#define EIGEN_COLAMD_DEFRAG_COUNT 2 +#define COLAMD_DEFRAG_COUNT 2 -/* stats [3]: eigen_colamd status: zero OK, > 0 warning or notice, < 0 error */ -#define EIGEN_COLAMD_STATUS 3 +/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ +#define COLAMD_STATUS 3 /* stats [4..6]: error info, or info on jumbled columns */ -#define EIGEN_COLAMD_INFO1 4 -#define EIGEN_COLAMD_INFO2 5 -#define EIGEN_COLAMD_INFO3 6 +#define COLAMD_INFO1 4 +#define COLAMD_INFO2 5 +#define COLAMD_INFO3 6 /* error codes returned in stats [3]: */ -#define EIGEN_COLAMD_OK (0) -#define EIGEN_COLAMD_OK_BUT_JUMBLED (1) -#define EIGEN_COLAMD_ERROR_A_not_present (-1) -#define EIGEN_COLAMD_ERROR_p_not_present (-2) -#define EIGEN_COLAMD_ERROR_nrow_negative (-3) -#define EIGEN_COLAMD_ERROR_ncol_negative (-4) -#define EIGEN_COLAMD_ERROR_nnz_negative (-5) -#define EIGEN_COLAMD_ERROR_p0_nonzero (-6) -#define EIGEN_COLAMD_ERROR_A_too_small (-7) -#define EIGEN_COLAMD_ERROR_col_length_negative (-8) -#define EIGEN_COLAMD_ERROR_row_index_out_of_bounds (-9) -#define EIGEN_COLAMD_ERROR_out_of_memory (-10) -#define EIGEN_COLAMD_ERROR_internal_error (-999) +#define COLAMD_OK (0) +#define COLAMD_OK_BUT_JUMBLED (1) +#define COLAMD_ERROR_A_not_present (-1) +#define COLAMD_ERROR_p_not_present (-2) +#define COLAMD_ERROR_nrow_negative (-3) +#define COLAMD_ERROR_ncol_negative (-4) +#define COLAMD_ERROR_nnz_negative (-5) +#define COLAMD_ERROR_p0_nonzero (-6) +#define COLAMD_ERROR_A_too_small (-7) +#define COLAMD_ERROR_col_length_negative (-8) +#define COLAMD_ERROR_row_index_out_of_bounds (-9) +#define COLAMD_ERROR_out_of_memory (-10) +#define COLAMD_ERROR_internal_error (-999) /* ========================================================================== */ /* === Definitions ========================================================== */ @@ -105,30 +106,30 @@ #define COLAMD_MAX(a,b) (((a) > (b)) ? (a) : (b)) #define COLAMD_MIN(a,b) (((a) < (b)) ? (a) : (b)) -#define EIGEN_ONES_COMPLEMENT(r) (-(r)-1) +#define ONES_COMPLEMENT(r) (-(r)-1) /* -------------------------------------------------------------------------- */ -#define EIGEN_COLAMD_EMPTY (-1) +#define COLAMD_EMPTY (-1) /* Row and column status */ -#define EIGEN_ALIVE (0) -#define EIGEN_DEAD (-1) +#define ALIVE (0) +#define DEAD (-1) /* Column status */ -#define EIGEN_DEAD_PRINCIPAL (-1) -#define EIGEN_DEAD_NON_PRINCIPAL (-2) +#define DEAD_PRINCIPAL (-1) +#define DEAD_NON_PRINCIPAL (-2) /* Macros for row and column status update and checking. */ -#define EIGEN_ROW_IS_DEAD(r) EIGEN_ROW_IS_MARKED_DEAD (Row[r].shared2.mark) -#define EIGEN_ROW_IS_MARKED_DEAD(row_mark) (row_mark < EIGEN_ALIVE) -#define EIGEN_ROW_IS_ALIVE(r) (Row [r].shared2.mark >= EIGEN_ALIVE) -#define EIGEN_COL_IS_DEAD(c) (Col [c].start < EIGEN_ALIVE) -#define EIGEN_COL_IS_ALIVE(c) (Col [c].start >= EIGEN_ALIVE) -#define EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == EIGEN_DEAD_PRINCIPAL) -#define EIGEN_KILL_ROW(r) { Row [r].shared2.mark = EIGEN_DEAD ; } -#define EIGEN_KILL_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_PRINCIPAL ; } -#define EIGEN_KILL_NON_PRINCIPAL_COL(c) { Col [c].start = EIGEN_DEAD_NON_PRINCIPAL ; } +#define ROW_IS_DEAD(r) ROW_IS_MARKED_DEAD (Row[r].shared2.mark) +#define ROW_IS_MARKED_DEAD(row_mark) (row_mark < ALIVE) +#define ROW_IS_ALIVE(r) (Row [r].shared2.mark >= ALIVE) +#define COL_IS_DEAD(c) (Col [c].start < ALIVE) +#define COL_IS_ALIVE(c) (Col [c].start >= ALIVE) +#define COL_IS_DEAD_PRINCIPAL(c) (Col [c].start == DEAD_PRINCIPAL) +#define KILL_ROW(r) { Row [r].shared2.mark = DEAD ; } +#define KILL_PRINCIPAL_COL(c) { Col [c].start = DEAD_PRINCIPAL ; } +#define KILL_NON_PRINCIPAL_COL(c) { Col [c].start = DEAD_NON_PRINCIPAL ; } /* ========================================================================== */ /* === Colamd reporting mechanism =========================================== */ @@ -146,7 +147,7 @@ /* Use printf in standard C environment, for debugging and statistics output. */ /* Output is generated only if debugging is enabled at compile time, or if */ -/* the caller explicitly calls eigen_colamd_report or symamd_report. */ +/* the caller explicitly calls colamd_report or symamd_report. */ #define PRINTF printf /* In C, matrices are 0-based and indices are reported as such in *_report */ @@ -155,9 +156,9 @@ #endif /* MATLAB_MEX_FILE */ // == Row and Column structures == -typedef struct EIGEN_Colamd_Col_struct +typedef struct colamd_col_struct { - int start ; /* index for A of first row in this column, or EIGEN_DEAD */ + int start ; /* index for A of first row in this column, or DEAD */ /* if column is dead */ int length ; /* number of rows in this column */ union @@ -186,16 +187,16 @@ typedef struct EIGEN_Colamd_Col_struct int hash_next ; /* next column, if col is in a hash list */ } shared4 ; -} EIGEN_Colamd_Col ; +} colamd_col ; -typedef struct EIGEN_Colamd_Row_struct +typedef struct Colamd_Row_struct { int start ; /* index for A of first col in this row */ int length ; /* number of principal columns in this row */ union { int degree ; /* number of principal & non-principal columns in row */ - int p ; /* used as a row pointer in eigen_init_rows_cols () */ + int p ; /* used as a row pointer in init_rows_cols () */ } shared1 ; union { @@ -203,19 +204,19 @@ typedef struct EIGEN_Colamd_Row_struct int first_column ;/* first column in row (used in garbage collection) */ } shared2 ; -} EIGEN_Colamd_Row ; +} Colamd_Row ; /* ========================================================================== */ /* === Colamd recommended memory size ======================================= */ /* ========================================================================== */ /* - The recommended length Alen of the array A passed to eigen_colamd is given by - the EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any + The recommended length Alen of the array A passed to colamd is given by + the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro. It returns -1 if any argument is negative. 2*nnz space is required for the row and column - indices of the matrix. EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) space is + indices of the matrix. colamd_c (n_col) + colamd_r (n_row) space is required for the Col and Row arrays, respectively, which are internal to - eigen_colamd. An additional n_col space is the minimal amount of "elbow room", + colamd. An additional n_col space is the minimal amount of "elbow room", and nnz/5 more space is recommended for run time efficiency. This macro is not needed when using symamd. @@ -224,120 +225,41 @@ typedef struct EIGEN_Colamd_Row_struct gcc -pedantic warning messages. */ -#define EIGEN_COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (EIGEN_Colamd_Col) / sizeof (int))) -#define EIGEN_COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (EIGEN_Colamd_Row) / sizeof (int))) +inline int colamd_c(int n_col) +{ return int( ((n_col) + 1) * sizeof (colamd_col) / sizeof (int) ) ; } -#define EIGEN_COLAMD_RECOMMENDED(nnz, n_row, n_col) \ -( \ -((nnz) < 0 || (n_row) < 0 || (n_col) < 0) \ -? \ - (-1) \ -: \ - (2 * (nnz) + EIGEN_COLAMD_C (n_col) + EIGEN_COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \ -) +inline int colamd_r(int n_row) +{ return int(((n_row) + 1) * sizeof (Colamd_Row) / sizeof (int)); } // Various routines -int eigen_colamd_recommended (int nnz, int n_row, int n_col) ; +inline int colamd_recommended (int nnz, int n_row, int n_col) ; -void eigen_colamd_set_defaults (double knobs [EIGEN_COLAMD_KNOBS]) ; +static inline void colamd_set_defaults (double knobs [COLAMD_KNOBS]) ; -bool eigen_colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[EIGEN_COLAMD_KNOBS], int stats [EIGEN_COLAMD_STATS]) ; +static bool colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[COLAMD_KNOBS], int stats [COLAMD_STATS]) ; -void eigen_colamd_report (int stats [EIGEN_COLAMD_STATS]); +static inline void colamd_report (int stats [COLAMD_STATS]); -int eigen_init_rows_cols (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col col [], int A [], int p [], int stats[EIGEN_COLAMD_STATS] ); +static int init_rows_cols (int n_row, int n_col, Colamd_Row Row [], colamd_col col [], int A [], int p [], int stats[COLAMD_STATS] ); -void eigen_init_scoring (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], double knobs[EIGEN_COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); +static void init_scoring (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int head [], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); -int eigen_find_ordering (int n_row, int n_col, int Alen, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); +static int find_ordering (int n_row, int n_col, int Alen, Colamd_Row Row [], colamd_col Col [], int A [], int head [], int n_col2, int max_deg, int pfree); -void eigen_order_children (int n_col, EIGEN_Colamd_Col Col [], int p []); +static void order_children (int n_col, colamd_col Col [], int p []); -void eigen_detect_super_cols ( -#ifndef COLAMD_NDEBUG - int n_col, - EIGEN_Colamd_Row Row [], -#endif /* COLAMD_NDEBUG */ - EIGEN_Colamd_Col Col [], +static void detect_super_cols ( + colamd_col Col [], int A [], int head [], int row_start, int row_length ) ; - int eigen_garbage_collection (int n_row, int n_col, EIGEN_Colamd_Row Row [], EIGEN_Colamd_Col Col [], int A [], int *pfree) ; +static int garbage_collection (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int *pfree) ; - int eigen_clear_mark (int n_row, EIGEN_Colamd_Row Row [] ) ; +static inline int clear_mark (int n_row, Colamd_Row Row [] ) ; - void eigen_print_report (char *method, int stats [EIGEN_COLAMD_STATS]) ; - -/* ========================================================================== */ -/* === Debugging prototypes and definitions ================================= */ -/* ========================================================================== */ - -#ifndef COLAMD_NDEBUG - -/* colamd_debug is the *ONLY* global variable, and is only */ -/* present when debugging */ - - int colamd_debug ; /* debug print level */ - -#define COLAMD_DEBUG0(params) { (void) PRINTF params ; } -#define COLAMD_DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; } -#define COLAMD_DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; } -#define COLAMD_DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; } -#define COLAMD_DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; } - -#ifdef MATLAB_MEX_FILE -#define COLAMD_ASSERT(expression) (mxAssert ((expression), "")) -#else -#define COLAMD_ASSERT(expression) (assert (expression)) -#endif /* MATLAB_MEX_FILE */ - - void eigen_colamd_get_debug /* gets the debug print level from getenv */ -( - char *method -) ; - - void eigen_debug_deg_lists -( - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int head [], - int min_score, - int should, - int max_deg -) ; - - void eigen_debug_mark -( - int n_row, - EIGEN_Colamd_Row Row [], - int tag_mark, - int max_mark -) ; - - void eigen_debug_matrix -( - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int A [] -) ; - - void eigen_debug_structures -( - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int A [], - int n_col2 -) ; - -#else /* COLAMD_NDEBUG */ +static void print_report (const char *method, int stats [COLAMD_STATS]) ; /* === No debugging ========================================================= */ @@ -349,51 +271,50 @@ void eigen_detect_super_cols ( #define COLAMD_ASSERT(expression) ((void) 0) -#endif /* COLAMD_NDEBUG */ - - /** * \brief Returns the recommended value of Alen * - * Returns recommended value of Alen for use by eigen_colamd. + * Returns recommended value of Alen for use by colamd. * Returns -1 if any input argument is negative. * The use of this routine or macro is optional. * Note that the macro uses its arguments more than once, - * so be careful for side effects, if you pass expressions as arguments to EIGEN_COLAMD_RECOMMENDED. + * so be careful for side effects, if you pass expressions as arguments to COLAMD_RECOMMENDED. * * \param nnz nonzeros in A * \param n_row number of rows in A * \param n_col number of columns in A - * \return recommended value of Alen for use by eigen_colamd + * \return recommended value of Alen for use by colamd */ -int eigen_colamd_recommended ( int nnz, int n_row, int n_col) +inline int colamd_recommended ( int nnz, int n_row, int n_col) { - - return (EIGEN_COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; + if ((nnz) < 0 || (n_row) < 0 || (n_col) < 0) + return (-1); + else + return (2 * (nnz) + colamd_c (n_col) + colamd_r (n_row) + (n_col) + ((nnz) / 5)); } /** * \brief set default parameters The use of this routine is optional. * - * Colamd: rows with more than (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col) + * Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col) * entries are removed prior to ordering. Columns with more than - * (knobs [EIGEN_COLAMD_DENSE_COL] * n_row) entries are removed prior to + * (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to * ordering, and placed last in the output column ordering. * - * EIGEN_COLAMD_DENSE_ROW and EIGEN_COLAMD_DENSE_COL are defined as 0 and 1, - * respectively, in eigen_colamd.h. Default values of these two knobs + * COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1, + * respectively, in colamd.h. Default values of these two knobs * are both 0.5. Currently, only knobs [0] and knobs [1] are * used, but future versions may use more knobs. If so, they will * be properly set to their defaults by the future version of - * eigen_colamd_set_defaults, so that the code that calls eigen_colamd will + * colamd_set_defaults, so that the code that calls colamd will * not need to change, assuming that you either use - * eigen_colamd_set_defaults, or pass a (double *) NULL pointer as the - * knobs array to eigen_colamd or symamd. + * colamd_set_defaults, or pass a (double *) NULL pointer as the + * knobs array to colamd or symamd. * - * \param knobs parameter settings for eigen_colamd + * \param knobs parameter settings for colamd */ -void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) +static inline void colamd_set_defaults(double knobs[COLAMD_KNOBS]) { /* === Local variables ================================================== */ @@ -403,12 +324,12 @@ void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) { return ; /* no knobs to initialize */ } - for (i = 0 ; i < EIGEN_COLAMD_KNOBS ; i++) + for (i = 0 ; i < COLAMD_KNOBS ; i++) { knobs [i] = 0 ; } - knobs [EIGEN_COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ - knobs [EIGEN_COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ + knobs [COLAMD_DENSE_ROW] = 0.5 ; /* ignore rows over 50% dense */ + knobs [COLAMD_DENSE_COL] = 0.5 ; /* ignore columns over 50% dense */ } /** @@ -425,10 +346,10 @@ void eigen_colamd_set_defaults(double knobs[EIGEN_COLAMD_KNOBS]) * \param Alen, size of the array A * \param A row indices of the matrix, of size ALen * \param p column pointers of A, of size n_col+1 - * \param knobs parameter settings for eigen_colamd - * \param stats eigen_colamd output statistics and error codes + * \param knobs parameter settings for colamd + * \param stats colamd output statistics and error codes */ -bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[EIGEN_COLAMD_KNOBS], int stats[EIGEN_COLAMD_STATS]) +static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[COLAMD_KNOBS], int stats[COLAMD_STATS]) { /* === Local variables ================================================== */ @@ -437,77 +358,74 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E int Row_size ; /* size of Row [], in integers */ int Col_size ; /* size of Col [], in integers */ int need ; /* minimum required length of A */ - EIGEN_Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ - EIGEN_Colamd_Col *Col ; /* pointer into A of Col [0..n_col] array */ + Colamd_Row *Row ; /* pointer into A of Row [0..n_row] array */ + colamd_col *Col ; /* pointer into A of Col [0..n_col] array */ int n_col2 ; /* number of non-dense, non-empty columns */ int n_row2 ; /* number of non-dense, non-empty rows */ int ngarbage ; /* number of garbage collections performed */ int max_deg ; /* maximum row degree */ - double default_knobs [EIGEN_COLAMD_KNOBS] ; /* default knobs array */ + double default_knobs [COLAMD_KNOBS] ; /* default knobs array */ -#ifndef COLAMD_NDEBUG - eigen_colamd_get_debug ("eigen_colamd") ; -#endif /* COLAMD_NDEBUG */ /* === Check the input arguments ======================================== */ if (!stats) { - COLAMD_DEBUG0 (("eigen_colamd: stats not present\n")) ; + COLAMD_DEBUG0 (("colamd: stats not present\n")) ; return (false) ; } - for (i = 0 ; i < EIGEN_COLAMD_STATS ; i++) + for (i = 0 ; i < COLAMD_STATS ; i++) { stats [i] = 0 ; } - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK ; - stats [EIGEN_COLAMD_INFO1] = -1 ; - stats [EIGEN_COLAMD_INFO2] = -1 ; + stats [COLAMD_STATUS] = COLAMD_OK ; + stats [COLAMD_INFO1] = -1 ; + stats [COLAMD_INFO2] = -1 ; if (!A) /* A is not present */ { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_not_present ; - COLAMD_DEBUG0 (("eigen_colamd: A not present\n")) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ; + COLAMD_DEBUG0 (("colamd: A not present\n")) ; return (false) ; } if (!p) /* p is not present */ { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p_not_present ; - COLAMD_DEBUG0 (("eigen_colamd: p not present\n")) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ; + COLAMD_DEBUG0 (("colamd: p not present\n")) ; return (false) ; } if (n_row < 0) /* n_row must be >= 0 */ { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nrow_negative ; - stats [EIGEN_COLAMD_INFO1] = n_row ; - COLAMD_DEBUG0 (("eigen_colamd: nrow negative %d\n", n_row)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ; + stats [COLAMD_INFO1] = n_row ; + COLAMD_DEBUG0 (("colamd: nrow negative %d\n", n_row)) ; return (false) ; } if (n_col < 0) /* n_col must be >= 0 */ { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_ncol_negative ; - stats [EIGEN_COLAMD_INFO1] = n_col ; - COLAMD_DEBUG0 (("eigen_colamd: ncol negative %d\n", n_col)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ; + stats [COLAMD_INFO1] = n_col ; + COLAMD_DEBUG0 (("colamd: ncol negative %d\n", n_col)) ; return (false) ; } nnz = p [n_col] ; if (nnz < 0) /* nnz must be >= 0 */ { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_nnz_negative ; - stats [EIGEN_COLAMD_INFO1] = nnz ; - COLAMD_DEBUG0 (("eigen_colamd: number of entries negative %d\n", nnz)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ; + stats [COLAMD_INFO1] = nnz ; + COLAMD_DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ; return (false) ; } if (p [0] != 0) { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_p0_nonzero ; - stats [EIGEN_COLAMD_INFO1] = p [0] ; - COLAMD_DEBUG0 (("eigen_colamd: p[0] not zero %d\n", p [0])) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ; + stats [COLAMD_INFO1] = p [0] ; + COLAMD_DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ; return (false) ; } @@ -515,72 +433,73 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E if (!knobs) { - eigen_colamd_set_defaults (default_knobs) ; + colamd_set_defaults (default_knobs) ; knobs = default_knobs ; } /* === Allocate the Row and Col arrays from array A ===================== */ - Col_size = EIGEN_COLAMD_C (n_col) ; - Row_size = EIGEN_COLAMD_R (n_row) ; + Col_size = colamd_c (n_col) ; + Row_size = colamd_r (n_row) ; need = 2*nnz + n_col + Col_size + Row_size ; if (need > Alen) { /* not enough space in array A to perform the ordering */ - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_A_too_small ; - stats [EIGEN_COLAMD_INFO1] = need ; - stats [EIGEN_COLAMD_INFO2] = Alen ; - COLAMD_DEBUG0 (("eigen_colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); + stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ; + stats [COLAMD_INFO1] = need ; + stats [COLAMD_INFO2] = Alen ; + COLAMD_DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen)); return (false) ; } Alen -= Col_size + Row_size ; - Col = (EIGEN_Colamd_Col *) &A [Alen] ; - Row = (EIGEN_Colamd_Row *) &A [Alen + Col_size] ; + Col = (colamd_col *) &A [Alen] ; + Row = (Colamd_Row *) &A [Alen + Col_size] ; /* === Construct the row and column data structures ===================== */ - if (!eigen_init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) + if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats)) { /* input matrix is invalid */ - COLAMD_DEBUG0 (("eigen_colamd: Matrix invalid\n")) ; + COLAMD_DEBUG0 (("colamd: Matrix invalid\n")) ; return (false) ; } /* === Initialize scores, kill dense rows/columns ======================= */ - eigen_init_scoring (n_row, n_col, Row, Col, A, p, knobs, + init_scoring (n_row, n_col, Row, Col, A, p, knobs, &n_row2, &n_col2, &max_deg) ; /* === Order the supercolumns =========================================== */ - ngarbage = eigen_find_ordering (n_row, n_col, Alen, Row, Col, A, p, + ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p, n_col2, max_deg, 2*nnz) ; /* === Order the non-principal columns ================================== */ - eigen_order_children (n_col, Col, p) ; + order_children (n_col, Col, p) ; /* === Return statistics in stats ======================================= */ - stats [EIGEN_COLAMD_DENSE_ROW] = n_row - n_row2 ; - stats [EIGEN_COLAMD_DENSE_COL] = n_col - n_col2 ; - stats [EIGEN_COLAMD_DEFRAG_COUNT] = ngarbage ; - COLAMD_DEBUG0 (("eigen_colamd: done.\n")) ; + stats [COLAMD_DENSE_ROW] = n_row - n_row2 ; + stats [COLAMD_DENSE_COL] = n_col - n_col2 ; + stats [COLAMD_DEFRAG_COUNT] = ngarbage ; + COLAMD_DEBUG0 (("colamd: done.\n")) ; return (true) ; } /* ========================================================================== */ -/* === eigen_colamd_report ======================================================== */ +/* === colamd_report ======================================================== */ /* ========================================================================== */ - void eigen_colamd_report + static inline void colamd_report ( - int stats [EIGEN_COLAMD_STATS] + int stats [COLAMD_STATS] ) { - eigen_print_report ("eigen_colamd", stats) ; + const char *method = "colamd"; + print_report (method, stats) ; } @@ -592,7 +511,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_init_rows_cols ======================================================= */ +/* === init_rows_cols ======================================================= */ /* ========================================================================== */ /* @@ -604,17 +523,17 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E true otherwise. Not user-callable. */ - int eigen_init_rows_cols /* returns true if OK, or false otherwise */ + static int init_rows_cols /* returns true if OK, or false otherwise */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ - EIGEN_Colamd_Row Row [], /* of size n_row+1 */ - EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ int A [], /* row indices of A, of size Alen */ int p [], /* pointers to columns in A, of size n_col+1 */ - int stats [EIGEN_COLAMD_STATS] /* eigen_colamd statistics */ + int stats [COLAMD_STATS] /* colamd statistics */ ) { /* === Local variables ================================================== */ @@ -637,24 +556,24 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E if (Col [col].length < 0) { /* column pointers must be non-decreasing */ - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_col_length_negative ; - stats [EIGEN_COLAMD_INFO1] = col ; - stats [EIGEN_COLAMD_INFO2] = Col [col].length ; - COLAMD_DEBUG0 (("eigen_colamd: col %d length %d < 0\n", col, Col [col].length)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = Col [col].length ; + COLAMD_DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ; return (false) ; } Col [col].shared1.thickness = 1 ; Col [col].shared2.score = 0 ; - Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; - Col [col].shared4.degree_next = EIGEN_COLAMD_EMPTY ; + Col [col].shared3.prev = COLAMD_EMPTY ; + Col [col].shared4.degree_next = COLAMD_EMPTY ; } /* p [0..n_col] no longer needed, used as "head" in subsequent routines */ /* === Scan columns, compute row degrees, and check row indices ========= */ - stats [EIGEN_COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ + stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/ for (row = 0 ; row < n_row ; row++) { @@ -676,11 +595,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* make sure row indices within range */ if (row < 0 || row >= n_row) { - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_ERROR_row_index_out_of_bounds ; - stats [EIGEN_COLAMD_INFO1] = col ; - stats [EIGEN_COLAMD_INFO2] = row ; - stats [EIGEN_COLAMD_INFO3] = n_row ; - COLAMD_DEBUG0 (("eigen_colamd: row %d col %d out of bounds\n", row, col)) ; + stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + stats [COLAMD_INFO3] = n_row ; + COLAMD_DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ; return (false) ; } @@ -688,11 +607,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { /* row index are unsorted or repeated (or both), thus col */ /* is jumbled. This is a notice, not an error condition. */ - stats [EIGEN_COLAMD_STATUS] = EIGEN_COLAMD_OK_BUT_JUMBLED ; - stats [EIGEN_COLAMD_INFO1] = col ; - stats [EIGEN_COLAMD_INFO2] = row ; - (stats [EIGEN_COLAMD_INFO3]) ++ ; - COLAMD_DEBUG1 (("eigen_colamd: row %d col %d unsorted/duplicate\n",row,col)); + stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ; + stats [COLAMD_INFO1] = col ; + stats [COLAMD_INFO2] = row ; + (stats [COLAMD_INFO3]) ++ ; + COLAMD_DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col)); } if (Row [row].shared2.mark != col) @@ -729,7 +648,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === Create row form ================================================== */ - if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { /* if cols jumbled, watch for repeated row indices */ for (col = 0 ; col < n_col ; col++) @@ -771,31 +690,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === See if we need to re-create columns ============================== */ - if (stats [EIGEN_COLAMD_STATUS] == EIGEN_COLAMD_OK_BUT_JUMBLED) + if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED) { - COLAMD_DEBUG0 (("eigen_colamd: reconstructing column form, matrix jumbled\n")) ; + COLAMD_DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ; -#ifndef COLAMD_NDEBUG - /* make sure column lengths are correct */ - for (col = 0 ; col < n_col ; col++) - { - p [col] = Col [col].length ; - } - for (row = 0 ; row < n_row ; row++) - { - rp = &A [Row [row].start] ; - rp_end = rp + Row [row].length ; - while (rp < rp_end) - { - p [*rp++]-- ; - } - } - for (col = 0 ; col < n_col ; col++) - { - COLAMD_ASSERT (p [col] == 0) ; - } - /* now p is all zero (different than when debugging is turned off) */ -#endif /* COLAMD_NDEBUG */ /* === Compute col pointers ========================================= */ @@ -833,7 +731,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_init_scoring ========================================================= */ +/* === init_scoring ========================================================= */ /* ========================================================================== */ /* @@ -841,17 +739,17 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E each column, and places all columns in the degree lists. Not user-callable. */ - void eigen_init_scoring +static void init_scoring ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ - EIGEN_Colamd_Row Row [], /* of size n_row+1 */ - EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ - double knobs [EIGEN_COLAMD_KNOBS],/* parameters */ + double knobs [COLAMD_KNOBS],/* parameters */ int *p_n_row2, /* number of non-dense, non-empty rows */ int *p_n_col2, /* number of non-dense, non-empty columns */ int *p_max_deg /* maximum row degree */ @@ -875,15 +773,12 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E int max_deg ; /* maximum row degree */ int next_col ; /* Used to add to degree list.*/ -#ifndef COLAMD_NDEBUG - int debug_count ; /* debug only. */ -#endif /* COLAMD_NDEBUG */ /* === Extract knobs ==================================================== */ - dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_ROW] * n_col, n_col)) ; - dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [EIGEN_COLAMD_DENSE_COL] * n_row, n_row)) ; - COLAMD_DEBUG1 (("eigen_colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; + dense_row_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ; + dense_col_count = COLAMD_MAX (0, COLAMD_MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ; + COLAMD_DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) ; max_deg = 0 ; n_col2 = n_col ; n_row2 = n_row ; @@ -899,10 +794,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { /* this is a empty column, kill and order it last */ Col [c].shared2.order = --n_col2 ; - EIGEN_KILL_PRINCIPAL_COL (c) ; + KILL_PRINCIPAL_COL (c) ; } } - COLAMD_DEBUG1 (("eigen_colamd: null columns killed: %d\n", n_col - n_col2)) ; + COLAMD_DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense columns =============================================== */ @@ -910,7 +805,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (c = n_col-1 ; c >= 0 ; c--) { /* skip any dead columns */ - if (EIGEN_COL_IS_DEAD (c)) + if (COL_IS_DEAD (c)) { continue ; } @@ -926,10 +821,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { Row [*cp++].shared1.degree-- ; } - EIGEN_KILL_PRINCIPAL_COL (c) ; + KILL_PRINCIPAL_COL (c) ; } } - COLAMD_DEBUG1 (("eigen_colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; + COLAMD_DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ; /* === Kill dense and empty rows ======================================== */ @@ -940,7 +835,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E if (deg > dense_row_count || deg == 0) { /* kill a dense or empty row */ - EIGEN_KILL_ROW (r) ; + KILL_ROW (r) ; --n_row2 ; } else @@ -949,7 +844,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E max_deg = COLAMD_MAX (max_deg, deg) ; } } - COLAMD_DEBUG1 (("eigen_colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; + COLAMD_DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ; /* === Compute initial column scores ==================================== */ @@ -962,7 +857,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (c = n_col-1 ; c >= 0 ; c--) { /* skip dead column */ - if (EIGEN_COL_IS_DEAD (c)) + if (COL_IS_DEAD (c)) { continue ; } @@ -975,7 +870,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* get a row */ row = *cp++ ; /* skip if dead */ - if (EIGEN_ROW_IS_DEAD (row)) + if (ROW_IS_DEAD (row)) { continue ; } @@ -994,7 +889,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* and have already been killed) */ COLAMD_DEBUG2 (("Newly null killed: %d\n", c)) ; Col [c].shared2.order = --n_col2 ; - EIGEN_KILL_PRINCIPAL_COL (c) ; + KILL_PRINCIPAL_COL (c) ; } else { @@ -1005,7 +900,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Col [c].shared2.score = score ; } } - COLAMD_DEBUG1 (("eigen_colamd: Dense, null, and newly-null columns killed: %d\n", + COLAMD_DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n", n_col-n_col2)) ; /* At this point, all empty rows and columns are dead. All live columns */ @@ -1013,20 +908,13 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* yet). Rows may contain dead columns, but all live rows contain at */ /* least one live column. */ -#ifndef COLAMD_NDEBUG - eigen_debug_structures (n_row, n_col, Row, Col, A, n_col2) ; -#endif /* COLAMD_NDEBUG */ - /* === Initialize degree lists ========================================== */ -#ifndef COLAMD_NDEBUG - debug_count = 0 ; -#endif /* COLAMD_NDEBUG */ /* clear the hash buckets */ for (c = 0 ; c <= n_col ; c++) { - head [c] = EIGEN_COLAMD_EMPTY ; + head [c] = COLAMD_EMPTY ; } min_score = n_col ; /* place in reverse order, so low column indices are at the front */ @@ -1034,7 +922,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (c = n_col-1 ; c >= 0 ; c--) { /* only add principal columns to degree lists */ - if (EIGEN_COL_IS_ALIVE (c)) + if (COL_IS_ALIVE (c)) { COLAMD_DEBUG4 (("place %d score %d minscore %d ncol %d\n", c, Col [c].shared2.score, min_score, n_col)) ; @@ -1047,16 +935,16 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_ASSERT (min_score <= n_col) ; COLAMD_ASSERT (score >= 0) ; COLAMD_ASSERT (score <= n_col) ; - COLAMD_ASSERT (head [score] >= EIGEN_COLAMD_EMPTY) ; + COLAMD_ASSERT (head [score] >= COLAMD_EMPTY) ; /* now add this column to dList at proper score location */ next_col = head [score] ; - Col [c].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [c].shared3.prev = COLAMD_EMPTY ; Col [c].shared4.degree_next = next_col ; /* if there already was a column with the same score, set its */ /* previous pointer to this new column */ - if (next_col != EIGEN_COLAMD_EMPTY) + if (next_col != COLAMD_EMPTY) { Col [next_col].shared3.prev = c ; } @@ -1065,19 +953,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* see if this score is less than current min */ min_score = COLAMD_MIN (min_score, score) ; -#ifndef COLAMD_NDEBUG - debug_count++ ; -#endif /* COLAMD_NDEBUG */ } } -#ifndef COLAMD_NDEBUG - COLAMD_DEBUG1 (("eigen_colamd: Live cols %d out of %d, non-princ: %d\n", - debug_count, n_col, n_col-debug_count)) ; - COLAMD_ASSERT (debug_count == n_col2) ; - eigen_debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) ; -#endif /* COLAMD_NDEBUG */ /* === Return number of remaining columns, and max row degree =========== */ @@ -1088,7 +967,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_find_ordering ======================================================== */ +/* === find_ordering ======================================================== */ /* ========================================================================== */ /* @@ -1097,15 +976,15 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E degree ordering method. Not user-callable. */ - int eigen_find_ordering /* return the number of garbage collections */ +static int find_ordering /* return the number of garbage collections */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows of A */ int n_col, /* number of columns of A */ int Alen, /* size of A, 2*nnz + n_col or larger */ - EIGEN_Colamd_Row Row [], /* of size n_row+1 */ - EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + Colamd_Row Row [], /* of size n_row+1 */ + colamd_col Col [], /* of size n_col+1 */ int A [], /* column form and row form of A */ int head [], /* of size n_col+1 */ int n_col2, /* Remaining columns to order */ @@ -1147,55 +1026,29 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E int next_col ; /* Used by Dlist operations. */ int ngarbage ; /* number of garbage collections performed */ -#ifndef COLAMD_NDEBUG - int debug_d ; /* debug loop counter */ - int debug_step = 0 ; /* debug loop counter */ -#endif /* COLAMD_NDEBUG */ /* === Initialization and clear mark ==================================== */ max_mark = INT_MAX - n_col ; /* INT_MAX defined in */ - tag_mark = eigen_clear_mark (n_row, Row) ; + tag_mark = clear_mark (n_row, Row) ; min_score = 0 ; ngarbage = 0 ; - COLAMD_DEBUG1 (("eigen_colamd: Ordering, n_col2=%d\n", n_col2)) ; + COLAMD_DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ; /* === Order the columns ================================================ */ for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */) { -#ifndef COLAMD_NDEBUG - if (debug_step % 100 == 0) - { - COLAMD_DEBUG2 (("\n... Step k: %d out of n_col2: %d\n", k, n_col2)) ; - } - else - { - COLAMD_DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ; - } - debug_step++ ; - eigen_debug_deg_lists (n_row, n_col, Row, Col, head, - min_score, n_col2-k, max_deg) ; - eigen_debug_matrix (n_row, n_col, Row, Col, A) ; -#endif /* COLAMD_NDEBUG */ - /* === Select pivot column, and order it ============================ */ /* make sure degree list isn't empty */ COLAMD_ASSERT (min_score >= 0) ; COLAMD_ASSERT (min_score <= n_col) ; - COLAMD_ASSERT (head [min_score] >= EIGEN_COLAMD_EMPTY) ; - -#ifndef COLAMD_NDEBUG - for (debug_d = 0 ; debug_d < min_score ; debug_d++) - { - COLAMD_ASSERT (head [debug_d] == EIGEN_COLAMD_EMPTY) ; - } -#endif /* COLAMD_NDEBUG */ + COLAMD_ASSERT (head [min_score] >= COLAMD_EMPTY) ; /* get pivot column from head of minimum degree list */ - while (head [min_score] == EIGEN_COLAMD_EMPTY && min_score < n_col) + while (head [min_score] == COLAMD_EMPTY && min_score < n_col) { min_score++ ; } @@ -1203,12 +1056,12 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_ASSERT (pivot_col >= 0 && pivot_col <= n_col) ; next_col = Col [pivot_col].shared4.degree_next ; head [min_score] = next_col ; - if (next_col != EIGEN_COLAMD_EMPTY) + if (next_col != COLAMD_EMPTY) { - Col [next_col].shared3.prev = EIGEN_COLAMD_EMPTY ; + Col [next_col].shared3.prev = COLAMD_EMPTY ; } - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (pivot_col)) ; + COLAMD_ASSERT (COL_IS_ALIVE (pivot_col)) ; COLAMD_DEBUG3 (("Pivot col: %d\n", pivot_col)) ; /* remember score for defrag check */ @@ -1227,16 +1080,13 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E needed_memory = COLAMD_MIN (pivot_col_score, n_col - k) ; if (pfree + needed_memory >= Alen) { - pfree = eigen_garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; + pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ; ngarbage++ ; /* after garbage collection we will have enough */ COLAMD_ASSERT (pfree + needed_memory < Alen) ; /* garbage collection has wiped out the Row[].shared2.mark array */ - tag_mark = eigen_clear_mark (n_row, Row) ; + tag_mark = clear_mark (n_row, Row) ; -#ifndef COLAMD_NDEBUG - eigen_debug_matrix (n_row, n_col, Row, Col, A) ; -#endif /* COLAMD_NDEBUG */ } /* === Compute pivot row pattern ==================================== */ @@ -1258,9 +1108,9 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { /* get a row */ row = *cp++ ; - COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", EIGEN_ROW_IS_ALIVE (row), row)) ; + COLAMD_DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ; /* skip if row is dead */ - if (EIGEN_ROW_IS_DEAD (row)) + if (ROW_IS_DEAD (row)) { continue ; } @@ -1272,7 +1122,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E col = *rp++ ; /* add the column, if alive and untagged */ col_thickness = Col [col].shared1.thickness ; - if (col_thickness > 0 && EIGEN_COL_IS_ALIVE (col)) + if (col_thickness > 0 && COL_IS_ALIVE (col)) { /* tag column in pivot row */ Col [col].shared1.thickness = -col_thickness ; @@ -1288,10 +1138,6 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Col [pivot_col].shared1.thickness = pivot_col_thickness ; max_deg = COLAMD_MAX (max_deg, pivot_row_degree) ; -#ifndef COLAMD_NDEBUG - COLAMD_DEBUG3 (("check2\n")) ; - eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; -#endif /* COLAMD_NDEBUG */ /* === Kill all rows used to construct pivot row ==================== */ @@ -1303,7 +1149,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* may be killing an already dead row */ row = *cp++ ; COLAMD_DEBUG3 (("Kill row in pivot col: %d\n", row)) ; - EIGEN_KILL_ROW (row) ; + KILL_ROW (row) ; } /* === Select a row index to use as the new pivot row =============== */ @@ -1318,7 +1164,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E else { /* there is no pivot row, since it is of zero length */ - pivot_row = EIGEN_COLAMD_EMPTY ; + pivot_row = COLAMD_EMPTY ; COLAMD_ASSERT (pivot_row_length == 0) ; } COLAMD_ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ; @@ -1340,7 +1186,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* context, is the column "length", or the number of row indices */ /* in that column). The number of row indices in a column is */ /* monotonically non-decreasing, from the length of the original */ - /* column on input to eigen_colamd. */ + /* column on input to colamd. */ /* === Compute set differences ====================================== */ @@ -1355,7 +1201,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E while (rp < rp_end) { col = *rp++ ; - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; COLAMD_DEBUG3 (("Col: %d\n", col)) ; /* clear tags used to construct pivot row pattern */ @@ -1370,8 +1216,8 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E next_col = Col [col].shared4.degree_next ; COLAMD_ASSERT (cur_score >= 0) ; COLAMD_ASSERT (cur_score <= n_col) ; - COLAMD_ASSERT (cur_score >= EIGEN_COLAMD_EMPTY) ; - if (prev_col == EIGEN_COLAMD_EMPTY) + COLAMD_ASSERT (cur_score >= COLAMD_EMPTY) ; + if (prev_col == COLAMD_EMPTY) { head [cur_score] = next_col ; } @@ -1379,7 +1225,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { Col [prev_col].shared4.degree_next = next_col ; } - if (next_col != EIGEN_COLAMD_EMPTY) + if (next_col != COLAMD_EMPTY) { Col [next_col].shared3.prev = prev_col ; } @@ -1394,7 +1240,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E row = *cp++ ; row_mark = Row [row].shared2.mark ; /* skip if dead */ - if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } @@ -1413,7 +1259,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E if (set_difference == 0) { COLAMD_DEBUG3 (("aggressive absorption. Row: %d\n", row)) ; - EIGEN_KILL_ROW (row) ; + KILL_ROW (row) ; } else { @@ -1423,10 +1269,6 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E } } -#ifndef COLAMD_NDEBUG - eigen_debug_deg_lists (n_row, n_col, Row, Col, head, - min_score, n_col2-k-pivot_row_degree, max_deg) ; -#endif /* COLAMD_NDEBUG */ /* === Add up set differences for each column ======================= */ @@ -1439,7 +1281,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { /* get a column */ col = *rp++ ; - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col) && col != pivot_col) ; + COLAMD_ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ; hash = 0 ; cur_score = 0 ; cp = &A [Col [col].start] ; @@ -1456,7 +1298,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_ASSERT(row >= 0 && row < n_row) ; row_mark = Row [row].shared2.mark ; /* skip if dead */ - if (EIGEN_ROW_IS_MARKED_DEAD (row_mark)) + if (ROW_IS_MARKED_DEAD (row_mark)) { continue ; } @@ -1480,7 +1322,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { COLAMD_DEBUG4 (("further mass elimination. Col: %d\n", col)) ; /* nothing left but the pivot row in this column */ - EIGEN_KILL_PRINCIPAL_COL (col) ; + KILL_PRINCIPAL_COL (col) ; pivot_row_degree -= Col [col].shared1.thickness ; COLAMD_ASSERT (pivot_row_degree >= 0) ; /* order it */ @@ -1504,7 +1346,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_ASSERT (hash <= n_col) ; head_column = head [hash] ; - if (head_column > EIGEN_COLAMD_EMPTY) + if (head_column > COLAMD_EMPTY) { /* degree list "hash" is non-empty, use prev (shared3) of */ /* first column in degree list as head of hash bucket */ @@ -1521,7 +1363,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* save hash function in Col [col].shared3.hash */ Col [col].shared3.hash = (int) hash ; - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; + COLAMD_ASSERT (COL_IS_ALIVE (col)) ; } } @@ -1531,17 +1373,13 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_DEBUG3 (("** Supercolumn detection phase. **\n")) ; - eigen_detect_super_cols ( - -#ifndef COLAMD_NDEBUG - n_col, Row, -#endif /* COLAMD_NDEBUG */ + detect_super_cols ( Col, A, head, pivot_row_start, pivot_row_length) ; /* === Kill the pivotal column ====================================== */ - EIGEN_KILL_PRINCIPAL_COL (pivot_col) ; + KILL_PRINCIPAL_COL (pivot_col) ; /* === Clear mark =================================================== */ @@ -1549,14 +1387,9 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E if (tag_mark >= max_mark) { COLAMD_DEBUG2 (("clearing tag_mark\n")) ; - tag_mark = eigen_clear_mark (n_row, Row) ; + tag_mark = clear_mark (n_row, Row) ; } -#ifndef COLAMD_NDEBUG - COLAMD_DEBUG3 (("check3\n")) ; - eigen_debug_mark (n_row, Row, tag_mark, max_mark) ; -#endif /* COLAMD_NDEBUG */ - /* === Finalize the new pivot row, and column scores ================ */ COLAMD_DEBUG3 (("** Finalize scores phase. **\n")) ; @@ -1570,7 +1403,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { col = *rp++ ; /* skip dead columns */ - if (EIGEN_COL_IS_DEAD (col)) + if (COL_IS_DEAD (col)) { continue ; } @@ -1604,11 +1437,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E COLAMD_ASSERT (min_score <= n_col) ; COLAMD_ASSERT (cur_score >= 0) ; COLAMD_ASSERT (cur_score <= n_col) ; - COLAMD_ASSERT (head [cur_score] >= EIGEN_COLAMD_EMPTY) ; + COLAMD_ASSERT (head [cur_score] >= COLAMD_EMPTY) ; next_col = head [cur_score] ; Col [col].shared4.degree_next = next_col ; - Col [col].shared3.prev = EIGEN_COLAMD_EMPTY ; - if (next_col != EIGEN_COLAMD_EMPTY) + Col [col].shared3.prev = COLAMD_EMPTY ; + if (next_col != COLAMD_EMPTY) { Col [next_col].shared3.prev = col ; } @@ -1619,11 +1452,6 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E } -#ifndef COLAMD_NDEBUG - eigen_debug_deg_lists (n_row, n_col, Row, Col, head, - min_score, n_col2-k, max_deg) ; -#endif /* COLAMD_NDEBUG */ - /* === Resurrect the new pivot row ================================== */ if (pivot_row_degree > 0) @@ -1645,11 +1473,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_order_children ======================================================= */ +/* === order_children ======================================================= */ /* ========================================================================== */ /* - The eigen_find_ordering routine has ordered all of the principal columns (the + The find_ordering routine has ordered all of the principal columns (the representatives of the supercolumns). The non-principal columns have not yet been ordered. This routine orders those columns by walking up the parent tree (a column is a child of the column which absorbed it). The @@ -1661,12 +1489,12 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E columns. Not user-callable. */ - void eigen_order_children +static inline void order_children ( /* === Parameters ======================================================= */ int n_col, /* number of columns of A */ - EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + colamd_col Col [], /* of size n_col+1 */ int p [] /* p [0 ... n_col-1] is the column permutation*/ ) { @@ -1682,15 +1510,15 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (i = 0 ; i < n_col ; i++) { /* find an un-ordered non-principal column */ - COLAMD_ASSERT (EIGEN_COL_IS_DEAD (i)) ; - if (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EIGEN_COLAMD_EMPTY) + COLAMD_ASSERT (COL_IS_DEAD (i)) ; + if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == COLAMD_EMPTY) { parent = i ; /* once found, find its principal parent */ do { parent = Col [parent].shared1.parent ; - } while (!EIGEN_EIGEN_COL_IS_DEAD_PRINCIPAL (parent)) ; + } while (!COL_IS_DEAD_PRINCIPAL (parent)) ; /* now, order all un-ordered non-principal columns along path */ /* to this parent. collapse tree at the same time */ @@ -1700,7 +1528,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E do { - COLAMD_ASSERT (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + COLAMD_ASSERT (Col [c].shared2.order == COLAMD_EMPTY) ; /* order this column */ Col [c].shared2.order = order++ ; @@ -1713,7 +1541,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* continue until we hit an ordered column. There are */ /* guarranteed not to be anymore unordered columns */ /* above an ordered column */ - } while (Col [c].shared2.order == EIGEN_COLAMD_EMPTY) ; + } while (Col [c].shared2.order == COLAMD_EMPTY) ; /* re-order the super_col parent to largest order for this group */ Col [parent].shared2.order = order ; @@ -1730,7 +1558,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_detect_super_cols ==================================================== */ +/* === detect_super_cols ==================================================== */ /* ========================================================================== */ /* @@ -1762,17 +1590,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Not user-callable. */ - void eigen_detect_super_cols +static void detect_super_cols ( /* === Parameters ======================================================= */ -#ifndef COLAMD_NDEBUG - /* these two parameters are only needed when debugging is enabled: */ - int n_col, /* number of columns of A */ - EIGEN_Colamd_Row Row [], /* of size n_row+1 */ -#endif /* COLAMD_NDEBUG */ - - EIGEN_Colamd_Col Col [], /* of size n_col+1 */ + colamd_col Col [], /* of size n_col+1 */ int A [], /* row indices of A */ int head [], /* head of degree lists and hash buckets */ int row_start, /* pointer to set of columns to check */ @@ -1802,7 +1624,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E while (rp < rp_end) { col = *rp++ ; - if (EIGEN_COL_IS_DEAD (col)) + if (COL_IS_DEAD (col)) { continue ; } @@ -1814,7 +1636,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === Get the first column in this hash bucket ===================== */ head_column = head [hash] ; - if (head_column > EIGEN_COLAMD_EMPTY) + if (head_column > COLAMD_EMPTY) { first_col = Col [head_column].shared3.headhash ; } @@ -1825,10 +1647,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === Consider each column in the hash bucket ====================== */ - for (super_c = first_col ; super_c != EIGEN_COLAMD_EMPTY ; + for (super_c = first_col ; super_c != COLAMD_EMPTY ; super_c = Col [super_c].shared4.hash_next) { - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (super_c)) ; + COLAMD_ASSERT (COL_IS_ALIVE (super_c)) ; COLAMD_ASSERT (Col [super_c].shared3.hash == hash) ; length = Col [super_c].length ; @@ -1838,10 +1660,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === Compare super_c with all columns after it ================ */ for (c = Col [super_c].shared4.hash_next ; - c != EIGEN_COLAMD_EMPTY ; c = Col [c].shared4.hash_next) + c != COLAMD_EMPTY ; c = Col [c].shared4.hash_next) { COLAMD_ASSERT (c != super_c) ; - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (c)) ; + COLAMD_ASSERT (COL_IS_ALIVE (c)) ; COLAMD_ASSERT (Col [c].shared3.hash == hash) ; /* not identical if lengths or scores are different */ @@ -1859,8 +1681,8 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (i = 0 ; i < length ; i++) { /* the columns are "clean" (no dead rows) */ - COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp1)) ; - COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (*cp2)) ; + COLAMD_ASSERT (ROW_IS_ALIVE (*cp1)) ; + COLAMD_ASSERT (ROW_IS_ALIVE (*cp2)) ; /* row indices will same order for both supercols, */ /* no gather scatter nessasary */ if (*cp1++ != *cp2++) @@ -1882,9 +1704,9 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Col [super_c].shared1.thickness += Col [c].shared1.thickness ; Col [c].shared1.parent = super_c ; - EIGEN_KILL_NON_PRINCIPAL_COL (c) ; - /* order c later, in eigen_order_children() */ - Col [c].shared2.order = EIGEN_COLAMD_EMPTY ; + KILL_NON_PRINCIPAL_COL (c) ; + /* order c later, in order_children() */ + Col [c].shared2.order = COLAMD_EMPTY ; /* remove c from hash bucket */ Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ; } @@ -1892,22 +1714,22 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* === Empty this hash bucket ======================================= */ - if (head_column > EIGEN_COLAMD_EMPTY) + if (head_column > COLAMD_EMPTY) { /* corresponding degree list "hash" is not empty */ - Col [head_column].shared3.headhash = EIGEN_COLAMD_EMPTY ; + Col [head_column].shared3.headhash = COLAMD_EMPTY ; } else { /* corresponding degree list "hash" is empty */ - head [hash] = EIGEN_COLAMD_EMPTY ; + head [hash] = COLAMD_EMPTY ; } } } /* ========================================================================== */ -/* === eigen_garbage_collection =================================================== */ +/* === garbage_collection =================================================== */ /* ========================================================================== */ /* @@ -1919,14 +1741,14 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Not user-callable. */ - int eigen_garbage_collection /* returns the new value of pfree */ +static int garbage_collection /* returns the new value of pfree */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows */ int n_col, /* number of columns */ - EIGEN_Colamd_Row Row [], /* row info */ - EIGEN_Colamd_Col Col [], /* column info */ + Colamd_Row Row [], /* row info */ + colamd_col Col [], /* column info */ int A [], /* A [0 ... Alen-1] holds the matrix */ int *pfree /* &A [0] ... pfree is in use */ ) @@ -1940,19 +1762,12 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E int c ; /* a column index */ int length ; /* length of a row or column */ -#ifndef COLAMD_NDEBUG - int debug_rows ; - COLAMD_DEBUG2 (("Defrag..\n")) ; - for (psrc = &A[0] ; psrc < pfree ; psrc++) COLAMD_ASSERT (*psrc >= 0) ; - debug_rows = 0 ; -#endif /* COLAMD_NDEBUG */ - /* === Defragment the columns =========================================== */ pdest = &A[0] ; for (c = 0 ; c < n_col ; c++) { - if (EIGEN_COL_IS_ALIVE (c)) + if (COL_IS_ALIVE (c)) { psrc = &A [Col [c].start] ; @@ -1963,7 +1778,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (j = 0 ; j < length ; j++) { r = *psrc++ ; - if (EIGEN_ROW_IS_ALIVE (r)) + if (ROW_IS_ALIVE (r)) { *pdest++ = r ; } @@ -1976,26 +1791,22 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (r = 0 ; r < n_row ; r++) { - if (EIGEN_ROW_IS_ALIVE (r)) + if (ROW_IS_ALIVE (r)) { if (Row [r].length == 0) { /* this row is of zero length. cannot compact it, so kill it */ COLAMD_DEBUG3 (("Defrag row kill\n")) ; - EIGEN_KILL_ROW (r) ; + KILL_ROW (r) ; } else { /* save first column index in Row [r].shared2.first_column */ psrc = &A [Row [r].start] ; Row [r].shared2.first_column = *psrc ; - COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; /* flag the start of the row with the one's complement of row */ - *psrc = EIGEN_ONES_COMPLEMENT (r) ; - -#ifndef COLAMD_NDEBUG - debug_rows++ ; -#endif /* COLAMD_NDEBUG */ + *psrc = ONES_COMPLEMENT (r) ; } } @@ -2011,11 +1822,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E { psrc-- ; /* get the row index */ - r = EIGEN_ONES_COMPLEMENT (*psrc) ; + r = ONES_COMPLEMENT (*psrc) ; COLAMD_ASSERT (r >= 0 && r < n_row) ; /* restore first column index */ *psrc = Row [r].shared2.first_column ; - COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; + COLAMD_ASSERT (ROW_IS_ALIVE (r)) ; /* move and compact the row */ COLAMD_ASSERT (pdest <= psrc) ; @@ -2024,17 +1835,13 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (j = 0 ; j < length ; j++) { c = *psrc++ ; - if (EIGEN_COL_IS_ALIVE (c)) + if (COL_IS_ALIVE (c)) { *pdest++ = c ; } } Row [r].length = (int) (pdest - &A [Row [r].start]) ; -#ifndef COLAMD_NDEBUG - debug_rows-- ; -#endif /* COLAMD_NDEBUG */ - } } /* ensure we found all the rows */ @@ -2047,7 +1854,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_clear_mark =========================================================== */ +/* === clear_mark =========================================================== */ /* ========================================================================== */ /* @@ -2055,12 +1862,12 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E Return value is the new tag_mark. Not user-callable. */ - int eigen_clear_mark /* return the new value for tag_mark */ +static inline int clear_mark /* return the new value for tag_mark */ ( /* === Parameters ======================================================= */ int n_row, /* number of rows in A */ - EIGEN_Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ + Colamd_Row Row [] /* Row [0 ... n_row-1].shared2.mark is set to zero */ ) { /* === Local variables ================================================== */ @@ -2069,7 +1876,7 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E for (r = 0 ; r < n_row ; r++) { - if (EIGEN_ROW_IS_ALIVE (r)) + if (ROW_IS_ALIVE (r)) { Row [r].shared2.mark = 0 ; } @@ -2080,13 +1887,13 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* ========================================================================== */ -/* === eigen_print_report ========================================================= */ +/* === print_report ========================================================= */ /* ========================================================================== */ - void eigen_print_report +static void print_report ( - char *method, - int stats [EIGEN_COLAMD_STATS] + const char *method, + int stats [COLAMD_STATS] ) { @@ -2098,11 +1905,11 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E return ; } - i1 = stats [EIGEN_COLAMD_INFO1] ; - i2 = stats [EIGEN_COLAMD_INFO2] ; - i3 = stats [EIGEN_COLAMD_INFO3] ; + i1 = stats [COLAMD_INFO1] ; + i2 = stats [COLAMD_INFO2] ; + i3 = stats [COLAMD_INFO3] ; - if (stats [EIGEN_COLAMD_STATUS] >= 0) + if (stats [COLAMD_STATUS] >= 0) { PRINTF ("%s: OK. ", method) ; } @@ -2111,10 +1918,10 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E PRINTF ("%s: ERROR. ", method) ; } - switch (stats [EIGEN_COLAMD_STATUS]) + switch (stats [COLAMD_STATUS]) { - case EIGEN_COLAMD_OK_BUT_JUMBLED: + case COLAMD_OK_BUT_JUMBLED: PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; @@ -2129,77 +1936,77 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E /* no break - fall through to next case instead */ - case EIGEN_COLAMD_OK: + case COLAMD_OK: PRINTF ("\n") ; PRINTF ("%s: number of dense or empty rows ignored: %d\n", - method, stats [EIGEN_COLAMD_DENSE_ROW]) ; + method, stats [COLAMD_DENSE_ROW]) ; PRINTF ("%s: number of dense or empty columns ignored: %d\n", - method, stats [EIGEN_COLAMD_DENSE_COL]) ; + method, stats [COLAMD_DENSE_COL]) ; PRINTF ("%s: number of garbage collections performed: %d\n", - method, stats [EIGEN_COLAMD_DEFRAG_COUNT]) ; + method, stats [COLAMD_DEFRAG_COUNT]) ; break ; - case EIGEN_COLAMD_ERROR_A_not_present: + case COLAMD_ERROR_A_not_present: PRINTF ("Array A (row indices of matrix) not present.\n") ; break ; - case EIGEN_COLAMD_ERROR_p_not_present: + case COLAMD_ERROR_p_not_present: PRINTF ("Array p (column pointers for matrix) not present.\n") ; break ; - case EIGEN_COLAMD_ERROR_nrow_negative: + case COLAMD_ERROR_nrow_negative: PRINTF ("Invalid number of rows (%d).\n", i1) ; break ; - case EIGEN_COLAMD_ERROR_ncol_negative: + case COLAMD_ERROR_ncol_negative: PRINTF ("Invalid number of columns (%d).\n", i1) ; break ; - case EIGEN_COLAMD_ERROR_nnz_negative: + case COLAMD_ERROR_nnz_negative: PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; break ; - case EIGEN_COLAMD_ERROR_p0_nonzero: + case COLAMD_ERROR_p0_nonzero: PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; break ; - case EIGEN_COLAMD_ERROR_A_too_small: + case COLAMD_ERROR_A_too_small: PRINTF ("Array A too small.\n") ; PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", i1, i2) ; break ; - case EIGEN_COLAMD_ERROR_col_length_negative: + case COLAMD_ERROR_col_length_negative: PRINTF ("Column %d has a negative number of nonzero entries (%d).\n", INDEX (i1), i2) ; break ; - case EIGEN_COLAMD_ERROR_row_index_out_of_bounds: + case COLAMD_ERROR_row_index_out_of_bounds: PRINTF ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; break ; - case EIGEN_COLAMD_ERROR_out_of_memory: + case COLAMD_ERROR_out_of_memory: PRINTF ("Out of memory.\n") ; break ; - case EIGEN_COLAMD_ERROR_internal_error: + case COLAMD_ERROR_internal_error: /* if this happens, there is a bug in the code */ PRINTF @@ -2208,307 +2015,5 @@ bool eigen_colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[E } } - - - -/* ========================================================================== */ -/* === eigen_colamd debugging routines ============================================ */ -/* ========================================================================== */ - -/* When debugging is disabled, the remainder of this file is ignored. */ - -#ifndef COLAMD_NDEBUG - - -/* ========================================================================== */ -/* === eigen_debug_structures ===================================================== */ -/* ========================================================================== */ - -/* - At this point, all empty rows and columns are dead. All live columns - are "clean" (containing no dead rows) and simplicial (no supercolumns - yet). Rows may contain dead columns, but all live rows contain at - least one live column. -*/ - - void eigen_debug_structures -( - /* === Parameters ======================================================= */ - - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int A [], - int n_col2 -) -{ - /* === Local variables ================================================== */ - - int i ; - int c ; - int *cp ; - int *cp_end ; - int len ; - int score ; - int r ; - int *rp ; - int *rp_end ; - int deg ; - - /* === Check A, Row, and Col ============================================ */ - - for (c = 0 ; c < n_col ; c++) - { - if (EIGEN_COL_IS_ALIVE (c)) - { - len = Col [c].length ; - score = Col [c].shared2.score ; - COLAMD_DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ; - COLAMD_ASSERT (len > 0) ; - COLAMD_ASSERT (score >= 0) ; - COLAMD_ASSERT (Col [c].shared1.thickness == 1) ; - cp = &A [Col [c].start] ; - cp_end = cp + len ; - while (cp < cp_end) - { - r = *cp++ ; - COLAMD_ASSERT (EIGEN_ROW_IS_ALIVE (r)) ; - } - } - else - { - i = Col [c].shared2.order ; - COLAMD_ASSERT (i >= n_col2 && i < n_col) ; - } - } - - for (r = 0 ; r < n_row ; r++) - { - if (EIGEN_ROW_IS_ALIVE (r)) - { - i = 0 ; - len = Row [r].length ; - deg = Row [r].shared1.degree ; - COLAMD_ASSERT (len > 0) ; - COLAMD_ASSERT (deg > 0) ; - rp = &A [Row [r].start] ; - rp_end = rp + len ; - while (rp < rp_end) - { - c = *rp++ ; - if (EIGEN_COL_IS_ALIVE (c)) - { - i++ ; - } - } - COLAMD_ASSERT (i > 0) ; - } - } -} - - -/* ========================================================================== */ -/* === eigen_debug_deg_lists ====================================================== */ -/* ========================================================================== */ - -/* - Prints the contents of the degree lists. Counts the number of columns - in the degree list and compares it to the total it should have. Also - checks the row degrees. -*/ - - void eigen_debug_deg_lists -( - /* === Parameters ======================================================= */ - - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int head [], - int min_score, - int should, - int max_deg -) -{ - /* === Local variables ================================================== */ - - int deg ; - int col ; - int have ; - int row ; - - /* === Check the degree lists =========================================== */ - - if (n_col > 10000 && colamd_debug <= 0) - { - return ; - } - have = 0 ; - COLAMD_DEBUG4 (("Degree lists: %d\n", min_score)) ; - for (deg = 0 ; deg <= n_col ; deg++) - { - col = head [deg] ; - if (col == EIGEN_COLAMD_EMPTY) - { - continue ; - } - COLAMD_DEBUG4 (("%d:", deg)) ; - while (col != EIGEN_COLAMD_EMPTY) - { - COLAMD_DEBUG4 ((" %d", col)) ; - have += Col [col].shared1.thickness ; - COLAMD_ASSERT (EIGEN_COL_IS_ALIVE (col)) ; - col = Col [col].shared4.degree_next ; - } - COLAMD_DEBUG4 (("\n")) ; - } - COLAMD_DEBUG4 (("should %d have %d\n", should, have)) ; - COLAMD_ASSERT (should == have) ; - - /* === Check the row degrees ============================================ */ - - if (n_row > 10000 && colamd_debug <= 0) - { - return ; - } - for (row = 0 ; row < n_row ; row++) - { - if (EIGEN_ROW_IS_ALIVE (row)) - { - COLAMD_ASSERT (Row [row].shared1.degree <= max_deg) ; - } - } -} - - -/* ========================================================================== */ -/* === eigen_debug_mark =========================================================== */ -/* ========================================================================== */ - -/* - Ensures that the tag_mark is less that the maximum and also ensures that - each entry in the mark array is less than the tag mark. -*/ - - void eigen_debug_mark -( - /* === Parameters ======================================================= */ - - int n_row, - EIGEN_Colamd_Row Row [], - int tag_mark, - int max_mark -) -{ - /* === Local variables ================================================== */ - - int r ; - - /* === Check the Row marks ============================================== */ - - COLAMD_ASSERT (tag_mark > 0 && tag_mark <= max_mark) ; - if (n_row > 10000 && colamd_debug <= 0) - { - return ; - } - for (r = 0 ; r < n_row ; r++) - { - COLAMD_ASSERT (Row [r].shared2.mark < tag_mark) ; - } -} - - -/* ========================================================================== */ -/* === eigen_debug_matrix ========================================================= */ -/* ========================================================================== */ - -/* - Prints out the contents of the columns and the rows. -*/ - - void eigen_debug_matrix -( - /* === Parameters ======================================================= */ - - int n_row, - int n_col, - EIGEN_Colamd_Row Row [], - EIGEN_Colamd_Col Col [], - int A [] -) -{ - /* === Local variables ================================================== */ - - int r ; - int c ; - int *rp ; - int *rp_end ; - int *cp ; - int *cp_end ; - - /* === Dump the rows and columns of the matrix ========================== */ - - if (colamd_debug < 3) - { - return ; - } - COLAMD_DEBUG3 (("DUMP MATRIX:\n")) ; - for (r = 0 ; r < n_row ; r++) - { - COLAMD_DEBUG3 (("Row %d alive? %d\n", r, EIGEN_ROW_IS_ALIVE (r))) ; - if (EIGEN_ROW_IS_DEAD (r)) - { - continue ; - } - COLAMD_DEBUG3 (("start %d length %d degree %d\n", - Row [r].start, Row [r].length, Row [r].shared1.degree)) ; - rp = &A [Row [r].start] ; - rp_end = rp + Row [r].length ; - while (rp < rp_end) - { - c = *rp++ ; - COLAMD_DEBUG4 ((" %d col %d\n", EIGEN_COL_IS_ALIVE (c), c)) ; - } - } - - for (c = 0 ; c < n_col ; c++) - { - COLAMD_DEBUG3 (("Col %d alive? %d\n", c, EIGEN_COL_IS_ALIVE (c))) ; - if (EIGEN_COL_IS_DEAD (c)) - { - continue ; - } - COLAMD_DEBUG3 (("start %d length %d shared1 %d shared2 %d\n", - Col [c].start, Col [c].length, - Col [c].shared1.thickness, Col [c].shared2.score)) ; - cp = &A [Col [c].start] ; - cp_end = cp + Col [c].length ; - while (cp < cp_end) - { - r = *cp++ ; - COLAMD_DEBUG4 ((" %d row %d\n", EIGEN_ROW_IS_ALIVE (r), r)) ; - } - } -} - - void eigen_colamd_get_debug -( - char *method -) -{ - colamd_debug = 0 ; /* no debug printing */ - - /* get "D" environment variable, which gives the debug printing level */ - if (getenv ("D")) - { - colamd_debug = atoi (getenv ("D")) ; - } - - COLAMD_DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n", - method, colamd_debug)) ; -} - -#endif /* NDEBUG */ +} // namespace internal #endif diff --git a/Eigen/src/OrderingMethods/Ordering.h b/Eigen/src/OrderingMethods/Ordering.h index 47cd6f169..f5757b319 100644 --- a/Eigen/src/OrderingMethods/Ordering.h +++ b/Eigen/src/OrderingMethods/Ordering.h @@ -27,8 +27,10 @@ #define EIGEN_ORDERING_H #include "Amd.h" -#include "Eigen_Colamd.h" namespace Eigen { + +#include "Eigen_Colamd.h" + namespace internal { /** @@ -131,18 +133,18 @@ class COLAMDOrdering int n = mat.cols(); int nnz = mat.nonZeros(); // Get the recommended value of Alen to be used by colamd - int Alen = eigen_colamd_recommended(nnz, m, n); + int Alen = internal::colamd_recommended(nnz, m, n); // Set the default parameters - double knobs [EIGEN_COLAMD_KNOBS]; - int stats [EIGEN_COLAMD_STATS]; - eigen_colamd_set_defaults(knobs); + double knobs [COLAMD_KNOBS]; + int stats [COLAMD_STATS]; + internal::colamd_set_defaults(knobs); int info; IndexVector p(n+1), A(Alen); for(int i=0; i <= n; i++) p(i) = mat.outerIndexPtr()[i]; for(int i=0; i < nnz; i++) A(i) = mat.innerIndexPtr()[i]; // Call Colamd routine to compute the ordering - info = eigen_colamd(m, n, Alen, A.data(), p.data(), knobs, stats); + info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); eigen_assert( info && "COLAMD failed " ); perm.resize(n); diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index e2076138a..77df091c3 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -205,7 +205,7 @@ class SparseLU void initperfvalues() { m_perfv.panel_size = 12; - m_perfv.relax = 6; + m_perfv.relax = 1; m_perfv.maxsuper = 100; m_perfv.rowblk = 200; m_perfv.colblk = 60; diff --git a/bench/spbench/CMakeLists.txt b/bench/spbench/CMakeLists.txt index 5451843b9..6e0e1b103 100644 --- a/bench/spbench/CMakeLists.txt +++ b/bench/spbench/CMakeLists.txt @@ -55,6 +55,12 @@ if(PASTIX_FOUND AND BLAS_FOUND) set(PASTIX_ALL_LIBS ${PASTIX_LIBRARIES} ${BLAS_LIBRARIES}) endif(PASTIX_FOUND AND BLAS_FOUND) +if(METIS_FOUND) + include_directories(${METIS_INCLUDES}) + set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) + add_definitions("-DEIGEN_METIS_SUPPORT") +endif(METIS_FOUND) + find_library(RT_LIBRARY rt) if(RT_LIBRARY) set(SPARSE_LIBS ${SPARSE_LIBS} ${RT_LIBRARY}) @@ -66,11 +72,6 @@ target_link_libraries (spbenchsolver ${SPARSE_LIBS}) add_executable(spsolver sp_solver.cpp) target_link_libraries (spsolver ${SPARSE_LIBS}) -if(METIS_FOUND) - include_directories(${METIS_INCLUDES}) - set (SPARSE_LIBS ${SPARSE_LIBS} ${METIS_LIBRARIES}) - add_definitions("-DEIGEN_METIS_SUPPORT") -endif(METIS_FOUND) add_executable(test_sparseLU test_sparseLU.cpp) target_link_libraries (test_sparseLU ${SPARSE_LIBS}) diff --git a/bench/spbench/spbenchsolver.h b/bench/spbench/spbenchsolver.h index c48ed7aa7..19c719c04 100644 --- a/bench/spbench/spbenchsolver.h +++ b/bench/spbench/spbenchsolver.h @@ -21,9 +21,14 @@ #include #include #include +#include #include "spbenchstyle.h" +#ifdef EIGEN_METIS_SUPPORT +#include +#endif + #ifdef EIGEN_CHOLMOD_SUPPORT #include #endif @@ -45,26 +50,27 @@ #endif // CONSTANTS -#define EIGEN_UMFPACK 0 -#define EIGEN_SUPERLU 1 -#define EIGEN_PASTIX 2 -#define EIGEN_PARDISO 3 -#define EIGEN_BICGSTAB 4 -#define EIGEN_BICGSTAB_ILUT 5 -#define EIGEN_GMRES 6 -#define EIGEN_GMRES_ILUT 7 -#define EIGEN_SIMPLICIAL_LDLT 8 -#define EIGEN_CHOLMOD_LDLT 9 -#define EIGEN_PASTIX_LDLT 10 -#define EIGEN_PARDISO_LDLT 11 -#define EIGEN_SIMPLICIAL_LLT 12 -#define EIGEN_CHOLMOD_SUPERNODAL_LLT 13 -#define EIGEN_CHOLMOD_SIMPLICIAL_LLT 14 -#define EIGEN_PASTIX_LLT 15 -#define EIGEN_PARDISO_LLT 16 -#define EIGEN_CG 17 -#define EIGEN_CG_PRECOND 18 -#define EIGEN_ALL_SOLVERS 19 +#define EIGEN_UMFPACK 10 +#define EIGEN_SUPERLU 20 +#define EIGEN_PASTIX 30 +#define EIGEN_PARDISO 40 +#define EIGEN_SPARSELU_COLAMD 50 +#define EIGEN_SPARSELU_METIS 51 +#define EIGEN_BICGSTAB 60 +#define EIGEN_BICGSTAB_ILUT 61 +#define EIGEN_GMRES 70 +#define EIGEN_GMRES_ILUT 71 +#define EIGEN_SIMPLICIAL_LDLT 80 +#define EIGEN_CHOLMOD_LDLT 90 +#define EIGEN_PASTIX_LDLT 100 +#define EIGEN_PARDISO_LDLT 110 +#define EIGEN_SIMPLICIAL_LLT 120 +#define EIGEN_CHOLMOD_SUPERNODAL_LLT 130 +#define EIGEN_CHOLMOD_SIMPLICIAL_LLT 140 +#define EIGEN_PASTIX_LLT 150 +#define EIGEN_PARDISO_LLT 160 +#define EIGEN_CG 170 +#define EIGEN_CG_PRECOND 180 using namespace Eigen; using namespace std; @@ -188,6 +194,17 @@ void printStatheader(std::ofstream& out) out << " EIGEN \n"; out << "
\n"; + out <<" \n"; + out << " LU_COLAMD \n"; + out << " EIGEN \n"; + out << " \n"; + +#ifdef EIGEN_METIS_SUPPORT + out <<" \n"; + out << " LU_METIS \n"; + out << " EIGEN \n"; + out << " \n"; +#endif out << "
\n"; } @@ -325,8 +342,19 @@ void SelectSolvers(const SparseMatrix&A, unsigned int sym, Matrix > solver; + call_directsolver(solver, EIGEN_SPARSELU_COLAMD, A, b, refX, statFile); + // Eigen SparseLU METIS + #ifdef EIGEN_METIS_SUPPORT + { + cout << "\n Solving with Sparse LU AND METIS ... \n"; + SparseLU > solver; + call_directsolver(solver, EIGEN_SPARSELU_METIS, A, b, refX, statFile); + } + #endif //BiCGSTAB { From 761fe49f37c7905df4aca97ff3d391d233e7fad6 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Mon, 10 Sep 2012 14:28:28 +0200 Subject: [PATCH 50/73] Clean the Colamd routine --- Eigen/src/OrderingMethods/Eigen_Colamd.h | 170 ----------------------- 1 file changed, 170 deletions(-) diff --git a/Eigen/src/OrderingMethods/Eigen_Colamd.h b/Eigen/src/OrderingMethods/Eigen_Colamd.h index 686c0f9f9..6dc1f280d 100644 --- a/Eigen/src/OrderingMethods/Eigen_Colamd.h +++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h @@ -50,13 +50,11 @@ #ifndef EIGEN_COLAMD_H #define EIGEN_COLAMD_H - namespace internal { /* Ensure that debugging is turned off: */ #ifndef COLAMD_NDEBUG #define COLAMD_NDEBUG #endif /* NDEBUG */ - /* ========================================================================== */ /* === Knob and statistics definitions ====================================== */ /* ========================================================================== */ @@ -135,26 +133,6 @@ namespace internal { /* === Colamd reporting mechanism =========================================== */ /* ========================================================================== */ -#ifdef MATLAB_MEX_FILE - -/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output */ -#define PRINTF mexPrintf - -/* In MATLAB, matrices are 1-based to the user, but 0-based internally */ -#define INDEX(i) ((i)+1) - -#else - -/* Use printf in standard C environment, for debugging and statistics output. */ -/* Output is generated only if debugging is enabled at compile time, or if */ -/* the caller explicitly calls colamd_report or symamd_report. */ -#define PRINTF printf - -/* In C, matrices are 0-based and indices are reported as such in *_report */ -#define INDEX(i) (i) - -#endif /* MATLAB_MEX_FILE */ - // == Row and Column structures == typedef struct colamd_col_struct { @@ -238,8 +216,6 @@ static inline void colamd_set_defaults (double knobs [COLAMD_KNOBS]) ; static bool colamd (int n_row, int n_col, int Alen, int A [], int p [], double knobs[COLAMD_KNOBS], int stats [COLAMD_STATS]) ; -static inline void colamd_report (int stats [COLAMD_STATS]); - static int init_rows_cols (int n_row, int n_col, Colamd_Row Row [], colamd_col col [], int A [], int p [], int stats[COLAMD_STATS] ); static void init_scoring (int n_row, int n_col, Colamd_Row Row [], colamd_col Col [], int A [], int head [], double knobs[COLAMD_KNOBS], int *p_n_row2, int *p_n_col2, int *p_max_deg); @@ -259,8 +235,6 @@ static int garbage_collection (int n_row, int n_col, Colamd_Row Row [], colamd_c static inline int clear_mark (int n_row, Colamd_Row Row [] ) ; -static void print_report (const char *method, int stats [COLAMD_STATS]) ; - /* === No debugging ========================================================= */ #define COLAMD_DEBUG0(params) ; @@ -489,20 +463,6 @@ static bool colamd(int n_row, int n_col, int Alen, int *A, int *p, double knobs[ return (true) ; } -/* ========================================================================== */ -/* === colamd_report ======================================================== */ -/* ========================================================================== */ - - static inline void colamd_report -( - int stats [COLAMD_STATS] -) -{ - const char *method = "colamd"; - print_report (method, stats) ; -} - - /* ========================================================================== */ /* === NON-USER-CALLABLE ROUTINES: ========================================== */ /* ========================================================================== */ @@ -1885,135 +1845,5 @@ static inline int clear_mark /* return the new value for tag_mark */ } - -/* ========================================================================== */ -/* === print_report ========================================================= */ -/* ========================================================================== */ - -static void print_report -( - const char *method, - int stats [COLAMD_STATS] -) -{ - - int i1, i2, i3 ; - - if (!stats) - { - PRINTF ("%s: No statistics available.\n", method) ; - return ; - } - - i1 = stats [COLAMD_INFO1] ; - i2 = stats [COLAMD_INFO2] ; - i3 = stats [COLAMD_INFO3] ; - - if (stats [COLAMD_STATUS] >= 0) - { - PRINTF ("%s: OK. ", method) ; - } - else - { - PRINTF ("%s: ERROR. ", method) ; - } - - switch (stats [COLAMD_STATUS]) - { - - case COLAMD_OK_BUT_JUMBLED: - - PRINTF ("Matrix has unsorted or duplicate row indices.\n") ; - - PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n", - method, i3) ; - - PRINTF ("%s: last seen duplicate or out-of-order row index: %d\n", - method, INDEX (i2)) ; - - PRINTF ("%s: last seen in column: %d", - method, INDEX (i1)) ; - - /* no break - fall through to next case instead */ - - case COLAMD_OK: - - PRINTF ("\n") ; - - PRINTF ("%s: number of dense or empty rows ignored: %d\n", - method, stats [COLAMD_DENSE_ROW]) ; - - PRINTF ("%s: number of dense or empty columns ignored: %d\n", - method, stats [COLAMD_DENSE_COL]) ; - - PRINTF ("%s: number of garbage collections performed: %d\n", - method, stats [COLAMD_DEFRAG_COUNT]) ; - break ; - - case COLAMD_ERROR_A_not_present: - - PRINTF ("Array A (row indices of matrix) not present.\n") ; - break ; - - case COLAMD_ERROR_p_not_present: - - PRINTF ("Array p (column pointers for matrix) not present.\n") ; - break ; - - case COLAMD_ERROR_nrow_negative: - - PRINTF ("Invalid number of rows (%d).\n", i1) ; - break ; - - case COLAMD_ERROR_ncol_negative: - - PRINTF ("Invalid number of columns (%d).\n", i1) ; - break ; - - case COLAMD_ERROR_nnz_negative: - - PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ; - break ; - - case COLAMD_ERROR_p0_nonzero: - - PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ; - break ; - - case COLAMD_ERROR_A_too_small: - - PRINTF ("Array A too small.\n") ; - PRINTF (" Need Alen >= %d, but given only Alen = %d.\n", - i1, i2) ; - break ; - - case COLAMD_ERROR_col_length_negative: - - PRINTF - ("Column %d has a negative number of nonzero entries (%d).\n", - INDEX (i1), i2) ; - break ; - - case COLAMD_ERROR_row_index_out_of_bounds: - - PRINTF - ("Row index (row %d) out of bounds (%d to %d) in column %d.\n", - INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ; - break ; - - case COLAMD_ERROR_out_of_memory: - - PRINTF ("Out of memory.\n") ; - break ; - - case COLAMD_ERROR_internal_error: - - /* if this happens, there is a bug in the code */ - PRINTF - ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ; - break ; - } -} - } // namespace internal #endif From 504edbddb185aec03e11578c059aa489a1af8fb3 Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 10 Sep 2012 13:27:44 -0400 Subject: [PATCH 51/73] Replace COPYING.LGPL by a copy of the LGPL 2.1 (instead of LGPL 3). Indeed, all the LGPL code we use, is licensed under LGPL 2.1 (with some files being "2.1 or later"). --- COPYING.LGPL | 613 ++++++++++++++++++++++++++++++++++++++----------- COPYING.README | 3 + 2 files changed, 478 insertions(+), 138 deletions(-) diff --git a/COPYING.LGPL b/COPYING.LGPL index 0e4fa8aaf..4362b4915 100644 --- a/COPYING.LGPL +++ b/COPYING.LGPL @@ -1,165 +1,502 @@ GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 + Version 2.1, February 1999 - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. + Preamble - 0. Additional Definitions. + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. - 1. Exception to Section 3 of the GNU GPL. + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. - 2. Conveying Modified Versions. + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. - 3. Object Code Incorporating Material from Library Header Files. + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. - b) Accompany the object code with a copy of the GNU GPL and this license - document. + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) - 4. Combined Works. + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! diff --git a/COPYING.README b/COPYING.README index 1d706784d..de5b63215 100644 --- a/COPYING.README +++ b/COPYING.README @@ -5,6 +5,9 @@ Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: Some files contain third-party code under BSD or LGPL licenses, whence the other COPYING.* files here. +All the LGPL code is either LGPL 2.1-only, or LGPL 2.1-or-later. +For this reason, the COPYING.LGPL file contains the LGPL 2.1 text. + If you want to guarantee that the Eigen code that you are #including is licensed under the MPL2 and possibly more permissive licenses (like BSD), #define this preprocessor symbol: From 45672e724e80ef7b5c9a6837296c8e55ae6a62a1 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 11 Sep 2012 12:12:19 +0200 Subject: [PATCH 52/73] Incomplete Cholesky preconditioner... not yet stable --- .../IterativeLinearSolvers/IncompleteLUT.h | 101 ++++---- bench/spbench/sp_solver.cpp | 5 +- unsupported/Eigen/IterativeSolvers | 1 + .../src/IterativeSolvers/IncompleteCholesky.h | 221 ++++++++++++++++++ 4 files changed, 275 insertions(+), 53 deletions(-) create mode 100644 unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h diff --git a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h index 224304f0e..5a71531cd 100644 --- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h @@ -10,8 +10,56 @@ #ifndef EIGEN_INCOMPLETE_LUT_H #define EIGEN_INCOMPLETE_LUT_H + namespace Eigen { +namespace internal { + +/** + * Compute a quick-sort split of a vector + * On output, the vector row is permuted such that its elements satisfy + * abs(row(i)) >= abs(row(ncut)) if incut + * \param row The vector of values + * \param ind The array of index for the elements in @p row + * \param ncut The number of largest elements to keep + **/ +template +int QuickSplit(VectorV &row, VectorI &ind, int ncut) +{ + typedef typename VectorV::RealScalar RealScalar; + using std::swap; + int mid; + int n = row.size(); /* length of the vector */ + int first, last ; + + ncut--; /* to fit the zero-based indices */ + first = 0; + last = n-1; + if (ncut < first || ncut > last ) return 0; + + do { + mid = first; + RealScalar abskey = std::abs(row(mid)); + for (int j = first + 1; j <= last; j++) { + if ( std::abs(row(j)) > abskey) { + ++mid; + swap(row(mid), row(j)); + swap(ind(mid), ind(j)); + } + } + /* Interchange for the pivot element */ + swap(row(mid), row(first)); + swap(ind(mid), ind(first)); + + if (mid > ncut) last = mid - 1; + else if (mid < ncut ) first = mid + 1; + } while (mid != ncut ); + + return 0; /* mid is equal to ncut */ +} + +}// end namespace internal /** * \brief Incomplete LU factorization with dual-threshold strategy * During the numerical factorization, two dropping rules are used : @@ -126,10 +174,6 @@ class IncompleteLUT : internal::noncopyable protected: - template - int QuickSplit(VectorV &row, VectorI &ind, int ncut); - - /** keeps off-diagonal entries; drops diagonal entries */ struct keep_diag { inline bool operator() (const Index& row, const Index& col, const Scalar&) const @@ -171,51 +215,6 @@ void IncompleteLUT::setFillfactor(int fillfactor) this->m_fillfactor = fillfactor; } - -/** - * Compute a quick-sort split of a vector - * On output, the vector row is permuted such that its elements satisfy - * abs(row(i)) >= abs(row(ncut)) if incut - * \param row The vector of values - * \param ind The array of index for the elements in @p row - * \param ncut The number of largest elements to keep - **/ -template -template -int IncompleteLUT::QuickSplit(VectorV &row, VectorI &ind, int ncut) -{ - using std::swap; - int mid; - int n = row.size(); /* length of the vector */ - int first, last ; - - ncut--; /* to fit the zero-based indices */ - first = 0; - last = n-1; - if (ncut < first || ncut > last ) return 0; - - do { - mid = first; - RealScalar abskey = std::abs(row(mid)); - for (int j = first + 1; j <= last; j++) { - if ( std::abs(row(j)) > abskey) { - ++mid; - swap(row(mid), row(j)); - swap(ind(mid), ind(j)); - } - } - /* Interchange for the pivot element */ - swap(row(mid), row(first)); - swap(ind(mid), ind(first)); - - if (mid > ncut) last = mid - 1; - else if (mid < ncut ) first = mid + 1; - } while (mid != ncut ); - - return 0; /* mid is equal to ncut */ -} - template template void IncompleteLUT::analyzePattern(const _MatrixType& amat) @@ -400,7 +399,7 @@ void IncompleteLUT::factorize(const _MatrixType& amat) len = (std::min)(sizel, nnzL); typename Vector::SegmentReturnType ul(u.segment(0, sizel)); typename VectorXi::SegmentReturnType jul(ju.segment(0, sizel)); - QuickSplit(ul, jul, len); + internal::QuickSplit(ul, jul, len); // store the largest m_fill elements of the L part m_lu.startVec(ii); @@ -429,7 +428,7 @@ void IncompleteLUT::factorize(const _MatrixType& amat) len = (std::min)(sizeu, nnzU); typename Vector::SegmentReturnType uu(u.segment(ii+1, sizeu-1)); typename VectorXi::SegmentReturnType juu(ju.segment(ii+1, sizeu-1)); - QuickSplit(uu, juu, len); + internal::QuickSplit(uu, juu, len); // store the largest elements of the U part for(int k = ii + 1; k < ii + len; k++) diff --git a/bench/spbench/sp_solver.cpp b/bench/spbench/sp_solver.cpp index e18f2d1c3..a1f4bac8a 100644 --- a/bench/spbench/sp_solver.cpp +++ b/bench/spbench/sp_solver.cpp @@ -13,7 +13,7 @@ #include // #include #include - +#include using namespace std; using namespace Eigen; @@ -26,7 +26,8 @@ int main(int argc, char **args) VectorXd b, x, tmp; BenchTimer timer,totaltime; //SparseLU > solver; - SuperLU > solver; +// SuperLU > solver; + ConjugateGradient, Lower,IncompleteCholesky > solver; ifstream matrix_file; string line; int n; diff --git a/unsupported/Eigen/IterativeSolvers b/unsupported/Eigen/IterativeSolvers index 6c6946d91..c3cc97cd2 100644 --- a/unsupported/Eigen/IterativeSolvers +++ b/unsupported/Eigen/IterativeSolvers @@ -33,6 +33,7 @@ #include "../../Eigen/Jacobi" #include "../../Eigen/Householder" #include "src/IterativeSolvers/GMRES.h" +#include "src/IterativeSolvers/IncompleteCholesky.h" //#include "src/IterativeSolvers/SSORPreconditioner.h" //@} diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h new file mode 100644 index 000000000..bdd494f26 --- /dev/null +++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h @@ -0,0 +1,221 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INCOMPLETE_CHOlESKY_H +#define EIGEN_INCOMPLETE_CHOlESKY_H +#include "Eigen/src/IterativeLinearSolvers/IncompleteLUT.h" +#include +#include + +namespace Eigen { +/** + * \brief Modified Incomplete Cholesky with dual threshold + * + * References : C-J. Lin and J. J. Moré, Incomplete Cholesky Factorizations with + * Limited memory, SIAM J. Sci. Comput. 21(1), pp. 24-45, 1999 + * + * \tparam _MatrixType The type of the sparse matrix. It should be a symmetric + * matrix. It is advised to give a row-oriented sparse matrix + * \tparam _UpLo The triangular part of the matrix to reference. + * \tparam _OrderingType + */ + +template > +class IncompleteCholesky : internal::noncopyable +{ + public: + typedef SparseMatrix MatrixType; + typedef _OrderingType OrderingType; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::Index Index; + typedef PermutationMatrix PermutationType; + typedef Matrix VectorType; + typedef Matrix IndexType; + + public: + IncompleteCholesky() {} + IncompleteCholesky(const MatrixType& matrix) + { + compute(matrix); + } + + Index rows() const { return m_L.rows(); } + + Index cols() const { return m_L.cols(); } + + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was succesful, + * \c NumericalIssue if the matrix appears to be negative. + */ + ComputationInfo info() const + { + eigen_assert(m_isInitialized && "IncompleteLLT is not initialized."); + return m_info; + } + /** + * \brief Computes the fill reducing permutation vector. + */ + template + void analyzePattern(const MatrixType& mat) + { + OrderingType ord; + ord(mat, m_perm); + m_analysisIsOk = true; + } + + template + void factorize(const MatrixType& amat); + + template + void compute (const MatrixType& matrix) + { + analyzePattern(matrix); + factorize(matrix); + } + + template + void _solve(const Rhs& b, Dest& x) const + { + eigen_assert(m_factorizationIsOk && "factorize() should be called first"); + if (m_perm.rows() == b.rows()) + x = m_perm.inverse() * b; + else + x = b; + x = m_L.template triangularView().solve(x); + x = m_L.adjoint().template triangularView().solve(x); + if (m_perm.rows() == b.rows()) + x = m_perm * x; + } + template inline const internal::solve_retval + solve(const MatrixBase& b) const + { + eigen_assert(m_isInitialized && "IncompleteLLT is not initialized."); + eigen_assert(cols()==b.rows() + && "IncompleteLLT::solve(): invalid number of rows of the right hand side matrix b"); + return internal::solve_retval(*this, b.derived()); + } + protected: + SparseMatrix m_L; // The lower part stored in CSC + bool m_analysisIsOk; + bool m_factorizationIsOk; + bool m_isInitialized; + ComputationInfo m_info; + PermutationType m_perm; + +}; + +template +template +void IncompleteCholesky::factorize(const _MatrixType& mat) +{ + eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); + + // FIXME Stability: We should probably compute the scaling factors and the shifts that are needed to ensure an efficient LLT preconditioner. + + // Dropping strategies : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added + + // Apply the fill-reducing permutation computed in analyzePattern() + if (m_perm.rows() == mat.rows() ) + m_L.template selfadjointView() = mat.template selfadjointView<_UpLo>().twistedBy(m_perm); + else + m_L.template selfadjointView() = mat.template selfadjointView<_UpLo>(); + + int n = mat.cols(); + + Scalar *vals = m_L.valuePtr(); //Values + Index *rowIdx = m_L.innerIndexPtr(); //Row indices + Index *colPtr = m_L.outerIndexPtr(); // Pointer to the beginning of each row + VectorType firstElt(n-1); // for each j, points to the next entry in vals that will be used in the factorization + // Initialize firstElt; + for (int j = 0; j < n-1; j++) firstElt(j) = colPtr[j]+1; + std::vector > listCol(n); // listCol(j) is a linked list of columns to update column j + VectorType curCol(n); // Store a nonzero values in each column + VectorType irow(n); // Row indices of nonzero elements in each column + // jki version of the Cholesky factorization + for (int j=0; j < n; j++) + { + //Left-looking factorize the column j + // First, load the jth column into curCol + Scalar diag = vals[colPtr[j]]; // Lower diagonal matrix with + curCol.setZero(); + irow.setLinSpaced(n,0,n-1); + for (int i = colPtr[j] + 1; i < colPtr[j+1]; i++) + { + curCol(rowIdx[i]) = vals[i]; + irow(rowIdx[i]) = rowIdx[i]; + } + + std::list::iterator k; + // Browse all previous columns that will update column j + for(k = listCol[j].begin(); k != listCol[j].end(); k++) + { + int jk = firstElt(*k); // First element to use in the column + Scalar a_jk = vals[jk]; + diag -= a_jk * a_jk; + jk += 1; + for (int i = jk; i < colPtr[*k]; i++) + { + curCol(rowIdx[i]) -= vals[i] * a_jk ; + } + firstElt(*k) = jk; + if (jk < colPtr[*k+1]) + { + // Add this column to the updating columns list for column *k+1 + listCol[rowIdx[jk]].push_back(*k); + } + } + + // Select the largest p elements + // p is the original number of elements in the column (without the diagonal) + int p = colPtr[j+1] - colPtr[j] - 2 ; + internal::QuickSplit(curCol, irow, p); + if(RealScalar(diag) <= 0) + { + m_info = NumericalIssue; + return; + } + RealScalar rdiag = internal::sqrt(RealScalar(diag)); + Scalar scal = Scalar(1)/rdiag; + vals[colPtr[j]] = rdiag; + // Insert the largest p elements in the matrix and scale them meanwhile + int cpt = 0; + for (int i = colPtr[j]+1; i < colPtr[j+1]; i++) + { + vals[i] = curCol(cpt) * scal; + rowIdx[i] = irow(cpt); + cpt ++; + } + } + m_factorizationIsOk = true; + m_isInitialized = true; + m_info = Success; +} + +namespace internal { + +template +struct solve_retval, Rhs> + : solve_retval_base, Rhs> +{ + typedef IncompleteCholesky<_MatrixType> Dec; + EIGEN_MAKE_SOLVE_HELPERS(Dec,Rhs) + + template void evalTo(Dest& dst) const + { + dec()._solve(rhs(),dst); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif \ No newline at end of file From 9e80822fc974c7883d23b197a1a1063d34602420 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 11 Sep 2012 13:32:56 +0200 Subject: [PATCH 53/73] fix compilation on freebsd --- blas/common.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/blas/common.h b/blas/common.h index 26b4ed5a3..e6398e952 100644 --- a/blas/common.h +++ b/blas/common.h @@ -10,6 +10,9 @@ #ifndef EIGEN_BLAS_COMMON_H #define EIGEN_BLAS_COMMON_H +#include +#include + #include #include @@ -68,9 +71,6 @@ inline bool check_uplo(const char* uplo) return UPLO(*uplo)!=0xff; } -#include -#include - namespace Eigen { #include "BandTriangularSolver.h" From 0c584dcf4d840effa622045b91486dbed0777213 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 12 Sep 2012 17:50:07 +0200 Subject: [PATCH 54/73] fix compilation with m.array().min/max(scalar) --- Eigen/src/plugins/ArrayCwiseBinaryOps.h | 6 ++++-- Eigen/src/plugins/ArrayCwiseUnaryOps.h | 1 + test/array_for_matrix.cpp | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h index 5b979ebf8..1e751ad62 100644 --- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h @@ -33,7 +33,8 @@ EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op) * * \sa max() */ -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, + const CwiseNullaryOp, PlainObject> > (min)(const Scalar &other) const { return (min)(Derived::PlainObject::Constant(rows(), cols(), other)); @@ -52,7 +53,8 @@ EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op) * * \sa min() */ -EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, const ConstantReturnType> +EIGEN_STRONG_INLINE const CwiseBinaryOp, const Derived, + const CwiseNullaryOp, PlainObject> > (max)(const Scalar &other) const { return (max)(Derived::PlainObject::Constant(rows(), cols(), other)); diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h index 0dffaf413..a59636790 100644 --- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h +++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h @@ -200,3 +200,4 @@ EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator<=, std::less_equal) EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator>, std::greater) EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(operator>=, std::greater_equal) + diff --git a/test/array_for_matrix.cpp b/test/array_for_matrix.cpp index a9cd54294..5a599c321 100644 --- a/test/array_for_matrix.cpp +++ b/test/array_for_matrix.cpp @@ -168,6 +168,12 @@ template void cwise_min_max(const MatrixType& m) VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1), m1.cwiseMax( maxM1)); VERIFY_IS_APPROX(m1, m1.cwiseMax( minM1)); + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, minM1).array(), (m1.array().min)( minM1)); + VERIFY_IS_APPROX(m1.array(), (m1.array().min)( maxM1)); + + VERIFY_IS_APPROX(MatrixType::Constant(rows,cols, maxM1).array(), (m1.array().max)( maxM1)); + VERIFY_IS_APPROX(m1.array(), (m1.array().max)( minM1)); + } template void resize(const MatrixTraits& t) From 48c4d48aec7b8aac992aabe265e7153e8a36c0bd Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 14 Sep 2012 09:54:56 +0200 Subject: [PATCH 55/73] workaround weird compilation error with MSVC --- Eigen/src/Core/MatrixBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index c00c1488c..31ebde8ab 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -237,7 +237,7 @@ template class MatrixBase // huuuge hack. make Eigen2's matrix.part() work in eigen3. Problem: Diagonal is now a class template instead // of an integer constant. Solution: overload the part() method template wrt template parameters list. - template class U> + template class U> const DiagonalWrapper part() const { return diagonal().asDiagonal(); } #endif // EIGEN2_SUPPORT From 7e0dd173120434e7c7cbb45b018e98d477baaa33 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Wed, 19 Sep 2012 18:32:02 +0200 Subject: [PATCH 56/73] Improve BiCGSTAB : With exact preconditioner, the solution should be found in one iteration --- Eigen/src/IterativeLinearSolvers/BiCGSTAB.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h index 126341be8..5a822e0ea 100644 --- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h @@ -39,10 +39,11 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x, int maxIters = iters; int n = mat.cols(); + x = precond.solve(x); VectorType r = rhs - mat * x; VectorType r0 = r; - RealScalar r0_sqnorm = r0.squaredNorm(); + RealScalar r0_sqnorm = rhs.squaredNorm(); Scalar rho = 1; Scalar alpha = 1; Scalar w = 1; @@ -223,7 +224,8 @@ public: template void _solve(const Rhs& b, Dest& x) const { - x.setZero(); +// x.setZero(); + x = b; _solveWithGuess(b,x); } From 7740127e3da88512d409bf0b2a045f373d067af1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 22 Sep 2012 11:11:26 +0200 Subject: [PATCH 57/73] Make Ref<> suitable for both Matrix and Array kinds. Note that Matrix kind objects can be implicitely converted to an Array kind Ref<> and vice versa --- Eigen/src/Core/Ref.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h index 38a838cf1..9c409eecf 100644 --- a/Eigen/src/Core/Ref.h +++ b/Eigen/src/Core/Ref.h @@ -195,12 +195,12 @@ template class Ref Base::construct(expr); } template - inline Ref(const MatrixBase& expr, + inline Ref(const DenseBase& expr, typename internal::enable_if::value&&bool(Traits::template match::MatchAtCompileTime)),Derived>::type* = 0, int = Derived::ThisConstantIsPrivateInPlainObjectBase) #else template - inline Ref(MatrixBase& expr) + inline Ref(DenseBase& expr) #endif { Base::construct(expr.const_cast_derived()); @@ -221,7 +221,7 @@ template class Ref - inline Ref(const MatrixBase& expr) + inline Ref(const DenseBase& expr) { // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," << match_helper::InnerStrideMatch << "\n"; // std::cout << int(StrideType::OuterStrideAtCompileTime) << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; From a01371548dc66ee8cbfac8effd5f560bf5d5697a Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 25 Sep 2012 09:53:40 +0200 Subject: [PATCH 58/73] Define sparseLU functions as static --- Eigen/src/SparseLU/SparseLU.h | 97 +++++++------------ Eigen/src/SparseLU/SparseLU_Coletree.h | 16 +-- Eigen/src/SparseLU/SparseLU_Memory.h | 15 ++- Eigen/src/SparseLU/SparseLU_Utils.h | 8 +- Eigen/src/SparseLU/SparseLU_column_bmod.h | 9 +- Eigen/src/SparseLU/SparseLU_column_dfs.h | 17 ++-- Eigen/src/SparseLU/SparseLU_copy_to_ucol.h | 8 +- .../src/SparseLU/SparseLU_heap_relax_snode.h | 4 +- Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 2 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 12 +-- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 14 +-- Eigen/src/SparseLU/SparseLU_pivotL.h | 7 +- Eigen/src/SparseLU/SparseLU_pruneL.h | 7 +- Eigen/src/SparseLU/SparseLU_relax_snode.h | 4 +- Eigen/src/SparseLU/SparseLU_snode_bmod.h | 8 +- Eigen/src/SparseLU/SparseLU_snode_dfs.h | 85 ++++++++-------- .../src/IterativeSolvers/IncompleteCholesky.h | 6 +- 17 files changed, 135 insertions(+), 184 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 77df091c3..f5d15ec6b 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -18,6 +18,8 @@ namespace Eigen { #include "SparseLU_Structs.h" #include "SparseLU_Matrix.h" +// Base structure containing all the factorization routines +#include "SparseLUBase.h" /** * \ingroup SparseLU_Module * \brief Sparse supernodal LU factorization for general matrices @@ -40,6 +42,7 @@ class SparseLU typedef Matrix ScalarVector; typedef Matrix IndexVector; typedef PermutationMatrix PermutationType; + public: SparseLU():m_isInitialized(true),m_Ustore(0,0,0,0,0,0),m_symmetricmode(false),m_diagpivotthresh(1.0) { @@ -58,6 +61,7 @@ class SparseLU void analyzePattern (const MatrixType& matrix); void factorize (const MatrixType& matrix); + void simplicialfactorize(const MatrixType& matrix); /** * Compute the symbolic and numeric factorization of the input sparse matrix. @@ -224,8 +228,7 @@ class SparseLU PermutationType m_perm_r ; // Row permutation IndexVector m_etree; // Column elimination tree - LU_GlobalLU_t m_glu; // persistent data to facilitate multiple factors - // FIXME All fields of this struct can be defined separately as class members + LU_GlobalLU_t m_glu; // SuperLU/SparseLU options bool m_symmetricmode; @@ -243,7 +246,6 @@ class SparseLU // Functions needed by the anaysis phase -#include "SparseLU_Coletree.h" /** * Compute the column permutation to minimize the fill-in (file amd.c ) * @@ -262,9 +264,6 @@ void SparseLU::analyzePattern(const MatrixType& mat) OrderingType ord; ord(mat,m_perm_c); - //FIXME Check the right semantic behind m_perm_c - // that is, column j of mat goes to column m_perm_c(j) of mat * m_perm_c; - // Apply the permutation to the column of the input matrix // m_mat = mat * m_perm_c.inverse(); //FIXME It should be less expensive here to permute only the structural pattern of the matrix @@ -282,13 +281,13 @@ void SparseLU::analyzePattern(const MatrixType& mat) // Compute the column elimination tree of the permuted matrix /*if (m_etree.size() == 0) */m_etree.resize(m_mat.cols()); - LU_sp_coletree(m_mat, m_etree); + SparseLUBase::LU_sp_coletree(m_mat, m_etree); // In symmetric mode, do not do postorder here if (!m_symmetricmode) { IndexVector post, iwork; // Post order etree - LU_TreePostorder(m_mat.cols(), m_etree, post); + SparseLUBase::LU_TreePostorder(m_mat.cols(), m_etree, post); // Renumber etree in postorder @@ -310,21 +309,7 @@ void SparseLU::analyzePattern(const MatrixType& mat) m_analysisIsOk = true; } -// Functions needed by the numerical factorization phase -#include "SparseLU_Memory.h" -#include "SparseLU_heap_relax_snode.h" -#include "SparseLU_relax_snode.h" -#include "SparseLU_snode_dfs.h" -#include "SparseLU_snode_bmod.h" -#include "SparseLU_pivotL.h" -#include "SparseLU_panel_dfs.h" -#include "SparseLU_kernel_bmod.h" -#include "SparseLU_panel_bmod.h" -#include "SparseLU_column_dfs.h" -#include "SparseLU_column_bmod.h" -#include "SparseLU_copy_to_ucol.h" -#include "SparseLU_pruneL.h" -#include "SparseLU_Utils.h" +// Functions needed by the numerical factorization phase /** @@ -370,7 +355,7 @@ void SparseLU::factorize(const MatrixType& matrix) int maxpanel = m_perfv.panel_size * m; // Allocate working storage common to the factor routines int lwork = 0; - int info = LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); + int info = SparseLUBase::LUMemInit(m, n, nnz, lwork, m_perfv.fillfactor, m_perfv.panel_size, m_glu); if (info) { std::cerr << "UNABLE TO ALLOCATE WORKING MEMORY\n\n" ; @@ -402,25 +387,17 @@ void SparseLU::factorize(const MatrixType& matrix) // Identify initial relaxed snodes IndexVector relax_end(n); if ( m_symmetricmode == true ) - LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + SparseLUBase::LU_heap_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); else - LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); + SparseLUBase::LU_relax_snode(n, m_etree, m_perfv.relax, marker, relax_end); m_perm_r.resize(m); m_perm_r.indices().setConstant(-1); marker.setConstant(-1); - IndexVector& xsup = m_glu.xsup; - IndexVector& supno = m_glu.supno; - IndexVector& xlsub = m_glu.xlsub; - IndexVector& xlusup = m_glu.xlusup; - IndexVector& xusub = m_glu.xusub; - ScalarVector& lusup = m_glu.lusup; - Index& nzlumax = m_glu.nzlumax; - - supno(0) = IND_EMPTY; xsup.setConstant(0); - xsup(0) = xlsub(0) = xusub(0) = xlusup(0) = Index(0); + m_glu.supno(0) = IND_EMPTY; m_glu.xsup.setConstant(0); + m_glu.xsup(0) = m_glu.xlsub(0) = m_glu.xusub(0) = m_glu.xlusup(0) = Index(0); // Work on one 'panel' at a time. A panel is one of the following : // (a) a relaxed supernode at the bottom of the etree, or @@ -441,7 +418,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Factorize the relaxed supernode(jcol:kcol) // First, determine the union of the row structure of the snode - info = LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); + info = SparseLUBase::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); if ( info ) { std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n"; @@ -449,15 +426,15 @@ void SparseLU::factorize(const MatrixType& matrix) m_factorizationIsOk = false; return; } - nextu = xusub(jcol); //starting location of column jcol in ucol - nextlu = xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) - jsupno = supno(jcol); // Supernode number which column jcol belongs to - fsupc = xsup(jsupno); //First column number of the current supernode - new_next = nextlu + (xlsub(fsupc+1)-xlsub(fsupc)) * (kcol - jcol + 1); + nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol + nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes) + jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to + fsupc = m_glu.xsup(jsupno); //First column number of the current supernode + new_next = nextlu + (m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc)) * (kcol - jcol + 1); int mem; - while (new_next > nzlumax ) + while (new_next > m_glu.nzlumax ) { - mem = LUMemXpand(lusup, nzlumax, nextlu, LUSUP, m_glu.num_expansions); + mem = SparseLUBase::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions); if (mem) { std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; @@ -468,16 +445,16 @@ void SparseLU::factorize(const MatrixType& matrix) // Now, left-looking factorize each column within the snode for (icol = jcol; icol<=kcol; icol++){ - xusub(icol+1) = nextu; + m_glu.xusub(icol+1) = nextu; // Scatter into SPA dense(*) for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it) dense(it.row()) = it.value(); // Numeric update within the snode - LU_snode_bmod(icol, fsupc, dense, m_glu); + SparseLUBase::LU_snode_bmod(icol, fsupc, dense, m_glu); // Eliminate the current column - info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + info = SparseLUBase::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { m_info = NumericalIssue; @@ -505,10 +482,10 @@ void SparseLU::factorize(const MatrixType& matrix) panel_size = n - jcol; // Symbolic outer factorization on a panel of columns - LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); + SparseLUBase::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); // Numeric sup-panel updates in topological order - LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); + SparseLUBase::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); // Sparse LU within the panel, and below the panel diagonal for ( jj = jcol; jj< jcol + panel_size; jj++) @@ -519,7 +496,7 @@ void SparseLU::factorize(const MatrixType& matrix) //Depth-first-search for the current column VectorBlock panel_lsubk(panel_lsub, k, m); VectorBlock repfnz_k(repfnz, k, m); - info = LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); + info = SparseLUBase::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n"; @@ -530,7 +507,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Numeric updates to this column VectorBlock dense_k(dense, k, m); VectorBlock segrep_k(segrep, nseg1, m-nseg1); - info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); + info = SparseLUBase::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n"; @@ -540,7 +517,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Copy the U-segments to ucol(*) - info = LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); + info = SparseLUBase::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); if ( info ) { std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n"; @@ -550,7 +527,7 @@ void SparseLU::factorize(const MatrixType& matrix) } // Form the L-segment - info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); + info = SparseLUBase::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); if ( info ) { std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <::factorize(const MatrixType& matrix) } // Prune columns (0:jj-1) using column jj - LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); + SparseLUBase::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); // Reset repfnz for this column for (i = 0; i < nseg; i++) @@ -574,11 +551,9 @@ void SparseLU::factorize(const MatrixType& matrix) } // end for -- end elimination // Count the number of nonzeros in factors - LU_countnz(n, m_nnzL, m_nnzU, m_glu); + SparseLUBase::LU_countnz(n, m_nnzL, m_nnzU, m_glu); // Apply permutation to the L subscripts - LU_fixupL(n, m_perm_r.indices(), m_glu); - - + SparseLUBase::LU_fixupL(n, m_perm_r.indices(), m_glu); // Create supernode matrix L m_Lstore.setInfos(m, n, m_glu.lusup, m_glu.xlusup, m_glu.lsub, m_glu.xlsub, m_glu.supno, m_glu.xsup); @@ -589,7 +564,7 @@ void SparseLU::factorize(const MatrixType& matrix) m_factorizationIsOk = true; } - +// #include "SparseLU_simplicialfactorize.h" namespace internal { template @@ -607,7 +582,5 @@ struct solve_retval, Rhs> } // end namespace internal - - } // End namespace Eigen -#endif \ No newline at end of file +#endif diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index 964f5e433..bb4067a45 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -31,8 +31,8 @@ #ifndef SPARSELU_COLETREE_H #define SPARSELU_COLETREE_H /** Find the root of the tree/set containing the vertex i : Use Path halving */ -template -int etree_find (int i, IndexVector& pp) +template< typename Scalar,typename Index> +int SparseLUBase::etree_find (int i, IndexVector& pp) { int p = pp(i); // Parent int gp = pp(p); // Grand parent @@ -50,8 +50,8 @@ int etree_find (int i, IndexVector& pp) * NOTE : The matrix is supposed to be in column-major format. * */ -template -int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) +template +int SparseLUBase::LU_sp_coletree(const MatrixType& mat, IndexVector& parent) { int nc = mat.cols(); // Number of columns int nr = mat.rows(); // Number of rows @@ -106,8 +106,8 @@ int LU_sp_coletree(const MatrixType& mat, IndexVector& parent) * Depth-first search from vertex n. No recursion. * This routine was contributed by Cédric Doucet, CEDRAT Group, Meylan, France. */ -template -void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum) +template +void SparseLUBase::LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum) { int current = n, first, next; while (postnum != n) @@ -152,8 +152,8 @@ void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVecto * \param parent Input tree * \param post postordered tree */ -template -void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) +template +void SparseLUBase::LU_TreePostorder(int n, IndexVector& parent, IndexVector& post) { IndexVector first_kid, next_kid; // Linked list of children int postnum; diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 48b36f5b4..0396ab61f 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -49,8 +49,9 @@ * \param keep_prev 1: use length and do not expand the vector; 0: compute new_len and expand * \param [in,out]num_expansions Number of times the memory has been expanded */ -template -int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) +template +template +int SparseLUBase::expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions) { float alpha = 1.5; // Ratio of the memory increase @@ -122,12 +123,9 @@ int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_ex * \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success * NOTE Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation */ -template -int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, LU_GlobalLU_t& glu) +template +int SparseLUBase::LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, GlobalLU_t& glu) { - typedef typename ScalarVector::Scalar Scalar; - typedef typename IndexVector::Scalar Index; - int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; glu.nzumax = glu.nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U @@ -187,8 +185,9 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, * \param glu Global data structure * \return 0 on success, > 0 size of the memory allocated so far */ +template template -int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions) +int SparseLUBase::LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions) { int failed_size; if (memtype == USUB) diff --git a/Eigen/src/SparseLU/SparseLU_Utils.h b/Eigen/src/SparseLU/SparseLU_Utils.h index 316b09ab0..b13930dbb 100644 --- a/Eigen/src/SparseLU/SparseLU_Utils.h +++ b/Eigen/src/SparseLU/SparseLU_Utils.h @@ -15,8 +15,8 @@ /** * \brief Count Nonzero elements in the factors */ -template -void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t& glu) +template +void SparseLUBase::LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu) { nnzL = 0; nnzU = (glu.xusub)(n); @@ -46,8 +46,8 @@ void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t -void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t& glu) +template +void SparseLUBase::LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu) { int fsupc, i, j, k, jstart; diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index bf25a33fc..b268c4348 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -46,11 +46,9 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, LU_GlobalLU_t& glu) +template +int SparseLUBase::LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, GlobalLU_t& glu) { - typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; int jsupno, k, ksub, krep, ksupno; int lptr, nrow, isub, irow, nextlu, new_next, ufirst; int fsupc, nsupc, nsupr, luptr, kfnz, no_zeros; @@ -95,9 +93,6 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); nrow = nsupr - d_fsupc - nsupc; - // NOTE Unlike the original implementation in SuperLU, the only feature - // available here is a sup-col update. - // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; diff --git a/Eigen/src/SparseLU/SparseLU_column_dfs.h b/Eigen/src/SparseLU/SparseLU_column_dfs.h index 568e0686c..fa8dcf18d 100644 --- a/Eigen/src/SparseLU/SparseLU_column_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_column_dfs.h @@ -42,15 +42,15 @@ * \param m number of rows in the matrix * \param jcol Current column * \param perm_r Row permutation - * \param maxsuper + * \param maxsuper Maximum number of column allowed in a supernode * \param [in,out] nseg Number of segments in current U[*,j] - new segments appended * \param lsub_col defines the rhs vector to start the dfs * \param [in,out] segrep Segment representatives - new segments appended - * \param repfnz + * \param repfnz First nonzero location in each row * \param xprune - * \param marker + * \param marker marker[i] == jj, if i was visited during dfs of current column jj; * \param parent - * \param xplore + * \param xplore working array * \param glu global LU data * \return 0 success * > 0 number of bytes allocated when run out of space @@ -60,6 +60,7 @@ template struct LU_column_dfs_traits { typedef typename IndexVector::Scalar Index; + typedef typename ScalarVector::Scalar Scalar; LU_column_dfs_traits(Index jcol, Index& jsuper, LU_GlobalLU_t& glu) : m_jcol(jcol), m_jsuper_ref(jsuper), m_glu(glu) {} @@ -70,7 +71,7 @@ struct LU_column_dfs_traits void mem_expand(IndexVector& lsub, int& nextl, int chmark) { if (nextl >= m_glu.nzlmax) - LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); + SparseLUBase::LUMemXpand(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions); if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY; } enum { ExpandMem = true }; @@ -80,11 +81,9 @@ struct LU_column_dfs_traits LU_GlobalLU_t& m_glu; }; -template -int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +int SparseLUBase::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) { - typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; int jsuper = glu.supno(jcol); int nextl = glu.xlsub(jcol); diff --git a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h index 541785881..d3227469d 100644 --- a/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +++ b/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h @@ -43,11 +43,9 @@ * > 0 - number of bytes allocated when run out of space * */ -template -int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzType& repfnz ,IndexVector& perm_r, DenseType& dense, LU_GlobalLU_t& glu) -{ - typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; +template +int SparseLUBase::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector& repfnz ,IndexVector& perm_r, BlockScalarVector& dense, GlobalLU_t& glu) +{ Index ksub, krep, ksupno; Index jsupno = glu.supno(jcol); diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 1bda70aaf..6d3271aff 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -38,8 +38,8 @@ * \param descendants Number of descendants of each node in the etree * \param relax_end last column in a supernode */ -template -void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +template +void SparseLUBase::LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) { // The etree may not be postordered, but its heap ordered diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index d5cad49b1..b15ff9c50 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -30,7 +30,7 @@ template struct LU_kernel_bmod template EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) { - typedef typename ScalarVector::Scalar Scalar; + typedef typename ScalarVector::Scalar Scalar; // First, copy U[*,j] segment from dense(*) to tempv(*) // The result of triangular solve is in tempv[*]; // The result of matric-vector update is in dense[*] diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index 1b31cc31a..ceb6c5938 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -34,7 +34,7 @@ * \brief Performs numeric block updates (sup-panel) in topological order. * * Before entering this routine, the original nonzeros in the panel - * were already copied i nto the spa[m,w] ... FIXME to be checked + * were already copied i nto the spa[m,w] * * \param m number of rows in the matrix * \param w Panel size @@ -48,10 +48,9 @@ * * */ -template -void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_perfvalues& perfv, LU_GlobalLU_t& glu) +template +void SparseLUBase::LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_perfvalues& perfv, GlobalLU_t& glu) { - typedef typename ScalarVector::Scalar Scalar; int ksub,jj,nextl_col; int fsupc, nsupc, nsupr, nrow; @@ -190,9 +189,6 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca segsize = krep - kfnz + 1; luptr = glu.xlusup(fsupc); - // NOTE : Unlike the original implementation in SuperLU, - // there is no update feature for col-col, 2col-col ... - // Perform a trianglar solve and block update, // then scatter the result of sup-col update to dense[] no_zeros = kfnz - fsupc; @@ -205,4 +201,4 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca } // End for each updating supernode } -#endif \ No newline at end of file +#endif diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 3581f6d9c..164417897 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -29,12 +29,12 @@ */ #ifndef SPARSELU_PANEL_DFS_H #define SPARSELU_PANEL_DFS_H - -template -void LU_dfs_kernel(const int jj, IndexVector& perm_r, +template +template +void SparseLUBase::LU_dfs_kernel(const int jj, IndexVector& perm_r, int& nseg, IndexVector& panel_lsub, IndexVector& segrep, - VectorBlock& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, - IndexVector& xplore, LU_GlobalLU_t& glu, + RepfnzType& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits ) { @@ -207,8 +207,8 @@ struct LU_panel_dfs_traits Index* m_marker; }; -template -void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t& glu) +template +void SparseLUBase::LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu) { int nextl_col; // Next available position in panel_lsub[*,jj] diff --git a/Eigen/src/SparseLU/SparseLU_pivotL.h b/Eigen/src/SparseLU/SparseLU_pivotL.h index 4ad49adee..c4a9f1c74 100644 --- a/Eigen/src/SparseLU/SparseLU_pivotL.h +++ b/Eigen/src/SparseLU/SparseLU_pivotL.h @@ -52,12 +52,9 @@ * \return 0 if success, i > 0 if U(i,i) is exactly zero * */ -template -int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, LU_GlobalLU_t& glu) +template +int SparseLUBase::LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu) { - typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; - typedef typename ScalarVector::RealScalar RealScalar; Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0 diff --git a/Eigen/src/SparseLU/SparseLU_pruneL.h b/Eigen/src/SparseLU/SparseLU_pruneL.h index f29285bd4..d8c58e039 100644 --- a/Eigen/src/SparseLU/SparseLU_pruneL.h +++ b/Eigen/src/SparseLU/SparseLU_pruneL.h @@ -46,12 +46,9 @@ * \param glu Global LU data * */ -template -void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, LU_GlobalLU_t& glu) +template +void SparseLUBase::LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, GlobalLU_t& glu) { - typedef typename IndexVector::Scalar Index; - typedef typename ScalarVector::Scalar Scalar; - // For each supernode-rep irep in U(*,j] int jsupno = glu.supno(jcol); int i,irep,irep1; diff --git a/Eigen/src/SparseLU/SparseLU_relax_snode.h b/Eigen/src/SparseLU/SparseLU_relax_snode.h index a9a0a00c1..8db8619c1 100644 --- a/Eigen/src/SparseLU/SparseLU_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_relax_snode.h @@ -37,8 +37,8 @@ * \param descendants Number of descendants of each node in the etree * \param relax_end last column in a supernode */ -template -void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) +template +void SparseLUBase::LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end) { // compute the number of descendants of each node in the etree diff --git a/Eigen/src/SparseLU/SparseLU_snode_bmod.h b/Eigen/src/SparseLU/SparseLU_snode_bmod.h index 18e6a93d2..beea71e31 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h @@ -29,11 +29,9 @@ */ #ifndef SPARSELU_SNODE_BMOD_H #define SPARSELU_SNODE_BMOD_H -template -int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t& glu) -{ - typedef typename ScalarVector::Scalar Scalar; - +template +int SparseLUBase::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu) +{ /* lsub : Compressed row subscripts of ( rectangular supernodes ) * xlsub : xlsub[j] is the starting location of the j-th column in lsub(*) * lusup : Numerical values of the rectangular supernodes diff --git a/Eigen/src/SparseLU/SparseLU_snode_dfs.h b/Eigen/src/SparseLU/SparseLU_snode_dfs.h index edb927cdc..199436cd7 100644 --- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h @@ -42,55 +42,54 @@ * \param marker (in/out) working vector * \return 0 on success, > 0 size of the memory when memory allocation failed */ - template - int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu) +template +int SparseLUBase::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu) +{ + int mem; + Index nsuper = ++glu.supno(jcol); // Next available supernode number + int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub + int krow,kmark; + for (int i = jcol; i <=kcol; i++) { - typedef typename IndexVector::Scalar Index; - int mem; - Index nsuper = ++glu.supno(jcol); // Next available supernode number - int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub - int krow,kmark; - for (int i = jcol; i <=kcol; i++) + // For each nonzero in A(*,i) + for (typename MatrixType::InnerIterator it(mat, i); it; ++it) { - // For each nonzero in A(*,i) - for (typename MatrixType::InnerIterator it(mat, i); it; ++it) + krow = it.row(); + kmark = marker(krow); + if ( kmark != kcol ) { - krow = it.row(); - kmark = marker(krow); - if ( kmark != kcol ) + // First time to visit krow + marker(krow) = kcol; + glu.lsub(nextl++) = krow; + if( nextl >= glu.nzlmax ) { - // First time to visit krow - marker(krow) = kcol; - glu.lsub(nextl++) = krow; - if( nextl >= glu.nzlmax ) - { - mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; // Memory expansion failed... Return the memory allocated so far - } + mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far } } - glu.supno(i) = nsuper; } - - // If supernode > 1, then make a copy of the subscripts for pruning - if (jcol < kcol) - { - Index new_next = nextl + (nextl - glu.xlsub(jcol)); - while (new_next > glu.nzlmax) - { - mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); - if (mem) return mem; // Memory expansion failed... Return the memory allocated so far - } - Index ifrom, ito = nextl; - for (ifrom = glu.xlsub(jcol); ifrom < nextl;) - glu.lsub(ito++) = glu.lsub(ifrom++); - for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; - nextl = ito; - } - glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode - glu.supno(kcol+1) = nsuper; - xprune(kcol) = nextl; - glu.xlsub(kcol+1) = nextl; - return 0; + glu.supno(i) = nsuper; } + + // If supernode > 1, then make a copy of the subscripts for pruning + if (jcol < kcol) + { + Index new_next = nextl + (nextl - glu.xlsub(jcol)); + while (new_next > glu.nzlmax) + { + mem = LUMemXpand(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions); + if (mem) return mem; // Memory expansion failed... Return the memory allocated so far + } + Index ifrom, ito = nextl; + for (ifrom = glu.xlsub(jcol); ifrom < nextl;) + glu.lsub(ito++) = glu.lsub(ifrom++); + for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl; + nextl = ito; + } + glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode + glu.supno(kcol+1) = nsuper; + xprune(kcol) = nextl; + glu.xlsub(kcol+1) = nextl; + return 0; +} #endif \ No newline at end of file diff --git a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h index bdd494f26..5bc41c0f8 100644 --- a/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h +++ b/unsupported/Eigen/src/IterativeSolvers/IncompleteCholesky.h @@ -118,7 +118,7 @@ void IncompleteCholesky::factorize(const _MatrixType { eigen_assert(m_analysisIsOk && "analyzePattern() should be called first"); - // FIXME Stability: We should probably compute the scaling factors and the shifts that are needed to ensure an efficient LLT preconditioner. + // FIXME Stability: We should probably compute the scaling factors and the shifts that are needed to ensure a succesful LLT factorization and an efficient preconditioner. // Dropping strategies : Keep only the p largest elements per column, where p is the number of elements in the column of the original matrix. Other strategies will be added @@ -177,8 +177,8 @@ void IncompleteCholesky::factorize(const _MatrixType // p is the original number of elements in the column (without the diagonal) int p = colPtr[j+1] - colPtr[j] - 2 ; internal::QuickSplit(curCol, irow, p); - if(RealScalar(diag) <= 0) - { + if(RealScalar(diag) <= 0) + { //FIXME We can use heuristics (Kershaw, 1978 or above reference ) to get a dynamic shift m_info = NumericalIssue; return; } From 088379ac2fcad3d73ef50f2763b34d5fa1197a5f Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 25 Sep 2012 09:58:29 +0200 Subject: [PATCH 59/73] Fix MSVC compile error in SparseLU --- Eigen/src/SparseLU/SparseLU.h | 2 +- Eigen/src/SparseLU/SparseLU_Memory.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index f5d15ec6b..6d0698a3d 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -470,7 +470,7 @@ void SparseLU::factorize(const MatrixType& matrix) // Adjust panel size so that a panel won't overlap with the next relaxed snode. int panel_size = m_perfv.panel_size; // upper bound on panel width - for (k = jcol + 1; k < std::min(jcol+panel_size, n); k++) + for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++) { if (relax_end(k) != IND_EMPTY) { diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 0396ab61f..62adcb4a2 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -32,7 +32,7 @@ #define EIGEN_SPARSELU_MEMORY #define LU_NO_MARKER 3 -#define LU_NUM_TEMPV(m,w,t,b) (std::max(m, (t+b)*w) ) +#define LU_NUM_TEMPV(m,w,t,b) ((std::max)(m, (t+b)*w) ) #define IND_EMPTY (-1) #define LU_Reduce(alpha) ((alpha + 1) / 2) // i.e (alpha-1)/2 + 1 From 5a3f49036b8ba786e018150b462a226b183290b0 Mon Sep 17 00:00:00 2001 From: Hauke Heibel Date: Tue, 25 Sep 2012 11:39:40 +0200 Subject: [PATCH 60/73] Removed scaling from the umeyama when it is not requested. --- Eigen/src/Geometry/Umeyama.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/Eigen/src/Geometry/Umeyama.h b/Eigen/src/Geometry/Umeyama.h index ac0939cde..345b47e0c 100644 --- a/Eigen/src/Geometry/Umeyama.h +++ b/Eigen/src/Geometry/Umeyama.h @@ -153,16 +153,21 @@ umeyama(const MatrixBase& src, const MatrixBase& dst, boo Rt.block(0,0,m,m).noalias() = svd.matrixU() * S.asDiagonal() * svd.matrixV().transpose(); } - // Eq. (42) - const Scalar c = 1/src_var * svd.singularValues().dot(S); + if (with_scaling) + { + // Eq. (42) + const Scalar c = 1/src_var * svd.singularValues().dot(S); - // Eq. (41) - // Note that we first assign dst_mean to the destination so that there no need - // for a temporary. - Rt.col(m).head(m) = dst_mean; - Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean; - - if (with_scaling) Rt.block(0,0,m,m) *= c; + // Eq. (41) + Rt.col(m).head(m) = dst_mean; + Rt.col(m).head(m).noalias() -= c*Rt.topLeftCorner(m,m)*src_mean; + Rt.block(0,0,m,m) *= c; + } + else + { + Rt.col(m).head(m) = dst_mean; + Rt.col(m).head(m).noalias() -= Rt.topLeftCorner(m,m)*src_mean; + } return Rt; } From 15a9f6b9c1d2673f5c319c826794c4bce0282696 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 25 Sep 2012 11:48:18 +0200 Subject: [PATCH 61/73] Doc for sparseLU --- Eigen/src/SparseLU/SparseLU.h | 59 ++++++++++++++++++++++++++++----- bench/spbench/test_sparseLU.cpp | 2 +- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 6d0698a3d..6f4458a26 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -24,9 +24,50 @@ namespace Eigen { * \ingroup SparseLU_Module * \brief Sparse supernodal LU factorization for general matrices * - * This class implements the supernodal LU factorization for general matrices. + * This class implements the supernodal LU factorization for general matrices. + * It uses the main techniques from the sequential SuperLU package + * (http://crd-legacy.lbl.gov/~xiaoye/SuperLU/). It handles transparently real + * and complex arithmetics with single and double precision, depending on the + * scalar type of your input matrix. + * The code has been optimized to provide BLAS-3 operations during supernode-panel updates. + * It benefits directly from the built-in high-performant Eigen BLAS routines. + * Moreover, when the size of a supernode is very small, the BLAS calls are avoided to + * enable a better optimization from the compiler. For best performance, + * you should compile it with NDEBUG flag to avoid the numerous bounds checking on vectors. + * + * An important parameter of this class is the ordering method. It is used to reorder the columns + * (and eventually the rows) of the matrix to reduce the number of new elements that are created during + * numerical factorization. The cheapest method available is COLAMD. + * See \link Ordering_Modules the Ordering module \endlink for the list of + * built-in and external ordering methods. + * + * Simple example with key steps + * \code + * VectorXd x(n), b(n); + * SparseMatrix A; + * SparseLU, COLAMDOrdering > solver; + * // fill A and b; + * // Compute the ordering permutation vector from the structural pattern of A + * solver.analyzePattern(A); + * // Compute the numerical factorization + * solver.factorize(A); + * //Use the factors to solve the linear system + * x = solver.solve(b); + * \endcode + * + * \WARNING The input matrix A should be in a \b compressed and \b column-major form. + * Otherwise an expensive copy will be made. You can call the inexpensive makeCompressed() to get a compressed matrix. + * + * \NOTE Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. + * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. + * If this is the case for your matrices, you can try the basic scaling method in \ref Scaling. * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> + * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS + * + * + * \sa \ref TutorialSparseDirectSolvers + * \sa \ref Ordering_Modules */ template class SparseLU @@ -247,13 +288,13 @@ class SparseLU // Functions needed by the anaysis phase /** - * Compute the column permutation to minimize the fill-in (file amd.c ) + * Compute the column permutation to minimize the fill-in * * - Apply this permutation to the input matrix - * - * - Compute the column elimination tree on the permuted matrix (file Eigen_Coletree.h) + * - Compute the column elimination tree on the permuted matrix * - * - Postorder the elimination tree and the column permutation (file Eigen_Coletree.h) + * - Postorder the elimination tree and the column permutation * */ template @@ -315,15 +356,17 @@ void SparseLU::analyzePattern(const MatrixType& mat) /** * - Numerical factorization * - Interleaved with the symbolic factorization - * \tparam MatrixType The type of the matrix, it should be a column-major sparse matrix - * \return info where - * : successful exit - * = 0: successful exit + * On exit, info is + * + * = 0: successful factorization + * * > 0: if info = i, and i is + * * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * and division by zero will occur if it is used to solve a * system of equations. + * * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. If lwork = -1, it is * the estimated amount of space needed, plus A->ncol. diff --git a/bench/spbench/test_sparseLU.cpp b/bench/spbench/test_sparseLU.cpp index c6511a9bc..f8ecbe69b 100644 --- a/bench/spbench/test_sparseLU.cpp +++ b/bench/spbench/test_sparseLU.cpp @@ -28,7 +28,7 @@ int main(int argc, char **args) // SparseLU, MetisOrdering > solver; // std::cout<< "ORDERING : METIS\n"; // #else - SparseLU, COLAMDOrdering > solver; + SparseLU, COLAMDOrdering > solver; std::cout<< "ORDERING : COLAMD\n"; // #endif From 357fe3641d696b0d7b878ca2d4e1b45639dff5f8 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Tue, 25 Sep 2012 11:55:33 +0200 Subject: [PATCH 62/73] Correct reference to iterative scaling method --- Eigen/src/SparseLU/SparseLU.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/SparseLU/SparseLU.h b/Eigen/src/SparseLU/SparseLU.h index 6f4458a26..9ea121ce5 100644 --- a/Eigen/src/SparseLU/SparseLU.h +++ b/Eigen/src/SparseLU/SparseLU.h @@ -60,7 +60,8 @@ namespace Eigen { * * \NOTE Unlike the initial SuperLU implementation, there is no step to equilibrate the matrix. * For badly scaled matrices, this step can be useful to reduce the pivoting during factorization. - * If this is the case for your matrices, you can try the basic scaling method in \ref Scaling. + * If this is the case for your matrices, you can try the basic scaling method at + * "unsupported/Eigen/src/IterativeSolvers/Scaling.h" * * \tparam _MatrixType The type of the sparse matrix. It must be a column-major SparseMatrix<> * \tparam _OrderingType The ordering method to use, either AMD, COLAMD or METIS From 1edb396542e7fc2275e8e2acd80b85cfad8cf64e Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 26 Sep 2012 19:24:41 +0200 Subject: [PATCH 63/73] fix minor typo in doc --- doc/C09_TutorialSparse.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/C09_TutorialSparse.dox b/doc/C09_TutorialSparse.dox index 20671f57b..6a16c3ae2 100644 --- a/doc/C09_TutorialSparse.dox +++ b/doc/C09_TutorialSparse.dox @@ -211,7 +211,7 @@ Here is a typical usage example: \code typedef Eigen::Triplet T; std::vector tripletList; -triplets.reserve(estimation_of_entries); +tripletList.reserve(estimation_of_entries); for(...) { // ... From 7e97dd5bd87ac4edfc4244039e55cd04e6f77568 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 26 Sep 2012 19:28:57 +0200 Subject: [PATCH 64/73] we should not directly include the *mmintrin.h headers but include immintrin.h only --- Eigen/Core | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 88337e47e..366465888 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -87,20 +87,7 @@ // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. // notice that since these are C headers, the extern "C" is theoretically needed anyways. extern "C" { - #include - #include - #ifdef EIGEN_VECTORIZE_SSE3 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSSE3 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSE4_1 - #include - #endif - #ifdef EIGEN_VECTORIZE_SSE4_2 - #include - #endif + #include } // end extern "C" #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE From 7c4b55fda9939dda66e927c158a21bcc117ae7f4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 26 Sep 2012 23:32:22 +0200 Subject: [PATCH 65/73] fix bug #509: warning with gcc 4.7 --- Eigen/src/Core/util/XprHelper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h index 4fd6a23d5..3d1290cd2 100644 --- a/Eigen/src/Core/util/XprHelper.h +++ b/Eigen/src/Core/util/XprHelper.h @@ -322,9 +322,9 @@ template::type> str // it's important that this value can still be squared without integer overflowing. DynamicAsInteger = 10000, ScalarReadCost = NumTraits::Scalar>::ReadCost, - ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? DynamicAsInteger : ScalarReadCost, + ScalarReadCostAsInteger = ScalarReadCost == Dynamic ? int(DynamicAsInteger) : int(ScalarReadCost), CoeffReadCost = traits::CoeffReadCost, - CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? DynamicAsInteger : CoeffReadCost, + CoeffReadCostAsInteger = CoeffReadCost == Dynamic ? int(DynamicAsInteger) : int(CoeffReadCost), NAsInteger = n == Dynamic ? int(DynamicAsInteger) : n, CostEvalAsInteger = (NAsInteger+1) * ScalarReadCostAsInteger + CoeffReadCostAsInteger, CostNoEvalAsInteger = NAsInteger * CoeffReadCostAsInteger From 44374788b5aacd1fbd130ebb86b0b37eb969ba61 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 26 Sep 2012 23:48:48 +0200 Subject: [PATCH 66/73] fix bug #511: pretty printers on windows --- debug/gdb/printers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debug/gdb/printers.py b/debug/gdb/printers.py index 9187acb33..86996a4f9 100644 --- a/debug/gdb/printers.py +++ b/debug/gdb/printers.py @@ -51,12 +51,12 @@ class EigenMatrixPrinter: template_params = m.split(',') template_params = map(lambda x:x.replace(" ", ""), template_params) - if template_params[1] == '-0x00000000000000001' or template_params[1] == '-0x000000001': + if template_params[1] == '-0x00000000000000001' or template_params[1] == '-0x000000001' or template_params[1] == '-1': self.rows = val['m_storage']['m_rows'] else: self.rows = int(template_params[1]) - if template_params[2] == '-0x00000000000000001' or template_params[2] == '-0x000000001': + if template_params[2] == '-0x00000000000000001' or template_params[2] == '-0x000000001' or template_params[2] == '-1': self.cols = val['m_storage']['m_cols'] else: self.cols = int(template_params[2]) From b648484dbaa53c4651760e87657bf5113d19370b Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 27 Sep 2012 00:23:19 +0200 Subject: [PATCH 67/73] fix bug #515: missing explicit scalar conversion (transplanted from b0862dcb2f9260e006e67e2b0610afdc3f58ee62 ) --- Eigen/src/Core/TriangularMatrix.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 3bf2a257d..fcd40e32f 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -511,6 +511,7 @@ template struct triangular_assignment_selector { typedef typename Derived1::Index Index; + typedef typename Derived1::Scalar Scalar; static inline void run(Derived1 &dst, const Derived2 &src) { for(Index j = 0; j < dst.cols(); ++j) @@ -520,7 +521,7 @@ struct triangular_assignment_selector Date: Thu, 27 Sep 2012 09:22:10 +0200 Subject: [PATCH 68/73] fix SparseMatrix option bit flag in eval<> helper --- Eigen/src/SparseCore/SparseUtil.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Eigen/src/SparseCore/SparseUtil.h b/Eigen/src/SparseCore/SparseUtil.h index 6062a086f..a686e08da 100644 --- a/Eigen/src/SparseCore/SparseUtil.h +++ b/Eigen/src/SparseCore/SparseUtil.h @@ -113,9 +113,10 @@ template struct sparse_eval { template struct sparse_eval { typedef typename traits::Scalar _Scalar; - enum { _Flags = traits::Flags }; + typedef typename traits::Index _Index; + enum { _Options = ((traits::Flags&RowMajorBit)==RowMajorBit) ? RowMajor : ColMajor }; public: - typedef SparseMatrix<_Scalar, _Flags> type; + typedef SparseMatrix<_Scalar, _Options, _Index> type; }; template struct sparse_eval { From 8b83e66906d9e52047137c0e45fc81076f915e13 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 27 Sep 2012 09:37:05 +0200 Subject: [PATCH 69/73] add scalar multiple to diagonal matrices (transplanted from dc5b335f9fbc25b15a408d8b1458a0140c0a8248 ) --- Eigen/src/Core/DiagonalMatrix.h | 12 ++++++++++++ test/diagonalmatrices.cpp | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index f27ab798a..da0264b0e 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -20,6 +20,7 @@ class DiagonalBase : public EigenBase public: typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; typedef typename internal::traits::StorageKind StorageKind; typedef typename internal::traits::Index Index; @@ -65,6 +66,17 @@ class DiagonalBase : public EigenBase return diagonal().cwiseInverse(); } + inline const DiagonalWrapper, const DiagonalVectorType> > + operator*(const Scalar& scalar) const + { + return diagonal() * scalar; + } + friend inline const DiagonalWrapper, const DiagonalVectorType> > + operator*(const Scalar& scalar, const DiagonalBase& other) + { + return other.diagonal() * scalar; + } + #ifdef EIGEN2_SUPPORT template bool isApprox(const DiagonalBase& other, typename NumTraits::Real precision = NumTraits::dummy_precision()) const diff --git a/test/diagonalmatrices.cpp b/test/diagonalmatrices.cpp index 3f5776dfc..7e9c80d7b 100644 --- a/test/diagonalmatrices.cpp +++ b/test/diagonalmatrices.cpp @@ -32,6 +32,8 @@ template void diagonalmatrices(const MatrixType& m) rv2 = RowVectorType::Random(cols); LeftDiagonalMatrix ldm1(v1), ldm2(v2); RightDiagonalMatrix rdm1(rv1), rdm2(rv2); + + Scalar s1 = internal::random(); SquareMatrixType sq_m1 (v1.asDiagonal()); VERIFY_IS_APPROX(sq_m1, v1.asDiagonal().toDenseMatrix()); @@ -76,6 +78,13 @@ template void diagonalmatrices(const MatrixType& m) big.block(i,j,rows,cols) = big.block(i,j,rows,cols) * rv1.asDiagonal(); VERIFY_IS_APPROX((big.block(i,j,rows,cols)) , m1 * rv1.asDiagonal() ); + + // scalar multiple + VERIFY_IS_APPROX(LeftDiagonalMatrix(ldm1*s1).diagonal(), ldm1.diagonal() * s1); + VERIFY_IS_APPROX(LeftDiagonalMatrix(s1*ldm1).diagonal(), s1 * ldm1.diagonal()); + + VERIFY_IS_APPROX(m1 * (rdm1 * s1), (m1 * rdm1) * s1); + VERIFY_IS_APPROX(m1 * (s1 * rdm1), (m1 * rdm1) * s1); } void test_diagonalmatrices() From 72bfed5e203e797b1fd82ec65ef7fa5d04be6e46 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 27 Sep 2012 11:34:56 +0200 Subject: [PATCH 70/73] Add forgotten SparseLUBase --- Eigen/src/SparseLU/SparseLUBase.h | 74 +++++++++++++++++++++++++++++++ doc/I17_SparseLinearSystems.dox | 4 +- 2 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 Eigen/src/SparseLU/SparseLUBase.h diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h new file mode 100644 index 000000000..94668fcff --- /dev/null +++ b/Eigen/src/SparseLU/SparseLUBase.h @@ -0,0 +1,74 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifndef SPARSELUBASE_H +#define SPARSELUBASE_H +/** + * Base class for sparseLU + */ +template +struct SparseLUBase +{ + typedef Matrix ScalarVector; + typedef Matrix IndexVector; + typedef typename ScalarVector::RealScalar RealScalar; + typedef VectorBlock > BlockScalarVector; + typedef VectorBlock > BlockIndexVector; +// typedef Ref > BlockScalarVector; +// typedef Ref > BlockIndexVector; + typedef LU_GlobalLU_t GlobalLU_t; + typedef SparseMatrix MatrixType; + + static int etree_find (int i, IndexVector& pp); + static int LU_sp_coletree(const MatrixType& mat, IndexVector& parent); + static void LU_nr_etdfs (int n, IndexVector& parent, IndexVector& first_kid, IndexVector& next_kid, IndexVector& post, int postnum); + static void LU_TreePostorder(int n, IndexVector& parent, IndexVector& post); + template + static int expand(VectorType& vec, int& length, int nbElts, int keep_prev, int& num_expansions); + static int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size, GlobalLU_t& glu); + template + static int LUMemXpand(VectorType& vec, int& maxlen, int nbElts, LU_MemType memtype, int& num_expansions); + static void LU_heap_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); + static void LU_relax_snode (const int n, IndexVector& et, const int relax_columns, IndexVector& descendants, IndexVector& relax_end); + static int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); + static int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu); + static int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu); + template + static void LU_dfs_kernel(const int jj, IndexVector& perm_r, + int& nseg, IndexVector& panel_lsub, IndexVector& segrep, + RepfnzType& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits); + static void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + + static void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, LU_perfvalues& perfv, GlobalLU_t& glu); + static int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); + static int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, ScalarVector& tempv, BlockIndexVector& segrep, BlockIndexVector& repfnz, int fpanelc, GlobalLU_t& glu); + static int LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, BlockIndexVector& repfnz ,IndexVector& perm_r, BlockScalarVector& dense, GlobalLU_t& glu); + static void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, const int nseg, const IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, GlobalLU_t& glu); + static void LU_countnz(const int n, int& nnzL, int& nnzU, GlobalLU_t& glu); + static void LU_fixupL(const int n, const IndexVector& perm_r, GlobalLU_t& glu); + +}; + +#include "SparseLU_Coletree.h" +#include "SparseLU_Memory.h" +#include "SparseLU_heap_relax_snode.h" +#include "SparseLU_relax_snode.h" +#include "SparseLU_snode_dfs.h" +#include "SparseLU_snode_bmod.h" +#include "SparseLU_pivotL.h" +#include "SparseLU_panel_dfs.h" +#include "SparseLU_kernel_bmod.h" +#include "SparseLU_panel_bmod.h" +#include "SparseLU_column_dfs.h" +#include "SparseLU_column_bmod.h" +#include "SparseLU_copy_to_ucol.h" +#include "SparseLU_pruneL.h" +#include "SparseLU_Utils.h" + +#endif diff --git a/doc/I17_SparseLinearSystems.dox b/doc/I17_SparseLinearSystems.dox index 740bee18e..cc8987d8a 100644 --- a/doc/I17_SparseLinearSystems.dox +++ b/doc/I17_SparseLinearSystems.dox @@ -47,8 +47,8 @@ For iterative solvers, the compute step is used to eventually setup a preconditi \code IterativeSolverClassName, PreconditionerName > solver; \endcode - -FIXME How to get a reference to the preconditioner, in order to set the parameters +The member function preconditioner() returns a read-write reference to the preconditioner + to directly interact with it. For instance, with the ILUT preconditioner, the incomplete factors L and U are computed in this step. See \link Sparse_modules the Sparse module \endlink for the list of available preconditioners in Eigen. From 82c3ff378423227a026ca710416c5f9cef1229d6 Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Thu, 27 Sep 2012 12:04:59 +0200 Subject: [PATCH 71/73] Fix Build error on MSVC --- unsupported/test/matrix_power.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/test/matrix_power.cpp b/unsupported/test/matrix_power.cpp index d891641f4..3a3f01464 100644 --- a/unsupported/test/matrix_power.cpp +++ b/unsupported/test/matrix_power.cpp @@ -32,8 +32,8 @@ template void test2dHyperbolicRotation(double tol) { Matrix,2,2> A, B, C; - T angle, ch = std::cosh(1); - std::complex ish(0, std::sinh(1)); + T angle, ch = std::cosh((T)1); + std::complex ish(0, std::sinh((T)1)); A << ch, ish, -ish, ch; for (int i = 0; i <= 20; i++) { From 87074d97e5a0f709f45dcb17493d2151295b6ce2 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 27 Sep 2012 23:35:54 +0200 Subject: [PATCH 72/73] old gcc versions do not have immintrin.h file... --- Eigen/Core | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Eigen/Core b/Eigen/Core index 366465888..502a4fc55 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -87,7 +87,26 @@ // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too. // notice that since these are C headers, the extern "C" is theoretically needed anyways. extern "C" { - #include + // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly. + // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus: + #ifdef __INTEL_COMPILER + #include + #else + #include + #include + #ifdef EIGEN_VECTORIZE_SSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSSE3 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_1 + #include + #endif + #ifdef EIGEN_VECTORIZE_SSE4_2 + #include + #endif + #endif } // end extern "C" #elif defined __ALTIVEC__ #define EIGEN_VECTORIZE From b68102d9a29ac2f631dead3d861f9e84c5897e9c Mon Sep 17 00:00:00 2001 From: Desire NUENTSA Date: Fri, 28 Sep 2012 10:44:25 +0200 Subject: [PATCH 73/73] MSVC needs parentheses around min and max --- Eigen/src/SparseLU/SparseLUBase.h | 4 ++-- Eigen/src/SparseLU/SparseLU_Coletree.h | 2 +- Eigen/src/SparseLU/SparseLU_Memory.h | 4 ++-- Eigen/src/SparseLU/SparseLU_column_bmod.h | 6 +++--- Eigen/src/SparseLU/SparseLU_heap_relax_snode.h | 2 +- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 2 +- Eigen/src/SparseLU/SparseLU_panel_dfs.h | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLUBase.h b/Eigen/src/SparseLU/SparseLUBase.h index 94668fcff..c00bc0532 100644 --- a/Eigen/src/SparseLU/SparseLUBase.h +++ b/Eigen/src/SparseLU/SparseLUBase.h @@ -38,10 +38,10 @@ struct SparseLUBase static int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t& glu); static int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu); static int LU_pivotL(const int jcol, const RealScalar diagpivotthresh, IndexVector& perm_r, IndexVector& iperm_c, int& pivrow, GlobalLU_t& glu); - template + template static void LU_dfs_kernel(const int jj, IndexVector& perm_r, int& nseg, IndexVector& panel_lsub, IndexVector& segrep, - RepfnzType& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + Ref repfnz_col, IndexVector& xprune, Ref marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits); static void LU_panel_dfs(const int m, const int w, const int jcol, MatrixType& A, IndexVector& perm_r, int& nseg, ScalarVector& dense, IndexVector& panel_lsub, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu); diff --git a/Eigen/src/SparseLU/SparseLU_Coletree.h b/Eigen/src/SparseLU/SparseLU_Coletree.h index bb4067a45..d3bc36ea4 100644 --- a/Eigen/src/SparseLU/SparseLU_Coletree.h +++ b/Eigen/src/SparseLU/SparseLU_Coletree.h @@ -70,7 +70,7 @@ int SparseLUBase::LU_sp_coletree(const MatrixType& mat, IndexVecto for (typename MatrixType::InnerIterator it(mat, col); it; ++it) { // Is it necessary to browse the whole matrix, the lower part should do the job ?? row = it.row(); - firstcol(row) = std::min(firstcol(row), col); + firstcol(row) = (std::min)(firstcol(row), col); } } /* Compute etree by Liu's algorithm for symmetric matrices, diff --git a/Eigen/src/SparseLU/SparseLU_Memory.h b/Eigen/src/SparseLU/SparseLU_Memory.h index 62adcb4a2..7b9f01355 100644 --- a/Eigen/src/SparseLU/SparseLU_Memory.h +++ b/Eigen/src/SparseLU/SparseLU_Memory.h @@ -128,8 +128,8 @@ int SparseLUBase::LUMemInit(int m, int n, int annz, int lwork, int { int& num_expansions = glu.num_expansions; //No memory expansions so far num_expansions = 0; - glu.nzumax = glu.nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U - glu.nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor + glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U + glu.nzlmax = (std::max)(1., fillratio/4.) * annz; // estimated nnz in L factor // Return the estimated size to the user if necessary if (lwork == IND_EMPTY) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index b268c4348..94f18fb73 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -76,7 +76,7 @@ int SparseLUBase::LU_column_bmod(const int jcol, const int nseg, B { // outside the rectangular supernode fsupc = glu.xsup(ksupno); - fst_col = std::max(fsupc, fpanelc); + fst_col = (std::max)(fsupc, fpanelc); // Distance from the current supernode to the current panel; // d_fsupc = 0 if fsupc > fpanelc @@ -86,7 +86,7 @@ int SparseLUBase::LU_column_bmod(const int jcol, const int nseg, B lptr = glu.xlsub(fsupc) + d_fsupc; kfnz = repfnz(krep); - kfnz = std::max(kfnz, fpanelc); + kfnz = (std::max)(kfnz, fpanelc); segsize = krep - kfnz + 1; nsupc = krep - fst_col + 1; @@ -132,7 +132,7 @@ int SparseLUBase::LU_column_bmod(const int jcol, const int nseg, B * 1) fsupc < fpanelc, then fst_col <-- fpanelc * 2) fsupc >= fpanelc, then fst_col <-- fsupc */ - fst_col = std::max(fsupc, fpanelc); + fst_col = (std::max)(fsupc, fpanelc); if (fst_col < jcol) { diff --git a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h index 6d3271aff..69e1d4da9 100644 --- a/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +++ b/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h @@ -88,7 +88,7 @@ void SparseLUBase::LU_heap_relax_snode (const int n, IndexVector& ++nsuper_et_post; k = n; for (i = snode_start; i <= j; ++i) - k = std::min(k, inv_post(i)); + k = (std::min)(k, inv_post(i)); l = inv_post(j); if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode { diff --git a/Eigen/src/SparseLU/SparseLU_panel_bmod.h b/Eigen/src/SparseLU/SparseLU_panel_bmod.h index ceb6c5938..6688b4e3e 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_panel_bmod.h @@ -89,7 +89,7 @@ void SparseLUBase::LU_panel_bmod(const int m, const int w, const i segsize = krep - kfnz + 1; u_cols++; - u_rows = std::max(segsize,u_rows); + u_rows = (std::max)(segsize,u_rows); } // if the blocks are large enough, use level 3 diff --git a/Eigen/src/SparseLU/SparseLU_panel_dfs.h b/Eigen/src/SparseLU/SparseLU_panel_dfs.h index 164417897..5d3025388 100644 --- a/Eigen/src/SparseLU/SparseLU_panel_dfs.h +++ b/Eigen/src/SparseLU/SparseLU_panel_dfs.h @@ -30,10 +30,10 @@ #ifndef SPARSELU_PANEL_DFS_H #define SPARSELU_PANEL_DFS_H template -template +template void SparseLUBase::LU_dfs_kernel(const int jj, IndexVector& perm_r, int& nseg, IndexVector& panel_lsub, IndexVector& segrep, - RepfnzType& repfnz_col, IndexVector& xprune, MarkerType& marker, IndexVector& parent, + Ref repfnz_col, IndexVector& xprune, Ref marker, IndexVector& parent, IndexVector& xplore, GlobalLU_t& glu, int& nextl_col, int krow, Traits& traits )