SparseLU: remove the "snode" path which appears to bring nearly zero speedup

2025-09-25 15:53:19 +08:00 · 2012-10-30 15:17:58 +01:00 · 2012-10-30 15:17:58 +01:00 · 90fcaf11cf
commit 90fcaf11cf
parent ac8c694f3e
4 changed files with 66 additions and 305 deletions
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@ -455,144 +455,85 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
  int i, k, jj; 
  for (jcol = 0; jcol < n; )
  {
-    if (relax_end(jcol) != IND_EMPTY) 
-    { // Starting a relaxed node from jcol
-      kcol = relax_end(jcol); // End index of the relaxed snode 
+    // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
+    int panel_size = m_perfv.panel_size; // upper bound on panel width
+    for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
+    {
+      if (relax_end(k) != IND_EMPTY) 
+      {
+        panel_size = k - jcol; 
+        break; 
+      }
+    }
+    if (k == n) 
+      panel_size = n - jcol; 
      
-      // Factorize the relaxed supernode(jcol:kcol)
-      // First, determine the union of the row structure of the snode 
-      info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); 
+    // Symbolic outer factorization on a panel of columns 
+    SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
+    
+    // Numeric sup-panel updates in topological order 
+    SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); 
+    
+    // Sparse LU within the panel, and below the panel diagonal 
+    for ( jj = jcol; jj< jcol + panel_size; jj++) 
+    {
+      k = (jj - jcol) * m; // Column index for w-wide arrays 
+      
+      nseg = nseg1; // begin after all the panel segments
+      //Depth-first-search for the current column
+      VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
+      VectorBlock<IndexVector> repfnz_k(repfnz, k, m); 
+      info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
      if ( info ) 
      {
-        std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
+        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
        m_info = NumericalIssue; 
        m_factorizationIsOk = false; 
        return; 
      }
-      nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
-      nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
-      jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to 
-      fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
-      int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
-      lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
-      new_next = nextlu + lda * (kcol - jcol + 1);
-      int mem; 
-      while (new_next > m_glu.nzlumax ) 
+      // Numeric updates to this column 
+      VectorBlock<ScalarVector> dense_k(dense, k, m); 
+      VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); 
+      info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
+      if ( info ) 
      {
-        mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
-        if (mem) 
-        {
-          std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
+        std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
+        m_info = NumericalIssue; 
+        m_factorizationIsOk = false; 
+        return; 
      }
      
-      // Now, left-looking factorize each column within the snode
-      for (icol = jcol; icol<=kcol; icol++){
-        m_glu.xusub(icol+1) = nextu;
-        // Scatter into SPA dense(*)
-        for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
-          dense(it.row()) = it.value();
-        
-        // Numeric update within the snode 
-        SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu); 
-        
-        // Eliminate the current column 
-        info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); 
-        if ( info ) 
-        {
-          m_info = NumericalIssue; 
-          std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
-      }
-      jcol = icol; // The last column te be eliminated
-    }
-    else 
-    { // Work on one panel of panel_size columns
-      
-      // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
-      int panel_size = m_perfv.panel_size; // upper bound on panel width
-      for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
+      // Copy the U-segments to ucol(*)
+      info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
+      if ( info ) 
      {
-        if (relax_end(k) != IND_EMPTY) 
-        {
-          panel_size = k - jcol; 
-          break; 
-        }
+        std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
+        m_info = NumericalIssue; 
+        m_factorizationIsOk = false; 
+        return; 
      }
-      if (k == n) 
-        panel_size = n - jcol; 
-        
-      // Symbolic outer factorization on a panel of columns 
-      SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu); 
      
-      // Numeric sup-panel updates in topological order 
-      SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu); 
-      
-      // Sparse LU within the panel, and below the panel diagonal 
-      for ( jj = jcol; jj< jcol + panel_size; jj++) 
+      // Form the L-segment 
+      info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
+      if ( info ) 
      {
-        k = (jj - jcol) * m; // Column index for w-wide arrays 
-        
-        nseg = nseg1; // begin after all the panel segments
-        //Depth-first-search for the current column
-        VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
-        VectorBlock<IndexVector> repfnz_k(repfnz, k, m); 
-        info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu); 
-        if ( info ) 
-        {
-          std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
-          m_info = NumericalIssue; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
-        // Numeric updates to this column 
-        VectorBlock<ScalarVector> dense_k(dense, k, m); 
-        VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1); 
-        info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu); 
-        if ( info ) 
-        {
-          std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
-          m_info = NumericalIssue; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
-        
-        // Copy the U-segments to ucol(*)
-        info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu); 
-        if ( info ) 
-        {
-          std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
-          m_info = NumericalIssue; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
-        
-        // Form the L-segment 
-        info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
-        if ( info ) 
-        {
-          std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
-          m_info = NumericalIssue; 
-          m_factorizationIsOk = false; 
-          return; 
-        }
-        
-        // Prune columns (0:jj-1) using column jj
-        SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
-        
-        // Reset repfnz for this column 
-        for (i = 0; i < nseg; i++)
-        {
-          irep = segrep(i); 
-          repfnz_k(irep) = IND_EMPTY; 
-        }
-      } // end SparseLU within the panel  
-      jcol += panel_size;  // Move to the next panel
-    } // end else 
+        std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
+        m_info = NumericalIssue; 
+        m_factorizationIsOk = false; 
+        return; 
+      }
+      
+      // Prune columns (0:jj-1) using column jj
+      SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu); 
+      
+      // Reset repfnz for this column 
+      for (i = 0; i < nseg; i++)
+      {
+        irep = segrep(i); 
+        repfnz_k(irep) = IND_EMPTY; 
+      }
+    } // end SparseLU within the panel  
+    jcol += panel_size;  // Move to the next panel
  } // end for -- end elimination 
  
  // Count the number of nonzeros in factors 
@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
 } // end namespace internal

 } // End namespace Eigen 
+
 #endif
--- a/Eigen/src/SparseLU/SparseLUBase.h
+++ b/Eigen/src/SparseLU/SparseLUBase.h
@ -57,8 +57,6 @@ struct SparseLUBase
 #include "SparseLU_Memory.h"
 #include "SparseLU_heap_relax_snode.h"
 #include "SparseLU_relax_snode.h"
-#include "SparseLU_snode_dfs.h"
-#include "SparseLU_snode_bmod.h"
 #include "SparseLU_pivotL.h"
 #include "SparseLU_panel_dfs.h"
 #include "SparseLU_kernel_bmod.h"
--- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h
@ -1,84 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/* 
- 
- * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU 
- 
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program for any
- * purpose, provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is
- * granted, provided the above notices are retained, and a notice that
- * the code was modified is included with the above copyright notice.
- */
-#ifndef SPARSELU_SNODE_BMOD_H
-#define SPARSELU_SNODE_BMOD_H
-template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
-{  
-  /* lsub : Compressed row subscripts of ( rectangular supernodes )
-   * xlsub :  xlsub[j] is the starting location of the j-th column in lsub(*)
-   * lusup : Numerical values of the rectangular supernodes
-   * xlusup[j] is the starting location of the j-th column in lusup(*)
-   */
-  int nextlu = glu.xlusup(jcol); // Starting location of the next column to add 
-  int irow, isub; 
-  // Process the supernodal portion of L\U[*,jcol]
-  for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
-  {
-    irow = glu.lsub(isub); 
-    glu.lusup(nextlu) = dense(irow);
-    dense(irow) = 0;
-    ++nextlu;
-  }
-  // Make sure the leading dimension is a multiple of the underlying packet size
-  // so that fast fully aligned kernels can be enabled:
-  {
-    Index lda = nextlu-glu.xlusup(jcol);
-    int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
-    if(offset)
-    {
-      glu.lusup.segment(nextlu,offset).setZero();
-      nextlu += offset;
-    }
-  }
-  glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
-  
-  if (fsupc < jcol ){
-    int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
-    int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
-    int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
-    int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
-    int lda = glu.xlusup(jcol+1) - ufirst;
-    
-    int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
-    
-    // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
-    Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
-    VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
-    u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
-    
-    // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
-    new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); 
-    VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); 
-    l.noalias() -= A * u; 
-  }
-  return 0;
-}
-#endif
--- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h
@ -1,95 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-/* 
- 
- * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU 
- 
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
- *
- * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
- * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- *
- * Permission is hereby granted to use or copy this program for any
- * purpose, provided the above notices are retained on all copies.
- * Permission to modify the code and to distribute modified code is
- * granted, provided the above notices are retained, and a notice that
- * the code was modified is included with the above copyright notice.
- */
-#ifndef SPARSELU_SNODE_DFS_H
-#define SPARSELU_SNODE_DFS_H
-  /**
-   * \brief Determine the union of the row structures of those columns within the relaxed snode.
- *    NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- * 
- * \param jcol start of the supernode
- * \param kcol end of the supernode
- * \param asub Row indices
- * \param colptr Pointer to the beginning of each column
- * \param xprune (out) The pruned tree ??
- * \param marker (in/out) working vector
- * \return 0 on success, > 0 size of the memory when memory allocation failed
- */
-template <typename Scalar, typename Index>
-int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
-{
-  int mem; 
-  Index nsuper = ++glu.supno(jcol); // Next available supernode number
-  int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
-  int krow,kmark; 
-  for (int i = jcol; i <=kcol; i++)
-  {
-    // For each nonzero in A(*,i)
-    for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
-    {
-      krow = it.row(); 
-      kmark = marker(krow);
-      if ( kmark != kcol )
-      {
-        // First time to visit krow
-        marker(krow) = kcol; 
-        glu.lsub(nextl++) = krow; 
-        if( nextl >= glu.nzlmax )
-        {
-          mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
-          if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
-        }
-      }
-    }
-    glu.supno(i) = nsuper;
-  }
-  
-  // If supernode > 1, then make a copy of the subscripts for pruning
-  if (jcol < kcol)
-  {
-    Index new_next = nextl + (nextl - glu.xlsub(jcol));
-    while (new_next > glu.nzlmax)
-    {
-      mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
-      if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
-    }
-    Index ifrom, ito = nextl; 
-    for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
-      glu.lsub(ito++) = glu.lsub(ifrom++);
-    for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
-    nextl = ito;
-  }
-  glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
-  glu.supno(kcol+1) = nsuper;
-  xprune(kcol) = nextl;
-  glu.xlsub(kcol+1) = nextl;
-  return 0;
-}
-#endif