SparseLU: remove the "snode" path which appears to bring nearly zero speedup

2025-08-12 19:59:05 +08:00 · 2012-10-30 15:17:58 +01:00 · 2012-10-30 15:17:58 +01:00 · 90fcaf11cf
commit 90fcaf11cf
parent ac8c694f3e
4 changed files with 66 additions and 305 deletions
--- a/Eigen/src/SparseLU/SparseLU.h
+++ b/Eigen/src/SparseLU/SparseLU.h
@ -455,64 +455,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
  int i, k, jj; 
  for (jcol = 0; jcol < n; )
  {
    if (relax_end(jcol) != IND_EMPTY) 
    { // Starting a relaxed node from jcol
      kcol = relax_end(jcol); // End index of the relaxed snode 
      // Factorize the relaxed supernode(jcol:kcol)
      // First, determine the union of the row structure of the snode 
      info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu); 
      if ( info ) 
      {
        std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
        m_info = NumericalIssue; 
        m_factorizationIsOk = false; 
        return; 
      }
      nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
      nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
      jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to 
      fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
      int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
      lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
      new_next = nextlu + lda * (kcol - jcol + 1);
      int mem; 
      while (new_next > m_glu.nzlumax ) 
      {
        mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
        if (mem) 
        {
          std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n"; 
          m_factorizationIsOk = false; 
          return; 
        }
      }
      // Now, left-looking factorize each column within the snode
      for (icol = jcol; icol<=kcol; icol++){
        m_glu.xusub(icol+1) = nextu;
        // Scatter into SPA dense(*)
        for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
          dense(it.row()) = it.value();
        // Numeric update within the snode 
        SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu); 
        // Eliminate the current column 
        info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu); 
        if ( info ) 
        {
          m_info = NumericalIssue; 
          std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl; 
          m_factorizationIsOk = false; 
          return; 
        }
      }
      jcol = icol; // The last column te be eliminated
    }
    else 
    { // Work on one panel of panel_size columns
    // Adjust panel size so that a panel won't overlap with the next relaxed snode. 
    int panel_size = m_perfv.panel_size; // upper bound on panel width
    for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
@ -592,7 +534,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
      }
    } // end SparseLU within the panel  
    jcol += panel_size;  // Move to the next panel
    } // end else 
  } // end for -- end elimination 
  // Count the number of nonzeros in factors 
@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
 } // end namespace internal
 } // End namespace Eigen 
 #endif
--- a/Eigen/src/SparseLU/SparseLUBase.h
+++ b/Eigen/src/SparseLU/SparseLUBase.h
@ -57,8 +57,6 @@ struct SparseLUBase
 #include "SparseLU_Memory.h"
 #include "SparseLU_heap_relax_snode.h"
 #include "SparseLU_relax_snode.h"
 #include "SparseLU_snode_dfs.h"
 #include "SparseLU_snode_bmod.h"
 #include "SparseLU_pivotL.h"
 #include "SparseLU_panel_dfs.h"
 #include "SparseLU_kernel_bmod.h"
--- a/Eigen/src/SparseLU/SparseLU_snode_bmod.h
+++ b/Eigen/src/SparseLU/SparseLU_snode_bmod.h
@ -1,84 +0,0 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 /* 
 * NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU 
 * -- SuperLU routine (version 3.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * October 15, 2003
 *
 * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program for any
 * purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is
 * granted, provided the above notices are retained, and a notice that
 * the code was modified is included with the above copyright notice.
 */
 #ifndef SPARSELU_SNODE_BMOD_H
 #define SPARSELU_SNODE_BMOD_H
 template <typename Scalar, typename Index>
 int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
 {  
  /* lsub : Compressed row subscripts of ( rectangular supernodes )
   * xlsub :  xlsub[j] is the starting location of the j-th column in lsub(*)
   * lusup : Numerical values of the rectangular supernodes
   * xlusup[j] is the starting location of the j-th column in lusup(*)
   */
  int nextlu = glu.xlusup(jcol); // Starting location of the next column to add 
  int irow, isub; 
  // Process the supernodal portion of L\U[*,jcol]
  for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
  {
    irow = glu.lsub(isub); 
    glu.lusup(nextlu) = dense(irow);
    dense(irow) = 0;
    ++nextlu;
  }
  // Make sure the leading dimension is a multiple of the underlying packet size
  // so that fast fully aligned kernels can be enabled:
  {
    Index lda = nextlu-glu.xlusup(jcol);
    int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
    if(offset)
    {
      glu.lusup.segment(nextlu,offset).setZero();
      nextlu += offset;
    }
  }
  glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
  if (fsupc < jcol ){
    int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
    int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
    int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
    int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
    int lda = glu.xlusup(jcol+1) - ufirst;
    int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
    // Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
    Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
    VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
    u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
    // Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
    new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) ); 
    VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow); 
    l.noalias() -= A * u; 
  }
  return 0;
 }
 #endif
--- a/Eigen/src/SparseLU/SparseLU_snode_dfs.h
+++ b/Eigen/src/SparseLU/SparseLU_snode_dfs.h
@ -1,95 +0,0 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 /* 
 * NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU 
 * -- SuperLU routine (version 2.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * November 15, 1997
 *
 * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
 *
 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
 * EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
 *
 * Permission is hereby granted to use or copy this program for any
 * purpose, provided the above notices are retained on all copies.
 * Permission to modify the code and to distribute modified code is
 * granted, provided the above notices are retained, and a notice that
 * the code was modified is included with the above copyright notice.
 */
 #ifndef SPARSELU_SNODE_DFS_H
 #define SPARSELU_SNODE_DFS_H
  /**
   * \brief Determine the union of the row structures of those columns within the relaxed snode.
 *    NOTE: The relaxed snodes are leaves of the supernodal etree, therefore, 
 *    the portion outside the rectangular supernode must be zero.
 * 
 * \param jcol start of the supernode
 * \param kcol end of the supernode
 * \param asub Row indices
 * \param colptr Pointer to the beginning of each column
 * \param xprune (out) The pruned tree ??
 * \param marker (in/out) working vector
 * \return 0 on success, > 0 size of the memory when memory allocation failed
 */
 template <typename Scalar, typename Index>
 int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat,  IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
 {
  int mem; 
  Index nsuper = ++glu.supno(jcol); // Next available supernode number
  int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
  int krow,kmark; 
  for (int i = jcol; i <=kcol; i++)
  {
    // For each nonzero in A(*,i)
    for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
    {
      krow = it.row(); 
      kmark = marker(krow);
      if ( kmark != kcol )
      {
        // First time to visit krow
        marker(krow) = kcol; 
        glu.lsub(nextl++) = krow; 
        if( nextl >= glu.nzlmax )
        {
          mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
          if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
        }
      }
    }
    glu.supno(i) = nsuper;
  }
  // If supernode > 1, then make a copy of the subscripts for pruning
  if (jcol < kcol)
  {
    Index new_next = nextl + (nextl - glu.xlsub(jcol));
    while (new_next > glu.nzlmax)
    {
      mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
      if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
    }
    Index ifrom, ito = nextl; 
    for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
      glu.lsub(ito++) = glu.lsub(ifrom++);
    for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
    nextl = ito;
  }
  glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
  glu.supno(kcol+1) = nsuper;
  xprune(kcol) = nextl;
  glu.xlsub(kcol+1) = nextl;
  return 0;
 }
 #endif