mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 11:19:02 +08:00
SparseLU: remove the "snode" path which appears to bring nearly zero speedup
This commit is contained in:
parent
ac8c694f3e
commit
90fcaf11cf
@ -455,144 +455,85 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
||||
int i, k, jj;
|
||||
for (jcol = 0; jcol < n; )
|
||||
{
|
||||
if (relax_end(jcol) != IND_EMPTY)
|
||||
{ // Starting a relaxed node from jcol
|
||||
kcol = relax_end(jcol); // End index of the relaxed snode
|
||||
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
|
||||
int panel_size = m_perfv.panel_size; // upper bound on panel width
|
||||
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
|
||||
{
|
||||
if (relax_end(k) != IND_EMPTY)
|
||||
{
|
||||
panel_size = k - jcol;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k == n)
|
||||
panel_size = n - jcol;
|
||||
|
||||
// Factorize the relaxed supernode(jcol:kcol)
|
||||
// First, determine the union of the row structure of the snode
|
||||
info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu);
|
||||
// Symbolic outer factorization on a panel of columns
|
||||
SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
|
||||
|
||||
// Numeric sup-panel updates in topological order
|
||||
SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
|
||||
|
||||
// Sparse LU within the panel, and below the panel diagonal
|
||||
for ( jj = jcol; jj< jcol + panel_size; jj++)
|
||||
{
|
||||
k = (jj - jcol) * m; // Column index for w-wide arrays
|
||||
|
||||
nseg = nseg1; // begin after all the panel segments
|
||||
//Depth-first-search for the current column
|
||||
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
|
||||
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
|
||||
info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
|
||||
nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
|
||||
jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to
|
||||
fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
|
||||
int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
|
||||
lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
|
||||
new_next = nextlu + lda * (kcol - jcol + 1);
|
||||
int mem;
|
||||
while (new_next > m_glu.nzlumax )
|
||||
// Numeric updates to this column
|
||||
VectorBlock<ScalarVector> dense_k(dense, k, m);
|
||||
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
|
||||
info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
|
||||
if (mem)
|
||||
{
|
||||
std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n";
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Now, left-looking factorize each column within the snode
|
||||
for (icol = jcol; icol<=kcol; icol++){
|
||||
m_glu.xusub(icol+1) = nextu;
|
||||
// Scatter into SPA dense(*)
|
||||
for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
|
||||
dense(it.row()) = it.value();
|
||||
|
||||
// Numeric update within the snode
|
||||
SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu);
|
||||
|
||||
// Eliminate the current column
|
||||
info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
m_info = NumericalIssue;
|
||||
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
jcol = icol; // The last column te be eliminated
|
||||
}
|
||||
else
|
||||
{ // Work on one panel of panel_size columns
|
||||
|
||||
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
|
||||
int panel_size = m_perfv.panel_size; // upper bound on panel width
|
||||
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
|
||||
// Copy the U-segments to ucol(*)
|
||||
info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
if (relax_end(k) != IND_EMPTY)
|
||||
{
|
||||
panel_size = k - jcol;
|
||||
break;
|
||||
}
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
if (k == n)
|
||||
panel_size = n - jcol;
|
||||
|
||||
// Symbolic outer factorization on a panel of columns
|
||||
SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
|
||||
|
||||
// Numeric sup-panel updates in topological order
|
||||
SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
|
||||
|
||||
// Sparse LU within the panel, and below the panel diagonal
|
||||
for ( jj = jcol; jj< jcol + panel_size; jj++)
|
||||
// Form the L-segment
|
||||
info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
k = (jj - jcol) * m; // Column index for w-wide arrays
|
||||
|
||||
nseg = nseg1; // begin after all the panel segments
|
||||
//Depth-first-search for the current column
|
||||
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
|
||||
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
|
||||
info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
// Numeric updates to this column
|
||||
VectorBlock<ScalarVector> dense_k(dense, k, m);
|
||||
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
|
||||
info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy the U-segments to ucol(*)
|
||||
info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Form the L-segment
|
||||
info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
|
||||
if ( info )
|
||||
{
|
||||
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Prune columns (0:jj-1) using column jj
|
||||
SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
|
||||
|
||||
// Reset repfnz for this column
|
||||
for (i = 0; i < nseg; i++)
|
||||
{
|
||||
irep = segrep(i);
|
||||
repfnz_k(irep) = IND_EMPTY;
|
||||
}
|
||||
} // end SparseLU within the panel
|
||||
jcol += panel_size; // Move to the next panel
|
||||
} // end else
|
||||
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
|
||||
m_info = NumericalIssue;
|
||||
m_factorizationIsOk = false;
|
||||
return;
|
||||
}
|
||||
|
||||
// Prune columns (0:jj-1) using column jj
|
||||
SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
|
||||
|
||||
// Reset repfnz for this column
|
||||
for (i = 0; i < nseg; i++)
|
||||
{
|
||||
irep = segrep(i);
|
||||
repfnz_k(irep) = IND_EMPTY;
|
||||
}
|
||||
} // end SparseLU within the panel
|
||||
jcol += panel_size; // Move to the next panel
|
||||
} // end for -- end elimination
|
||||
|
||||
// Count the number of nonzeros in factors
|
||||
@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
|
||||
} // end namespace internal
|
||||
|
||||
} // End namespace Eigen
|
||||
|
||||
#endif
|
||||
|
@ -57,8 +57,6 @@ struct SparseLUBase
|
||||
#include "SparseLU_Memory.h"
|
||||
#include "SparseLU_heap_relax_snode.h"
|
||||
#include "SparseLU_relax_snode.h"
|
||||
#include "SparseLU_snode_dfs.h"
|
||||
#include "SparseLU_snode_bmod.h"
|
||||
#include "SparseLU_pivotL.h"
|
||||
#include "SparseLU_panel_dfs.h"
|
||||
#include "SparseLU_kernel_bmod.h"
|
||||
|
@ -1,84 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 3.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* October 15, 2003
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_SNODE_BMOD_H
|
||||
#define SPARSELU_SNODE_BMOD_H
|
||||
template <typename Scalar, typename Index>
|
||||
int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
|
||||
{
|
||||
/* lsub : Compressed row subscripts of ( rectangular supernodes )
|
||||
* xlsub : xlsub[j] is the starting location of the j-th column in lsub(*)
|
||||
* lusup : Numerical values of the rectangular supernodes
|
||||
* xlusup[j] is the starting location of the j-th column in lusup(*)
|
||||
*/
|
||||
int nextlu = glu.xlusup(jcol); // Starting location of the next column to add
|
||||
int irow, isub;
|
||||
// Process the supernodal portion of L\U[*,jcol]
|
||||
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
|
||||
{
|
||||
irow = glu.lsub(isub);
|
||||
glu.lusup(nextlu) = dense(irow);
|
||||
dense(irow) = 0;
|
||||
++nextlu;
|
||||
}
|
||||
// Make sure the leading dimension is a multiple of the underlying packet size
|
||||
// so that fast fully aligned kernels can be enabled:
|
||||
{
|
||||
Index lda = nextlu-glu.xlusup(jcol);
|
||||
int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
|
||||
if(offset)
|
||||
{
|
||||
glu.lusup.segment(nextlu,offset).setZero();
|
||||
nextlu += offset;
|
||||
}
|
||||
}
|
||||
glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
|
||||
|
||||
if (fsupc < jcol ){
|
||||
int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
|
||||
int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
|
||||
int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
|
||||
int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
|
||||
int lda = glu.xlusup(jcol+1) - ufirst;
|
||||
|
||||
int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
|
||||
|
||||
// Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
|
||||
Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
|
||||
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
|
||||
u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
|
||||
|
||||
// Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
|
||||
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
|
||||
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
|
||||
l.noalias() -= A * u;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
@ -1,95 +0,0 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
/*
|
||||
|
||||
* NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU
|
||||
|
||||
* -- SuperLU routine (version 2.0) --
|
||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
||||
* and Lawrence Berkeley National Lab.
|
||||
* November 15, 1997
|
||||
*
|
||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
||||
*
|
||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
||||
*
|
||||
* Permission is hereby granted to use or copy this program for any
|
||||
* purpose, provided the above notices are retained on all copies.
|
||||
* Permission to modify the code and to distribute modified code is
|
||||
* granted, provided the above notices are retained, and a notice that
|
||||
* the code was modified is included with the above copyright notice.
|
||||
*/
|
||||
#ifndef SPARSELU_SNODE_DFS_H
|
||||
#define SPARSELU_SNODE_DFS_H
|
||||
/**
|
||||
* \brief Determine the union of the row structures of those columns within the relaxed snode.
|
||||
* NOTE: The relaxed snodes are leaves of the supernodal etree, therefore,
|
||||
* the portion outside the rectangular supernode must be zero.
|
||||
*
|
||||
* \param jcol start of the supernode
|
||||
* \param kcol end of the supernode
|
||||
* \param asub Row indices
|
||||
* \param colptr Pointer to the beginning of each column
|
||||
* \param xprune (out) The pruned tree ??
|
||||
* \param marker (in/out) working vector
|
||||
* \return 0 on success, > 0 size of the memory when memory allocation failed
|
||||
*/
|
||||
template <typename Scalar, typename Index>
|
||||
int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
|
||||
{
|
||||
int mem;
|
||||
Index nsuper = ++glu.supno(jcol); // Next available supernode number
|
||||
int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
|
||||
int krow,kmark;
|
||||
for (int i = jcol; i <=kcol; i++)
|
||||
{
|
||||
// For each nonzero in A(*,i)
|
||||
for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
|
||||
{
|
||||
krow = it.row();
|
||||
kmark = marker(krow);
|
||||
if ( kmark != kcol )
|
||||
{
|
||||
// First time to visit krow
|
||||
marker(krow) = kcol;
|
||||
glu.lsub(nextl++) = krow;
|
||||
if( nextl >= glu.nzlmax )
|
||||
{
|
||||
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
|
||||
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
|
||||
}
|
||||
}
|
||||
}
|
||||
glu.supno(i) = nsuper;
|
||||
}
|
||||
|
||||
// If supernode > 1, then make a copy of the subscripts for pruning
|
||||
if (jcol < kcol)
|
||||
{
|
||||
Index new_next = nextl + (nextl - glu.xlsub(jcol));
|
||||
while (new_next > glu.nzlmax)
|
||||
{
|
||||
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
|
||||
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
|
||||
}
|
||||
Index ifrom, ito = nextl;
|
||||
for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
|
||||
glu.lsub(ito++) = glu.lsub(ifrom++);
|
||||
for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
|
||||
nextl = ito;
|
||||
}
|
||||
glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
|
||||
glu.supno(kcol+1) = nsuper;
|
||||
xprune(kcol) = nextl;
|
||||
glu.xlsub(kcol+1) = nextl;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user