SparseLU: remove the "snode" path which appears to bring nearly zero speedup

This commit is contained in:
Gael Guennebaud 2012-10-30 15:17:58 +01:00
parent ac8c694f3e
commit 90fcaf11cf
4 changed files with 66 additions and 305 deletions

View File

@ -455,144 +455,85 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
int i, k, jj;
for (jcol = 0; jcol < n; )
{
if (relax_end(jcol) != IND_EMPTY)
{ // Starting a relaxed node from jcol
kcol = relax_end(jcol); // End index of the relaxed snode
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
int panel_size = m_perfv.panel_size; // upper bound on panel width
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
{
if (relax_end(k) != IND_EMPTY)
{
panel_size = k - jcol;
break;
}
}
if (k == n)
panel_size = n - jcol;
// Factorize the relaxed supernode(jcol:kcol)
// First, determine the union of the row structure of the snode
info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu);
// Symbolic outer factorization on a panel of columns
SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
// Numeric sup-panel updates in topological order
SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
// Sparse LU within the panel, and below the panel diagonal
for ( jj = jcol; jj< jcol + panel_size; jj++)
{
k = (jj - jcol) * m; // Column index for w-wide arrays
nseg = nseg1; // begin after all the panel segments
//Depth-first-search for the current column
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
if ( info )
{
std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to
fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
new_next = nextlu + lda * (kcol - jcol + 1);
int mem;
while (new_next > m_glu.nzlumax )
// Numeric updates to this column
VectorBlock<ScalarVector> dense_k(dense, k, m);
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
if ( info )
{
mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
if (mem)
{
std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n";
m_factorizationIsOk = false;
return;
}
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Now, left-looking factorize each column within the snode
for (icol = jcol; icol<=kcol; icol++){
m_glu.xusub(icol+1) = nextu;
// Scatter into SPA dense(*)
for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
dense(it.row()) = it.value();
// Numeric update within the snode
SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu);
// Eliminate the current column
info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
if ( info )
{
m_info = NumericalIssue;
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
m_factorizationIsOk = false;
return;
}
}
jcol = icol; // The last column te be eliminated
}
else
{ // Work on one panel of panel_size columns
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
int panel_size = m_perfv.panel_size; // upper bound on panel width
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
// Copy the U-segments to ucol(*)
info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
if ( info )
{
if (relax_end(k) != IND_EMPTY)
{
panel_size = k - jcol;
break;
}
std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
if (k == n)
panel_size = n - jcol;
// Symbolic outer factorization on a panel of columns
SparseLUBase<Scalar,Index>::LU_panel_dfs(m, panel_size, jcol, m_mat, m_perm_r.indices(), nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, m_glu);
// Numeric sup-panel updates in topological order
SparseLUBase<Scalar,Index>::LU_panel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, m_perfv, m_glu);
// Sparse LU within the panel, and below the panel diagonal
for ( jj = jcol; jj< jcol + panel_size; jj++)
// Form the L-segment
info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
if ( info )
{
k = (jj - jcol) * m; // Column index for w-wide arrays
nseg = nseg1; // begin after all the panel segments
//Depth-first-search for the current column
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m);
VectorBlock<IndexVector> repfnz_k(repfnz, k, m);
info = SparseLUBase<Scalar,Index>::LU_column_dfs(m, jj, m_perm_r.indices(), m_perfv.maxsuper, nseg, panel_lsubk, segrep, repfnz_k, xprune, marker, parent, xplore, m_glu);
if ( info )
{
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_DFS() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Numeric updates to this column
VectorBlock<ScalarVector> dense_k(dense, k, m);
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m-nseg1);
info = SparseLUBase<Scalar,Index>::LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_glu);
if ( info )
{
std::cerr << "UNABLE TO EXPAND MEMORY IN COLUMN_BMOD() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Copy the U-segments to ucol(*)
info = SparseLUBase<Scalar,Index>::LU_copy_to_ucol(jj, nseg, segrep, repfnz_k ,m_perm_r.indices(), dense_k, m_glu);
if ( info )
{
std::cerr << "UNABLE TO EXPAND MEMORY IN COPY_TO_UCOL() \n";
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Form the L-segment
info = SparseLUBase<Scalar,Index>::LU_pivotL(jj, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
if ( info )
{
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Prune columns (0:jj-1) using column jj
SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
// Reset repfnz for this column
for (i = 0; i < nseg; i++)
{
irep = segrep(i);
repfnz_k(irep) = IND_EMPTY;
}
} // end SparseLU within the panel
jcol += panel_size; // Move to the next panel
} // end else
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
m_info = NumericalIssue;
m_factorizationIsOk = false;
return;
}
// Prune columns (0:jj-1) using column jj
SparseLUBase<Scalar,Index>::LU_pruneL(jj, m_perm_r.indices(), pivrow, nseg, segrep, repfnz_k, xprune, m_glu);
// Reset repfnz for this column
for (i = 0; i < nseg; i++)
{
irep = segrep(i);
repfnz_k(irep) = IND_EMPTY;
}
} // end SparseLU within the panel
jcol += panel_size; // Move to the next panel
} // end for -- end elimination
// Count the number of nonzeros in factors
@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
} // end namespace internal
} // End namespace Eigen
#endif

View File

@ -57,8 +57,6 @@ struct SparseLUBase
#include "SparseLU_Memory.h"
#include "SparseLU_heap_relax_snode.h"
#include "SparseLU_relax_snode.h"
#include "SparseLU_snode_dfs.h"
#include "SparseLU_snode_bmod.h"
#include "SparseLU_pivotL.h"
#include "SparseLU_panel_dfs.h"
#include "SparseLU_kernel_bmod.h"

View File

@ -1,84 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU
* -- SuperLU routine (version 3.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* October 15, 2003
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_SNODE_BMOD_H
#define SPARSELU_SNODE_BMOD_H
template <typename Scalar, typename Index>
int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
{
/* lsub : Compressed row subscripts of ( rectangular supernodes )
* xlsub : xlsub[j] is the starting location of the j-th column in lsub(*)
* lusup : Numerical values of the rectangular supernodes
* xlusup[j] is the starting location of the j-th column in lusup(*)
*/
int nextlu = glu.xlusup(jcol); // Starting location of the next column to add
int irow, isub;
// Process the supernodal portion of L\U[*,jcol]
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
{
irow = glu.lsub(isub);
glu.lusup(nextlu) = dense(irow);
dense(irow) = 0;
++nextlu;
}
// Make sure the leading dimension is a multiple of the underlying packet size
// so that fast fully aligned kernels can be enabled:
{
Index lda = nextlu-glu.xlusup(jcol);
int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
if(offset)
{
glu.lusup.segment(nextlu,offset).setZero();
nextlu += offset;
}
}
glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
if (fsupc < jcol ){
int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
int lda = glu.xlusup(jcol+1) - ufirst;
int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
// Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
// Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
l.noalias() -= A * u;
}
return 0;
}
#endif

View File

@ -1,95 +0,0 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
* NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU
* -- SuperLU routine (version 2.0) --
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
* and Lawrence Berkeley National Lab.
* November 15, 1997
*
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
*
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
*
* Permission is hereby granted to use or copy this program for any
* purpose, provided the above notices are retained on all copies.
* Permission to modify the code and to distribute modified code is
* granted, provided the above notices are retained, and a notice that
* the code was modified is included with the above copyright notice.
*/
#ifndef SPARSELU_SNODE_DFS_H
#define SPARSELU_SNODE_DFS_H
/**
* \brief Determine the union of the row structures of those columns within the relaxed snode.
* NOTE: The relaxed snodes are leaves of the supernodal etree, therefore,
* the portion outside the rectangular supernode must be zero.
*
* \param jcol start of the supernode
* \param kcol end of the supernode
* \param asub Row indices
* \param colptr Pointer to the beginning of each column
* \param xprune (out) The pruned tree ??
* \param marker (in/out) working vector
* \return 0 on success, > 0 size of the memory when memory allocation failed
*/
template <typename Scalar, typename Index>
int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
{
int mem;
Index nsuper = ++glu.supno(jcol); // Next available supernode number
int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
int krow,kmark;
for (int i = jcol; i <=kcol; i++)
{
// For each nonzero in A(*,i)
for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
{
krow = it.row();
kmark = marker(krow);
if ( kmark != kcol )
{
// First time to visit krow
marker(krow) = kcol;
glu.lsub(nextl++) = krow;
if( nextl >= glu.nzlmax )
{
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
}
}
}
glu.supno(i) = nsuper;
}
// If supernode > 1, then make a copy of the subscripts for pruning
if (jcol < kcol)
{
Index new_next = nextl + (nextl - glu.xlsub(jcol));
while (new_next > glu.nzlmax)
{
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
}
Index ifrom, ito = nextl;
for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
glu.lsub(ito++) = glu.lsub(ifrom++);
for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
nextl = ito;
}
glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
glu.supno(kcol+1) = nsuper;
xprune(kcol) = nextl;
glu.xlsub(kcol+1) = nextl;
return 0;
}
#endif