mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
SparseLU: remove the "snode" path which appears to bring nearly zero speedup
This commit is contained in:
parent
ac8c694f3e
commit
90fcaf11cf
@ -455,64 +455,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
|||||||
int i, k, jj;
|
int i, k, jj;
|
||||||
for (jcol = 0; jcol < n; )
|
for (jcol = 0; jcol < n; )
|
||||||
{
|
{
|
||||||
if (relax_end(jcol) != IND_EMPTY)
|
|
||||||
{ // Starting a relaxed node from jcol
|
|
||||||
kcol = relax_end(jcol); // End index of the relaxed snode
|
|
||||||
|
|
||||||
// Factorize the relaxed supernode(jcol:kcol)
|
|
||||||
// First, determine the union of the row structure of the snode
|
|
||||||
info = SparseLUBase<Scalar,Index>::LU_snode_dfs(jcol, kcol, m_mat, xprune, marker, m_glu);
|
|
||||||
if ( info )
|
|
||||||
{
|
|
||||||
std::cerr << "MEMORY ALLOCATION FAILED IN SNODE_DFS() \n";
|
|
||||||
m_info = NumericalIssue;
|
|
||||||
m_factorizationIsOk = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
nextu = m_glu.xusub(jcol); //starting location of column jcol in ucol
|
|
||||||
nextlu = m_glu.xlusup(jcol); //Starting location of column jcol in lusup (rectangular supernodes)
|
|
||||||
jsupno = m_glu.supno(jcol); // Supernode number which column jcol belongs to
|
|
||||||
fsupc = m_glu.xsup(jsupno); //First column number of the current supernode
|
|
||||||
int lda = m_glu.xusub(fsupc+1) - m_glu.xusub(fsupc);
|
|
||||||
lda = m_glu.xlsub(fsupc+1)-m_glu.xlsub(fsupc);
|
|
||||||
new_next = nextlu + lda * (kcol - jcol + 1);
|
|
||||||
int mem;
|
|
||||||
while (new_next > m_glu.nzlumax )
|
|
||||||
{
|
|
||||||
mem = SparseLUBase<Scalar,Index>::LUMemXpand(m_glu.lusup, m_glu.nzlumax, nextlu, LUSUP, m_glu.num_expansions);
|
|
||||||
if (mem)
|
|
||||||
{
|
|
||||||
std::cerr << "MEMORY ALLOCATION FAILED FOR L FACTOR \n";
|
|
||||||
m_factorizationIsOk = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now, left-looking factorize each column within the snode
|
|
||||||
for (icol = jcol; icol<=kcol; icol++){
|
|
||||||
m_glu.xusub(icol+1) = nextu;
|
|
||||||
// Scatter into SPA dense(*)
|
|
||||||
for (typename MatrixType::InnerIterator it(m_mat, icol); it; ++it)
|
|
||||||
dense(it.row()) = it.value();
|
|
||||||
|
|
||||||
// Numeric update within the snode
|
|
||||||
SparseLUBase<Scalar,Index>::LU_snode_bmod(icol, fsupc, dense, m_glu);
|
|
||||||
|
|
||||||
// Eliminate the current column
|
|
||||||
info = SparseLUBase<Scalar,Index>::LU_pivotL(icol, m_diagpivotthresh, m_perm_r.indices(), iperm_c.indices(), pivrow, m_glu);
|
|
||||||
if ( info )
|
|
||||||
{
|
|
||||||
m_info = NumericalIssue;
|
|
||||||
std::cerr<< "THE MATRIX IS STRUCTURALLY SINGULAR ... ZERO COLUMN AT " << info <<std::endl;
|
|
||||||
m_factorizationIsOk = false;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
jcol = icol; // The last column te be eliminated
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ // Work on one panel of panel_size columns
|
|
||||||
|
|
||||||
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
|
// Adjust panel size so that a panel won't overlap with the next relaxed snode.
|
||||||
int panel_size = m_perfv.panel_size; // upper bound on panel width
|
int panel_size = m_perfv.panel_size; // upper bound on panel width
|
||||||
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
|
for (k = jcol + 1; k < (std::min)(jcol+panel_size, n); k++)
|
||||||
@ -592,7 +534,6 @@ void SparseLU<MatrixType, OrderingType>::factorize(const MatrixType& matrix)
|
|||||||
}
|
}
|
||||||
} // end SparseLU within the panel
|
} // end SparseLU within the panel
|
||||||
jcol += panel_size; // Move to the next panel
|
jcol += panel_size; // Move to the next panel
|
||||||
} // end else
|
|
||||||
} // end for -- end elimination
|
} // end for -- end elimination
|
||||||
|
|
||||||
// Count the number of nonzeros in factors
|
// Count the number of nonzeros in factors
|
||||||
@ -628,4 +569,5 @@ struct solve_retval<SparseLU<_MatrixType,Derived>, Rhs>
|
|||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // End namespace Eigen
|
} // End namespace Eigen
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -57,8 +57,6 @@ struct SparseLUBase
|
|||||||
#include "SparseLU_Memory.h"
|
#include "SparseLU_Memory.h"
|
||||||
#include "SparseLU_heap_relax_snode.h"
|
#include "SparseLU_heap_relax_snode.h"
|
||||||
#include "SparseLU_relax_snode.h"
|
#include "SparseLU_relax_snode.h"
|
||||||
#include "SparseLU_snode_dfs.h"
|
|
||||||
#include "SparseLU_snode_bmod.h"
|
|
||||||
#include "SparseLU_pivotL.h"
|
#include "SparseLU_pivotL.h"
|
||||||
#include "SparseLU_panel_dfs.h"
|
#include "SparseLU_panel_dfs.h"
|
||||||
#include "SparseLU_kernel_bmod.h"
|
#include "SparseLU_kernel_bmod.h"
|
||||||
|
@ -1,84 +0,0 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
|
||||||
// for linear algebra.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
|
||||||
//
|
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
* NOTE: This file is the modified version of [s,d,c,z]snode_bmod.c file in SuperLU
|
|
||||||
|
|
||||||
* -- SuperLU routine (version 3.0) --
|
|
||||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
||||||
* and Lawrence Berkeley National Lab.
|
|
||||||
* October 15, 2003
|
|
||||||
*
|
|
||||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
|
||||||
*
|
|
||||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
|
||||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted to use or copy this program for any
|
|
||||||
* purpose, provided the above notices are retained on all copies.
|
|
||||||
* Permission to modify the code and to distribute modified code is
|
|
||||||
* granted, provided the above notices are retained, and a notice that
|
|
||||||
* the code was modified is included with the above copyright notice.
|
|
||||||
*/
|
|
||||||
#ifndef SPARSELU_SNODE_BMOD_H
|
|
||||||
#define SPARSELU_SNODE_BMOD_H
|
|
||||||
template <typename Scalar, typename Index>
|
|
||||||
int SparseLUBase<Scalar,Index>::LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, GlobalLU_t& glu)
|
|
||||||
{
|
|
||||||
/* lsub : Compressed row subscripts of ( rectangular supernodes )
|
|
||||||
* xlsub : xlsub[j] is the starting location of the j-th column in lsub(*)
|
|
||||||
* lusup : Numerical values of the rectangular supernodes
|
|
||||||
* xlusup[j] is the starting location of the j-th column in lusup(*)
|
|
||||||
*/
|
|
||||||
int nextlu = glu.xlusup(jcol); // Starting location of the next column to add
|
|
||||||
int irow, isub;
|
|
||||||
// Process the supernodal portion of L\U[*,jcol]
|
|
||||||
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
|
|
||||||
{
|
|
||||||
irow = glu.lsub(isub);
|
|
||||||
glu.lusup(nextlu) = dense(irow);
|
|
||||||
dense(irow) = 0;
|
|
||||||
++nextlu;
|
|
||||||
}
|
|
||||||
// Make sure the leading dimension is a multiple of the underlying packet size
|
|
||||||
// so that fast fully aligned kernels can be enabled:
|
|
||||||
{
|
|
||||||
Index lda = nextlu-glu.xlusup(jcol);
|
|
||||||
int offset = internal::first_multiple<Index>(lda, internal::packet_traits<Scalar>::size) - lda;
|
|
||||||
if(offset)
|
|
||||||
{
|
|
||||||
glu.lusup.segment(nextlu,offset).setZero();
|
|
||||||
nextlu += offset;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
|
|
||||||
|
|
||||||
if (fsupc < jcol ){
|
|
||||||
int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
|
|
||||||
int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
|
|
||||||
int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
|
|
||||||
int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
|
|
||||||
int lda = glu.xlusup(jcol+1) - ufirst;
|
|
||||||
|
|
||||||
int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
|
|
||||||
|
|
||||||
// Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
|
|
||||||
Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
|
|
||||||
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
|
|
||||||
u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
|
|
||||||
|
|
||||||
// Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
|
|
||||||
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
|
|
||||||
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
|
|
||||||
l.noalias() -= A * u;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
@ -1,95 +0,0 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
|
||||||
// for linear algebra.
|
|
||||||
//
|
|
||||||
// Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
|
|
||||||
//
|
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
||||||
|
|
||||||
/*
|
|
||||||
|
|
||||||
* NOTE: This file is the modified version of [s,d,c,z]snode_dfs.c file in SuperLU
|
|
||||||
|
|
||||||
* -- SuperLU routine (version 2.0) --
|
|
||||||
* Univ. of California Berkeley, Xerox Palo Alto Research Center,
|
|
||||||
* and Lawrence Berkeley National Lab.
|
|
||||||
* November 15, 1997
|
|
||||||
*
|
|
||||||
* Copyright (c) 1994 by Xerox Corporation. All rights reserved.
|
|
||||||
*
|
|
||||||
* THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
|
|
||||||
* EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted to use or copy this program for any
|
|
||||||
* purpose, provided the above notices are retained on all copies.
|
|
||||||
* Permission to modify the code and to distribute modified code is
|
|
||||||
* granted, provided the above notices are retained, and a notice that
|
|
||||||
* the code was modified is included with the above copyright notice.
|
|
||||||
*/
|
|
||||||
#ifndef SPARSELU_SNODE_DFS_H
|
|
||||||
#define SPARSELU_SNODE_DFS_H
|
|
||||||
/**
|
|
||||||
* \brief Determine the union of the row structures of those columns within the relaxed snode.
|
|
||||||
* NOTE: The relaxed snodes are leaves of the supernodal etree, therefore,
|
|
||||||
* the portion outside the rectangular supernode must be zero.
|
|
||||||
*
|
|
||||||
* \param jcol start of the supernode
|
|
||||||
* \param kcol end of the supernode
|
|
||||||
* \param asub Row indices
|
|
||||||
* \param colptr Pointer to the beginning of each column
|
|
||||||
* \param xprune (out) The pruned tree ??
|
|
||||||
* \param marker (in/out) working vector
|
|
||||||
* \return 0 on success, > 0 size of the memory when memory allocation failed
|
|
||||||
*/
|
|
||||||
template <typename Scalar, typename Index>
|
|
||||||
int SparseLUBase<Scalar,Index>::LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, GlobalLU_t& glu)
|
|
||||||
{
|
|
||||||
int mem;
|
|
||||||
Index nsuper = ++glu.supno(jcol); // Next available supernode number
|
|
||||||
int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
|
|
||||||
int krow,kmark;
|
|
||||||
for (int i = jcol; i <=kcol; i++)
|
|
||||||
{
|
|
||||||
// For each nonzero in A(*,i)
|
|
||||||
for (typename MatrixType::InnerIterator it(mat, i); it; ++it)
|
|
||||||
{
|
|
||||||
krow = it.row();
|
|
||||||
kmark = marker(krow);
|
|
||||||
if ( kmark != kcol )
|
|
||||||
{
|
|
||||||
// First time to visit krow
|
|
||||||
marker(krow) = kcol;
|
|
||||||
glu.lsub(nextl++) = krow;
|
|
||||||
if( nextl >= glu.nzlmax )
|
|
||||||
{
|
|
||||||
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
|
|
||||||
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
glu.supno(i) = nsuper;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If supernode > 1, then make a copy of the subscripts for pruning
|
|
||||||
if (jcol < kcol)
|
|
||||||
{
|
|
||||||
Index new_next = nextl + (nextl - glu.xlsub(jcol));
|
|
||||||
while (new_next > glu.nzlmax)
|
|
||||||
{
|
|
||||||
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
|
|
||||||
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
|
|
||||||
}
|
|
||||||
Index ifrom, ito = nextl;
|
|
||||||
for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
|
|
||||||
glu.lsub(ito++) = glu.lsub(ifrom++);
|
|
||||||
for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
|
|
||||||
nextl = ito;
|
|
||||||
}
|
|
||||||
glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
|
|
||||||
glu.supno(kcol+1) = nsuper;
|
|
||||||
xprune(kcol) = nextl;
|
|
||||||
glu.xlsub(kcol+1) = nextl;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
Loading…
x
Reference in New Issue
Block a user