Memory management

This commit is contained in:
Desire NUENTSA 2012-06-07 19:06:22 +02:00
parent 268ba3b521
commit f091879d77
7 changed files with 206 additions and 194 deletions

View File

@ -48,7 +48,8 @@ class SparseLU
typedef SparseMatrix<Scalar,ColMajor,Index> NCMatrix;
typedef SuperNodalMatrix<Scalar, Index> SCMatrix;
typedef GlobalLU_t<Scalar, Index> Eigen_GlobalLU_t;
typedef Matrix<Scalar,Dynamic,1> VectorType;
typedef Matrix<Scalar,Dynamic,1> ScalarVector;
typedef Matrix<Index,Dynamic,1> IndexVector;
typedef PermutationMatrix<Dynamic, Dynamic, Index> PermutationType;
public:
SparseLU():m_isInitialized(true),m_symmetricmode(false),m_fact(DOFACT),m_diagpivotthresh(1.0)
@ -93,15 +94,15 @@ class SparseLU
fact_t m_fact;
NCMatrix m_mat; // The input (permuted ) matrix
SCMatrix m_Lstore; // The lower triangular matrix (supernodal)
NCMatrix m_Ustore; //The upper triangular matrix
NCMatrix m_Ustore; // The upper triangular matrix
PermutationType m_perm_c; // Column permutation
PermutationType m_iperm_c; // Column permutation
PermutationType m_perm_r ; // Row permutation
PermutationType m_iperm_r ; // Inverse row permutation
VectorXi m_etree; // Column elimination tree
IndexVector m_etree; // Column elimination tree
Scalar *m_work; //
Index *m_iwork; //
ScalarVector m_work; //
IndexVector m_iwork; //
static Eigen_GlobalLU_t m_Glu; // persistent data to facilitate multiple factors
// should be defined as a class member
// SuperLU/SparseLU options
@ -158,7 +159,7 @@ void SparseLU::analyzePattern(const MatrixType& mat)
// In symmetric mode, do not do postorder here
if (m_symmetricmode == false) {
VectorXi post, iwork;
IndexVector post, iwork;
// Post order etree
post = internal::TreePostorder(m_mat.cols(), m_etree);
@ -209,20 +210,20 @@ void SparseLU::factorize(const MatrixType& matrix)
int maxpanel = m_panel_size * m;
// Set up pointers for integer working arrays
Map<VectorXi> segrep(m_iwork, m); //
Map<VectorXi> parent(&segrep(0) + m, m); //
Map<VectorXi> xplore(&parent(0) + m, m); //
Map<VectorXi> repfnz(&xplore(0) + m, maxpanel); //
Map<VectorXi> panel_lsub(&repfnz(0) + maxpanel, maxpanel);//
Map<VectorXi> xprune(&panel_lsub(0) + maxpanel, n); //
Map<VectorXi> marker(&xprune(0)+n, m * LU_NO_MARKER); //
Map<IndexVector> segrep(&m_iwork(0), m); //
Map<IndexVector> parent(&segrep(0) + m, m); //
Map<IndexVector> xplore(&parent(0) + m, m); //
Map<IndexVector> repfnz(&xplore(0) + m, maxpanel); //
Map<IndexVector> panel_lsub(&repfnz(0) + maxpanel, maxpanel);//
Map<IndexVector> xprune(&panel_lsub(0) + maxpanel, n); //
Map<IndexVector> marker(&xprune(0)+n, m * LU_NO_MARKER); //
repfnz.setConstant(-1);
panel_lsub.setConstant(-1);
// Set up pointers for scalar working arrays
VectorType dense(maxpanel);
ScalarVector dense(maxpanel);
dense.setZero();
VectorType tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk);
ScalarVector tempv(LU_NUM_TEMPV(m,m_panel_size,m_maxsuper,m_rowblk);
tempv.setZero();
// Setup Permutation vectors
@ -234,7 +235,7 @@ void SparseLU::factorize(const MatrixType& matrix)
iperm_c = m_perm_c.inverse();
// Identify initial relaxed snodes
VectorXi relax_end(n);
IndexVector relax_end(n);
if ( m_symmetricmode = true )
LU_heap_relax_snode(n, m_etree, m_relax, marker, relax_end);
else
@ -243,11 +244,12 @@ void SparseLU::factorize(const MatrixType& matrix)
m_perm_r.setConstant(-1);
marker.setConstant(-1);
VectorXi& xsup = m_Glu.xsup;
VectorXi& supno = m_GLu.supno;
VectorXi& xlsub = m_Glu.xlsub;
VectorXi& xlusup = m_GLu.xlusup;
VectorXi& xusub = m_Glu.xusub;
IndexVector& xsup = m_Glu.xsup;
IndexVector& supno = m_GLu.supno;
IndexVector& xlsub = m_Glu.xlsub;
IndexVector& xlusup = m_GLu.xlusup;
IndexVector& xusub = m_Glu.xusub;
Index& nzlumax = m_Glu.nzlumax;
supno(0) = IND_EMPTY;
xsup(0) = xlsub(0) = xusub(0) = xlusup(0);
@ -259,7 +261,7 @@ void SparseLU::factorize(const MatrixType& matrix)
// (b) panel_size contiguous columns, <panel_size> defined by the user
register int jcol,kcol;
int min_mn = std::min(m,n);
VectorXi panel_histo(n);
IndexVector panel_histo(n);
Index nextu, nextlu, jsupno, fsupc, new_next;
int pivrow; // Pivotal row number in the original row matrix
int nseg1; // Number of segments in U-column above panel row jcol
@ -274,7 +276,7 @@ void SparseLU::factorize(const MatrixType& matrix)
// Factorize the relaxed supernode(jcol:kcol)
// First, determine the union of the row structure of the snode
info = LU_snode_dfs(jcol, kcol, m_mat.innerIndexPtr(), m_mat.outerIndexPtr(), xprune, marker);
if ( !info )
if ( info )
{
m_info = NumericalIssue;
m_factorizationIsOk = false;
@ -288,8 +290,12 @@ void SparseLU::factorize(const MatrixType& matrix)
nzlumax = m_Glu.nzlumax;
while (new_next > nzlumax )
{
m_Glu.lusup = LUMemXpand<Scalar>(jcol, nextlu, LUSUP, nzlumax);
m_GLu.nzlumax = nzlumax;
mem = LUMemXpand<Scalar>(lusup, nzlumax, nextlu, LUSUP, m_Glu);
if (mem)
{
m_factorizationIsOk = false;
return;
}
}
// Now, left-looking factorize each column within the snode
for (icol = jcol; icol<=kcol; icol++){
@ -303,7 +309,7 @@ void SparseLU::factorize(const MatrixType& matrix)
// Eliminate the current column
info = LU_pivotL(icol, m_diagpivotthresh, m_perm_r, m_iperm_c, pivrow, m_Glu);
if ( !info )
if ( info )
{
m_info = NumericalIssue;
m_factorizationIsOk = false;
@ -341,8 +347,8 @@ void SparseLU::factorize(const MatrixType& matrix)
nseg = nseg1; // begin after all the panel segments
//Depth-first-search for the current column
VectorBlock<VectorXi> panel_lsubk(panel_lsub, k, m); //FIXME
VectorBlock<VectorXi> repfnz_k(repfnz, k, m); //FIXME
VectorBlock<IndexVector> panel_lsubk(panel_lsub, k, m); //FIXME
VectorBlock<IndexVector> repfnz_k(repfnz, k, m); //FIXME
info = LU_column_dfs(m, jj, perm_r, nseg, panel_lsub(k), segrep, repfnz_k, xprune, marker, parent, xplore, m_Glu);
if ( !info )
{
@ -351,10 +357,10 @@ void SparseLU::factorize(const MatrixType& matrix)
return;
}
// Numeric updates to this column
VectorBlock<VectorXi> dense_k(dense, k, m); //FIXME
VectorBlock<VectorXi> segrep_k(segrep, nseg1, m) // FIXME Check the length
VectorBlock<IndexVector> dense_k(dense, k, m); //FIXME
VectorBlock<IndexVector> segrep_k(segrep, nseg1, m) // FIXME Check the length
info = LU_column_bmod(jj, (nseg - nseg1), dense_k, tempv, segrep_k, repfnz_k, jcol, m_Glu);
if ( !info )
if ( info )
{
m_info = NumericalIssue;
m_factorizationIsOk = false;
@ -364,7 +370,7 @@ void SparseLU::factorize(const MatrixType& matrix)
// Copy the U-segments to ucol(*)
//FIXME Check that repfnz_k, dense_k... have stored references to modified columns
info = LU_copy_to_col(jj, nseg, segrep, repfnz_k, perm_r, dense_k, m_Glu);
if ( !info )
if ( info )
{
m_info = NumericalIssue;
m_factorizationIsOk = false;
@ -373,7 +379,7 @@ void SparseLU::factorize(const MatrixType& matrix)
// Form the L-segment
info = LU_pivotL(jj, m_diagpivotthresh, m_perm_r, iperm_c, pivrow, m_Glu);
if ( !info )
if ( info )
{
m_info = NumericalIssue;
m_factorizationIsOk = false;

View File

@ -53,7 +53,7 @@
namespace internal {
/**
* \brief Allocate various working space needed in the numerical factorization phase.
* \brief Allocate various working space failed in the numerical factorization phase.
* \param m number of rows of the input matrix
* \param n number of columns
* \param annz number of initial nonzeros in the matrix
@ -61,22 +61,21 @@ namespace internal {
* \param iwork Integer working space
* \param lwork if lwork=-1, this routine returns an estimated size of the required memory
* \param Glu persistent data to facilitate multiple factors : will be deleted later ??
* \return an estimated size of the required memory if lwork = -1;
* FIXME should also return the size of actually allocated when memory allocation failed
* NOTE Unlike SuperLU, this routine does not allow the user to provide the size to allocate
* \return an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated when memory allocation failed
* NOTE Unlike SuperLU, this routine does not allow the user to provide its own user space
*/
template <typename ScalarVector,typename IndexVector>
int SparseLU::LUMemInit(int m, int n, int annz, Scalar *work, Index *iwork, int lwork, int fillratio, GlobalLU_t& Glu)
int SparseLU::LUMemInit(int m, int n, int annz, ScalarVector& work, IndexVector& iwork, int lwork, int fillratio, GlobalLU_t& Glu)
{
typedef typename ScalarVector::Scalar;
typedef typename IndexVector::Index;
Glu.num_expansions = 0; //No memory expansions so far
if (!Glu.expanders)
Glu.expanders = new ExpHeader(LU_NBR_MEMTYPE);
int& num_expansions = Glu.num_expansions; //No memory expansions so far
num_expansions = 0;
// Guess the size for L\U factors
int nzlmax, nzumax, nzlumax;
Index& nzlmax = Glu.nzlmax;
Index& nzumax = Glu.nzumax;
Index& nzlumax = Glu.nzlumax;
nzumax = nzlumax = m_fillratio * annz; // estimated number of nonzeros in U
nzlmax = std::max(1, m_fill_ratio/4.) * annz; // estimated nnz in L factor
@ -90,138 +89,145 @@ int SparseLU::LUMemInit(int m, int n, int annz, Scalar *work, Index *iwork, int
}
// Setup the required space
// NOTE: In SuperLU, there is an option to let the user provide its own space, unlike here.
// Allocate Integer pointers for L\U factors
Glu.supno = new IndexVector;
Glu.supno->resize(n+1);
Glu.xlsub = new IndexVector;
Glu.xlsub->resize(n+1);
Glu.xlusup = new IndexVector;
Glu.xlusup->resize(n+1);
Glu.xusub = new IndexVector;
Glu.xusub->resize(n+1);
// First allocate Integer pointers for L\U factors
Glu.supno.resize(n+1);
Glu.xlsub.resize(n+1);
Glu.xlusup.resize(n+1);
Glu.xusub.resize(n+1);
// Reserve memory for L/U factors
Glu.lusup = new ScalarVector;
Glu.ucol = new ScalarVector;
Glu.lsub = new IndexVector;
Glu.usub = new IndexVector;
expand<ScalarVector>(Glu.lusup,nzlumax, LUSUP, 0, 0, Glu);
expand<ScalarVector>(Glu.ucol,nzumax, UCOL, 0, 0, Glu);
expand<IndexVector>(Glu.lsub,nzlmax, LSUB, 0, 0, Glu);
expand<IndexVector>(Glu.usub,nzumax, USUB, 0, 1, Glu);
expand<ScalarVector>(Glu.lusup, nzlumax, 0, 0, num_expansions);
expand<ScalarVector>(Glu.ucol,nzumax, 0, 0, num_expansions);
expand<IndexVector>(Glu.lsub,nzlmax, 0, 0, num_expansions);
expand<IndexVector>(Glu.usub,nzumax, 0, 1, num_expansions);
// Check if the memory is correctly allocated,
// Should be a try... catch section here
while ( !Glu.lusup.size() || !Glu.ucol.size() || !Glu.lsub.size() || !Glu.usub.size())
{
//otherwise reduce the estimated size and retry
// delete [] Glu.lusup;
// delete [] Glu.ucol;
// delete [] Glu.lsub;
// delete [] Glu.usub;
//
nzlumax /= 2;
nzumax /= 2;
nzlmax /= 2;
//FIXME Should be an excpetion here
eigen_assert (nzlumax > annz && "Not enough memory to perform factorization");
//FIXME Should be an exception here
if (nzlumax < annz ) return nzlumax;
expand<ScalarVector>(Glu.lsup, nzlumax, LUSUP, 0, 0, Glu);
expand<ScalarVector>(Glu.ucol, nzumax, UCOL, 0, 0, Glu);
expand<IndexVector>(Glu.lsub, nzlmax, LSUB, 0, 0, Glu);
expand<IndexVector>(Glu.usub, nzumax, USUB, 0, 1, Glu);
expand<ScalarVector>(Glu.lsup, nzlumax, 0, 0, Glu);
expand<ScalarVector>(Glu.ucol, nzumax, 0, 0, Glu);
expand<IndexVector>(Glu.lsub, nzlmax, 0, 0, Glu);
expand<IndexVector>(Glu.usub, nzumax, 0, 1, Glu);
}
// LUWorkInit : Now, allocate known working storage
int isize = (2 * m_panel_size + 3 + LU_NO_MARKER) * m + n;
int dsize = m * m_panel_size + LU_NUM_TEMPV(m, m_panel_size, m_maxsuper, m_rowblk);
iwork = new Index(isize);
eigen_assert( (m_iwork != 0) && "Malloc fails for iwork");
work = new Scalar(dsize);
eigen_assert( (m_work != 0) && "Malloc fails for dwork");
iwork.resize(isize);
work.resize(isize);
++Glu.num_expansions;
++num_expansions;
return 0;
} // end LuMemInit
/**
* Expand the existing storage to accomodate more fill-ins
* \param vec Valid pointer to a vector to allocate or expand
* \param [in,out]prev_len At input, length from previous call. At output, length of the newly allocated vector
* \param type Which part of the memory to expand
* \param len_to_copy Size of the memory to be copied to new store
* \param keep_prev true: use prev_len; Do not expand this vector; false: compute new_len and expand
* \param vec Valid pointer to the vector to allocate or expand
* \param [in,out]length At input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector
* \param [in]len_to_copy Current number of elements in the factors
* \param keep_prev true: use length and do not expand the vector; false: compute new_len and expand
* \param [in,out]num_expansions Number of times the memory has been expanded
*/
template <typename VectorType >
int SparseLU::expand(VectorType& vec, int& prev_len, MemType type, int len_to_copy, bool keep_prev, GlobalLU_t& Glu)
int SparseLU::expand(VectorType& vec, int& length, int len_to_copy, bool keep_prev, int& num_expansions)
{
float alpha = 1.5; // Ratio of the memory increase
int new_len; // New size of the allocated memory
if(Glu.num_expansions == 0 || keep_prev)
new_len = prev_len; // First time allocate requested
if(num_expansions == 0 || keep_prev)
new_len = length ; // First time allocate requested
else
new_len = alpha * prev_len;
new_len = alpha * length ;
// Allocate new space
// vec = new VectorType(new_len);
VectorType old_vec(vec);
if ( Glu.num_expansions != 0 ) // The memory has been expanded before
VectorType old_vec; // Temporary vector to hold the previous values
if (len_to_copy > 0 )
old_vec = vec; // old_vec should be of size len_to_copy... to be checked
//expand the current vector //FIXME Should be in a try ... catch region
vec.resize(new_len);
/*
* Test if the memory has been well allocated
* otherwise reduce the size and try to reallocate
* copy data from previous vector (if exists) to the newly allocated vector
*/
if ( num_expansions != 0 ) // The memory has been expanded before
{
int tries = 0;
vec.resize(new_len); //expand the current vector
if (keep_prev)
{
if (!vec.size()) return -1 ; // FIXME could throw an exception somehow
if (!vec.size()) return new_len ;
}
else
{
while (!vec.size())
{
// Reduce the size and allocate again
if ( ++tries > 10) return -1
// Reduce the size and allocate again
if ( ++tries > 10) return new_len;
alpha = LU_Reduce(alpha);
new_len = alpha * prev_len;
vec->resize(new_len);
new_len = alpha * length ;
vec.resize(new_len); //FIXME Should be in a try catch section
}
} // end allocation
//Copy the previous values to the newly allocated space
for (int i = 0; i < old_vec.size(); i++)
vec(i) = old_vec(i);
if (len_to_copy > 0)
vec.segment(0, len_to_copy) = old_vec;
} // end expansion
// expanders[type].mem = vec;
// expanders[type].size = new_len;
prev_len = new_len;
if(Glu.num_expansions) ++Glu.num_expansions;
length = new_len;
if(num_expansions) ++num_expansions;
return 0;
}
/**
* \brief Expand the existing storage
*
* NOTE: The calling sequence of this function is different from that of SuperLU
*
* \return a pointer to the newly allocated space
* \param vec vector to expand
* \param [in,out]maxlen On input, previous size of vec (Number of elements to copy ). on output, new size
* \param next current number of elements in the vector.
* \param Glu Global data structure
* \return 0 on success, > 0 size of the memory allocated so far
*/
template <typename VectorType>
VectorType* SparseLU::LUMemXpand(int jcol, int next, MemType mem_type, int& maxlen)
template <typename IndexVector>
int SparseLU::LUMemXpand(VectorType& vec, int& maxlen, int next, LU_MemType memtype, LU_GlobalLu_t& Glu)
{
VectorType *newmem;
int failed_size;
int& num_expansions = Glu.num_expansions;
if (memtype == USUB)
vec = expand<VectorType>(vec, maxlen, mem_type, next, 1);
failed_size = expand<IndexVector>(vec, maxlen, next, 1, num_expansions);
else
vec = expand<VectorType>(vec, maxlen, mem_type, next, 0);
// FIXME Should be an exception instead of an assert
eigen_assert(new_mem.size() && "Can't expand memory");
failed_size = expand<IndexVector>(vec, maxlen, next, 0, num_expansions);
if (failed_size)
return faileld_size;
return new_mem;
// The following code is not really needed since maxlen is passed by reference
// and correspond to the appropriate field in Glu
// switch ( mem_type ) {
// case LUSUP:
// Glu.nzlumax = maxlen;
// break;
// case UCOL:
// Glu.nzumax = maxlen;
// break;
// case LSUB:
// Glu.nzlmax = maxlen;
// break;
// case USUB:
// Glu.nzumax = maxlen;
// break;
// }
return 0 ;
}

View File

@ -92,8 +92,9 @@ typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC} colperm_t;
typedef enum {DOFACT, SamePattern, Factored} fact_t;
typedef enum {LUSUP, UCOL, LSUB, USUB, LLVL, ULVL} MemType;
/** Headers for dynamically managed memory
\tparam IndexVectorType can be int, real scalar or complex scalar*/
/* Obsolete, headers for dynamically managed memory
\tparam VectorType can be int, real scalar or complex scalar*/
template <typename VectorType>
struct ExpHeader {
int size; // Length of the memory that has been used */
@ -118,7 +119,7 @@ struct {
Index n; // Number of columns in the matrix
int num_expansions;
ExpHeader *expanders; // Array of pointers to 4 types of memory
ExpHeader *expanders; // Deprecated... Array of pointers to 4 types of memory
} GlobalLU_t;
}// End namespace Eigen

View File

@ -59,8 +59,8 @@
* > 0 - number of bytes allocated when run out of space
*
*/
template <typename VectorType>
int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense, VectorType& tempv, VectorXi& segrep, VectorXi& repfnz, int fpanelc, LU_GlobalLu_t& Glu)
template <typename ScalarVector, typename IndexVector>
int SparseLU::LU_column_bmod(const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, IndexVector& segrep, IndexVector& repfnz, int fpanelc, LU_GlobalLu_t& Glu)
{
int jsupno, k, ksub, krep, krep_ind, ksupno;
@ -72,13 +72,14 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense,
* kfnz = first nonz in the k-th supernodal segment
* no-zeros = no lf leading zeros in a supernodal U-segment
*/
VectorXi& xsup = Glu.xsup;
VectorXi& supno = Glu.supno;
VectorXi& lsub = Glu.lsub;
VectorXi& xlsub = Glu.xlsub;
VectorXi& xlusup = Glu.xlusup;
VectorType& lusup = Glu.lusup;
int nzlumax = GLu.nzlumax;
IndexVector& xsup = Glu.xsup;
IndexVector& supno = Glu.supno;
IndexVector& lsub = Glu.lsub;
IndexVector& xlsub = Glu.xlsub;
IndexVector& xlusup = Glu.xlusup;
ScalarVector& lusup = Glu.lusup;
Index& nzlumax = Glu.nzlumax;
int jsupno = supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
k = nseg - 1;
@ -126,13 +127,13 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense,
luptr += nsupr * no_zeros + no_zeros;
// Form Eigen matrix and vector
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) );
Map<VectorType> u(tempv.data(), segsize);
Map<ScalarVector> u(tempv.data(), segsize);
u = A.triangularView<Lower>().solve(u);
// Dense matrix-vector product y <-- A*x
luptr += segsize;
new (&A) (&A) Map<Matrix<Scalar,Dynamic, Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) );
Map<VectorType> l( &(tempv.data()[segsize]), segsize);
Map<ScalarVector> l( &(tempv.data()[segsize]), segsize);
l= A * u;
// Scatter tempv[] into SPA dense[] as a temporary storage
@ -164,10 +165,9 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense,
new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc);
while (new_next > nzlumax )
{
Glu.lusup = LUmemXpand<Scalar>(jcol, nextlu, LUSUP, &nzlumax);
Glu.nzlumax = nzlumax;
lusup = Glu.lusup;
lsub = Glu.lsub;
mem = LUmemXpand<Scalar>(Glu.lusup, nzlumax, nextlu, LUSUP, Glu);
if (mem) return mem;
lsub = Glu.lsub; //FIXME Why is it updated here.
}
for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++)
@ -203,11 +203,11 @@ int SparseLU::LU_column_bmod(const int jcol, const int nseg, VectorType& dense,
// points to the beginning of jcol in snode L\U(jsupno)
ufirst = xlusup(jcol) + d_fsupc;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) );
Map<VectorType> l( &(lusup.data()[ufirst]), nsupc );
Map<ScalarVector> l( &(lusup.data()[ufirst]), nsupc );
u = A.triangularView().solve(u);
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) );
Map<VectorType> l( &(lusup.data()[ufirst+nsupc]), nsupr );
Map<ScalarVector> l( &(lusup.data()[ufirst+nsupc]), nsupr );
l = l - A * u;
} // End if fst_col

View File

@ -70,9 +70,10 @@
* > 0 number of bytes allocated when run out of space
*
*/
int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, VectorXi& nseg VectorXi& lsub_col, VectorXi& segrep, VectorXi& repfnz, VectorXi& xprune, VectorXi& marker, VectorXi& parent, VectorXi& xplore, LU_GlobalLu_t& Glu)
template <typename IndexVector>
int SparseLU::LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, IndexVector& nseg IndexVector& lsub_col, IndexVector& segrep, IndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLu_t& Glu)
{
typedef typename VectorXi::Index;
typedef typename IndexVector::IndexVector;
int jcolp1, jcolm1, jsuper, nsuper, nextl;
int krow; // Row index of the current element
@ -82,17 +83,18 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto
int chperm, chmark, chrep, oldrep, kchild;
int myfnz; // First nonzero element in the current column
int xdfs, maxdfs, kpar;
int mem;
// Initialize pointers
VectorXi& xsup = Glu.xsup;
VectorXi& supno = Glu.supno;
VectorXi& lsub = Glu.lsub;
VectorXi& xlsub = Glu.xlsub;
IndexVector& xsup = Glu.xsup;
IndexVector& supno = Glu.supno;
IndexVector& lsub = Glu.lsub;
IndexVector& xlsub = Glu.xlsub;
IndexVector& nzlmax = Glu.nzlmax;
nsuper = supno(jcol);
jsuper = nsuper;
nextl = xlsup(jcol);
VectorBlock<VectorXi> marker2(marker, 2*m, m);
VectorBlock<IndexVector> marker2(marker, 2*m, m);
// For each nonzero in A(*,jcol) do dfs
for (k = 0; lsub_col[k] != IND_EMPTY; k++)
{
@ -113,10 +115,8 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto
lsub(nextl++) = krow; // krow is indexed into A
if ( nextl >= nzlmax )
{
Glu.lsub = LUMemXpand<Index>(jcol, nextl, LSUB, nzlmax);
//FIXME try... catch out of space
Glu.nzlmax = nzlmax;
lsub = Glu.lsub;
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, Glu);
if ( mem ) return mem;
}
if (kmark != jcolm1) jsuper = IND_EMPTY; // Row index subset testing
}
@ -163,10 +163,8 @@ int SparseLU::LU_column_dfs(const int m, const int jcol, VectorXi& perm_r, Vecto
lsub(nextl++) = kchild;
if (nextl >= nzlmax)
{
Glu.lsub = LUMemXpand<Index>(jcol, nextl, LSUB, nzlmax);
//FIXME Catch out of space errors
GLu.nzlmax = nzlmax;
lsub = Glu.lsub;
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB);
if (mem) return mem;
}
if (chmark != jcolm1) jsuper = IND_EMPTY;
}

View File

@ -58,26 +58,28 @@
* > 0 - number of bytes allocated when run out of space
*
*/
template <typename VectorType>
int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, VectorXi& segrep, VectorXi& repfnz, VectorXi& perm_r, VectorType& dense, LU_GlobalLu_t& Glu)
template <typename ScalarVector, typename IndexVector>
int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, IndexVector& segrep, IndexVector& repfnz, IndexVector& perm_r, ScalarVector& dense, LU_GlobalLu_t& Glu)
{
int ksupno, k, ksub, krep, ksupno;
Index ksupno, k, ksub, krep, ksupno;
typedef typename IndexVector::Index;
VectorXi& xsup = Glu.xsup;
VectorXi& supno = Glu.supno;
VectorXi& lsub = Glu.lsub;
VectorXi& xlsub = Glu.xlsub;
VectorType& ucol = GLu.ucol;
VectorXi& usub = Glu.usub;
VectorXi& xusub = Glu.xusub;
int nzumax = GLu.nzumax;
int jsupno = supno(jcol);
IndexVector& xsup = Glu.xsup;
IndexVector& supno = Glu.supno;
IndexVector& lsub = Glu.lsub;
IndexVector& xlsub = Glu.xlsub;
ScalarVector& ucol = GLu.ucol;
IndexVector& usub = Glu.usub;
IndexVector& xusub = Glu.xusub;
Index& nzumax = Glu.nzumax;
Index jsupno = supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
k = nseg - 1;
int nextu = xusub(jcol);
int kfnz, isub, segsize;
int new_next,irow;
Index nextu = xusub(jcol);
Index kfnz, isub, segsize;
Index new_next,irow;
for (ksub = 0; ksub < nseg; ksub++)
{
krep = segrep(k); k--;
@ -93,13 +95,12 @@ int SparseLU::LU_copy_to_ucol(const int jcol, const int nseg, VectorXi& segrep,
new_next = nextu + segsize;
while (new_next > nzumax)
{
Glu.ucol = LU_MemXpand<Scalar>(jcol, nextu, UCOL, nzumax); //FIXME try and catch errors
ucol = Glu.ucol;
Glu.nzumax = nzumax;
Glu.usub = LU_MemXpand<Index>(jcol, nextu, USUB, nzumax); //FIXME try and catch errors
Glu.nzumax = nzumax;
usub = Glu.usub;
lsub = Glu.lsub;
mem = LU_MemXpand<ScalarVector>(ucol, nzumax, nextu, UCOL, Glu);
if (mem) return mem;
mem = LU_MemXpand<Index>(usub, nzumax, nextu, USUB, Glu);
if (mem) return mem;
lsub = Glu.lsub; //FIXME Why setting this as well ??
}
for (i = 0; i < segsize; i++)

View File

@ -55,17 +55,19 @@
* \param colptr Pointer to the beginning of each column
* \param xprune (out) The pruned tree ??
* \param marker (in/out) working vector
* \return 0 on success, > 0 size of the memory when memory allocation failed
*/
template <typename Index>
int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const VectorXi* asub, const VectorXi* colptr,
VectorXi& xprune, VectorXi& marker, LU_GlobalLu_t *m_Glu)
template <typename IndexVector>
int SparseLU::LU_snode_dfs(const int jcol, const int kcol, const IndexVector* asub, const IndexVector* colptr, IndexVector& xprune, IndexVector& marker, LU_GlobalLu_t& Glu)
{
VectorXi& xsup = m_Glu->xsup;
VectorXi& supno = m_Glu->supno; // Supernode number corresponding to this column
VectorXi& lsub = m_Glu->lsub;
VectorXi& xlsub = m_Glu->xlsub;
int nsuper = ++supno(jcol); // Next available supernode number
typedef typename IndexVector::Index;
IndexVector& xsup = Glu.xsup;
IndexVector& supno = Glu.supno; // Supernode number corresponding to this column
IndexVector& lsub = Glu.lsub;
IndexVector& xlsub = Glu.xlsub;
Index& nzlmax = Glu.nzlmax;
int mem;
Index nsuper = ++supno(jcol); // Next available supernode number
register int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
register int i,k;
int krow,kmark;
@ -83,26 +85,24 @@
lsub(nextl++) = krow;
if( nextl >= nzlmax )
{
m_Glu->lsub = LUMemXpand<Index>(jcol, nextl, LSUB, nzlmax);
m_Glu->nzlmax = nzlmax;
lsub = m_Glu->lsub;
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, Glu);
if (mem) return mem;
}
}
}
supno(i) = nsuper;
supno(i) = nsuper;
}
// If supernode > 1, then make a copy of the subscripts for pruning
if (jcol < kcol)
{
int new_next = nextl + (nextl - xlsub(jcol));
Index new_next = nextl + (nextl - xlsub(jcol));
while (new_next > nzlmax)
{
m_Glu->lsub = LUMemXpand<Index>(jcol, nextl, LSUB, &nzlmax);
m_Glu->nzlmax= nzlmax;
lsub = m_Glu->lsub;
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, Glu);
if (mem) return mem;
}
int ifrom, ito = nextl;
Index ifrom, ito = nextl;
for (ifrom = xlsub(jcol); ifrom < nextl;)
lsub(ito++) = lsub(ifrom++);
for (i = jcol+1; i <=kcol; i++)xlsub(i) = nextl;