Prefix with glu, the global structure

This commit is contained in:
Desire NUENTSA 2012-08-03 16:36:00 +02:00
parent 03509d1387
commit 70db61c269
11 changed files with 156 additions and 211 deletions

View File

@ -130,19 +130,15 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,
int& num_expansions = glu.num_expansions; //No memory expansions so far
num_expansions = 0;
// Guess the size for L\U factors
Index& nzlmax = glu.nzlmax;
Index& nzumax = glu.nzumax;
Index& nzlumax = glu.nzlumax;
nzumax = nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U
nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor
glu.nzumax = glu.nzlumax = std::max(fillratio * annz, m*n); // estimated number of nonzeros in U
glu.nzlmax = std::max(1., fillratio/4.) * annz; // estimated nnz in L factor
// Return the estimated size to the user if necessary
if (lwork == IND_EMPTY)
{
int estimated_size;
estimated_size = LU_GluIntArray(n) * sizeof(Index) + LU_TempSpace(m, panel_size)
+ (nzlmax + nzumax) * sizeof(Index) + (nzlumax+nzumax) * sizeof(Scalar) + n;
+ (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n;
return estimated_size;
}
@ -160,18 +156,18 @@ int LUMemInit(int m, int n, int annz, int lwork, int fillratio, int panel_size,
{
try
{
expand<ScalarVector>(glu.lusup, nzlumax, 0, 0, num_expansions);
expand<ScalarVector>(glu.ucol,nzumax, 0, 0, num_expansions);
expand<IndexVector>(glu.lsub,nzlmax, 0, 0, num_expansions);
expand<IndexVector>(glu.usub,nzumax, 0, 1, num_expansions);
expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions);
expand<ScalarVector>(glu.ucol,glu.nzumax, 0, 0, num_expansions);
expand<IndexVector>(glu.lsub,glu.nzlmax, 0, 0, num_expansions);
expand<IndexVector>(glu.usub,glu.nzumax, 0, 1, num_expansions);
}
catch(std::bad_alloc& )
{
//Reduce the estimated size and retry
nzlumax /= 2;
nzumax /= 2;
nzlmax /= 2;
if (nzlumax < annz ) return nzlumax;
glu.nzlumax /= 2;
glu.nzumax /= 2;
glu.nzlmax /= 2;
if (glu.nzlumax < annz ) return glu.nzlumax;
}
} while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());

View File

@ -12,12 +12,12 @@
#define EIGEN_SPARSELU_UTILS_H
/**
* \brief Count Nonzero elements in the factors
*/
template <typename IndexVector, typename ScalarVector>
void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
{
IndexVector& xsup = glu.xsup;
IndexVector& xlsub = glu.xlsub;
nnzL = 0;
nnzU = (glu.xusub)(n);
int nsuper = (glu.supno)(n);
@ -27,10 +27,10 @@ void LU_countnz(const int n, int& nnzL, int& nnzU, LU_GlobalLU_t<IndexVector, Sc
// For each supernode
for (i = 0; i <= nsuper; i++)
{
fsupc = xsup(i);
jlen = xlsub(fsupc+1) - xlsub(fsupc);
fsupc = glu.xsup(i);
jlen = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
for (j = fsupc; j < xsup(i+1); j++)
for (j = fsupc; j < glu.xsup(i+1); j++)
{
nnzL += jlen;
nnzU += j - fsupc + 1;
@ -50,9 +50,6 @@ template <typename IndexVector, typename ScalarVector>
void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
{
int fsupc, i, j, k, jstart;
IndexVector& xsup = glu.xsup;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
int nextl = 0;
int nsuper = (glu.supno)(n);
@ -60,19 +57,19 @@ void LU_fixupL(const int n, const IndexVector& perm_r, LU_GlobalLU_t<IndexVector
// For each supernode
for (i = 0; i <= nsuper; i++)
{
fsupc = xsup(i);
jstart = xlsub(fsupc);
xlsub(fsupc) = nextl;
for (j = jstart; j < xlsub(fsupc + 1); j++)
fsupc = glu.xsup(i);
jstart = glu.xlsub(fsupc);
glu.xlsub(fsupc) = nextl;
for (j = jstart; j < glu.xlsub(fsupc + 1); j++)
{
lsub(nextl) = perm_r(lsub(j)); // Now indexed into P*A
glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A
nextl++;
}
for (k = fsupc+1; k < xsup(i+1); k++)
xlsub(k) = nextl; // other columns in supernode i
for (k = fsupc+1; k < glu.xsup(i+1); k++)
glu.xlsub(k) = nextl; // other columns in supernode i
}
xlsub(n) = nextl;
glu.xlsub(n) = nextl;
}
#endif

View File

@ -62,15 +62,8 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca
* kfnz = first nonz in the k-th supernodal segment
* no_zeros = no lf leading zeros in a supernodal U-segment
*/
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
IndexVector& xlusup = glu.xlusup;
ScalarVector& lusup = glu.lusup;
Index& nzlumax = glu.nzlumax;
jsupno = supno(jcol);
jsupno = glu.supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
k = nseg - 1;
int d_fsupc; // distance between the first column of the current panel and the
@ -80,26 +73,26 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca
for (ksub = 0; ksub < nseg; ksub++)
{
krep = segrep(k); k--;
ksupno = supno(krep);
ksupno = glu.supno(krep);
if (jsupno != ksupno )
{
// outside the rectangular supernode
fsupc = xsup(ksupno);
fsupc = glu.xsup(ksupno);
fst_col = std::max(fsupc, fpanelc);
// Distance from the current supernode to the current panel;
// d_fsupc = 0 if fsupc > fpanelc
d_fsupc = fst_col - fsupc;
luptr = xlusup(fst_col) + d_fsupc;
lptr = xlsub(fsupc) + d_fsupc;
luptr = glu.xlusup(fst_col) + d_fsupc;
lptr = glu.xlsub(fsupc) + d_fsupc;
kfnz = repfnz(krep);
kfnz = std::max(kfnz, fpanelc);
segsize = krep - kfnz + 1;
nsupc = krep - fst_col + 1;
nsupr = xlsub(fsupc+1) - xlsub(fsupc);
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
nrow = nsupr - d_fsupc - nsupc;
// NOTE Unlike the original implementation in SuperLU, the only feature
@ -109,34 +102,34 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca
// then scatter the result of sup-col update to dense
no_zeros = kfnz - fst_col;
if(segsize==1)
LU_kernel_bmod<1>::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
else
LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
} // end if jsupno
} // end for each segment
// Process the supernodal portion of L\U[*,j]
nextlu = xlusup(jcol);
fsupc = xsup(jsupno);
nextlu = glu.xlusup(jcol);
fsupc = glu.xsup(jsupno);
// copy the SPA dense into L\U[*,j]
int mem;
new_next = nextlu + xlsub(fsupc + 1) - xlsub(fsupc);
while (new_next > nzlumax )
new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc);
while (new_next > glu.nzlumax )
{
mem = LUMemXpand<ScalarVector>(glu.lusup, nzlumax, nextlu, LUSUP, glu.num_expansions);
mem = LUMemXpand<ScalarVector>(glu.glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);
if (mem) return mem;
}
for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++)
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
{
irow = lsub(isub);
lusup(nextlu) = dense(irow);
irow = glu.lsub(isub);
glu.lusup(nextlu) = dense(irow);
dense(irow) = Scalar(0.0);
++nextlu;
}
xlusup(jcol + 1) = nextlu; // close L\U(*,jcol);
glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol);
/* For more updates within the panel (also within the current supernode),
* should start from the first column of the panel, or the first column
@ -152,20 +145,20 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca
// d_fsupc = 0 if fsupc >= fpanelc
d_fsupc = fst_col - fsupc;
lptr = xlsub(fsupc) + d_fsupc;
luptr = xlusup(fst_col) + d_fsupc;
nsupr = xlsub(fsupc+1) - xlsub(fsupc); // leading dimension
lptr = glu.xlsub(fsupc) + d_fsupc;
luptr = glu.xlusup(fst_col) + d_fsupc;
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension
nsupc = jcol - fst_col; // excluding jcol
nrow = nsupr - d_fsupc - nsupc;
// points to the beginning of jcol in snode L\U(jsupno)
ufirst = xlusup(jcol) + d_fsupc;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> u(lusup, ufirst, nsupc);
ufirst = glu.xlusup(jcol) + d_fsupc;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
u = A.template triangularView<UnitLower>().solve(u);
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> l(lusup, ufirst+nsupc, nrow);
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
l.noalias() -= A * u;
} // End if fst_col

View File

@ -67,10 +67,10 @@ struct LU_column_dfs_traits
{
return true;
}
void mem_expand(IndexVector& lsub, int& nextl, int chmark)
void mem_expand(IndexVector& glu.lsub, int& nextl, int chmark)
{
if (nextl >= m_glu.nzlmax)
LUMemXpand<IndexVector>(lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions);
LUMemXpand<IndexVector>(glu.lsub, m_glu.nzlmax, nextl, LSUB, m_glu.num_expansions);
if (chmark != (m_jcol-1)) m_jsuper_ref = IND_EMPTY;
}
enum { ExpandMem = true };
@ -84,16 +84,10 @@ template <typename IndexVector, typename ScalarVector, typename BlockIndexVector
int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper, int& nseg, BlockIndexVector& lsub_col, IndexVector& segrep, BlockIndexVector& repfnz, IndexVector& xprune, IndexVector& marker, IndexVector& parent, IndexVector& xplore, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
{
typedef typename IndexVector::Scalar Index;
typedef typename ScalarVector::Scalar Scalar;
typedef typename ScalarVector
// Initialize pointers
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
int jsuper = supno(jcol);
int nextl = xlsub(jcol);
int jsuper = glu.supno(jcol);
int nextl = glu.xlsub(jcol);
VectorBlock<IndexVector> marker2(marker, 2*m, m);
@ -109,25 +103,25 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper
// krow was visited before, go to the next nonz;
if (kmark == jcol) continue;
LU_dfs_kernel(jcol, perm_r, nseg, lsub, segrep, repfnz, xprune, marker2, parent,
LU_dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
xplore, glu, nextl, krow, traits);
} // for each nonzero ...
int fsupc, jptr, jm1ptr, ito, ifrom, istop;
int nsuper = supno(jcol);
int nsuper = glu.supno(jcol);
int jcolp1 = jcol + 1;
int jcolm1 = jcol - 1;
// check to see if j belongs in the same supernode as j-1
if ( jcol == 0 )
{ // Do nothing for column 0
nsuper = supno(0) = 0 ;
nsuper = glu.supno(0) = 0 ;
}
else
{
fsupc = xsup(nsuper);
jptr = xlsub(jcol); // Not yet compressed
jm1ptr = xlsub(jcolm1);
fsupc = glu.xsup(nsuper);
jptr = glu.xlsub(jcol); // Not yet compressed
jm1ptr = glu.xlsub(jcolm1);
// Use supernodes of type T2 : see SuperLU paper
if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = IND_EMPTY;
@ -137,7 +131,7 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper
if ( (jcol - fsupc) >= maxsuper) jsuper = IND_EMPTY;
/* If jcol starts a new supernode, reclaim storage space in
* lsub from previous supernode. Note we only store
* glu.lsub from previous supernode. Note we only store
* the subscript set of the first and last columns of
* a supernode. (first for num values, last for pruning)
*/
@ -145,26 +139,26 @@ int LU_column_dfs(const int m, const int jcol, IndexVector& perm_r, int maxsuper
{ // starts a new supernode
if ( (fsupc < jcolm1-1) )
{ // >= 3 columns in nsuper
ito = xlsub(fsupc+1);
xlsub(jcolm1) = ito;
ito = glu.xlsub(fsupc+1);
glu.xlsub(jcolm1) = ito;
istop = ito + jptr - jm1ptr;
xprune(jcolm1) = istop; // intialize xprune(jcol-1)
xlsub(jcol) = istop;
glu.xlsub(jcol) = istop;
for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
lsub(ito) = lsub(ifrom);
glu.lsub(ito) = glu.lsub(ifrom);
nextl = ito; // = istop + length(jcol)
}
nsuper++;
supno(jcol) = nsuper;
glu.supno(jcol) = nsuper;
} // if a new supernode
} // end else: jcol > 0
// Tidy up the pointers before exit
xsup(nsuper+1) = jcolp1;
supno(jcolp1) = nsuper;
glu.xsup(nsuper+1) = jcolp1;
glu.supno(jcolp1) = nsuper;
xprune(jcol) = nextl; // Intialize upper bound for pruning
xlsub(jcolp1) = nextl;
glu.xlsub(jcolp1) = nextl;
return 0;
}

View File

@ -49,51 +49,42 @@ int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzTy
typedef typename IndexVector::Scalar Index;
typedef typename ScalarVector::Scalar Scalar;
Index ksub, krep, ksupno;
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
ScalarVector& ucol = glu.ucol;
IndexVector& usub = glu.usub;
IndexVector& xusub = glu.xusub;
Index& nzumax = glu.nzumax;
Index jsupno = supno(jcol);
Index jsupno = glu.supno(jcol);
// For each nonzero supernode segment of U[*,j] in topological order
int k = nseg - 1, i;
Index nextu = xusub(jcol);
Index nextu = glu.xusub(jcol);
Index kfnz, isub, segsize;
Index new_next,irow;
Index fsupc, mem;
for (ksub = 0; ksub < nseg; ksub++)
{
krep = segrep(k); k--;
ksupno = supno(krep);
ksupno = glu.supno(krep);
if (jsupno != ksupno ) // should go into ucol();
{
kfnz = repfnz(krep);
if (kfnz != IND_EMPTY)
{ // Nonzero U-segment
fsupc = xsup(ksupno);
isub = xlsub(fsupc) + kfnz - fsupc;
fsupc = glu.xsup(ksupno);
isub = glu.xlsub(fsupc) + kfnz - fsupc;
segsize = krep - kfnz + 1;
new_next = nextu + segsize;
while (new_next > nzumax)
while (new_next > glu.nzumax)
{
mem = LUMemXpand<ScalarVector>(ucol, nzumax, nextu, UCOL, glu.num_expansions);
mem = LUMemXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions);
if (mem) return mem;
mem = LUMemXpand<IndexVector>(usub, nzumax, nextu, USUB, glu.num_expansions);
mem = LUMemXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions);
if (mem) return mem;
}
for (i = 0; i < segsize; i++)
{
irow = lsub(isub);
usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
ucol(nextu) = dense(irow);
irow = glu.lsub(isub);
glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
glu.ucol(nextu) = dense(irow);
dense(irow) = Scalar(0.0);
nextu++;
isub++;
@ -104,7 +95,7 @@ int LU_copy_to_ucol(const int jcol, const int nseg, SegRepType& segrep, RepfnzTy
} // end if jsupno
} // end for each segment
xusub(jcol + 1) = nextu; // close U(*,jcol)
glu.xusub(jcol + 1) = nextu; // close U(*,jcol)
return 0;
}

View File

@ -52,12 +52,6 @@ template <typename DenseIndexBlock, typename IndexVector, typename ScalarVector>
void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, ScalarVector& dense, ScalarVector& tempv, DenseIndexBlock& segrep, DenseIndexBlock& repfnz, LU_GlobalLU_t<IndexVector,ScalarVector>& glu)
{
typedef typename ScalarVector::Scalar Scalar;
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
IndexVector& xlusup = glu.xlusup;
ScalarVector& lusup = glu.lusup;
int ksub,jj,nextl_col;
int fsupc, nsupc, nsupr, nrow;
@ -76,11 +70,11 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
* nsupr = number of rows in a supernode
*/
krep = segrep(k); k--;
fsupc = xsup(supno(krep));
fsupc = glu.xsup(glu.supno(krep));
nsupc = krep - fsupc + 1;
nsupr = xlsub(fsupc+1) - xlsub(fsupc);
nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
nrow = nsupr - nsupc;
lptr = xlsub(fsupc);
lptr = glu.xlsub(fsupc);
// loop over the panel columns to detect the actual number of columns and rows
int u_rows = 0;
@ -118,14 +112,14 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
continue; // skip any zero segment
segsize = krep - kfnz + 1;
luptr = xlusup(fsupc);
luptr = glu.xlusup(fsupc);
no_zeros = kfnz - fsupc;
int isub = lptr + no_zeros;
int off = u_rows-segsize;
for (int i = 0; i < segsize; i++)
{
int irow = lsub(isub);
int irow = glu.lsub(isub);
U(i+off,u_col) = dense_col(irow);
++isub;
}
@ -134,15 +128,15 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
}
// solve U = A^-1 U
luptr = xlusup(fsupc);
luptr = glu.xlusup(fsupc);
no_zeros = (krep - u_rows + 1) - fsupc;
luptr += nsupr * no_zeros + no_zeros;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) );
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(nsupr) );
U = A.template triangularView<UnitLower>().solve(U);
// update
luptr += u_rows;
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) );
Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(nsupr) );
assert(tempv.size()>w*u_rows + nrow*w);
Map<Matrix<Scalar,Dynamic,Dynamic> > L(tempv.data()+w*u_rows, nrow, u_cols);
L.noalias() = B * U;
@ -166,7 +160,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
int off = u_rows-segsize;
for (int i = 0; i < segsize; i++)
{
int irow = lsub(isub++);
int irow = glu.lsub(isub++);
dense_col(irow) = U.coeff(i+off,u_col);
U.coeffRef(i,u_col) = 0;
}
@ -174,7 +168,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
// Scatter l into SPA dense[]
for (int i = 0; i < nrow; i++)
{
int irow = lsub(isub++);
int irow = glu.lsub(isub++);
dense_col(irow) -= L.coeff(i+off,u_col);
L.coeffRef(i,u_col) = 0;
}
@ -195,7 +189,7 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
continue; // skip any zero segment
segsize = krep - kfnz + 1;
luptr = xlusup(fsupc);
luptr = glu.xlusup(fsupc);
// NOTE : Unlike the original implementation in SuperLU,
// there is no update feature for col-col, 2col-col ...
@ -203,10 +197,10 @@ void LU_panel_bmod(const int m, const int w, const int jcol, const int nseg, Sca
// Perform a trianglar solve and block update,
// then scatter the result of sup-col update to dense[]
no_zeros = kfnz - fsupc;
if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros);
if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, nsupr, nrow, glu.lsub, lptr, no_zeros);
} // End for each column in the panel
}

View File

@ -38,10 +38,6 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
int& nextl_col, int krow, Traits& traits
)
{
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
int kmark = marker(krow);
@ -59,7 +55,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
// krow is in U : if its supernode-representative krep
// has been explored, update repfnz(*)
// krep = supernode representative of the current row
int krep = xsup(supno(kperm)+1) - 1;
int krep = glu.xsup(glu.supno(kperm)+1) - 1;
// First nonzero element in the current column:
int myfnz = repfnz_col(krep);
@ -75,7 +71,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
int oldrep = IND_EMPTY;
parent(krep) = oldrep;
repfnz_col(krep) = kperm;
int xdfs = xlsub(krep);
int xdfs = glu.xlsub(krep);
int maxdfs = xprune(krep);
int kpar;
@ -84,7 +80,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
// For each unmarked kchild of krep
while (xdfs < maxdfs)
{
int kchild = lsub(xdfs);
int kchild = glu.lsub(xdfs);
xdfs++;
int chmark = marker(kchild);
@ -104,7 +100,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
// case kchild is in U :
// chrep = its supernode-rep. If its rep has been explored,
// update its repfnz(*)
int chrep = xsup(supno(chperm)+1) - 1;
int chrep = glu.xsup(glu.supno(chperm)+1) - 1;
myfnz = repfnz_col(chrep);
if (myfnz != IND_EMPTY)
@ -119,7 +115,7 @@ void LU_dfs_kernel(const int jj, IndexVector& perm_r,
krep = chrep; // Go deeper down G(L)
parent(krep) = oldrep;
repfnz_col(krep) = chperm;
xdfs = xlsub(krep);
xdfs = glu.xlsub(krep);
maxdfs = xprune(krep);
} // end if myfnz != -1
@ -205,7 +201,7 @@ struct LU_panel_dfs_traits
}
return false;
}
void mem_expand(IndexVector& /*lsub*/, int /*nextl*/, int /*chmark*/) {}
void mem_expand(IndexVector& /*glu.lsub*/, int /*nextl*/, int /*chmark*/) {}
enum { ExpandMem = false };
Index m_jcol;
Index* m_marker;

View File

@ -58,19 +58,14 @@ int LU_pivotL(const int jcol, const typename ScalarVector::RealScalar diagpivott
typedef typename IndexVector::Scalar Index;
typedef typename ScalarVector::Scalar Scalar;
typedef typename ScalarVector::RealScalar RealScalar;
// Initialize pointers
IndexVector& lsub = glu.lsub; // Compressed row subscripts of L rectangular supernodes.
IndexVector& xlsub = glu.xlsub; // pointers to the beginning of each column subscript in lsub
ScalarVector& lusup = glu.lusup; // Numerical values of L ordered by columns
IndexVector& xlusup = glu.xlusup; // pointers to the beginning of each colum in lusup
Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0
Index lptr = xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
Index nsupr = xlsub(fsupc+1) - lptr; // Number of rows in the supernode
Scalar* lu_sup_ptr = &(lusup.data()[xlusup(fsupc)]); // Start of the current supernode
Scalar* lu_col_ptr = &(lusup.data()[xlusup(jcol)]); // Start of jcol in the supernode
Index* lsub_ptr = &(lsub.data()[lptr]); // Start of row indices of the supernode
Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode
Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
// Determine the largest abs numerical value for partial pivoting
Index diagind = iperm_c(jcol); // diagonal index

View File

@ -51,16 +51,9 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons
{
typedef typename IndexVector::Scalar Index;
typedef typename ScalarVector::Scalar Scalar;
// Initialize pointers
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno;
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
ScalarVector& lusup = glu.lusup;
IndexVector& xlusup = glu.xlusup;
// For each supernode-rep irep in U(*,j]
int jsupno = supno(jcol);
int jsupno = glu.supno(jcol);
int i,irep,irep1;
bool movnum, do_prune = false;
Index kmin, kmax, minloc, maxloc,krow;
@ -76,18 +69,18 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons
// If a snode overlaps with the next panel, then the U-segment
// is fragmented into two parts -- irep and irep1. We should let
// pruning occur at the rep-column in irep1s snode.
if (supno(irep) == supno(irep1) ) continue; // don't prune
if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune
// If it has not been pruned & it has a nonz in row L(pivrow,i)
if (supno(irep) != jsupno )
if (glu.supno(irep) != jsupno )
{
if ( xprune (irep) >= xlsub(irep1) )
if ( xprune (irep) >= glu.xlsub(irep1) )
{
kmin = xlsub(irep);
kmax = xlsub(irep1) - 1;
kmin = glu.xlsub(irep);
kmax = glu.xlsub(irep1) - 1;
for (krow = kmin; krow <= kmax; krow++)
{
if (lsub(krow) == pivrow)
if (glu.lsub(krow) == pivrow)
{
do_prune = true;
break;
@ -100,20 +93,20 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons
// do a quicksort-type partition
// movnum=true means that the num values have to be exchanged
movnum = false;
if (irep == xsup(supno(irep)) ) // Snode of size 1
if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1
movnum = true;
while (kmin <= kmax)
{
if (perm_r(lsub(kmax)) == IND_EMPTY)
if (perm_r(glu.lsub(kmax)) == IND_EMPTY)
kmax--;
else if ( perm_r(lsub(kmin)) != IND_EMPTY)
else if ( perm_r(glu.lsub(kmin)) != IND_EMPTY)
kmin++;
else
{
// kmin below pivrow (not yet pivoted), and kmax
// above pivrow: interchange the two suscripts
std::swap(lsub(kmin), lsub(kmax));
std::swap(glu.lsub(kmin), glu.lsub(kmax));
// If the supernode has only one column, then we
// only keep one set of subscripts. For any subscript
@ -121,9 +114,9 @@ void LU_pruneL(const int jcol, const IndexVector& perm_r, const int pivrow, cons
// done on the numerical values.
if (movnum)
{
minloc = xlusup(irep) + ( kmin - xlsub(irep) );
maxloc = xlusup(irep) + ( kmax - xlsub(irep) );
std::swap(lusup(minloc), lusup(maxloc));
minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) );
maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) );
std::swap(glu.lusup(minloc), glu.lusup(maxloc));
}
kmin++;
kmax--;

View File

@ -33,39 +33,40 @@ template <typename IndexVector, typename ScalarVector>
int LU_snode_bmod (const int jcol, const int fsupc, ScalarVector& dense, LU_GlobalLU_t<IndexVector,ScalarVector>& glu)
{
typedef typename ScalarVector::Scalar Scalar;
IndexVector& lsub = glu.lsub; // Compressed row subscripts of ( rectangular supernodes ??)
IndexVector& xlsub = glu.xlsub; // xlsub[j] is the starting location of the j-th column in lsub(*)
ScalarVector& lusup = glu.lusup; // Numerical values of the rectangular supernodes
IndexVector& xlusup = glu.xlusup; // xlusup[j] is the starting location of the j-th column in lusup(*)
int nextlu = xlusup(jcol); // Starting location of the next column to add
/* lsub : Compressed row subscripts of ( rectangular supernodes )
* xlsub : xlsub[j] is the starting location of the j-th column in lsub(*)
* lusup : Numerical values of the rectangular supernodes
* xlusup[j] is the starting location of the j-th column in lusup(*)
*/
int nextlu = glu.xlusup(jcol); // Starting location of the next column to add
int irow, isub;
// Process the supernodal portion of L\U[*,jcol]
for (isub = xlsub(fsupc); isub < xlsub(fsupc+1); isub++)
for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
{
irow = lsub(isub);
lusup(nextlu) = dense(irow);
irow = glu.lsub(isub);
glu.lusup(nextlu) = dense(irow);
dense(irow) = 0;
++nextlu;
}
xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
glu.xlusup(jcol + 1) = nextlu; // Initialize xlusup for next column ( jcol+1 )
if (fsupc < jcol ){
int luptr = xlusup(fsupc); // points to the first column of the supernode
int nsupr = xlsub(fsupc + 1) -xlsub(fsupc); //Number of rows in the supernode
int luptr = glu.xlusup(fsupc); // points to the first column of the supernode
int nsupr = glu.xlsub(fsupc + 1) -glu.xlsub(fsupc); //Number of rows in the supernode
int nsupc = jcol - fsupc; // Number of columns in the supernodal portion of L\U[*,jcol]
int ufirst = xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
int ufirst = glu.xlusup(jcol); // points to the beginning of column jcol in supernode L\U(jsupno)
int nrow = nsupr - nsupc; // Number of rows in the off-diagonal blocks
// Solve the triangular system for U(fsupc:jcol, jcol) with L(fspuc:jcol, fsupc:jcol)
Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> u(lusup, ufirst, nsupc);
Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
u = A.template triangularView<UnitLower>().solve(u); // Call the Eigen dense triangular solve interface
// Update the trailing part of the column jcol U(jcol:jcol+nrow, jcol) using L(jcol:jcol+nrow, fsupc:jcol) and U(fsupc:jcol)
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> l(lusup, ufirst+nsupc, nrow);
new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>,0,OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(nsupr) );
VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
l.noalias() -= A * u;
}
return 0;

View File

@ -46,14 +46,9 @@
int LU_snode_dfs(const int jcol, const int kcol,const MatrixType& mat, IndexVector& xprune, IndexVector& marker, LU_GlobalLU_t<IndexVector, ScalarVector>& glu)
{
typedef typename IndexVector::Scalar Index;
IndexVector& xsup = glu.xsup;
IndexVector& supno = glu.supno; // Supernode number corresponding to this column
IndexVector& lsub = glu.lsub;
IndexVector& xlsub = glu.xlsub;
Index& nzlmax = glu.nzlmax;
int mem;
Index nsuper = ++supno(jcol); // Next available supernode number
int nextl = xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
Index nsuper = ++glu.supno(jcol); // Next available supernode number
int nextl = glu.xlsub(jcol); //Index of the starting location of the jcol-th column in lsub
int krow,kmark;
for (int i = jcol; i <=kcol; i++)
{
@ -66,36 +61,36 @@
{
// First time to visit krow
marker(krow) = kcol;
lsub(nextl++) = krow;
if( nextl >= nzlmax )
glu.lsub(nextl++) = krow;
if( nextl >= glu.nzlmax )
{
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions);
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
}
}
}
supno(i) = nsuper;
glu.supno(i) = nsuper;
}
// If supernode > 1, then make a copy of the subscripts for pruning
if (jcol < kcol)
{
Index new_next = nextl + (nextl - xlsub(jcol));
while (new_next > nzlmax)
Index new_next = nextl + (nextl - glu.xlsub(jcol));
while (new_next > glu.nzlmax)
{
mem = LUMemXpand<IndexVector>(lsub, nzlmax, nextl, LSUB, glu.num_expansions);
mem = LUMemXpand<IndexVector>(glu.lsub, glu.nzlmax, nextl, LSUB, glu.num_expansions);
if (mem) return mem; // Memory expansion failed... Return the memory allocated so far
}
Index ifrom, ito = nextl;
for (ifrom = xlsub(jcol); ifrom < nextl;)
lsub(ito++) = lsub(ifrom++);
for (int i = jcol+1; i <=kcol; i++) xlsub(i) = nextl;
for (ifrom = glu.xlsub(jcol); ifrom < nextl;)
glu.lsub(ito++) = glu.lsub(ifrom++);
for (int i = jcol+1; i <=kcol; i++) glu.xlsub(i) = nextl;
nextl = ito;
}
xsup(nsuper+1) = kcol + 1; // Start of next available supernode
supno(kcol+1) = nsuper;
glu.xsup(nsuper+1) = kcol + 1; // Start of next available supernode
glu.supno(kcol+1) = nsuper;
xprune(kcol) = nextl;
xlsub(kcol+1) = nextl;
glu.xlsub(kcol+1) = nextl;
return 0;
}
#endif