From 8f6d5eacb4af8fc31301625652a5017e6c2e50eb Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 29 Jul 2012 22:26:00 +0200 Subject: [PATCH] optimize LU_kernel_bmod for small cases, and add an important .noalias() --- Eigen/src/SparseLU/SparseLU_column_bmod.h | 5 +- Eigen/src/SparseLU/SparseLU_kernel_bmod.h | 123 ++++++++++++++-------- Eigen/src/SparseLU/SparseLU_panel_bmod.h | 5 +- 3 files changed, 85 insertions(+), 48 deletions(-) diff --git a/Eigen/src/SparseLU/SparseLU_column_bmod.h b/Eigen/src/SparseLU/SparseLU_column_bmod.h index 1457b6f35..5c26bd70e 100644 --- a/Eigen/src/SparseLU/SparseLU_column_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_column_bmod.h @@ -122,7 +122,10 @@ int LU_column_bmod(const int jcol, const int nseg, BlockScalarVector& dense, Sca // Perform a triangular solver and block update, // then scatter the result of sup-col update to dense no_zeros = kfnz - fst_col; - LU_kernel_bmod(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + if(segsize==1) + LU_kernel_bmod<1>::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else + LU_kernel_bmod::run(segsize, dense, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // end if jsupno } // end for each segment diff --git a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h index d5df70fd2..0d4b20f59 100644 --- a/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +++ b/Eigen/src/SparseLU/SparseLU_kernel_bmod.h @@ -39,54 +39,85 @@ * \param no_zeros Number of nonzeros elements before the diagonal part of the supernode * \return 0 on success */ -template -int LU_kernel_bmod(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) +template struct LU_kernel_bmod { - typedef typename ScalarVector::Scalar Scalar; - // First, copy U[*,j] segment from dense(*) to tempv(*) - // The result of triangular solve is in tempv[*]; - // The result of matric-vector update is in dense[*] - int isub = lptr + no_zeros; - int i, irow; - for (i = 0; i < segsize; i++) + template + EIGEN_DONT_INLINE static void run(const int segsize, BlockScalarVector& dense, ScalarVector& tempv, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) { - irow = lsub(isub); - tempv(i) = dense(irow); - ++isub; + typedef typename ScalarVector::Scalar Scalar; + // First, copy U[*,j] segment from dense(*) to tempv(*) + // The result of triangular solve is in tempv[*]; + // The result of matric-vector update is in dense[*] + int isub = lptr + no_zeros; + int i, irow; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub); + tempv(i) = dense(irow); + ++isub; + } + // Dense triangular solve -- start effective triangle + luptr += nsupr * no_zeros + no_zeros; + // Form Eigen matrix and vector + Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); + Map > u(tempv.data(), segsize); + + u = A.template triangularView().solve(u); + + // Dense matrix-vector product y <-- B*x + luptr += segsize; + Map, 0, OuterStride<> > B( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); + Map > l(tempv.data()+segsize, nrow); + if(SegSizeAtCompileTime==2) + l = u(0) * B.col(0) + u(1) * B.col(1); + else if(SegSizeAtCompileTime==3) + l = u(0) * B.col(0) + u(1) * B.col(1) + u(2) * B.col(2); + else + l.noalias() = B * u; + + // Scatter tempv[] into SPA dense[] as a temporary storage + isub = lptr + no_zeros; + for (i = 0; i < ((SegSizeAtCompileTime==Dynamic)?segsize:SegSizeAtCompileTime); i++) + { + irow = lsub(isub++); + dense(irow) = tempv(i); + } + + // Scatter l into SPA dense[] + for (i = 0; i < nrow; i++) + { + irow = lsub(isub++); + dense(irow) -= l(i); + } } - // Dense triangular solve -- start effective triangle - luptr += nsupr * no_zeros + no_zeros; - // Form Eigen matrix and vector - Map, 0, OuterStride<> > A( &(lusup.data()[luptr]), segsize, segsize, OuterStride<>(nsupr) ); - VectorBlock u(tempv, 0, segsize); - - u = A.template triangularView().solve(u); - - // Dense matrix-vector product y <-- A*x - luptr += segsize; - new (&A) Map, 0, OuterStride<> > ( &(lusup.data()[luptr]), nrow, segsize, OuterStride<>(nsupr) ); - VectorBlock l(tempv, segsize, nrow); - l= A * u; - - // Scatter tempv[] into SPA dense[] as a temporary storage - isub = lptr + no_zeros; - for (i = 0; i < segsize; i++) +}; + +template <> struct LU_kernel_bmod<1> +{ + template + EIGEN_DONT_INLINE static void run(const int /*segsize*/, BlockScalarVector& dense, ScalarVector& /*tempv*/, ScalarVector& lusup, int& luptr, const int nsupr, const int nrow, IndexVector& lsub, const int lptr, const int no_zeros) { - irow = lsub(isub); - dense(irow) = tempv(i); - tempv(i) = Scalar(0.0); - ++isub; + typedef typename ScalarVector::Scalar Scalar; + Scalar f = dense(lsub(lptr + no_zeros)); + luptr += nsupr * no_zeros + no_zeros + 1; + const Scalar* a(lusup.data() + luptr); + const typename IndexVector::Scalar* irow(lsub.data()+lptr + no_zeros + 1); + int i = 0; + for (; i+1 < nrow; i+=2) + { + int i0 = *(irow++); + int i1 = *(irow++); + Scalar a0 = *(a++); + Scalar a1 = *(a++); + Scalar d0 = dense.coeff(i0); + Scalar d1 = dense.coeff(i1); + d0 -= f*a0; + d1 -= f*a1; + dense.coeffRef(i0) = d0; + dense.coeffRef(i1) = d1; + } + if(i::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); + else LU_kernel_bmod::run(segsize, dense_col, tempv, lusup, luptr, nsupr, nrow, lsub, lptr, no_zeros); } // End for each column in the panel } // End for each updating supernode