From 91561cded44d53de0724b89cad9b60cc86c7bd80 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Tue, 26 Jan 2010 05:59:58 +0100
Subject: [PATCH] use a plain matrix to store the upper triangular matrix 'R',
 instead of the "compact inside a vector" scheme used by fortran/minpack. The
 most difficult part is to fix all related code. Tests pass.

---
 .../HybridNonLinearSolver.h                   | 61 +++++------------
 .../Eigen/src/NonLinearOptimization/dogleg.h  | 35 +++-------
 .../Eigen/src/NonLinearOptimization/r1updt.h  | 68 ++++++-------------
 3 files changed, 48 insertions(+), 116 deletions(-)
diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 8f7285203..3ad901391 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -74,6 +74,8 @@ public:
     };
     typedef Matrix< Scalar, Dynamic, 1 > FVectorType;
     typedef Matrix< Scalar, Dynamic, Dynamic > JacobianType;
+    /* TODO: if eigen provides a triangular storage, use it here */
+    typedef Matrix< Scalar, Dynamic, Dynamic > UpperTriangularType;
 
     Status hybrj1(
             FVectorType  &x,
@@ -113,8 +115,9 @@ public:
 
     void resetParameters(void) { parameters = Parameters(); }
     Parameters parameters;
-    FVectorType  fvec, R, qtf, diag;
+    FVectorType  fvec, qtf, diag;
     JacobianType fjac;
+    UpperTriangularType R;
     int nfev;
     int njev;
     int iter;
@@ -173,7 +176,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
     wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n);
     fvec.resize(n);
     qtf.resize(n);
-    R.resize( (n*(n+1))/2);
     fjac.resize(n, n);
     if (mode != 2)
         diag.resize(n);
@@ -218,7 +220,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
         const int mode
         )
 {
-    int i, j, l;
+    int i, j;
     jeval = true;
 
     /* calculate the jacobian matrix. */
@@ -272,17 +274,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
 
     /* copy the triangular factor of the qr factorization into r. */
 
+    R = qrfac.matrixQR();
     sing = false;
-    for (j = 0; j < n; ++j) {
-        l = j;
-        for (i = 0; i < j; ++i) {
-            R[l] = fjac(i,j);
-            l = l + n - i -1;
-        }
-        R[l] = wa1[j];
-        if (wa1[j] == 0.)
-            sing = true;
-    }
+    for (j = 0; j < n; ++j)
+        if (wa1[j] == 0.) sing = true;
 
     /* accumulate the orthogonal factor in fjac. */
     ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
@@ -328,13 +323,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
 
         /* compute the scaled predicted reduction. */
 
-        l = 0;
         for (i = 0; i < n; ++i) {
             sum = 0.;
-            for (j = i; j < n; ++j) {
-                sum += R[l] * wa1[j];
-                ++l;
-            }
+            for (j = i; j < n; ++j)
+                sum += R(i,j) * wa1[j];
             wa3[i] = qtf[i] + sum;
         }
         temp = wa3.stableNorm();
@@ -421,7 +413,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
 
         /* compute the qr factorization of the updated jacobian. */
 
-        ei_r1updt<Scalar>(n, n, R.data(), R.size(), wa1.data(), wa2.data(), wa3.data(), &sing);
+        ei_r1updt<Scalar>(n, n, R, wa1.data(), wa2.data(), wa3.data(), &sing);
         ei_r1mpyq<Scalar>(n, n, fjac.data(), fjac.rows(), wa2.data(), wa3.data());
         ei_r1mpyq<Scalar>(1, n, qtf.data(), 1, wa2.data(), wa3.data());
 
@@ -488,7 +480,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
 
     wa1.resize(n); wa2.resize(n); wa3.resize(n); wa4.resize(n);
     qtf.resize(n);
-    R.resize( (n*(n+1))/2);
     fjac.resize(n, n);
     fvec.resize(n);
     if (mode != 2)
@@ -536,7 +527,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         const int mode
         )
 {
-    int i, j, l;
+    int i, j;
     jeval = true;
     if (parameters.nb_of_subdiagonals<0) parameters.nb_of_subdiagonals= n-1;
     if (parameters.nb_of_superdiagonals<0) parameters.nb_of_superdiagonals= n-1;
@@ -591,26 +582,13 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
         }
 
     /* copy the triangular factor of the qr factorization into r. */
-
+    R = qrfac.matrixQR();
     sing = false;
-    for (j = 0; j < n; ++j) {
-        l = j;
-        for (i = 0; i < j; ++i) {
-            R[l] = fjac(i,j);
-            l = l + n - i -1;
-        }
-        R[l] = wa1[j];
-        if (wa1[j] == 0.)
-            sing = true;
-    }
+    for (j = 0; j < n; ++j)
+        if (wa1[j] == 0.) sing = true;
 
     /* accumulate the orthogonal factor in fjac. */
     ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
-#if 0
-    std::cout << "ei_qform<Scalar>: " << fjac << std::endl;
-    fjac = qrfac.matrixQ();
-    std::cout << "qrfac.matrixQ():" << fjac << std::endl;
-#endif
 
     /* rescale if necessary. */
 
@@ -653,13 +631,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
 
         /* compute the scaled predicted reduction. */
 
-        l = 0;
         for (i = 0; i < n; ++i) {
             sum = 0.;
-            for (j = i; j < n; ++j) {
-                sum += R[l] * wa1[j];
-                ++l;
-            }
+            for (j = i; j < n; ++j)
+                sum += R(i,j) * wa1[j];
             wa3[i] = qtf[i] + sum;
         }
         temp = wa3.stableNorm();
@@ -747,7 +722,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
 
         /* compute the qr factorization of the updated jacobian. */
 
-        ei_r1updt<Scalar>(n, n, R.data(), R.size(), wa1.data(), wa2.data(), wa3.data(), &sing);
+        ei_r1updt<Scalar>(n, n, R, wa1.data(), wa2.data(), wa3.data(), &sing);
         ei_r1mpyq<Scalar>(n, n, fjac.data(), fjac.rows(), wa2.data(), wa3.data());
         ei_r1mpyq<Scalar>(1, n, qtf.data(), 1, wa2.data(), wa3.data());
 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
index bbd0840ae..36ed962f1 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/dogleg.h
@@ -1,14 +1,14 @@
 
 template <typename Scalar>
 void ei_dogleg(
-        Matrix< Scalar, Dynamic, 1 >  &r,
+        const Matrix< Scalar, Dynamic, Dynamic >  &qrfac,
         const Matrix< Scalar, Dynamic, 1 >  &diag,
         const Matrix< Scalar, Dynamic, 1 >  &qtb,
         Scalar delta,
         Matrix< Scalar, Dynamic, 1 >  &x)
 {
     /* Local variables */
-    int i, j, k, l, jj;
+    int i, j, k;
     Scalar sum, temp, alpha, bnorm;
     Scalar gnorm, qnorm;
     Scalar sgnorm;
@@ -20,26 +20,16 @@ void ei_dogleg(
     assert(n==qtb.size());
     assert(n==x.size());
 
-    /* first, calculate the gauss-newton direction. */
-
-    jj = n * (n + 1) / 2;
     for (k = 0; k < n; ++k) {
         j = n - k - 1;
-        jj -= k+1;
-        l = jj + 1;
         sum = 0.;
         for (i = j+1; i < n; ++i) {
-            sum += r[l] * x[i];
-            ++l;
+            sum += qrfac(j,i) * x[i];
         }
-        temp = r[jj];
+        temp = qrfac(j,j);
         if (temp == 0.) {
-            l = j;
-            for (i = 0; i <= j; ++i) {
-                /* Computing MAX */
-                temp = std::max(temp,ei_abs(r[l]));
-                l = l + n - i - 1;
-            }
+            for (i = 0; i <= j; ++i)
+                temp = std::max(temp,ei_abs(qrfac(i,j)));
             temp = epsmch * temp;
             if (temp == 0.)
                 temp = epsmch;
@@ -58,13 +48,10 @@ void ei_dogleg(
     /* the gauss-newton direction is not acceptable. */
     /* next, calculate the scaled gradient direction. */
 
-    l = 0;
     for (j = 0; j < n; ++j) {
         temp = qtb[j];
-        for (i = j; i < n; ++i) {
-            wa1[i] += r[l] * temp;
-            ++l;
-        }
+        for (i = j; i < n; ++i)
+            wa1[i] += qrfac(j,i) * temp;
         wa1[j] /= diag[j];
     }
 
@@ -81,16 +68,12 @@ void ei_dogleg(
     /* at which the quadratic is minimized. */
 
     wa1.array() /= (diag*gnorm).array();
-    l = 0;
     for (j = 0; j < n; ++j) {
         sum = 0.;
         for (i = j; i < n; ++i) {
-            sum += r[l] * wa1[i];
-            ++l;
-            /* L100: */
+            sum += qrfac(j,i) * wa1[i];
         }
         wa2[j] = sum;
-        /* L110: */
     }
     temp = wa2.stableNorm();
     sgnorm = gnorm / temp / temp;
diff --git a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
index f2ddb189b..e5e907582 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/r1updt.h
@@ -1,34 +1,27 @@
 
-    template <typename Scalar>
-void ei_r1updt(int m, int n, Scalar *s, int /* ls */, const Scalar *u, Scalar *v, Scalar *w, bool *sing)
+template <typename Scalar>
+void ei_r1updt(int m, int n, Matrix< Scalar, Dynamic, Dynamic > &s, const Scalar *u, Scalar *v, Scalar *w, bool *sing)
 {
     /* Local variables */
-    int i, j, l, jj, nm1;
+    int i, j, nm1;
     Scalar tan__;
     int nmj;
     Scalar cos__, sin__, tau, temp, cotan;
 
+    // ei_r1updt had a broader usecase, but we dont use it here. And, more
+    // importantly, we can not test it.
+    assert(m==n);
+
     /* Parameter adjustments */
     --w;
     --u;
     --v;
-    --s;
 
     /* Function Body */
     const Scalar giant = std::numeric_limits<Scalar>::max();
 
-    /*     initialize the diagonal element pointer. */
-
-    jj = n * ((m << 1) - n + 1) / 2 - (m - n);
-
     /*     move the nontrivial part of the last column of s into w. */
-
-    l = jj;
-    for (i = n; i <= m; ++i) {
-        w[i] = s[l];
-        ++l;
-        /* L10: */
-    }
+    w[n] = s(n-1,n-1);
 
     /*     rotate the vector v into a multiple of the n-th unit vector */
     /*     in such a way that a spike is introduced into w. */
@@ -39,7 +32,6 @@ void ei_r1updt(int m, int n, Scalar *s, int /* ls */, const Scalar *u, Scalar *v
     }
     for (nmj = 1; nmj <= nm1; ++nmj) {
         j = n - nmj;
-        jj -= m - j + 1;
         w[j] = 0.;
         if (v[j] == 0.) {
             goto L50;
@@ -75,16 +67,12 @@ L30:
 
         /*        apply the transformation to s and extend the spike in w. */
 
-        l = jj;
         for (i = j; i <= m; ++i) {
-            temp = cos__ * s[l] - sin__ * w[i];
-            w[i] = sin__ * s[l] + cos__ * w[i];
-            s[l] = temp;
-            ++l;
-            /* L40: */
+            temp = cos__ * s(j-1,i-1) - sin__ * w[i];
+            w[i] = sin__ * s(j-1,i-1) + cos__ * w[i];
+            s(j-1,i-1) = temp;
         }
 L50:
-        /* L60: */
         ;
     }
 L70:
@@ -110,9 +98,9 @@ L70:
         /*        determine a givens rotation which eliminates the */
         /*        j-th element of the spike. */
 
-        if (ei_abs(s[jj]) >= ei_abs(w[j]))
+        if (ei_abs(s(j-1,j-1)) >= ei_abs(w[j]))
             goto L90;
-        cotan = s[jj] / w[j];
+        cotan = s(j-1,j-1) / w[j];
         /* Computing 2nd power */
         sin__ = Scalar(.5) / ei_sqrt(Scalar(0.25) + Scalar(0.25) * ei_abs2(cotan));
         cos__ = sin__ * cotan;
@@ -122,7 +110,7 @@ L70:
         }
         goto L100;
 L90:
-        tan__ = w[j] / s[jj];
+        tan__ = w[j] / s(j-1,j-1);
         /* Computing 2nd power */
         cos__ = Scalar(.5) / ei_sqrt(Scalar(0.25) + Scalar(0.25) * ei_abs2(tan__));
         sin__ = cos__ * tan__;
@@ -131,13 +119,10 @@ L100:
 
         /*        apply the transformation to s and reduce the spike in w. */
 
-        l = jj;
         for (i = j; i <= m; ++i) {
-            temp = cos__ * s[l] + sin__ * w[i];
-            w[i] = -sin__ * s[l] + cos__ * w[i];
-            s[l] = temp;
-            ++l;
-            /* L110: */
+            temp = cos__ * s(j-1,i-1) + sin__ * w[i];
+            w[i] = -sin__ * s(j-1,i-1) + cos__ * w[i];
+            s(j-1,i-1) = temp;
         }
 
         /*        store the information necessary to recover the */
@@ -148,28 +133,17 @@ L120:
 
         /*        test for zero diagonal elements in the output s. */
 
-        if (s[jj] == 0.) {
+        if (s(j-1,j-1) == 0.) {
             *sing = true;
         }
-        jj += m - j + 1;
-        /* L130: */
     }
 L140:
-
     /*     move w back into the last column of the output s. */
+    s(n-1,n-1) = w[n];
 
-    l = jj;
-    for (i = n; i <= m; ++i) {
-        s[l] = w[i];
-        ++l;
-        /* L150: */
-    }
-    if (s[jj] == 0.) {
+    if (s(j-1,j-1) == 0.) {
         *sing = true;
     }
     return;
-
-    /*     last card of subroutine r1updt. */
-
-} /* r1updt_ */
+}