merge with tip

2025-09-18 04:13:14 +08:00 · 2009-12-01 18:03:15 -05:00 · 2009-12-01 18:03:15 -05:00 · c05ae35441
commit c05ae35441
parent ff1e9542f6 291fd4f8da
13 changed files with 316 additions and 319 deletions
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@ -234,8 +234,9 @@ template<typename _MatrixType> class FullPivLU
      * who need to determine when pivots are to be considered nonzero. This is not used for the
      * LU decomposition itself.
      *
-      * When it needs to get the threshold value, Eigen calls threshold(). By default, this calls
+      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
-      * defaultThreshold(). Once you have called the present method setThreshold(const RealScalar&),
+      * uses a formula to automatically determine a reasonable threshold.
      * Once you have called the present method setThreshold(const RealScalar&),
      * your value is used instead.
      *
      * \param threshold The new value to use as the threshold.
@ -303,7 +304,7 @@ template<typename _MatrixType> class FullPivLU
    inline int dimensionOfKernel() const
    {
      ei_assert(m_isInitialized && "LU is not initialized.");
-      return m_lu.cols() - rank();
+      return cols() - rank();
    }
    /** \returns true if the matrix of which *this is the LU decomposition represents an injective
@ -316,7 +317,7 @@ template<typename _MatrixType> class FullPivLU
    inline bool isInjective() const
    {
      ei_assert(m_isInitialized && "LU is not initialized.");
-      return rank() == m_lu.cols();
+      return rank() == cols();
    }
    /** \returns true if the matrix of which *this is the LU decomposition represents a surjective
@ -329,7 +330,7 @@ template<typename _MatrixType> class FullPivLU
    inline bool isSurjective() const
    {
      ei_assert(m_isInitialized && "LU is not initialized.");
-      return rank() == m_lu.rows();
+      return rank() == rows();
    }
    /** \returns true if the matrix of which *this is the LU decomposition is invertible.
@ -402,6 +403,7 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
  m_maxpivot = RealScalar(0);
  for(int k = 0; k < size; ++k)
  {
    // First, we need to find the pivot.
@ -418,10 +420,10 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
    // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
    if(biggest_in_corner == RealScalar(0))
    {
-      // before exiting, make sure to initialize the still uninitialized row_transpositions
+      // before exiting, make sure to initialize the still uninitialized transpositions
      // in a sane state without destroying what we already have.
      m_nonzero_pivots = k;
-      for(int i = k; i < size; i++)
+      for(int i = k; i < size; ++i)
      {
        rows_transpositions.coeffRef(i) = i;
        cols_transpositions.coeffRef(i) = i;
@ -617,7 +619,7 @@ struct ei_solve_retval<FullPivLU<_MatrixType>, Rhs>
     * Step 4: result = Q * c;
     */
-    const int rows = dec().matrixLU().rows(), cols = dec().matrixLU().cols(),
+    const int rows = dec().rows(), cols = dec().cols(),
              nonzero_pivots = dec().nonzeroPivots();
    ei_assert(rhs().rows() == rows);
    const int smalldim = std::min(rows, cols);
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@ -74,7 +74,8 @@ template<typename _MatrixType> class ColPivHouseholderQR
    ColPivHouseholderQR(const MatrixType& matrix)
      : m_qr(matrix.rows(), matrix.cols()),
        m_hCoeffs(std::min(matrix.rows(),matrix.cols())),
-        m_isInitialized(false)
+        m_isInitialized(false),
        m_usePrescribedThreshold(false)
    {
      compute(matrix);
    }
@ -153,54 +154,63 @@ template<typename _MatrixType> class ColPivHouseholderQR
    /** \returns the rank of the matrix of which *this is the QR decomposition.
      *
-      * \note This is computed at the time of the construction of the QR decomposition. This
+      * \note This method has to determine which pivots should be considered nonzero.
-      *       method does not perform any further computation.
+      *       For that, it uses the threshold value that you can control by calling
      *       setThreshold(const RealScalar&).
      */
    inline int rank() const
    {
      ei_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return m_rank;
+      RealScalar premultiplied_threshold = ei_abs(m_maxpivot) * threshold();
      int result = 0;
      for(int i = 0; i < m_nonzero_pivots; ++i)
        result += (ei_abs(m_qr.coeff(i,i)) > premultiplied_threshold);
      return result;
    }
    /** \returns the dimension of the kernel of the matrix of which *this is the QR decomposition.
      *
-      * \note Since the rank is computed at the time of the construction of the QR decomposition, this
+      * \note This method has to determine which pivots should be considered nonzero.
-      *       method almost does not perform any further computation.
+      *       For that, it uses the threshold value that you can control by calling
      *       setThreshold(const RealScalar&).
      */
    inline int dimensionOfKernel() const
    {
      ei_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return m_qr.cols() - m_rank;
+      return cols() - rank();
    }
    /** \returns true if the matrix of which *this is the QR decomposition represents an injective
      *          linear map, i.e. has trivial kernel; false otherwise.
      *
-      * \note Since the rank is computed at the time of the construction of the QR decomposition, this
+      * \note This method has to determine which pivots should be considered nonzero.
-      *       method almost does not perform any further computation.
+      *       For that, it uses the threshold value that you can control by calling
      *       setThreshold(const RealScalar&).
      */
    inline bool isInjective() const
    {
      ei_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return m_rank == m_qr.cols();
+      return rank() == cols();
    }
    /** \returns true if the matrix of which *this is the QR decomposition represents a surjective
      *          linear map; false otherwise.
      *
-      * \note Since the rank is computed at the time of the construction of the QR decomposition, this
+      * \note This method has to determine which pivots should be considered nonzero.
-      *       method almost does not perform any further computation.
+      *       For that, it uses the threshold value that you can control by calling
      *       setThreshold(const RealScalar&).
      */
    inline bool isSurjective() const
    {
      ei_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-      return m_rank == m_qr.rows();
+      return rank() == rows();
    }
    /** \returns true if the matrix of which *this is the QR decomposition is invertible.
      *
-      * \note Since the rank is computed at the time of the construction of the QR decomposition, this
+      * \note This method has to determine which pivots should be considered nonzero.
-      *       method almost does not perform any further computation.
+      *       For that, it uses the threshold value that you can control by calling
      *       setThreshold(const RealScalar&).
      */
    inline bool isInvertible() const
    {
@ -226,13 +236,80 @@ template<typename _MatrixType> class ColPivHouseholderQR
    inline int cols() const { return m_qr.cols(); }
    const HCoeffsType& hCoeffs() const { return m_hCoeffs; }
    /** Allows to prescribe a threshold to be used by certain methods, such as rank(),
      * who need to determine when pivots are to be considered nonzero. This is not used for the
      * QR decomposition itself.
      *
      * When it needs to get the threshold value, Eigen calls threshold(). By default, this
      * uses a formula to automatically determine a reasonable threshold.
      * Once you have called the present method setThreshold(const RealScalar&),
      * your value is used instead.
      *
      * \param threshold The new value to use as the threshold.
      *
      * A pivot will be considered nonzero if its absolute value is strictly greater than
      *  \f$ \vert pivot \vert \leqslant threshold \times \vert maxpivot \vert \f$
      * where maxpivot is the biggest pivot.
      *
      * If you want to come back to the default behavior, call setThreshold(Default_t)
      */
    ColPivHouseholderQR& setThreshold(const RealScalar& threshold)
    {
      m_usePrescribedThreshold = true;
      m_prescribedThreshold = threshold;
    }
    /** Allows to come back to the default behavior, letting Eigen use its default formula for
      * determining the threshold.
      *
      * You should pass the special object Eigen::Default as parameter here.
      * \code qr.setThreshold(Eigen::Default); \endcode
      *
      * See the documentation of setThreshold(const RealScalar&).
      */
    ColPivHouseholderQR& setThreshold(Default_t)
    {
      m_usePrescribedThreshold = false;
    }
    /** Returns the threshold that will be used by certain methods such as rank().
      *
      * See the documentation of setThreshold(const RealScalar&).
      */
    RealScalar threshold() const
    {
      ei_assert(m_isInitialized || m_usePrescribedThreshold);
      return m_usePrescribedThreshold ? m_prescribedThreshold
      // this formula comes from experimenting (see "LU precision tuning" thread on the list)
      // and turns out to be identical to Higham's formula used already in LDLt.
                                      : epsilon<Scalar>() * m_qr.diagonalSize();
    }
    /** \returns the number of nonzero pivots in the QR decomposition.
      * Here nonzero is meant in the exact sense, not in a fuzzy sense.
      * So that notion isn't really intrinsically interesting, but it is
      * still useful when implementing algorithms.
      *
      * \sa rank()
      */
    inline int nonzeroPivots() const
    {
      ei_assert(m_isInitialized && "LU is not initialized.");
      return m_nonzero_pivots;
    }
    /** \returns the absolute value of the biggest pivot, i.e. the biggest
      *          diagonal coefficient of U.
      */
    RealScalar maxPivot() const { return m_maxpivot; }
  protected:
    MatrixType m_qr;
    HCoeffsType m_hCoeffs;
    PermutationType m_cols_permutation;
-    bool m_isInitialized;
+    bool m_isInitialized, m_usePrescribedThreshold;
-    RealScalar m_precision;
+    RealScalar m_prescribedThreshold, m_maxpivot;
-    int m_rank;
+    int m_nonzero_pivots;
    int m_det_pq;
 };
@ -259,61 +336,84 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
 {
  int rows = matrix.rows();
  int cols = matrix.cols();
-  int size = std::min(rows,cols);
+  int size = matrix.diagonalSize();
  m_rank = size;
  m_qr = matrix;
  m_hCoeffs.resize(size);
  RowVectorType temp(cols);
  m_precision = epsilon<Scalar>() * size;
  IntRowVectorType cols_transpositions(matrix.cols());
  int number_of_transpositions = 0;
  RealRowVectorType colSqNorms(cols);
  for(int k = 0; k < cols; ++k)
    colSqNorms.coeffRef(k) = m_qr.col(k).squaredNorm();
  RealScalar biggestColSqNorm = colSqNorms.maxCoeff();
-  for (int k = 0; k < size; ++k)
+  RealScalar threshold_helper = colSqNorms.maxCoeff() * ei_abs2(epsilon<Scalar>()) / rows;
  m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
  m_maxpivot = RealScalar(0);
  for(int k = 0; k < size; ++k)
  {
-    int biggest_col_in_corner;
+    // first, we look up in our table colSqNorms which column has the biggest squared norm
-    RealScalar biggestColSqNormInCorner = colSqNorms.end(cols-k).maxCoeff(&biggest_col_in_corner);
+    int biggest_col_index;
-    biggest_col_in_corner += k;
+    RealScalar biggest_col_sq_norm = colSqNorms.end(cols-k).maxCoeff(&biggest_col_index);
    biggest_col_index += k;
-    // if the corner is negligible, then we have less than full rank, and we can finish early
+    // since our table colSqNorms accumulates imprecision at every step, we must now recompute
-    if(ei_isMuchSmallerThan(biggestColSqNormInCorner, biggestColSqNorm, m_precision))
+    // the actual squared norm of the selected column.
    // Note that not doing so does result in solve() sometimes returning inf/nan values
    // when running the unit test with 1000 repetitions.
    biggest_col_sq_norm = m_qr.col(biggest_col_index).end(rows-k).squaredNorm();
    // we store that back into our table: it can't hurt to correct our table.
    colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
    // if the current biggest column is smaller than epsilon times the initial biggest column,
    // terminate to avoid generating nan/inf values.
    // Note that here, if we test instead for "biggest == 0", we get a failure every 1000 (or so)
    // repetitions of the unit test, with the result of solve() filled with large values of the order
    // of 1/(size*epsilon).
    if(biggest_col_sq_norm < threshold_helper * (rows-k))
    {
-      m_rank = k;
+      m_nonzero_pivots = k;
-      for(int i = k; i < size; i++)
+      m_hCoeffs.end(size-k).setZero();
-      {
+      m_qr.corner(BottomRight,rows-k,cols-k)
-        cols_transpositions.coeffRef(i) = i;
+          .template triangularView<StrictlyLowerTriangular>()
-        m_hCoeffs.coeffRef(i) = Scalar(0);
+          .setZero();
      }
      break;
    }
-    cols_transpositions.coeffRef(k) = biggest_col_in_corner;
+    // apply the transposition to the columns
-    if(k != biggest_col_in_corner) {
+    cols_transpositions.coeffRef(k) = biggest_col_index;
-      m_qr.col(k).swap(m_qr.col(biggest_col_in_corner));
+    if(k != biggest_col_index) {
-      std::swap(colSqNorms.coeffRef(k), colSqNorms.coeffRef(biggest_col_in_corner));
+      m_qr.col(k).swap(m_qr.col(biggest_col_index));
      std::swap(colSqNorms.coeffRef(k), colSqNorms.coeffRef(biggest_col_index));
      ++number_of_transpositions;
    }
    // generate the householder vector, store it below the diagonal
    RealScalar beta;
    m_qr.col(k).end(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
    // apply the householder transformation to the diagonal coefficient
    m_qr.coeffRef(k,k) = beta;
    // remember the maximum absolute value of diagonal coefficients
    if(ei_abs(beta) > m_maxpivot) m_maxpivot = ei_abs(beta);
    // apply the householder transformation
    m_qr.corner(BottomRight, rows-k, cols-k-1)
        .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
    // update our table of squared norms of the columns
    colSqNorms.end(cols-k-1) -= m_qr.row(k).end(cols-k-1).cwise().abs2();
  }
  m_cols_permutation.setIdentity(cols);
-  for(int k = 0; k < size; ++k)
+  for(int k = 0; k < m_nonzero_pivots; ++k)
    m_cols_permutation.applyTranspositionOnTheRight(k, cols_transpositions.coeff(k));
  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
@ -330,13 +430,12 @@ struct ei_solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
  template<typename Dest> void evalTo(Dest& dst) const
  {
-    const int rows = dec().rows(), cols = dec().cols();
+    const int rows = dec().rows(), cols = dec().cols(),
              nonzero_pivots = dec().nonzeroPivots();
    dst.resize(cols, rhs().cols());
    ei_assert(rhs().rows() == rows);
-    // FIXME introduce nonzeroPivots() and use it here. and more generally,
+    if(nonzero_pivots == 0)
    // make the same improvements in this dec as in FullPivLU.
    if(dec().rank()==0)
    {
      dst.setZero();
      return;
@ -346,28 +445,26 @@ struct ei_solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
    // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
    c.applyOnTheLeft(householderSequence(
-      dec().matrixQR().corner(TopLeft,rows,dec().rank()),
+      dec().matrixQR(),
-      dec().hCoeffs().start(dec().rank())).transpose()
+      dec().hCoeffs(),
-    );
+      true
-
+    ));
    if(!dec().isSurjective())
    {
      // is c is in the image of R ?
      RealScalar biggest_in_upper_part_of_c = c.corner(TopLeft, dec().rank(), c.cols()).cwise().abs().maxCoeff();
      RealScalar biggest_in_lower_part_of_c = c.corner(BottomLeft, rows-dec().rank(), c.cols()).cwise().abs().maxCoeff();
      // FIXME brain dead
      const RealScalar m_precision = epsilon<Scalar>() * std::min(rows,cols);
      if(!ei_isMuchSmallerThan(biggest_in_lower_part_of_c, biggest_in_upper_part_of_c, m_precision*4))
        return;
    }
    dec().matrixQR()
-       .corner(TopLeft, dec().rank(), dec().rank())
+       .corner(TopLeft, nonzero_pivots, nonzero_pivots)
       .template triangularView<UpperTriangular>()
-       .solveInPlace(c.corner(TopLeft, dec().rank(), c.cols()));
+       .solveInPlace(c.corner(TopLeft, nonzero_pivots, c.cols()));
-    for(int i = 0; i < dec().rank(); ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
+
-    for(int i = dec().rank(); i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
+    typename Rhs::PlainMatrixType d(c);
    d.corner(TopLeft, nonzero_pivots, c.cols())
      = dec().matrixQR()
       .corner(TopLeft, nonzero_pivots, nonzero_pivots)
       .template triangularView<UpperTriangular>()
       * c.corner(TopLeft, nonzero_pivots, c.cols());
    for(int i = 0; i < nonzero_pivots; ++i) dst.row(dec().colsPermutation().indices().coeff(i)) = c.row(i);
    for(int i = nonzero_pivots; i < cols; ++i) dst.row(dec().colsPermutation().indices().coeff(i)).setZero();
  }
 };
@ -376,7 +473,7 @@ template<typename MatrixType>
 typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHouseholderQR<MatrixType>::matrixQ() const
 {
  ei_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
-  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
+  return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate(), false);
 }
 #endif // EIGEN_HIDE_HEAVY_CODE
--- a/blas/README.txt
+++ b/blas/README.txt
@ -4,4 +4,6 @@ This directory contains a BLAS library built on top of Eigen.
 This is currently a work in progress which is far to be ready for use,
 but feel free to contribute to it if you wish.
-If you want to compile it, set the cmake variable EIGEN_BUILD_BLAS to "on".
+This module is not built by default. In order to compile it, you need to
 type 'make blas' from within your build dir.
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@ -607,6 +607,7 @@ EXCLUDE_SYMLINKS       = NO
 EXCLUDE_PATTERNS       = CMake* \
                         *.txt \
                         *.sh \
                         *.orig \
                         *.diff \
                         diff
                         *~
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp
@ -28,11 +28,11 @@
 template<typename MatrixType> void qr()
 {
-//  int rows = ei_random<int>(20,200), cols = ei_random<int>(20,200), cols2 = ei_random<int>(20,200);
+  int rows = ei_random<int>(2,200), cols = ei_random<int>(2,200), cols2 = ei_random<int>(2,200);
  int rows=3, cols=3, cols2=3;
  int rank = ei_random<int>(1, std::min(rows, cols)-1);
  typedef typename MatrixType::Scalar Scalar;
  typedef typename MatrixType::RealScalar RealScalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
  MatrixType m1;
@ -44,19 +44,11 @@ template<typename MatrixType> void qr()
  VERIFY(!qr.isInvertible());
  VERIFY(!qr.isSurjective());
  MatrixType r = qr.matrixQR();
  MatrixQType q = qr.matrixQ();
  VERIFY_IS_UNITARY(q);
  // FIXME need better way to construct trapezoid
  for(int i = 0; i < rows; i++) for(int j = 0; j < cols; j++) if(i>j) r(i,j) = Scalar(0);
-  MatrixType b = qr.matrixQ() * r;
+  MatrixType r = qr.matrixQR().template triangularView<UpperTriangular>();
-
+  MatrixType c = q * r * qr.colsPermutation().inverse();
  MatrixType c = MatrixType::Zero(rows,cols);
  for(int i = 0; i < cols; ++i) c.col(qr.colsPermutation().indices().coeff(i)) = b.col(i);
  VERIFY_IS_APPROX(m1, c);
  MatrixType m2 = MatrixType::Random(cols,cols2);
@ -80,15 +72,8 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
  VERIFY(!qr.isInvertible());
  VERIFY(!qr.isSurjective());
-  Matrix<Scalar,Rows,Cols> r = qr.matrixQR();
+  Matrix<Scalar,Rows,Cols> r = qr.matrixQR().template triangularView<UpperTriangular>();
-  // FIXME need better way to construct trapezoid
+  Matrix<Scalar,Rows,Cols> c = qr.matrixQ() * r * qr.colsPermutation().inverse();
  for(int i = 0; i < Rows; i++) for(int j = 0; j < Cols; j++) if(i>j) r(i,j) = Scalar(0);
  Matrix<Scalar,Rows,Cols> b = qr.matrixQ() * r;
  Matrix<Scalar,Rows,Cols> c = MatrixType::Zero(Rows,Cols);
  for(int i = 0; i < Cols; ++i) c.col(qr.colsPermutation().indices().coeff(i)) = b.col(i);
  VERIFY_IS_APPROX(m1, c);
  Matrix<Scalar,Cols,Cols2> m2 = Matrix<Scalar,Cols,Cols2>::Random(Cols,Cols2);
@ -118,7 +103,7 @@ template<typename MatrixType> void qr_invertible()
  ColPivHouseholderQR<MatrixType> qr(m1);
  m3 = MatrixType::Random(size,size);
  m2 = qr.solve(m3);
-  VERIFY_IS_APPROX(m3, m1*m2);
+  //VERIFY_IS_APPROX(m3, m1*m2);
  // now construct a matrix with prescribed determinant
  m1.setZero();
@ -150,7 +135,7 @@ template<typename MatrixType> void qr_verify_assert()
 void test_qr_colpivoting()
 {
-  for(int i = 0; i < 1; i++) {
+  for(int i = 0; i < g_repeat; i++) {
    CALL_SUBTEST_1( qr<MatrixXf>() );
    CALL_SUBTEST_2( qr<MatrixXd>() );
    CALL_SUBTEST_3( qr<MatrixXcd>() );
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@ -44,11 +44,42 @@
  * The build-in implementation is based on kissfft. It is a small, free, and
  * reasonably efficient default.
  *
-  * Frontends are
+  * There are currently two frontends:
  *
  * - fftw (http://www.fftw.org) : faster, GPL -- incompatible with Eigen in LGPL form, bigger code size.
-  * - MLK (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form
+  * - MLK (http://en.wikipedia.org/wiki/Math_Kernel_Library) : fastest, commercial -- may be incompatible with Eigen in GPL form.
  *
  * \section FFTDesign Design
  *
  * The following design decisions were made concerning scaling and
  * half-spectrum for real FFT.
  *
  * The intent is to facilitate generic programming and ease migrating code
  * from  Matlab/octave.
  * We think the default behavior of Eigen/FFT should favor correctness and
  * generality over speed. Of course, the caller should be able to "opt-out" from this
  * behavior and get the speed increase if they want it.
  *
  * 1) %Scaling:
  * Other libraries (FFTW,IMKL,KISSFFT)  do not perform scaling, so there
  * is a constant gain incurred after the forward&inverse transforms , so 
  * IFFT(FFT(x)) = Kx;  this is done to avoid a vector-by-value multiply.  
  * The downside is that algorithms that worked correctly in Matlab/octave 
  * don't behave the same way once implemented in C++.
  *
  * How Eigen/FFT differs: invertible scaling is performed so IFFT( FFT(x) ) = x. 
  *
  * 2) Real FFT half-spectrum
  * Other libraries use only half the frequency spectrum (plus one extra 
  * sample for the Nyquist bin) for a real FFT, the other half is the 
  * conjugate-symmetric of the first half.  This saves them a copy and some 
  * memory.  The downside is the caller needs to have special logic for the 
  * number of bins in complex vs real.
  *
  * How Eigen/FFT differs: The full spectrum is returned from the forward 
  * transform.  This facilitates generic template programming by obviating 
  * separate specializations for real vs complex.  On the inverse
  * transform, only half the spectrum is actually used if the output type is real.
  */
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@ -226,15 +226,11 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
        return UserAksed;
    ++njev;
-    /* compute the qr factorization of the jacobian. */
+    wa2 = fjac.colwise().blueNorm();
    ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), false, iwa, wa1.data(), wa2.data());
    /* on the first iteration and if mode is 1, scale according */
    /* to the norms of the columns of the initial jacobian. */
    if (iter == 1) {
        /* on the first iteration and if mode is 1, scale according */
        /* to the norms of the columns of the initial jacobian. */
        if (mode != 2)
            for (j = 0; j < n; ++j) {
                diag[j] = wa2[j];
@ -251,6 +247,9 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
            delta = parameters.factor;
    }
    /* compute the qr factorization of the jacobian. */
    ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), false, iwa, wa1.data());
    /* form (q transpose)*fvec and store in qtf. */
    qtf = fvec;
@ -269,18 +268,16 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
    sing = false;
    for (j = 0; j < n; ++j) {
        l = j;
-        if (j)
+        for (i = 0; i < j; ++i) {
-            for (i = 0; i < j; ++i) {
+            R[l] = fjac(i,j);
-                R[l] = fjac(i,j);
+            l = l + n - i -1;
-                l = l + n - i -1;
+        }
            }
        R[l] = wa1[j];
        if (wa1[j] == 0.)
            sing = true;
    }
    /* accumulate the orthogonal factor in fjac. */
    ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
    /* rescale if necessary. */
@ -543,13 +540,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
        return UserAksed;
    nfev += std::min(parameters.nb_of_subdiagonals+parameters.nb_of_superdiagonals+ 1, n);
-    /* compute the qr factorization of the jacobian. */
+    wa2 = fjac.colwise().blueNorm();
    ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), false, iwa, wa1.data(), wa2.data());
    /* on the first iteration and if mode is 1, scale according */
    /* to the norms of the columns of the initial jacobian. */
    if (iter == 1) {
        if (mode != 2)
            for (j = 0; j < n; ++j) {
@ -560,7 +554,6 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
        /* on the first iteration, calculate the norm of the scaled x */
        /* and initialize the step bound delta. */
        wa3 = diag.cwise() * x;
        xnorm = wa3.stableNorm();
        delta = parameters.factor * xnorm;
@ -568,6 +561,9 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
            delta = parameters.factor;
    }
    /* compute the qr factorization of the jacobian. */
    ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), false, iwa, wa1.data());
    /* form (q transpose)*fvec and store in qtf. */
    qtf = fvec;
@ -586,18 +582,16 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
    sing = false;
    for (j = 0; j < n; ++j) {
        l = j;
-        if (j)
+        for (i = 0; i < j; ++i) {
-            for (i = 0; i < j; ++i) {
+            R[l] = fjac(i,j);
-                R[l] = fjac(i,j);
+            l = l + n - i -1;
-                l = l + n - i -1;
+        }
            }
        R[l] = wa1[j];
        if (wa1[j] == 0.)
            sing = true;
    }
    /* accumulate the orthogonal factor in fjac. */
    ei_qform<Scalar>(n, n, fjac.data(), fjac.rows(), wa1.data());
    /* rescale if necessary. */
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@ -248,8 +248,9 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(
    /* compute the qr factorization of the jacobian. */
-    ei_qrfac<Scalar>(m, n, fjac.data(), fjac.rows(), true, ipvt.data(), wa1.data(), wa2.data());
+    wa2 = fjac.colwise().blueNorm();
-    ipvt.cwise()-=1; // qrfac() creates ipvt with fortran convetion (1->n), convert it to c (0->n-1)
+    ei_qrfac<Scalar>(m, n, fjac.data(), fjac.rows(), true, ipvt.data(), wa1.data());
    ipvt.cwise()-=1; // qrfac() creates ipvt with fortran convention (1->n), convert it to c (0->n-1)
    /* on the first iteration and if mode is 1, scale according */
    /* to the norms of the columns of the initial jacobian. */
@ -319,7 +320,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(
        /* determine the levenberg-marquardt parameter. */
-        ei_lmpar<Scalar>(fjac, ipvt, diag, qtf, delta, par, wa1, wa2);
+        ei_lmpar<Scalar>(fjac, ipvt, diag, qtf, delta, par, wa1);
        /* store the direction p and x + p. calculate the norm of p. */
@ -432,8 +433,6 @@ LevenbergMarquardt<FunctorType,Scalar>::lmstr1(
 {
    n = x.size();
    m = functor.values();
    JacobianType fjac(m, n);
    VectorXi ipvt;
    /* check the input parameters for errors. */
    if (n <= 0 || m < n || tol < 0.)
@ -537,8 +536,9 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
    }
    if (sing) {
        ipvt.cwise()+=1;
-        ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), true, ipvt.data(), wa1.data(), wa2.data());
+        wa2 = fjac.colwise().blueNorm();
-        ipvt.cwise()-=1; // qrfac() creates ipvt with fortran convetion (1->n), convert it to c (0->n-1)
+        ei_qrfac<Scalar>(n, n, fjac.data(), fjac.rows(), true, ipvt.data(), wa1.data());
        ipvt.cwise()-=1; // qrfac() creates ipvt with fortran convention (1->n), convert it to c (0->n-1)
        for (j = 0; j < n; ++j) {
            if (fjac(j,j) != 0.) {
                sum = 0.;
@ -603,7 +603,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
        /* determine the levenberg-marquardt parameter. */
-        ei_lmpar<Scalar>(fjac, ipvt, diag, qtf, delta, par, wa1, wa2);
+        ei_lmpar<Scalar>(fjac, ipvt, diag, qtf, delta, par, wa1);
        /* store the direction p and x + p. calculate the norm of p. */
--- a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
@ -7,16 +7,14 @@ void ei_lmpar(
        const Matrix< Scalar, Dynamic, 1 >  &qtb,
        Scalar delta,
        Scalar &par,
-        Matrix< Scalar, Dynamic, 1 >  &x,
+        Matrix< Scalar, Dynamic, 1 >  &x)
        Matrix< Scalar, Dynamic, 1 >  &sdiag)
 {
    /* Local variables */
-    int i, j, k, l;
+    int i, j, l;
    Scalar fp;
-    Scalar sum, parc, parl;
+    Scalar parc, parl;
    int iter;
    Scalar temp, paru;
    int nsing;
    Scalar gnorm;
    Scalar dxnorm;
@ -28,31 +26,28 @@ void ei_lmpar(
    assert(n==qtb.size());
    assert(n==x.size());
-    Matrix< Scalar, Dynamic, 1 >  wa1(n), wa2(n);
+    Matrix< Scalar, Dynamic, 1 >  wa1, wa2;
    /* compute and store in x the gauss-newton direction. if the */
    /* jacobian is rank-deficient, obtain a least squares solution. */
-    nsing = n-1;
+    int nsing = n-1;
    wa1 = qtb;
    for (j = 0; j < n; ++j) {
        wa1[j] = qtb[j];
        if (r(j,j) == 0. && nsing == n-1)
            nsing = j - 1;
        if (nsing < n-1) 
            wa1[j] = 0.;
    }
-    for (k = 0; k <= nsing; ++k) {
+    for (j = nsing; j>=0; --j) {
        j = nsing - k;
        wa1[j] /= r(j,j);
        temp = wa1[j];
        for (i = 0; i < j ; ++i)
            wa1[i] -= r(i,j) * temp;
    }
-    for (j = 0; j < n; ++j) {
+    for (j = 0; j < n; ++j)
-        l = ipvt[j];
+        x[ipvt[j]] = wa1[j];
        x[l] = wa1[j];
    }
    /* initialize the iteration counter. */
    /* evaluate the function at the origin, and test */
@ -77,8 +72,10 @@ void ei_lmpar(
            l = ipvt[j];
            wa1[j] = diag[l] * (wa2[l] / dxnorm);
        }
        // it's actually a triangularView.solveInplace(), though in a weird
        // way:
        for (j = 0; j < n; ++j) {
-            sum = 0.;
+            Scalar sum = 0.;
            for (i = 0; i < j; ++i) 
                sum += r(i,j) * wa1[i];
            wa1[j] = (wa1[j] - sum) / r(j,j);
@ -89,13 +86,9 @@ void ei_lmpar(
    /* calculate an upper bound, paru, for the zero of the function. */
-    for (j = 0; j < n; ++j) {
+    for (j = 0; j < n; ++j)
-        sum = 0.;
+        wa1[j] = r.col(j).start(j+1).dot(qtb.start(j+1)) / diag[ipvt[j]];
-        for (i = 0; i <= j; ++i)
+
            sum += r(i,j) * qtb[i];
        l = ipvt[j];
        wa1[j] = sum / diag[l];
    }
    gnorm = wa1.stableNorm();
    paru = gnorm / delta;
    if (paru == 0.)
@ -119,12 +112,10 @@ void ei_lmpar(
        if (par == 0.)
            par = std::max(dwarf,Scalar(.001) * paru); /* Computing MAX */
-        temp = ei_sqrt(par);
+        wa1 = ei_sqrt(par)* diag;
        wa1 = temp * diag;
-        ipvt.cwise()+=1; // qrsolv() expects the fortran convention (as qrfac provides)
+        Matrix< Scalar, Dynamic, 1 > sdiag(n);
-        ei_qrsolv<Scalar>(n, r.data(), r.rows(), ipvt.data(), wa1.data(), qtb.data(), x.data(), sdiag.data(), wa2.data());
+        ei_qrsolv<Scalar>(r, ipvt, wa1, qtb, x, sdiag);
        ipvt.cwise()-=1;
        wa2 = diag.cwise() * x;
        dxnorm = wa2.blueNorm();
@ -143,7 +134,6 @@ void ei_lmpar(
        for (j = 0; j < n; ++j) {
            l = ipvt[j];
            wa1[j] = diag[l] * (wa2[l] / dxnorm);
            /* L180: */
        }
        for (j = 0; j < n; ++j) {
            wa1[j] /= sdiag[j];
--- a/unsupported/Eigen/src/NonLinearOptimization/qrfac.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/qrfac.h
@ -1,8 +1,7 @@
 template <typename Scalar>
 void ei_qrfac(int m, int n, Scalar *a, int
-        lda, int pivot, int *ipvt, Scalar *rdiag,
+        lda, int pivot, int *ipvt, Scalar *rdiag)
        Scalar *acnorm)
 {
    /* System generated locals */
    int a_dim1, a_offset;
@ -18,7 +17,6 @@ void ei_qrfac(int m, int n, Scalar *a, int
    Matrix< Scalar, Dynamic, 1 > wa(n+1);
    /* Parameter adjustments */
    --acnorm;
    --rdiag;
    a_dim1 = lda;
    a_offset = 1 + a_dim1 * 1;
@ -31,13 +29,10 @@ void ei_qrfac(int m, int n, Scalar *a, int
    /*     compute the initial column norms and initialize several arrays. */
    for (j = 1; j <= n; ++j) {
-        acnorm[j] = Map< Matrix< Scalar, Dynamic, 1 > >(&a[j * a_dim1 + 1],m).blueNorm();
+        rdiag[j] = Map< Matrix< Scalar, Dynamic, 1 > >(&a[j * a_dim1 + 1],m).blueNorm();
        rdiag[j] = acnorm[j];
        wa[j] = rdiag[j];
-        if (pivot) {
+        if (pivot)
            ipvt[j] = j;
        }
        /* L10: */
    }
    /*     reduce a to r with householder transformations. */
--- a/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/qrsolv.h
@ -1,166 +1,92 @@
-    template <typename Scalar>
+template <typename Scalar>
-void ei_qrsolv(int n, Scalar *r__, int ldr, 
+void ei_qrsolv(
-        const int *ipvt, const Scalar *diag, const Scalar *qtb, Scalar *x, 
+        Matrix< Scalar, Dynamic, Dynamic > &r,
-        Scalar *sdiag, Scalar *wa)
+        VectorXi &ipvt, // TODO : const once ipvt mess fixed
        const Matrix< Scalar, Dynamic, 1 >  &diag,
        const Matrix< Scalar, Dynamic, 1 >  &qtb,
        Matrix< Scalar, Dynamic, 1 >  &x,
        Matrix< Scalar, Dynamic, 1 >  &sdiag)
 {
    /* System generated locals */
    int r_dim1, r_offset;
    /* Local variables */
-    int i, j, k, l, jp1, kp1;
+    int i, j, k, l;
-    Scalar tan__, cos__, sin__, sum, temp, cotan;
+    Scalar sum, temp;
-    int nsing;
+    int n = r.cols();
-    Scalar qtbpj;
+    Matrix< Scalar, Dynamic, 1 >  wa(n);
    /* Parameter adjustments */
    --wa;
    --sdiag;
    --x;
    --qtb;
    --diag;
    --ipvt;
    r_dim1 = ldr;
    r_offset = 1 + r_dim1 * 1;
    r__ -= r_offset;
    /* Function Body */
    /*     copy r and (q transpose)*b to preserve input and initialize s. */
    /*     in particular, save the diagonal elements of r in x. */
-    for (j = 1; j <= n; ++j) {
+    x = r.diagonal();
-        for (i = j; i <= n; ++i) {
+    wa = qtb;
-            r__[i + j * r_dim1] = r__[j + i * r_dim1];
+
-            /* L10: */
+    for (j = 0; j < n; ++j)
-        }
+        for (i = j+1; i < n; ++i)
-        x[j] = r__[j + j * r_dim1];
+            r(i,j) = r(j,i);
        wa[j] = qtb[j];
        /* L20: */
    }
    /*     eliminate the diagonal matrix d using a givens rotation. */
-
+    for (j = 0; j < n; ++j) {
    for (j = 1; j <= n; ++j) {
        /*        prepare the row of d to be eliminated, locating the */
        /*        diagonal element using p from the qr factorization. */
        l = ipvt[j];
-        if (diag[l] == 0.) {
+        if (diag[l] == 0.)
-            goto L90;
+            break;
-        }
+        sdiag.segment(j,n-j).setZero();
        for (k = j; k <= n; ++k) {
            sdiag[k] = 0.;
            /* L30: */
        }
        sdiag[j] = diag[l];
        /*        the transformations to eliminate the row of d */
        /*        modify only a single element of (q transpose)*b */
        /*        beyond the first n, which is initially zero. */
-        qtbpj = 0.;
+        Scalar qtbpj = 0.;
-        for (k = j; k <= n; ++k) {
+        for (k = j; k < n; ++k) {
            /*           determine a givens rotation which eliminates the */
            /*           appropriate element in the current row of d. */
-
+            PlanarRotation<Scalar> givens;
-            if (sdiag[k] == 0.)
+            givens.makeGivens(-r(k,k), sdiag[k]);
                goto L70;
            if ( ei_abs(r__[k + k * r_dim1]) >= ei_abs(sdiag[k]))
                goto L40;
            cotan = r__[k + k * r_dim1] / sdiag[k];
            /* Computing 2nd power */
            sin__ = Scalar(.5) / ei_sqrt(Scalar(0.25) + Scalar(0.25) * ei_abs2(cotan));
            cos__ = sin__ * cotan;
            goto L50;
 L40:
            tan__ = sdiag[k] / r__[k + k * r_dim1];
            /* Computing 2nd power */
            cos__ = Scalar(.5) / ei_sqrt(Scalar(0.25) + Scalar(0.25) * ei_abs2(tan__));
            sin__ = cos__ * tan__;
 L50:
            /*           compute the modified diagonal element of r and */
            /*           the modified element of ((q transpose)*b,0). */
-            r__[k + k * r_dim1] = cos__ * r__[k + k * r_dim1] + sin__ * sdiag[
+            r(k,k) = givens.c() * r(k,k) + givens.s() * sdiag[k];
-                k];
+            temp = givens.c() * wa[k] + givens.s() * qtbpj;
-            temp = cos__ * wa[k] + sin__ * qtbpj;
+            qtbpj = -givens.s() * wa[k] + givens.c() * qtbpj;
            qtbpj = -sin__ * wa[k] + cos__ * qtbpj;
            wa[k] = temp;
            /*           accumulate the tranformation in the row of s. */
-
+            for (i = k+1; i<n; ++i) {
-            kp1 = k + 1;
+                temp = givens.c() * r(i,k) + givens.s() * sdiag[i];
-            if (n < kp1) {
+                sdiag[i] = -givens.s() * r(i,k) + givens.c() * sdiag[i];
-                goto L70;
+                r(i,k) = temp;
            }
            for (i = kp1; i <= n; ++i) {
                temp = cos__ * r__[i + k * r_dim1] + sin__ * sdiag[i];
                sdiag[i] = -sin__ * r__[i + k * r_dim1] + cos__ * sdiag[
                    i];
                r__[i + k * r_dim1] = temp;
                /* L60: */
            }
 L70:
            /* L80: */
            ;
        }
 L90:
        /*        store the diagonal element of s and restore */
        /*        the corresponding diagonal element of r. */
        sdiag[j] = r__[j + j * r_dim1];
        r__[j + j * r_dim1] = x[j];
        /* L100: */
    }
    // restore
    sdiag = r.diagonal();
    r.diagonal() = x;
    /*     solve the triangular system for z. if the system is */
    /*     singular, then obtain a least squares solution. */
-    nsing = n;
+    int nsing;
-    for (j = 1; j <= n; ++j) {
+    for (nsing=0; nsing<n && sdiag[nsing]!=0; nsing++);
-        if (sdiag[j] == 0. && nsing == n) {
+    wa.segment(nsing,n-nsing).setZero();
-            nsing = j - 1;
+    nsing--; // nsing is the last nonsingular index
-        }
+
-        if (nsing < n) {
+    for (j = nsing; j>=0; j--) {
            wa[j] = 0.;
        }
        /* L110: */
    }
    if (nsing < 1) {
        goto L150;
    }
    for (k = 1; k <= nsing; ++k) {
        j = nsing - k + 1;
        sum = 0.;
-        jp1 = j + 1;
+        for (i = j+1; i <= nsing; ++i)
-        if (nsing < jp1) {
+            sum += r(i,j) * wa[i];
            goto L130;
        }
        for (i = jp1; i <= nsing; ++i) {
            sum += r__[i + j * r_dim1] * wa[i];
            /* L120: */
        }
 L130:
        wa[j] = (wa[j] - sum) / sdiag[j];
        /* L140: */
    }
 L150:
    /*     permute the components of z back to components of x. */
-
+    for (j = 0; j < n; ++j) x[ipvt[j]] = wa[j];
-    for (j = 1; j <= n; ++j) {
+}
        l = ipvt[j];
        x[l] = wa[j];
        /* L160: */
    }
    return;
    /*     last card of subroutine qrsolv. */
 } /* qrsolv_ */
--- a/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/r1mpyq.h
@ -18,28 +18,18 @@ void ei_r1mpyq(int m, int n, Scalar *a, int
    a -= a_offset;
    /* Function Body */
    nm1 = n - 1;
    if (nm1 < 1)
        return;
    /*     apply the first set of givens rotations to a. */
    nm1 = n - 1;
    if (nm1 < 1) {
        /* goto L50; */
        return;
    }
    for (nmj = 1; nmj <= nm1; ++nmj) {
        j = n - nmj;
        if (ei_abs(v[j]) > 1.) {
            cos__ = 1. / v[j];
        }
        if (ei_abs(v[j]) > 1.) {
            /* Computing 2nd power */
            sin__ = ei_sqrt(1. - ei_abs2(cos__));
-        }
+        } else  {
        if (ei_abs(v[j]) <= 1.) {
            sin__ = v[j];
        }
        if (ei_abs(v[j]) <= 1.) {
            /* Computing 2nd power */
            cos__ = ei_sqrt(1. - ei_abs2(sin__));
        }
        for (i = 1; i <= m; ++i) {
@ -47,26 +37,15 @@ void ei_r1mpyq(int m, int n, Scalar *a, int
            a[i + n * a_dim1] = sin__ * a[i + j * a_dim1] + cos__ * a[
                i + n * a_dim1];
            a[i + j * a_dim1] = temp;
            /* L10: */
        }
        /* L20: */
    }
    /*     apply the second set of givens rotations to a. */
    for (j = 1; j <= nm1; ++j) {
        if (ei_abs(w[j]) > 1.) {
            cos__ = 1. / w[j];
        }
        if (ei_abs(w[j]) > 1.) {
            /* Computing 2nd power */
            sin__ = ei_sqrt(1. - ei_abs2(cos__));
-        }
+        } else  {
        if (ei_abs(w[j]) <= 1.) {
            sin__ = w[j];
        }
        if (ei_abs(w[j]) <= 1.) {
            /* Computing 2nd power */
            cos__ = ei_sqrt(1. - ei_abs2(sin__));
        }
        for (i = 1; i <= m; ++i) {
@ -74,14 +53,8 @@ void ei_r1mpyq(int m, int n, Scalar *a, int
            a[i + n * a_dim1] = -sin__ * a[i + j * a_dim1] + cos__ * a[
                i + n * a_dim1];
            a[i + j * a_dim1] = temp;
            /* L30: */
        }
        /* L40: */
    }
    /* L50: */
    return;
    /*     last card of subroutine r1mpyq. */
 } /* r1mpyq_ */
--- a/unsupported/doc/Doxyfile.in
+++ b/unsupported/doc/Doxyfile.in
@ -601,6 +601,7 @@ EXCLUDE_PATTERNS       = CMake* \
                         *.txt \
                         *.sh \
                         *.diff \
                         *.orig \
                         diff
                         *~