Solve the issue found by Timothy in solveTriangular:

=> row-major rhs are now evaluated to a column-major temporary before the computations. Add solveInPlace in Cholesky*
2025-09-11 00:43:13 +08:00 · 2008-10-13 13:14:43 +00:00 · 2008-10-13 13:14:43 +00:00 · e2bd8623f8
commit e2bd8623f8
parent 537a0e0a52
9 changed files with 129 additions and 45 deletions
--- a/Eigen/src/Cholesky/Cholesky.h
+++ b/Eigen/src/Cholesky/Cholesky.h
@ -74,6 +74,9 @@ template<typename MatrixType> class Cholesky
    template<typename Derived>
    typename Derived::Eval solve(const MatrixBase<Derived> &b) const;
    template<typename Derived>
    bool solveInPlace(MatrixBase<Derived> &bAndX) const;
    void compute(const MatrixType& matrix);
  protected:
@ -141,8 +144,37 @@ typename Derived::Eval Cholesky<MatrixType>::solve(const MatrixBase<Derived> &b)
 {
  const int size = m_matrix.rows();
  ei_assert(size==b.rows());
  typename ei_eval_to_column_major<Derived>::type x(b);
  solveInPlace(x);
  return x;
  //return m_matrix.adjoint().template part<Upper>().solveTriangular(matrixL().solveTriangular(b));
 }
-  return m_matrix.adjoint().template part<Upper>().solveTriangular(matrixL().solveTriangular(b));
+/** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
  * The result is stored in \a bAndx
  *
  * \returns true in case of success, false otherwise.
  *
  * In other words, it computes \f$ b = A^{-1} b \f$ with
  * \f$ {L^{*}}^{-1} L^{-1} b \f$ from right to left.
  * \param bAndX stores both the matrix \f$ b \f$ and the result \f$ x \f$
  *
  * Example: \include Cholesky_solve.cpp
  * Output: \verbinclude Cholesky_solve.out
  *
  * \sa MatrixBase::cholesky(), Cholesky::solve()
  */
 template<typename MatrixType>
 template<typename Derived>
 bool Cholesky<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
 {
  const int size = m_matrix.rows();
  ei_assert(size==bAndX.rows());
  if (!m_isPositiveDefinite)
    return false;
  matrixL().solveTriangularInPlace(bAndX);
  m_matrix.adjoint().template part<Upper>().solveTriangularInPlace(bAndX);
  return true;
 }
 /** \cholesky_module
--- a/Eigen/src/Cholesky/CholeskyWithoutSquareRoot.h
+++ b/Eigen/src/Cholesky/CholeskyWithoutSquareRoot.h
@ -71,6 +71,9 @@ template<typename MatrixType> class CholeskyWithoutSquareRoot
    template<typename Derived>
    typename Derived::Eval solve(const MatrixBase<Derived> &b) const;
 		template<typename Derived>
    bool solveInPlace(MatrixBase<Derived> &bAndX) const;
    void compute(const MatrixType& matrix);
  protected:
@ -101,7 +104,7 @@ void CholeskyWithoutSquareRoot<MatrixType>::compute(const MatrixType& a)
    m_matrix = a;
    return;
  }
-  
+
  // Let's preallocate a temporay vector to evaluate the matrix-vector product into it.
  // Unlike the standard Cholesky decomposition, here we cannot evaluate it to the destination
  // matrix because it a sub-row which is not compatible suitable for efficient packet evaluation.
@ -144,8 +147,8 @@ void CholeskyWithoutSquareRoot<MatrixType>::compute(const MatrixType& a)
  * \param b the column vector \f$ b \f$, which can also be a matrix.
  *
  * See Cholesky::solve() for a example.
-  * 
+  *
-  * \sa MatrixBase::choleskyNoSqrt()
+  * \sa CholeskyWithoutSquareRoot::solveInPlace(), MatrixBase::choleskyNoSqrt()
  */
 template<typename MatrixType>
 template<typename Derived>
@ -161,6 +164,34 @@ typename Derived::Eval CholeskyWithoutSquareRoot<MatrixType>::solve(const Matrix
     );
 }
 /** Computes the solution x of \f$ A x = b \f$ using the current decomposition of A.
  * The result is stored in \a bAndx
  *
  * \returns true in case of success, false otherwise.
  *
  * In other words, it computes \f$ b = A^{-1} b \f$ with
  * \f$ {L^{*}}^{-1} D^{-1} L^{-1} b \f$ from right to left.
  * \param bAndX stores both the matrix \f$ b \f$ and the result \f$ x \f$
  *
  * Example: \include Cholesky_solve.cpp
  * Output: \verbinclude Cholesky_solve.out
  *
  * \sa MatrixBase::cholesky(), CholeskyWithoutSquareRoot::solve()
  */
 template<typename MatrixType>
 template<typename Derived>
 bool CholeskyWithoutSquareRoot<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
 {
  const int size = m_matrix.rows();
  ei_assert(size==bAndX.rows());
  if (!m_isPositiveDefinite)
    return false;
  matrixL().solveTriangularInPlace(bAndX);
 	bAndX *= m_matrix.cwise().inverse().template part<Diagonal>();
  m_matrix.adjoint().template part<UnitUpper>().solveTriangularInPlace(bAndX);
  return true;
 }
 /** \cholesky_module
  * \returns the Cholesky decomposition without square root of \c *this
  */
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -320,7 +320,8 @@ template<typename Derived> class MatrixBase
    Derived& operator*=(const MatrixBase<OtherDerived>& other);
    template<typename OtherDerived>
-    typename OtherDerived::Eval solveTriangular(const MatrixBase<OtherDerived>& other) const;
+    typename ei_eval_to_column_major<OtherDerived>::type
 		solveTriangular(const MatrixBase<OtherDerived>& other) const;
    template<typename OtherDerived>
    void solveTriangularInPlace(MatrixBase<OtherDerived>& other) const;
@ -544,11 +545,11 @@ template<typename Derived> class MatrixBase
    const Select<Derived,ThenDerived,ElseDerived>
    select(const MatrixBase<ThenDerived>& thenMatrix,
           const MatrixBase<ElseDerived>& elseMatrix) const;
-   
+
    template<typename ThenDerived>
    inline const Select<Derived,ThenDerived, NestByValue<typename ThenDerived::ConstantReturnType> >
    select(const MatrixBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
-    
+
    template<typename ElseDerived>
    inline const Select<Derived, NestByValue<typename ElseDerived::ConstantReturnType>, ElseDerived >
    select(typename ElseDerived::Scalar thenScalar, const MatrixBase<ElseDerived>& elseMatrix) const;
@ -581,7 +582,7 @@ template<typename Derived> class MatrixBase
    template<typename OtherDerived>
    EvalType cross(const MatrixBase<OtherDerived>& other) const;
    EvalType unitOrthogonal(void) const;
-    
+
    #ifdef EIGEN_MATRIXBASE_PLUGIN
    #include EIGEN_MATRIXBASE_PLUGIN
    #endif
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -36,8 +36,6 @@ struct ei_product_coeff_impl;
 template<int StorageOrder, int Index, typename Lhs, typename Rhs, typename PacketScalar, int LoadMode>
 struct ei_product_packet_impl;
 template<typename T> struct ei_product_eval_to_column_major;
 /** \class ProductReturnType
  *
  * \brief Helper class to get the correct and optimized returned type of operator*
@ -70,7 +68,7 @@ struct ProductReturnType<Lhs,Rhs,CacheFriendlyProduct>
  typedef typename ei_nested<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
  typedef typename ei_nested<Rhs,Lhs::RowsAtCompileTime,
-                             typename ei_product_eval_to_column_major<Rhs>::type
+                             typename ei_eval_to_column_major<Rhs>::type
                   >::type RhsNested;
  typedef Product<LhsNested, RhsNested, CacheFriendlyProduct> Type;
@ -706,23 +704,12 @@ inline Derived& MatrixBase<Derived>::lazyAssign(const Product<Lhs,Rhs,CacheFrien
  return derived();
 }
 template<typename T> struct ei_product_eval_to_column_major
 {
  typedef Matrix<typename ei_traits<T>::Scalar,
                ei_traits<T>::RowsAtCompileTime,
                ei_traits<T>::ColsAtCompileTime,
                ColMajor,
                ei_traits<T>::MaxRowsAtCompileTime,
                ei_traits<T>::MaxColsAtCompileTime
          > type;
 };
 template<typename T> struct ei_product_copy_rhs
 {
  typedef typename ei_meta_if<
         (ei_traits<T>::Flags & RowMajorBit)
      || (!(ei_traits<T>::Flags & DirectAccessBit)),
-      typename ei_product_eval_to_column_major<T>::type,
+      typename ei_eval_to_column_major<T>::type,
      const T&
    >::ret type;
 };
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@ -88,12 +88,12 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
          other.coeffRef(i,c) = tmp/lhs.coeff(i,i);
      }
-      // now let process the remaining rows 4 at once
+      // now let's process the remaining rows 4 at once
      for(int i=blockyStart; IsLower ? i<size : i>0; )
      {
        int startBlock = i;
        int endBlock = startBlock + (IsLower ? 4 : -4);
-        
+
        /* Process the i cols times 4 rows block, and keep the result in a temporary vector */
        // FIXME use fixed size block but take care to small fixed size matrices...
        Matrix<Scalar,Dynamic,1> btmp(4);
@ -101,7 +101,7 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,RowMajor|IsDense>
          btmp = lhs.block(startBlock,0,4,i) * other.col(c).start(i);
        else
          btmp = lhs.block(i-3,i+1,4,size-1-i) * other.col(c).end(size-1-i);
-        
+
        /* Let's process the 4x4 sub-matrix as usual.
         * btmp stores the diagonal coefficients used to update the remaining part of the result.
         */
@ -191,6 +191,12 @@ struct ei_solve_triangular_selector<Lhs,Rhs,UpLo,ColMajor|IsDense>
          &(lhs.const_cast_derived().coeffRef(IsLower ? endBlock : 0, IsLower ? startBlock : endBlock+1)),
          lhs.stride(),
          btmp, &(other.coeffRef(IsLower ? endBlock : 0, c)));
 // 				if (IsLower)
 //           other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
 //                                           * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
 // 				else
 //           other.col(c).end(size-endBlock) += (lhs.block(endBlock, startBlock, size-endBlock, endBlock-startBlock)
 //                                           * other.col(c).block(startBlock,endBlock-startBlock)).lazy();
      }
      /* Now we have to process the remaining part as usual */
@ -227,7 +233,15 @@ void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other
  ei_assert(!(Flags & ZeroDiagBit));
  ei_assert(Flags & (UpperTriangularBit|LowerTriangularBit));
-  ei_solve_triangular_selector<Derived, OtherDerived>::run(derived(), other.derived());
+  const bool copy = ei_traits<OtherDerived>::Flags&RowMajorBit;
  typedef typename ei_meta_if<copy,
    typename ei_eval_to_column_major<OtherDerived>::type, OtherDerived&>::ret OtherCopy;
  OtherCopy otherCopy(other.derived());
  ei_solve_triangular_selector<Derived, typename ei_unref<OtherCopy>::type>::run(derived(), otherCopy);
  if (copy)
    other = otherCopy;
 }
 /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular.
@ -240,17 +254,17 @@ void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other
  * It is required that \c *this be marked as either an upper or a lower triangular matrix, which
  * can be done by marked(), and that is automatically the case with expressions such as those returned
  * by extract().
-  * 
+  *
  * \addexample SolveTriangular \label How to solve a triangular system (aka. how to multiply the inverse of a triangular matrix by another one)
-  * 
+  *
  * Example: \include MatrixBase_marked.cpp
  * Output: \verbinclude MatrixBase_marked.out
-  * 
+  *
  * This function is essentially a wrapper to the faster solveTriangularInPlace() function creating
  * a temporary copy of \a other, calling solveTriangularInPlace() on the copy and returning it.
  * Therefore, if \a other is not needed anymore, it is quite faster to call solveTriangularInPlace()
  * instead of solveTriangular().
-  * 
+  *
  * For users coming from BLAS, this function (and more specifically solveTriangularInPlace()) offer
  * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines.
  *
@ -258,14 +272,15 @@ void MatrixBase<Derived>::solveTriangularInPlace(MatrixBase<OtherDerived>& other
  * \code
  * M * T^1  <=>  T.transpose().solveTriangularInPlace(M.transpose());
  * \endcode
-  * 
+  *
  * \sa solveTriangularInPlace(), marked(), extract()
  */
 template<typename Derived>
 template<typename OtherDerived>
-typename OtherDerived::Eval MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
+typename ei_eval_to_column_major<OtherDerived>::type
 MatrixBase<Derived>::solveTriangular(const MatrixBase<OtherDerived>& other) const
 {
-  typename OtherDerived::Eval res(other);
+  typename ei_eval_to_column_major<OtherDerived>::type res(other);
  solveTriangularInPlace(res);
  return res;
 }
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@ -121,6 +121,18 @@ template<typename T> struct ei_eval<T,IsDense>
          > type;
 };
 template<typename T> struct ei_eval_to_column_major
 {
  typedef Matrix<typename ei_traits<T>::Scalar,
                ei_traits<T>::RowsAtCompileTime,
                ei_traits<T>::ColsAtCompileTime,
                ColMajor,
                ei_traits<T>::MaxRowsAtCompileTime,
                ei_traits<T>::MaxColsAtCompileTime
          > type;
 };
 template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 template<typename T> struct ei_must_nest_by_value<NestByValue<T> > { enum { ret = true }; };
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@ -50,16 +50,16 @@ template<typename MatrixType> class SVD
      AlignmentMask = int(PacketSize)-1,
      MinSize = EIGEN_ENUM_MIN(MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime)
    };
-    
+
    typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> ColVector;
    typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> RowVector;
-    
+
    typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MinSize> MatrixUType;
    typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> MatrixVType;
    typedef Matrix<Scalar, MinSize, 1> SingularValuesType;
  public:
-  
+
    SVD(const MatrixType& matrix)
      : m_matU(matrix.rows(), std::min(matrix.rows(), matrix.cols())),
        m_matV(matrix.cols(),matrix.cols()),
@ -69,7 +69,7 @@ template<typename MatrixType> class SVD
    }
    template<typename OtherDerived, typename ResultType>
-    void solve(const MatrixBase<OtherDerived> &b, ResultType* result) const;
+    bool solve(const MatrixBase<OtherDerived> &b, ResultType* result) const;
    const MatrixUType& matrixU() const { return m_matU; }
    const SingularValuesType& singularValues() const { return m_sigma; }
@ -97,7 +97,7 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)
  const int m = matrix.rows();
  const int n = matrix.cols();
  const int nu = std::min(m,n);
-  
+
  m_matU.resize(m, nu);
  m_matU.setZero();
  m_sigma.resize(std::min(m,n));
@ -130,7 +130,7 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)
      }
      m_sigma[k] = -m_sigma[k];
    }
-    
+
    for (j = k+1; j < n; j++)
    {
      if ((k < nct) && (m_sigma[k] != 0.0))
@ -468,18 +468,18 @@ void SVD<MatrixType>::compute(const MatrixType& matrix)
 template<typename MatrixType>
 SVD<MatrixType>& SVD<MatrixType>::sort()
 {
-  int mu = m_matU.rows(); 
+  int mu = m_matU.rows();
-  int mv = m_matV.rows(); 
+  int mv = m_matV.rows();
  int n  = m_matU.cols();
  for (int i=0; i<n; i++)
  {
-    int  k = i; 
+    int  k = i;
    Scalar p = m_sigma.coeff(i);
    for (int j=i+1; j<n; j++)
    {
-      if (m_sigma.coeff(j) > p) 
+      if (m_sigma.coeff(j) > p)
      {
        k = j;
        p = m_sigma.coeff(j);
@ -509,7 +509,7 @@ SVD<MatrixType>& SVD<MatrixType>::sort()
  */
 template<typename MatrixType>
 template<typename OtherDerived, typename ResultType>
-void SVD<MatrixType>::solve(const MatrixBase<OtherDerived> &b, ResultType* result) const
+bool SVD<MatrixType>::solve(const MatrixBase<OtherDerived> &b, ResultType* result) const
 {
  const int rows = m_matU.rows();
  ei_assert(b.rows() == rows);
@ -530,6 +530,7 @@ void SVD<MatrixType>::solve(const MatrixBase<OtherDerived> &b, ResultType* resul
    result->col(j) = m_matV * aux;
  }
  return true;
 }
 /** \svd_module
--- a/test/sparse.cpp
+++ b/test/sparse.cpp
@ -217,6 +217,10 @@ template<typename Scalar> void sparse(int rows, int cols)
    // TODO test row major
  }
  // test Cholesky
  {
  }
 }
 void test_sparse()
--- a/test/triangular.cpp
+++ b/test/triangular.cpp
@ -125,5 +125,6 @@ void test_triangular()
    CALL_SUBTEST( triangular(MatrixXcf(4, 4)) );
    CALL_SUBTEST( triangular(Matrix<std::complex<float>,8, 8>()) );
    CALL_SUBTEST( triangular(MatrixXd(17,17)) );
    CALL_SUBTEST( triangular(Matrix<float,Dynamic,Dynamic,RowMajor>(5, 5)) );
  }
 }