merged incoming udpates

2025-09-27 00:33:24 +08:00 · 2014-03-20 22:11:13 +08:00 · 2014-03-20 22:11:13 +08:00 · e3fb190edf
commit e3fb190edf
parent cfd3d6ce9c c39a3fa7a1
4 changed files with 93 additions and 111 deletions
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -110,7 +110,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { re
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set_epi32(from,from,from,from); }
 #else
-template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set1_ps(from); }
+
 // GCC generates a shufps instruction for set1_ps instead of the more efficient pshufd instruction.
 // However, with AVX, we want it to generate a vbroadcastss.
 // Moreover, we cannot use intrinsics here because then gcc generates crappy code in some cases (see bug 203)
 #if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
  template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) {
    Packet4f res;
    asm("pshufd $0, %[a], %[b]" : [b] "=x" (res) : [a] "x" (from));
    return res;
  }
 #else
  template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float&  from) { return _mm_set_ps1(from); }
 #endif
 template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
 template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from) { return _mm_set1_epi32(from); }
 #endif
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -700,7 +700,7 @@ template<typename T> class aligned_stack_memory_handler
 * \sa \ref TopicStlContainers.
 */
 template<class T>
-class aligned_allocator
+class aligned_allocator : public std::allocator<T>
 {
 public:
  typedef size_t          size_type;
@ -717,81 +717,25 @@ public:
    typedef aligned_allocator<U> other;
  };
-    pointer address( reference value ) const
+  aligned_allocator() : std::allocator<T>() {}
    {
        return &value;
    }
-    const_pointer address( const_reference value ) const
+  aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
    {
        return &value;
    }
    aligned_allocator()
    {
    }
    aligned_allocator( const aligned_allocator& )
    {
    }
  template<class U>
-    aligned_allocator( const aligned_allocator<U>& )
+  aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
    {
    }
-    ~aligned_allocator()
+  ~aligned_allocator() {}
    {
    }
-    size_type max_size() const
+  pointer allocate(size_type num, const void* /*hint*/ = 0)
  {
        return (std::numeric_limits<size_type>::max)();
    }
    pointer allocate( size_type num, const void* hint = 0 )
    {
        EIGEN_UNUSED_VARIABLE(hint);
    internal::check_size_for_overflow<T>(num);
-        return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
+    return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
  }
-    void construct( pointer p, const T& value )
+  void deallocate(pointer p, size_type /*num*/)
  {
-        ::new( p ) T( value );
+    internal::aligned_free(p);
  }
 #if (__cplusplus >= 201103L)
    template <typename U, typename... Args>
    void construct( U* u, Args&&... args)
    {
        ::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
    }
 #endif
    void destroy( pointer p )
    {
        p->~T();
    }
 #if (__cplusplus >= 201103L)
    template <typename U>
    void destroy( U* u )
    {
        u->~U();
    }
 #endif
    void deallocate( pointer p, size_type /*num*/ )
    {
        internal::aligned_free( p );
    }
    bool operator!=(const aligned_allocator<T>& ) const
    { return false; }
    bool operator==(const aligned_allocator<T>& ) const
    { return true; }
 };
 //---------- Cache sizes ----------
--- a/unsupported/Eigen/src/IterativeSolvers/MINRES.h
+++ b/unsupported/Eigen/src/IterativeSolvers/MINRES.h
@ -37,22 +37,31 @@ namespace Eigen {
            typedef typename Dest::Scalar Scalar;
            typedef Matrix<Scalar,Dynamic,1> VectorType;
            // Check for zero rhs
            const RealScalar rhsNorm2(rhs.squaredNorm());
            if(rhsNorm2 == 0)
            {
                x.setZero();
                iters = 0;
                tol_error = 0;
                return;
            }
            // initialize
            const int maxIters(iters);  // initialize maxIters to iters
            const int N(mat.cols());    // the size of the matrix
            const RealScalar rhsNorm2(rhs.squaredNorm());
            const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2)
            // Initialize preconditioned Lanczos
-//            VectorType v_old(N); // will be initialized inside loop
+            VectorType v_old(N); // will be initialized inside loop
            VectorType v( VectorType::Zero(N) ); //initialize v
            VectorType v_new(rhs-mat*x); //initialize v_new
            RealScalar residualNorm2(v_new.squaredNorm());
-//            VectorType w(N); // will be initialized inside loop
+            VectorType w(N); // will be initialized inside loop
            VectorType w_new(precond.solve(v_new)); // initialize w_new
 //            RealScalar beta; // will be initialized inside loop
            RealScalar beta_new2(v_new.dot(w_new));
-            eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
+            eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
            RealScalar beta_new(sqrt(beta_new2));
            const RealScalar beta_one(beta_new);
            v_new /= beta_new;
@ -62,14 +71,14 @@ namespace Eigen {
            RealScalar c_old(1.0);
            RealScalar s(0.0); // the sine of the Givens rotation
            RealScalar s_old(0.0); // the sine of the Givens rotation
-//            VectorType p_oold(N); // will be initialized in loop
+            VectorType p_oold(N); // will be initialized in loop
            VectorType p_old(VectorType::Zero(N)); // initialize p_old=0
            VectorType p(p_old); // initialize p=0
            RealScalar eta(1.0);
            iters = 0; // reset iters
-            while ( iters < maxIters ){
+            while ( iters < maxIters )
-                
+            {
                // Preconditioned Lanczos
                /* Note that there are 4 variants on the Lanczos algorithm. These are
                 * described in Paige, C. C. (1972). Computational variants of
@ -81,17 +90,17 @@ namespace Eigen {
                 * A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
                 */
                const RealScalar beta(beta_new);
-//                v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
+                v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
-                const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
+//                const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
                v = v_new; // update
-//                w = w_new; // update
+                w = w_new; // update
-                const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
+//                const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
                v_new.noalias() = mat*w - beta*v_old; // compute v_new
                const RealScalar alpha = v_new.dot(w);
                v_new -= alpha*v; // overwrite v_new
                w_new = precond.solve(v_new); // overwrite w_new
                beta_new2 = v_new.dot(w_new); // compute beta_new
-                eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
+                eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
                beta_new = sqrt(beta_new2); // compute beta_new
                v_new /= beta_new; // overwrite v_new for next iteration
                w_new /= beta_new; // overwrite w_new for next iteration
@ -107,28 +116,34 @@ namespace Eigen {
                s=beta_new/r1; // new sine
                // Update solution
-//                p_oold = p_old;
+                p_oold = p_old;
-                const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
+//                const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
                p_old = p;
                p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
                x += beta_one*c*eta*p;
                /* Update the squared residual. Note that this is the estimated residual.
                The real residual |Ax-b|^2 may be slightly larger */
                residualNorm2 *= s*s;
-                if ( residualNorm2 < threshold2){
+                if ( residualNorm2 < threshold2)
                {
                    break;
                }
                eta=-s*eta; // update eta
                iters++; // increment iteration number (for output purposes)
            }
-            tol_error = std::sqrt(residualNorm2 / rhsNorm2); // return error. Note that this is the estimated error. The real error |Ax-b|/|b| may be slightly larger 
+            
            /* Compute error. Note that this is the estimated error. The real 
             error |Ax-b|/|b| may be slightly larger */
            tol_error = std::sqrt(residualNorm2 / rhsNorm2);
        }
    }
    template< typename _MatrixType, int _UpLo=Lower,
    typename _Preconditioner = IdentityPreconditioner>
 //    typename _Preconditioner = IdentityPreconditioner<typename _MatrixType::Scalar> > // preconditioner must be positive definite
    class MINRES;
    namespace internal {
--- a/unsupported/test/minres.cpp
+++ b/unsupported/test/minres.cpp
@ -1,8 +1,8 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
 // Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
 // Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -14,19 +14,29 @@
 template<typename T> void test_minres_T()
 {
-  MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_diag;
+  // Identity preconditioner
-  MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner    > minres_colmajor_I;
+  MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner    > minres_colmajor_lower_I;
-//  MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> >           minres_colmajor_ilut;
+  MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner    > minres_colmajor_upper_I;
-  //minres<SparseMatrix<T>, SSORPreconditioner<T> >     minres_colmajor_ssor;
+
  // Diagonal preconditioner
  MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag;
  MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag;
  // call tests for SPD matrix
  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) );
  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_I) );
  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag)  );
  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag)  );
  // TO DO: symmetric semi-definite matrix
  // TO DO: symmetric indefinite matrix
  CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag)  );
  CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_I) );
 // CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut)     );
  //CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor)     );
 }
 void test_minres()
 {
  CALL_SUBTEST_1(test_minres_T<double>());
-//  CALL_SUBTEST_2(test_minres_T<std::complex<double> >());
+//  CALL_SUBTEST_2(test_minres_T<std::compex<double> >());
 }