mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
merged incoming udpates
This commit is contained in:
commit
e3fb190edf
@ -110,7 +110,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
|
|||||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
|
||||||
#else
|
#else
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
|
|
||||||
|
// GCC generates a shufps instruction for set1_ps instead of the more efficient pshufd instruction.
|
||||||
|
// However, with AVX, we want it to generate a vbroadcastss.
|
||||||
|
// Moreover, we cannot use intrinsics here because then gcc generates crappy code in some cases (see bug 203)
|
||||||
|
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
|
||||||
|
Packet4f res;
|
||||||
|
asm("pshufd $0, %[a], %[b]" : [b] "=x" (res) : [a] "x" (from));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
|
||||||
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
|
||||||
#endif
|
#endif
|
||||||
|
@ -700,7 +700,7 @@ template<typename T> class aligned_stack_memory_handler
|
|||||||
* \sa \ref TopicStlContainers.
|
* \sa \ref TopicStlContainers.
|
||||||
*/
|
*/
|
||||||
template<class T>
|
template<class T>
|
||||||
class aligned_allocator
|
class aligned_allocator : public std::allocator<T>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef size_t size_type;
|
typedef size_t size_type;
|
||||||
@ -717,81 +717,25 @@ public:
|
|||||||
typedef aligned_allocator<U> other;
|
typedef aligned_allocator<U> other;
|
||||||
};
|
};
|
||||||
|
|
||||||
pointer address( reference value ) const
|
aligned_allocator() : std::allocator<T>() {}
|
||||||
{
|
|
||||||
return &value;
|
|
||||||
}
|
|
||||||
|
|
||||||
const_pointer address( const_reference value ) const
|
aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
|
||||||
{
|
|
||||||
return &value;
|
|
||||||
}
|
|
||||||
|
|
||||||
aligned_allocator()
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
aligned_allocator( const aligned_allocator& )
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class U>
|
template<class U>
|
||||||
aligned_allocator( const aligned_allocator<U>& )
|
aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
~aligned_allocator()
|
~aligned_allocator() {}
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
size_type max_size() const
|
pointer allocate(size_type num, const void* /*hint*/ = 0)
|
||||||
{
|
{
|
||||||
return (std::numeric_limits<size_type>::max)();
|
|
||||||
}
|
|
||||||
|
|
||||||
pointer allocate( size_type num, const void* hint = 0 )
|
|
||||||
{
|
|
||||||
EIGEN_UNUSED_VARIABLE(hint);
|
|
||||||
internal::check_size_for_overflow<T>(num);
|
internal::check_size_for_overflow<T>(num);
|
||||||
return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
|
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
void construct( pointer p, const T& value )
|
void deallocate(pointer p, size_type /*num*/)
|
||||||
{
|
{
|
||||||
::new( p ) T( value );
|
internal::aligned_free(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if (__cplusplus >= 201103L)
|
|
||||||
template <typename U, typename... Args>
|
|
||||||
void construct( U* u, Args&&... args)
|
|
||||||
{
|
|
||||||
::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void destroy( pointer p )
|
|
||||||
{
|
|
||||||
p->~T();
|
|
||||||
}
|
|
||||||
|
|
||||||
#if (__cplusplus >= 201103L)
|
|
||||||
template <typename U>
|
|
||||||
void destroy( U* u )
|
|
||||||
{
|
|
||||||
u->~U();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void deallocate( pointer p, size_type /*num*/ )
|
|
||||||
{
|
|
||||||
internal::aligned_free( p );
|
|
||||||
}
|
|
||||||
|
|
||||||
bool operator!=(const aligned_allocator<T>& ) const
|
|
||||||
{ return false; }
|
|
||||||
|
|
||||||
bool operator==(const aligned_allocator<T>& ) const
|
|
||||||
{ return true; }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
//---------- Cache sizes ----------
|
//---------- Cache sizes ----------
|
||||||
|
@ -37,22 +37,31 @@ namespace Eigen {
|
|||||||
typedef typename Dest::Scalar Scalar;
|
typedef typename Dest::Scalar Scalar;
|
||||||
typedef Matrix<Scalar,Dynamic,1> VectorType;
|
typedef Matrix<Scalar,Dynamic,1> VectorType;
|
||||||
|
|
||||||
|
// Check for zero rhs
|
||||||
|
const RealScalar rhsNorm2(rhs.squaredNorm());
|
||||||
|
if(rhsNorm2 == 0)
|
||||||
|
{
|
||||||
|
x.setZero();
|
||||||
|
iters = 0;
|
||||||
|
tol_error = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// initialize
|
// initialize
|
||||||
const int maxIters(iters); // initialize maxIters to iters
|
const int maxIters(iters); // initialize maxIters to iters
|
||||||
const int N(mat.cols()); // the size of the matrix
|
const int N(mat.cols()); // the size of the matrix
|
||||||
const RealScalar rhsNorm2(rhs.squaredNorm());
|
|
||||||
const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2)
|
const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2)
|
||||||
|
|
||||||
// Initialize preconditioned Lanczos
|
// Initialize preconditioned Lanczos
|
||||||
// VectorType v_old(N); // will be initialized inside loop
|
VectorType v_old(N); // will be initialized inside loop
|
||||||
VectorType v( VectorType::Zero(N) ); //initialize v
|
VectorType v( VectorType::Zero(N) ); //initialize v
|
||||||
VectorType v_new(rhs-mat*x); //initialize v_new
|
VectorType v_new(rhs-mat*x); //initialize v_new
|
||||||
RealScalar residualNorm2(v_new.squaredNorm());
|
RealScalar residualNorm2(v_new.squaredNorm());
|
||||||
// VectorType w(N); // will be initialized inside loop
|
VectorType w(N); // will be initialized inside loop
|
||||||
VectorType w_new(precond.solve(v_new)); // initialize w_new
|
VectorType w_new(precond.solve(v_new)); // initialize w_new
|
||||||
// RealScalar beta; // will be initialized inside loop
|
// RealScalar beta; // will be initialized inside loop
|
||||||
RealScalar beta_new2(v_new.dot(w_new));
|
RealScalar beta_new2(v_new.dot(w_new));
|
||||||
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
|
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
|
||||||
RealScalar beta_new(sqrt(beta_new2));
|
RealScalar beta_new(sqrt(beta_new2));
|
||||||
const RealScalar beta_one(beta_new);
|
const RealScalar beta_one(beta_new);
|
||||||
v_new /= beta_new;
|
v_new /= beta_new;
|
||||||
@ -62,14 +71,14 @@ namespace Eigen {
|
|||||||
RealScalar c_old(1.0);
|
RealScalar c_old(1.0);
|
||||||
RealScalar s(0.0); // the sine of the Givens rotation
|
RealScalar s(0.0); // the sine of the Givens rotation
|
||||||
RealScalar s_old(0.0); // the sine of the Givens rotation
|
RealScalar s_old(0.0); // the sine of the Givens rotation
|
||||||
// VectorType p_oold(N); // will be initialized in loop
|
VectorType p_oold(N); // will be initialized in loop
|
||||||
VectorType p_old(VectorType::Zero(N)); // initialize p_old=0
|
VectorType p_old(VectorType::Zero(N)); // initialize p_old=0
|
||||||
VectorType p(p_old); // initialize p=0
|
VectorType p(p_old); // initialize p=0
|
||||||
RealScalar eta(1.0);
|
RealScalar eta(1.0);
|
||||||
|
|
||||||
iters = 0; // reset iters
|
iters = 0; // reset iters
|
||||||
while ( iters < maxIters ){
|
while ( iters < maxIters )
|
||||||
|
{
|
||||||
// Preconditioned Lanczos
|
// Preconditioned Lanczos
|
||||||
/* Note that there are 4 variants on the Lanczos algorithm. These are
|
/* Note that there are 4 variants on the Lanczos algorithm. These are
|
||||||
* described in Paige, C. C. (1972). Computational variants of
|
* described in Paige, C. C. (1972). Computational variants of
|
||||||
@ -81,17 +90,17 @@ namespace Eigen {
|
|||||||
* A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
|
* A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
|
||||||
*/
|
*/
|
||||||
const RealScalar beta(beta_new);
|
const RealScalar beta(beta_new);
|
||||||
// v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
|
v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
|
||||||
const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
|
// const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
|
||||||
v = v_new; // update
|
v = v_new; // update
|
||||||
// w = w_new; // update
|
w = w_new; // update
|
||||||
const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
|
// const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
|
||||||
v_new.noalias() = mat*w - beta*v_old; // compute v_new
|
v_new.noalias() = mat*w - beta*v_old; // compute v_new
|
||||||
const RealScalar alpha = v_new.dot(w);
|
const RealScalar alpha = v_new.dot(w);
|
||||||
v_new -= alpha*v; // overwrite v_new
|
v_new -= alpha*v; // overwrite v_new
|
||||||
w_new = precond.solve(v_new); // overwrite w_new
|
w_new = precond.solve(v_new); // overwrite w_new
|
||||||
beta_new2 = v_new.dot(w_new); // compute beta_new
|
beta_new2 = v_new.dot(w_new); // compute beta_new
|
||||||
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
|
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
|
||||||
beta_new = sqrt(beta_new2); // compute beta_new
|
beta_new = sqrt(beta_new2); // compute beta_new
|
||||||
v_new /= beta_new; // overwrite v_new for next iteration
|
v_new /= beta_new; // overwrite v_new for next iteration
|
||||||
w_new /= beta_new; // overwrite w_new for next iteration
|
w_new /= beta_new; // overwrite w_new for next iteration
|
||||||
@ -107,28 +116,34 @@ namespace Eigen {
|
|||||||
s=beta_new/r1; // new sine
|
s=beta_new/r1; // new sine
|
||||||
|
|
||||||
// Update solution
|
// Update solution
|
||||||
// p_oold = p_old;
|
p_oold = p_old;
|
||||||
const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
|
// const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
|
||||||
p_old = p;
|
p_old = p;
|
||||||
p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
|
p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
|
||||||
x += beta_one*c*eta*p;
|
x += beta_one*c*eta*p;
|
||||||
|
|
||||||
|
/* Update the squared residual. Note that this is the estimated residual.
|
||||||
|
The real residual |Ax-b|^2 may be slightly larger */
|
||||||
residualNorm2 *= s*s;
|
residualNorm2 *= s*s;
|
||||||
|
|
||||||
if ( residualNorm2 < threshold2){
|
if ( residualNorm2 < threshold2)
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
eta=-s*eta; // update eta
|
eta=-s*eta; // update eta
|
||||||
iters++; // increment iteration number (for output purposes)
|
iters++; // increment iteration number (for output purposes)
|
||||||
}
|
}
|
||||||
tol_error = std::sqrt(residualNorm2 / rhsNorm2); // return error. Note that this is the estimated error. The real error |Ax-b|/|b| may be slightly larger
|
|
||||||
|
/* Compute error. Note that this is the estimated error. The real
|
||||||
|
error |Ax-b|/|b| may be slightly larger */
|
||||||
|
tol_error = std::sqrt(residualNorm2 / rhsNorm2);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template< typename _MatrixType, int _UpLo=Lower,
|
template< typename _MatrixType, int _UpLo=Lower,
|
||||||
typename _Preconditioner = IdentityPreconditioner>
|
typename _Preconditioner = IdentityPreconditioner>
|
||||||
// typename _Preconditioner = IdentityPreconditioner<typename _MatrixType::Scalar> > // preconditioner must be positive definite
|
|
||||||
class MINRES;
|
class MINRES;
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
// This file is part of Eigen, a lightweight C++ template library
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
// for linear algebra.
|
// for linear algebra.
|
||||||
//
|
//
|
||||||
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
|
|
||||||
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
|
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
|
||||||
|
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
|
||||||
//
|
//
|
||||||
// This Source Code Form is subject to the terms of the Mozilla
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
@ -14,19 +14,29 @@
|
|||||||
|
|
||||||
template<typename T> void test_minres_T()
|
template<typename T> void test_minres_T()
|
||||||
{
|
{
|
||||||
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_diag;
|
// Identity preconditioner
|
||||||
MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_I;
|
MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_lower_I;
|
||||||
// MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> > minres_colmajor_ilut;
|
MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner > minres_colmajor_upper_I;
|
||||||
//minres<SparseMatrix<T>, SSORPreconditioner<T> > minres_colmajor_ssor;
|
|
||||||
|
// Diagonal preconditioner
|
||||||
|
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag;
|
||||||
|
MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag;
|
||||||
|
|
||||||
|
// call tests for SPD matrix
|
||||||
|
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) );
|
||||||
|
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_I) );
|
||||||
|
|
||||||
|
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag) );
|
||||||
|
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag) );
|
||||||
|
|
||||||
|
// TO DO: symmetric semi-definite matrix
|
||||||
|
// TO DO: symmetric indefinite matrix
|
||||||
|
|
||||||
CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag) );
|
|
||||||
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_I) );
|
|
||||||
// CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut) );
|
|
||||||
//CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor) );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void test_minres()
|
void test_minres()
|
||||||
{
|
{
|
||||||
CALL_SUBTEST_1(test_minres_T<double>());
|
CALL_SUBTEST_1(test_minres_T<double>());
|
||||||
// CALL_SUBTEST_2(test_minres_T<std::complex<double> >());
|
// CALL_SUBTEST_2(test_minres_T<std::compex<double> >());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user