merged incoming udpates

This commit is contained in:
Bo Li 2014-03-20 22:11:13 +08:00
commit e3fb190edf
4 changed files with 93 additions and 111 deletions

View File

@ -110,7 +110,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
// GCC generates a shufps instruction for set1_ps instead of the more efficient pshufd instruction.
// However, with AVX, we want it to generate a vbroadcastss.
// Moreover, we cannot use intrinsics here because then gcc generates crappy code in some cases (see bug 203)
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
Packet4f res;
asm("pshufd $0, %[a], %[b]" : [b] "=x" (res) : [a] "x" (from));
return res;
}
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif

View File

@ -700,98 +700,42 @@ template<typename T> class aligned_stack_memory_handler
* \sa \ref TopicStlContainers.
*/
template<class T>
class aligned_allocator
class aligned_allocator : public std::allocator<T>
{
public:
typedef size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
typedef size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef T value_type;
template<class U>
struct rebind
{
typedef aligned_allocator<U> other;
};
template<class U>
struct rebind
{
typedef aligned_allocator<U> other;
};
pointer address( reference value ) const
{
return &value;
}
aligned_allocator() : std::allocator<T>() {}
const_pointer address( const_reference value ) const
{
return &value;
}
aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
aligned_allocator()
{
}
template<class U>
aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
aligned_allocator( const aligned_allocator& )
{
}
~aligned_allocator() {}
template<class U>
aligned_allocator( const aligned_allocator<U>& )
{
}
pointer allocate(size_type num, const void* /*hint*/ = 0)
{
internal::check_size_for_overflow<T>(num);
return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
}
~aligned_allocator()
{
}
size_type max_size() const
{
return (std::numeric_limits<size_type>::max)();
}
pointer allocate( size_type num, const void* hint = 0 )
{
EIGEN_UNUSED_VARIABLE(hint);
internal::check_size_for_overflow<T>(num);
return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) );
}
void construct( pointer p, const T& value )
{
::new( p ) T( value );
}
#if (__cplusplus >= 201103L)
template <typename U, typename... Args>
void construct( U* u, Args&&... args)
{
::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
}
#endif
void destroy( pointer p )
{
p->~T();
}
#if (__cplusplus >= 201103L)
template <typename U>
void destroy( U* u )
{
u->~U();
}
#endif
void deallocate( pointer p, size_type /*num*/ )
{
internal::aligned_free( p );
}
bool operator!=(const aligned_allocator<T>& ) const
{ return false; }
bool operator==(const aligned_allocator<T>& ) const
{ return true; }
void deallocate(pointer p, size_type /*num*/)
{
internal::aligned_free(p);
}
};
//---------- Cache sizes ----------

View File

@ -37,22 +37,31 @@ namespace Eigen {
typedef typename Dest::Scalar Scalar;
typedef Matrix<Scalar,Dynamic,1> VectorType;
// Check for zero rhs
const RealScalar rhsNorm2(rhs.squaredNorm());
if(rhsNorm2 == 0)
{
x.setZero();
iters = 0;
tol_error = 0;
return;
}
// initialize
const int maxIters(iters); // initialize maxIters to iters
const int N(mat.cols()); // the size of the matrix
const RealScalar rhsNorm2(rhs.squaredNorm());
const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2)
// Initialize preconditioned Lanczos
// VectorType v_old(N); // will be initialized inside loop
VectorType v_old(N); // will be initialized inside loop
VectorType v( VectorType::Zero(N) ); //initialize v
VectorType v_new(rhs-mat*x); //initialize v_new
RealScalar residualNorm2(v_new.squaredNorm());
// VectorType w(N); // will be initialized inside loop
VectorType w(N); // will be initialized inside loop
VectorType w_new(precond.solve(v_new)); // initialize w_new
// RealScalar beta; // will be initialized inside loop
RealScalar beta_new2(v_new.dot(w_new));
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
RealScalar beta_new(sqrt(beta_new2));
const RealScalar beta_one(beta_new);
v_new /= beta_new;
@ -62,14 +71,14 @@ namespace Eigen {
RealScalar c_old(1.0);
RealScalar s(0.0); // the sine of the Givens rotation
RealScalar s_old(0.0); // the sine of the Givens rotation
// VectorType p_oold(N); // will be initialized in loop
VectorType p_oold(N); // will be initialized in loop
VectorType p_old(VectorType::Zero(N)); // initialize p_old=0
VectorType p(p_old); // initialize p=0
RealScalar eta(1.0);
iters = 0; // reset iters
while ( iters < maxIters ){
while ( iters < maxIters )
{
// Preconditioned Lanczos
/* Note that there are 4 variants on the Lanczos algorithm. These are
* described in Paige, C. C. (1972). Computational variants of
@ -81,17 +90,17 @@ namespace Eigen {
* A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
*/
const RealScalar beta(beta_new);
// v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
// const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
v = v_new; // update
// w = w_new; // update
const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
w = w_new; // update
// const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
v_new.noalias() = mat*w - beta*v_old; // compute v_new
const RealScalar alpha = v_new.dot(w);
v_new -= alpha*v; // overwrite v_new
w_new = precond.solve(v_new); // overwrite w_new
beta_new2 = v_new.dot(w_new); // compute beta_new
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
beta_new = sqrt(beta_new2); // compute beta_new
v_new /= beta_new; // overwrite v_new for next iteration
w_new /= beta_new; // overwrite w_new for next iteration
@ -107,28 +116,34 @@ namespace Eigen {
s=beta_new/r1; // new sine
// Update solution
// p_oold = p_old;
const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
p_oold = p_old;
// const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
p_old = p;
p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
x += beta_one*c*eta*p;
/* Update the squared residual. Note that this is the estimated residual.
The real residual |Ax-b|^2 may be slightly larger */
residualNorm2 *= s*s;
if ( residualNorm2 < threshold2){
if ( residualNorm2 < threshold2)
{
break;
}
eta=-s*eta; // update eta
iters++; // increment iteration number (for output purposes)
}
tol_error = std::sqrt(residualNorm2 / rhsNorm2); // return error. Note that this is the estimated error. The real error |Ax-b|/|b| may be slightly larger
/* Compute error. Note that this is the estimated error. The real
error |Ax-b|/|b| may be slightly larger */
tol_error = std::sqrt(residualNorm2 / rhsNorm2);
}
}
template< typename _MatrixType, int _UpLo=Lower,
typename _Preconditioner = IdentityPreconditioner>
// typename _Preconditioner = IdentityPreconditioner<typename _MatrixType::Scalar> > // preconditioner must be positive definite
class MINRES;
namespace internal {

View File

@ -1,8 +1,8 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
@ -14,19 +14,29 @@
template<typename T> void test_minres_T()
{
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_diag;
MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_I;
// MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> > minres_colmajor_ilut;
//minres<SparseMatrix<T>, SSORPreconditioner<T> > minres_colmajor_ssor;
// Identity preconditioner
MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_lower_I;
MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner > minres_colmajor_upper_I;
// Diagonal preconditioner
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag;
MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag;
// call tests for SPD matrix
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_I) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag) );
// TO DO: symmetric semi-definite matrix
// TO DO: symmetric indefinite matrix
CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_I) );
// CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut) );
//CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor) );
}
void test_minres()
{
CALL_SUBTEST_1(test_minres_T<double>());
// CALL_SUBTEST_2(test_minres_T<std::complex<double> >());
// CALL_SUBTEST_2(test_minres_T<std::compex<double> >());
}