merged incoming udpates

This commit is contained in:
Bo Li 2014-03-20 22:11:13 +08:00
commit e3fb190edf
4 changed files with 93 additions and 111 deletions

View File

@ -110,7 +110,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { re
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); } template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set_pd(from,from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); } template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set_epi32(from,from,from,from); }
#else #else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set1_ps(from); }
// GCC generates a shufps instruction for set1_ps instead of the more efficient pshufd instruction.
// However, with AVX, we want it to generate a vbroadcastss.
// Moreover, we cannot use intrinsics here because then gcc generates crappy code in some cases (see bug 203)
#if (defined __GNUC__) && (!defined __INTEL_COMPILER) && (!defined __clang__) && (!defined __AVX__)
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
Packet4f res;
asm("pshufd $0, %[a], %[b]" : [b] "=x" (res) : [a] "x" (from));
return res;
}
#else
template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) { return _mm_set_ps1(from); }
#endif
template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); } template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) { return _mm_set1_pd(from); }
template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) { return _mm_set1_epi32(from); }
#endif #endif

View File

@ -700,7 +700,7 @@ template<typename T> class aligned_stack_memory_handler
* \sa \ref TopicStlContainers. * \sa \ref TopicStlContainers.
*/ */
template<class T> template<class T>
class aligned_allocator class aligned_allocator : public std::allocator<T>
{ {
public: public:
typedef size_t size_type; typedef size_t size_type;
@ -717,81 +717,25 @@ public:
typedef aligned_allocator<U> other; typedef aligned_allocator<U> other;
}; };
pointer address( reference value ) const aligned_allocator() : std::allocator<T>() {}
{
return &value;
}
const_pointer address( const_reference value ) const aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
{
return &value;
}
aligned_allocator()
{
}
aligned_allocator( const aligned_allocator& )
{
}
template<class U> template<class U>
aligned_allocator( const aligned_allocator<U>& ) aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
{
}
~aligned_allocator() ~aligned_allocator() {}
{
}
size_type max_size() const pointer allocate(size_type num, const void* /*hint*/ = 0)
{ {
return (std::numeric_limits<size_type>::max)();
}
pointer allocate( size_type num, const void* hint = 0 )
{
EIGEN_UNUSED_VARIABLE(hint);
internal::check_size_for_overflow<T>(num); internal::check_size_for_overflow<T>(num);
return static_cast<pointer>( internal::aligned_malloc( num * sizeof(T) ) ); return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
} }
void construct( pointer p, const T& value ) void deallocate(pointer p, size_type /*num*/)
{ {
::new( p ) T( value ); internal::aligned_free(p);
} }
#if (__cplusplus >= 201103L)
template <typename U, typename... Args>
void construct( U* u, Args&&... args)
{
::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
}
#endif
void destroy( pointer p )
{
p->~T();
}
#if (__cplusplus >= 201103L)
template <typename U>
void destroy( U* u )
{
u->~U();
}
#endif
void deallocate( pointer p, size_type /*num*/ )
{
internal::aligned_free( p );
}
bool operator!=(const aligned_allocator<T>& ) const
{ return false; }
bool operator==(const aligned_allocator<T>& ) const
{ return true; }
}; };
//---------- Cache sizes ---------- //---------- Cache sizes ----------

View File

@ -37,22 +37,31 @@ namespace Eigen {
typedef typename Dest::Scalar Scalar; typedef typename Dest::Scalar Scalar;
typedef Matrix<Scalar,Dynamic,1> VectorType; typedef Matrix<Scalar,Dynamic,1> VectorType;
// Check for zero rhs
const RealScalar rhsNorm2(rhs.squaredNorm());
if(rhsNorm2 == 0)
{
x.setZero();
iters = 0;
tol_error = 0;
return;
}
// initialize // initialize
const int maxIters(iters); // initialize maxIters to iters const int maxIters(iters); // initialize maxIters to iters
const int N(mat.cols()); // the size of the matrix const int N(mat.cols()); // the size of the matrix
const RealScalar rhsNorm2(rhs.squaredNorm());
const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2) const RealScalar threshold2(tol_error*tol_error*rhsNorm2); // convergence threshold (compared to residualNorm2)
// Initialize preconditioned Lanczos // Initialize preconditioned Lanczos
// VectorType v_old(N); // will be initialized inside loop VectorType v_old(N); // will be initialized inside loop
VectorType v( VectorType::Zero(N) ); //initialize v VectorType v( VectorType::Zero(N) ); //initialize v
VectorType v_new(rhs-mat*x); //initialize v_new VectorType v_new(rhs-mat*x); //initialize v_new
RealScalar residualNorm2(v_new.squaredNorm()); RealScalar residualNorm2(v_new.squaredNorm());
// VectorType w(N); // will be initialized inside loop VectorType w(N); // will be initialized inside loop
VectorType w_new(precond.solve(v_new)); // initialize w_new VectorType w_new(precond.solve(v_new)); // initialize w_new
// RealScalar beta; // will be initialized inside loop // RealScalar beta; // will be initialized inside loop
RealScalar beta_new2(v_new.dot(w_new)); RealScalar beta_new2(v_new.dot(w_new));
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
RealScalar beta_new(sqrt(beta_new2)); RealScalar beta_new(sqrt(beta_new2));
const RealScalar beta_one(beta_new); const RealScalar beta_one(beta_new);
v_new /= beta_new; v_new /= beta_new;
@ -62,14 +71,14 @@ namespace Eigen {
RealScalar c_old(1.0); RealScalar c_old(1.0);
RealScalar s(0.0); // the sine of the Givens rotation RealScalar s(0.0); // the sine of the Givens rotation
RealScalar s_old(0.0); // the sine of the Givens rotation RealScalar s_old(0.0); // the sine of the Givens rotation
// VectorType p_oold(N); // will be initialized in loop VectorType p_oold(N); // will be initialized in loop
VectorType p_old(VectorType::Zero(N)); // initialize p_old=0 VectorType p_old(VectorType::Zero(N)); // initialize p_old=0
VectorType p(p_old); // initialize p=0 VectorType p(p_old); // initialize p=0
RealScalar eta(1.0); RealScalar eta(1.0);
iters = 0; // reset iters iters = 0; // reset iters
while ( iters < maxIters ){ while ( iters < maxIters )
{
// Preconditioned Lanczos // Preconditioned Lanczos
/* Note that there are 4 variants on the Lanczos algorithm. These are /* Note that there are 4 variants on the Lanczos algorithm. These are
* described in Paige, C. C. (1972). Computational variants of * described in Paige, C. C. (1972). Computational variants of
@ -81,17 +90,17 @@ namespace Eigen {
* A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987). * A. Greenbaum, Iterative Methods for Solving Linear Systems, SIAM (1987).
*/ */
const RealScalar beta(beta_new); const RealScalar beta(beta_new);
// v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter v_old = v; // update: at first time step, this makes v_old = 0 so value of beta doesn't matter
const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT // const VectorType v_old(v); // NOT SURE IF CREATING v_old EVERY ITERATION IS EFFICIENT
v = v_new; // update v = v_new; // update
// w = w_new; // update w = w_new; // update
const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT // const VectorType w(w_new); // NOT SURE IF CREATING w EVERY ITERATION IS EFFICIENT
v_new.noalias() = mat*w - beta*v_old; // compute v_new v_new.noalias() = mat*w - beta*v_old; // compute v_new
const RealScalar alpha = v_new.dot(w); const RealScalar alpha = v_new.dot(w);
v_new -= alpha*v; // overwrite v_new v_new -= alpha*v; // overwrite v_new
w_new = precond.solve(v_new); // overwrite w_new w_new = precond.solve(v_new); // overwrite w_new
beta_new2 = v_new.dot(w_new); // compute beta_new beta_new2 = v_new.dot(w_new); // compute beta_new
eigen_assert(beta_new2 >= 0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE"); eigen_assert(beta_new2 >= 0.0 && "PRECONDITIONER IS NOT POSITIVE DEFINITE");
beta_new = sqrt(beta_new2); // compute beta_new beta_new = sqrt(beta_new2); // compute beta_new
v_new /= beta_new; // overwrite v_new for next iteration v_new /= beta_new; // overwrite v_new for next iteration
w_new /= beta_new; // overwrite w_new for next iteration w_new /= beta_new; // overwrite w_new for next iteration
@ -107,28 +116,34 @@ namespace Eigen {
s=beta_new/r1; // new sine s=beta_new/r1; // new sine
// Update solution // Update solution
// p_oold = p_old; p_oold = p_old;
const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT // const VectorType p_oold(p_old); // NOT SURE IF CREATING p_oold EVERY ITERATION IS EFFICIENT
p_old = p; p_old = p;
p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED? p.noalias()=(w-r2*p_old-r3*p_oold) /r1; // IS NOALIAS REQUIRED?
x += beta_one*c*eta*p; x += beta_one*c*eta*p;
/* Update the squared residual. Note that this is the estimated residual.
The real residual |Ax-b|^2 may be slightly larger */
residualNorm2 *= s*s; residualNorm2 *= s*s;
if ( residualNorm2 < threshold2){ if ( residualNorm2 < threshold2)
{
break; break;
} }
eta=-s*eta; // update eta eta=-s*eta; // update eta
iters++; // increment iteration number (for output purposes) iters++; // increment iteration number (for output purposes)
} }
tol_error = std::sqrt(residualNorm2 / rhsNorm2); // return error. Note that this is the estimated error. The real error |Ax-b|/|b| may be slightly larger
/* Compute error. Note that this is the estimated error. The real
error |Ax-b|/|b| may be slightly larger */
tol_error = std::sqrt(residualNorm2 / rhsNorm2);
} }
} }
template< typename _MatrixType, int _UpLo=Lower, template< typename _MatrixType, int _UpLo=Lower,
typename _Preconditioner = IdentityPreconditioner> typename _Preconditioner = IdentityPreconditioner>
// typename _Preconditioner = IdentityPreconditioner<typename _MatrixType::Scalar> > // preconditioner must be positive definite
class MINRES; class MINRES;
namespace internal { namespace internal {

View File

@ -1,8 +1,8 @@
// This file is part of Eigen, a lightweight C++ template library // This file is part of Eigen, a lightweight C++ template library
// for linear algebra. // for linear algebra.
// //
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2012 Giacomo Po <gpo@ucla.edu> // Copyright (C) 2012 Giacomo Po <gpo@ucla.edu>
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
// //
// This Source Code Form is subject to the terms of the Mozilla // This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed // Public License v. 2.0. If a copy of the MPL was not distributed
@ -14,19 +14,29 @@
template<typename T> void test_minres_T() template<typename T> void test_minres_T()
{ {
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_diag; // Identity preconditioner
MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_I; MINRES<SparseMatrix<T>, Lower, IdentityPreconditioner > minres_colmajor_lower_I;
// MINRES<SparseMatrix<T>, Lower, IncompleteLUT<T> > minres_colmajor_ilut; MINRES<SparseMatrix<T>, Upper, IdentityPreconditioner > minres_colmajor_upper_I;
//minres<SparseMatrix<T>, SSORPreconditioner<T> > minres_colmajor_ssor;
// Diagonal preconditioner
MINRES<SparseMatrix<T>, Lower, DiagonalPreconditioner<T> > minres_colmajor_lower_diag;
MINRES<SparseMatrix<T>, Upper, DiagonalPreconditioner<T> > minres_colmajor_upper_diag;
// call tests for SPD matrix
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_I) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_I) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_lower_diag) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_upper_diag) );
// TO DO: symmetric semi-definite matrix
// TO DO: symmetric indefinite matrix
CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_diag) );
CALL_SUBTEST( check_sparse_spd_solving(minres_colmajor_I) );
// CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ilut) );
//CALL_SUBTEST( check_sparse_square_solving(minres_colmajor_ssor) );
} }
void test_minres() void test_minres()
{ {
CALL_SUBTEST_1(test_minres_T<double>()); CALL_SUBTEST_1(test_minres_T<double>());
// CALL_SUBTEST_2(test_minres_T<std::complex<double> >()); // CALL_SUBTEST_2(test_minres_T<std::compex<double> >());
} }