Merge with default branch

2025-07-12 16:11:49 +08:00 · 2014-02-18 15:45:39 +01:00 · 2014-02-18 15:45:39 +01:00 · 463554c254
commit 463554c254
parent 82c066b3c4 37a1d736bf
68 changed files with 1058 additions and 475 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -204,7 +204,7 @@ if(NOT MSVC)

  option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
  if(EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
    message(STATUS "Enabling NEON in tests/examples")
  endif()

--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@ -4,14 +4,10 @@
 ## # The following are required to uses Dart and the Cdash dashboard
 ##   ENABLE_TESTING()
 ##   INCLUDE(CTest)
-set(CTEST_PROJECT_NAME "Eigen")
+set(CTEST_PROJECT_NAME "Eigen3.2")
 set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC")

 set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
-set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
+set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen3.2")
 set(CTEST_DROP_SITE_CDASH TRUE)
-set(CTEST_PROJECT_SUBPROJECTS
-Official
-Unsupported
-)
--- a/Eigen/QR
+++ b/Eigen/QR
@ -15,7 +15,9 @@
  *
  * This module provides various QR decompositions
  * This module also provides some MatrixBase methods, including:
-  *  - MatrixBase::qr(),
+  *  - MatrixBase::householderQr()
+  *  - MatrixBase::colPivHouseholderQr()
+  *  - MatrixBase::fullPivHouseholderQr()
  *
  * \code
  * #include <Eigen/QR>
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@ -16,7 +16,10 @@
 namespace Eigen { 

 namespace internal {
-template<typename MatrixType, int UpLo> struct LDLT_Traits;
+  template<typename MatrixType, int UpLo> struct LDLT_Traits;
+
+  // PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef
+  enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite };
 }

 /** \ingroup Cholesky_Module
@ -69,7 +72,12 @@ template<typename _MatrixType, int _UpLo> class LDLT
      * The default constructor is useful in cases in which the user intends to
      * perform decompositions via LDLT::compute(const MatrixType&).
      */
-    LDLT() : m_matrix(), m_transpositions(), m_isInitialized(false) {}
+    LDLT() 
+      : m_matrix(), 
+        m_transpositions(), 
+        m_sign(internal::ZeroSign),
+        m_isInitialized(false) 
+    {}

    /** \brief Default Constructor with memory preallocation
      *
@ -81,6 +89,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      : m_matrix(size, size),
        m_transpositions(size),
        m_temporary(size),
+        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {}

@ -93,6 +102,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
      : m_matrix(matrix.rows(), matrix.cols()),
        m_transpositions(matrix.rows()),
        m_temporary(matrix.rows()),
+        m_sign(internal::ZeroSign),
        m_isInitialized(false)
    {
      compute(matrix);
@ -139,7 +149,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline bool isPositive() const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == 1;
+      return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign;
    }
    
    #ifdef EIGEN2_SUPPORT
@ -153,7 +163,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    inline bool isNegative(void) const
    {
      eigen_assert(m_isInitialized && "LDLT is not initialized.");
-      return m_sign == -1;
+      return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign;
    }

    /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A.
@ -235,7 +245,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    MatrixType m_matrix;
    TranspositionType m_transpositions;
    TmpMatrixType m_temporary;
-    int m_sign;
+    internal::SignMatrix m_sign;
    bool m_isInitialized;
 };

@ -246,7 +256,7 @@ template<int UpLo> struct ldlt_inplace;
 template<> struct ldlt_inplace<Lower>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
  {
    using std::abs;
    typedef typename MatrixType::Scalar Scalar;
@ -258,8 +268,9 @@ template<> struct ldlt_inplace<Lower>
    if (size <= 1)
    {
      transpositions.setIdentity();
-      if(sign)
-        *sign = numext::real(mat.coeff(0,0))>0 ? 1:-1;
+      if (numext::real(mat.coeff(0,0)) > 0) sign = PositiveSemiDef;
+      else if (numext::real(mat.coeff(0,0)) < 0) sign = NegativeSemiDef;
+      else sign = ZeroSign;
      return true;
    }

@ -284,7 +295,6 @@ template<> struct ldlt_inplace<Lower>
      if(biggest_in_corner < cutoff)
      {
        for(Index i = k; i < size; i++) transpositions.coeffRef(i) = i;
-        if(sign) *sign = 0;
        break;
      }

@ -325,15 +335,15 @@ template<> struct ldlt_inplace<Lower>
      }
      if((rs>0) && (abs(mat.coeffRef(k,k)) > cutoff))
        A21 /= mat.coeffRef(k,k);
-      
-      if(sign)
-      {
-        // LDLT is not guaranteed to work for indefinite matrices, but let's try to get the sign right
-        int newSign = numext::real(mat.diagonal().coeff(index_of_biggest_in_corner)) > 0;
-        if(k == 0)
-          *sign = newSign;
-        else if(*sign != newSign)
-          *sign = 0;
+
+      RealScalar realAkk = numext::real(mat.coeffRef(k,k));
+      if (sign == PositiveSemiDef) {
+        if (realAkk < 0) sign = Indefinite;
+      } else if (sign == NegativeSemiDef) {
+        if (realAkk > 0) sign = Indefinite;
+      } else if (sign == ZeroSign) {
+        if (realAkk > 0) sign = PositiveSemiDef;
+        else if (realAkk < 0) sign = NegativeSemiDef;
      }
    }

@ -399,7 +409,7 @@ template<> struct ldlt_inplace<Lower>
 template<> struct ldlt_inplace<Upper>
 {
  template<typename MatrixType, typename TranspositionType, typename Workspace>
-  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, int* sign=0)
+  static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign)
  {
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::unblocked(matt, transpositions, temp, sign);
@ -445,7 +455,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  m_isInitialized = false;
  m_temporary.resize(size);

-  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, &m_sign);
+  internal::ldlt_inplace<UpLo>::unblocked(m_matrix, m_transpositions, m_temporary, m_sign);

  m_isInitialized = true;
  return *this;
@ -473,7 +483,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Deri
    for (Index i = 0; i < size; i++)
      m_transpositions.coeffRef(i) = i;
    m_temporary.resize(size);
-    m_sign = sigma>=0 ? 1 : -1;
+    m_sign = sigma>=0 ? internal::PositiveSemiDef : internal::NegativeSemiDef;
    m_isInitialized = true;
  }

--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@ -138,6 +138,9 @@ DenseBase<Derived>::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& f
  *
  * The template parameter \a CustomNullaryOp is the type of the functor.
  *
+  * Here is an example with C++11 random generators: \include random_cpp11.cpp
+  * Output: \verbinclude random_cpp11.out
+  * 
  * \sa class CwiseNullaryOp
  */
 template<typename Derived>
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@ -77,7 +77,12 @@ template<typename MatrixType, int _DiagIndex> class Diagonal

    EIGEN_DEVICE_FUNC
    inline Index rows() const
-    { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
+    {
+      EIGEN_USING_STD_MATH(min);
+      return m_index.value()<0 ? (min)(Index(m_matrix.cols()),Index(m_matrix.rows()+m_index.value()))
+                               : (min)(Index(m_matrix.rows()),Index(m_matrix.cols()-m_index.value()));
+      
+    }

    EIGEN_DEVICE_FUNC
    inline Index cols() const { return 1; }
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@ -141,36 +141,6 @@ Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
  return derived();
 }

-/** replaces \c *this by \c *this * \a other.
-  *
-  * \returns a reference to \c *this
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline Derived&
-MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-  return derived();
-}
-
-/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
-  */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheRight(derived());
-}
-
-/** replaces \c *this by \c *this * \a other. */
-template<typename Derived>
-template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
-{
-  other.derived().applyThisOnTheLeft(derived());
-}
-
 } // end namespace Eigen

 #endif // EIGEN_EIGENBASE_H
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -575,6 +575,51 @@ template<typename Derived> class MatrixBase
    {EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar))==-1,YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); return *this;}
 };

+
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+/** replaces \c *this by \c *this * \a other.
+  *
+  * \returns a reference to \c *this
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline Derived&
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+  return derived();
+}
+
+/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=().
+  *
+  * Example: \include MatrixBase_applyOnTheRight.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheRight.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheRight(derived());
+}
+
+/** replaces \c *this by \a other * \c *this.
+  *
+  * Example: \include MatrixBase_applyOnTheLeft.cpp
+  * Output: \verbinclude MatrixBase_applyOnTheLeft.out
+  */
+template<typename Derived>
+template<typename OtherDerived>
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
+{
+  other.derived().applyThisOnTheLeft(derived());
+}
+
 } // end namespace Eigen

 #endif // EIGEN_MATRIXBASE_H
--- a/Eigen/src/Core/Random.h
+++ b/Eigen/src/Core/Random.h
@ -34,6 +34,8 @@ struct functor_traits<scalar_random_op<Scalar> >
  * The parameters \a rows and \a cols are the number of rows and of columns of
  * the returned matrix. Must be compatible with this MatrixBase type.
  *
+  * \not_reentrant
+  * 
  * This variant is meant to be used for dynamic-size matrix types. For fixed-size types,
  * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used
  * instead.
@ -45,8 +47,10 @@ struct functor_traits<scalar_random_op<Scalar> >
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
+  * 
+  * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators.
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index), MatrixBase::Random()
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random()
  */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@ -64,6 +68,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
  * Must be compatible with this MatrixBase type.
  *
  * \only_for_vectors
+  * \not_reentrant
  *
  * This variant is meant to be used for dynamic-size vector types. For fixed-size types,
  * it is redundant to pass \a size as argument, so Random() should be used
@ -76,7 +81,7 @@ DenseBase<Derived>::Random(Index rows, Index cols)
  * a temporary vector whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random()
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random()
  */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@ -99,8 +104,10 @@ DenseBase<Derived>::Random(Index size)
  * This expression has the "evaluate before nesting" flag so that it will be evaluated into
  * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected
  * behavior with expressions involving random matrices.
+  * 
+  * \not_reentrant
  *
-  * \sa MatrixBase::setRandom(), MatrixBase::Random(Index,Index), MatrixBase::Random(Index)
+  * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index)
  */
 template<typename Derived>
 inline const CwiseNullaryOp<internal::scalar_random_op<typename internal::traits<Derived>::Scalar>, Derived>
@ -114,6 +121,8 @@ DenseBase<Derived>::Random()
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  * 
+  * \not_reentrant
+  * 
  * Example: \include MatrixBase_setRandom.cpp
  * Output: \verbinclude MatrixBase_setRandom.out
  *
@ -131,11 +140,12 @@ inline Derived& DenseBase<Derived>::setRandom()
  * and in the [-1:1] range for floating point scalar types.
  * 
  * \only_for_vectors
+  * \not_reentrant
  *
  * Example: \include Matrix_setRandom_int.cpp
  * Output: \verbinclude Matrix_setRandom_int.out
  *
-  * \sa MatrixBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
@ -150,13 +160,15 @@ PlainObjectBase<Derived>::setRandom(Index newSize)
  * Numbers are uniformly spread through their whole definition range for integer types,
  * and in the [-1:1] range for floating point scalar types.
  *
+  * \not_reentrant
+  * 
  * \param nbRows the new number of rows
  * \param nbCols the new number of columns
  *
  * Example: \include Matrix_setRandom_int_int.cpp
  * Output: \verbinclude Matrix_setRandom_int_int.out
  *
-  * \sa MatrixBase::setRandom(), setRandom(Index), class CwiseNullaryOp, MatrixBase::Random()
+  * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random()
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived&
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@ -17,16 +17,29 @@ namespace internal {
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
-  Scalar max = bl.cwiseAbs().maxCoeff();
-  if (max>scale)
+  using std::max;
+  Scalar maxCoeff = bl.cwiseAbs().maxCoeff();
+  
+  if (maxCoeff>scale)
  {
-    ssq = ssq * numext::abs2(scale/max);
-    scale = max;
-    invScale = Scalar(1)/scale;
+    ssq = ssq * numext::abs2(scale/maxCoeff);
+    Scalar tmp = Scalar(1)/maxCoeff;
+    if(tmp > NumTraits<Scalar>::highest())
+    {
+      invScale = NumTraits<Scalar>::highest();
+      scale = Scalar(1)/invScale;
+    }
+    else
+    {
+      scale = maxCoeff;
+      invScale = tmp;
+    }
  }
-  // TODO if the max is much much smaller than the current scale,
+  
+  // TODO if the maxCoeff is much much smaller than the current scale,
  // then we can neglect this sub vector
-  ssq += (bl*invScale).squaredNorm();
+  if(scale>Scalar(0)) // if scale==0, then bl is 0 
+    ssq += (bl*invScale).squaredNorm();
 }

 template<typename Derived>
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -504,13 +504,18 @@ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_min_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst(_mm_min_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
  // after some experiments, it is seems this is the fastest way to implement it
  // for GCC (eg., it does not like using std::min after the pstore !!)
  EIGEN_ALIGN16 int aux[4];
  pstore(aux, a);
-  register int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
  return aux0<aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
 }

 // max
@ -525,13 +530,18 @@ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  Packet4i tmp = _mm_max_epi32(a, _mm_shuffle_epi32(a, _MM_SHUFFLE(0,0,3,2)));
+  return pfirst(_mm_max_epi32(tmp,_mm_shuffle_epi32(tmp, 1)));
+#else
  // after some experiments, it is seems this is the fastest way to implement it
  // for GCC (eg., it does not like using std::min after the pstore !!)
  EIGEN_ALIGN16 int aux[4];
  pstore(aux, a);
-  register int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
-  register int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
+  int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
+  int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
  return aux0>aux2 ? aux0 : aux2;
+#endif // EIGEN_VECTORIZE_SSE4_1
 }

 #if (defined __GNUC__)
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@ -79,8 +79,8 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
  for (Index j=FirstTriangular ? bound : 0;
       j<(FirstTriangular ? size : bound);j+=2)
  {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
-    register const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A1 = lhs + (j+1)*lhsStride;

    Scalar t0 = cjAlpha * rhs[j];
    Packet ptmp0 = pset1<Packet>(t0);
@ -147,7 +147,7 @@ EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrd
  }
  for (Index j=FirstTriangular ? 0 : bound;j<(FirstTriangular ? bound : size);j++)
  {
-    register const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;
+    const Scalar* EIGEN_RESTRICT A0 = lhs + j*lhsStride;

    Scalar t1 = cjAlpha * rhs[j];
    Scalar t2(0);
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -608,7 +608,7 @@ template<typename T> class aligned_stack_memory_handler
  */
 #ifdef EIGEN_ALLOCA

-  #ifdef __arm__
+  #if defined(__arm__) || defined(_WIN32)
    #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((reinterpret_cast<size_t>(EIGEN_ALLOCA(SIZE+16)) & ~(size_t(15))) + 16)
  #else
    #define EIGEN_ALIGNED_ALLOCA EIGEN_ALLOCA
@ -664,7 +664,9 @@ template<typename T> class aligned_stack_memory_handler
      /* memory allocated we can safely let the default implementation handle */ \
      /* this particular case. */ \
      static void *operator new(size_t size, void *ptr) { return ::operator new(size,ptr); } \
+      static void *operator new[](size_t size, void* ptr) { return ::operator new[](size,ptr); } \
      void operator delete(void * memory, void *ptr) throw() { return ::operator delete(memory,ptr); } \
+      void operator delete[](void * memory, void *ptr) throw() { return ::operator delete[](memory,ptr); } \
      /* nothrow-new (returns zero instead of std::bad_alloc) */ \
      EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
      void operator delete(void *ptr, const std::nothrow_t&) throw() { \
@ -759,11 +761,27 @@ public:
        ::new( p ) T( value );
    }

+#if (__cplusplus >= 201103L)
+    template <typename U, typename... Args>
+    void construct( U* u, Args&&... args)
+    {
+        ::new( static_cast<void*>(u) ) U( std::forward<Args>( args )... );
+    }
+#endif
+
    void destroy( pointer p )
    {
        p->~T();
    }

+#if (__cplusplus >= 201103L)
+    template <typename U>
+    void destroy( U* u )
+    {
+        u->~U();
+    }
+#endif
+
    void deallocate( pointer p, size_type /*num*/ )
    {
        internal::aligned_free( p );
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@ -756,7 +756,9 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
  EIGEN_DEVICE_FUNC
  static inline void run(SolverType& solver, const MatrixType& mat, int options)
  {
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(max)
+    EIGEN_USING_STD_MATH(sqrt);
+    
    eigen_assert(mat.cols() == 2 && mat.cols() == mat.rows());
    eigen_assert((options&~(EigVecMask|GenEigMask))==0
            && (options&EigVecMask)!=EigVecMask
@ -768,7 +770,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
  
    // map the matrix coefficients to [-1:1] to avoid over- and underflow.
    Scalar scale = mat.cwiseAbs().maxCoeff();
-    scale = (std::max)(scale,Scalar(1));
+    scale = (max)(scale,Scalar(1));
    MatrixType scaledMat = mat / scale;
    
    // Compute the eigenvalues
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@ -28,7 +28,7 @@ namespace Eigen {
  *      * AngleAxisf(ea[2], Vector3f::UnitZ()); \endcode
  * This corresponds to the right-multiply conventions (with right hand side frames).
  * 
-  * The returned angles are in the ranges [0:pi]x[0:pi]x[-pi:pi].
+  * The returned angles are in the ranges [0:pi]x[-pi:pi]x[-pi:pi].
  * 
  * \sa class AngleAxis
  */
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@ -150,10 +150,6 @@ public:
  /** \returns the conjugated quaternion */
  Quaternion<Scalar> conjugate() const;

-  /** \returns an interpolation for a constant motion between \a other and \c *this
-    * \a t in [0;1]
-    * see http://en.wikipedia.org/wiki/Slerp
-    */
  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@ -194,11 +190,11 @@ public:
  * \brief The quaternion class used to represent 3D orientations and rotations
  *
  * \tparam _Scalar the scalar type, i.e., the type of the coefficients
-  * \tparam _Options controls the memory alignement of the coeffecients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
+  * \tparam _Options controls the memory alignment of the coefficients. Can be \# AutoAlign or \# DontAlign. Default is AutoAlign.
  *
  * This class represents a quaternion \f$ w+xi+yj+zk \f$ that is a convenient representation of
  * orientations and rotations of objects in three dimensions. Compared to other representations
-  * like Euler angles or 3x3 matrices, quatertions offer the following advantages:
+  * like Euler angles or 3x3 matrices, quaternions offer the following advantages:
  * \li \b compact storage (4 scalars)
  * \li \b efficient to compose (28 flops),
  * \li \b stable spherical interpolation
@ -385,7 +381,7 @@ class Map<Quaternion<_Scalar>, _Options >

    /** Constructs a Mapped Quaternion object from the pointer \a coeffs
      *
-      * The pointer \a coeffs must reference the four coeffecients of Quaternion in the following order:
+      * The pointer \a coeffs must reference the four coefficients of Quaternion in the following order:
      * \code *coeffs == {x, y, z, w} \endcode
      *
      * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
@ -399,16 +395,16 @@ class Map<Quaternion<_Scalar>, _Options >
 };

 /** \ingroup Geometry_Module
-  * Map an unaligned array of single precision scalar as a quaternion */
+  * Map an unaligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, 0>         QuaternionMapf;
 /** \ingroup Geometry_Module
-  * Map an unaligned array of double precision scalar as a quaternion */
+  * Map an unaligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, 0>        QuaternionMapd;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of single precision scalars as a quaternion */
 typedef Map<Quaternion<float>, Aligned>   QuaternionMapAlignedf;
 /** \ingroup Geometry_Module
-  * Map a 16-bits aligned array of double precision scalars as a quaternion */
+  * Map a 16-byte aligned array of double precision scalars as a quaternion */
 typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;

 /***************************************************************************
@ -579,7 +575,7 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
  Scalar c = v1.dot(v0);

  // if dot == -1, vectors are nearly opposites
-  // => accuraletly compute the rotation axis by computing the
+  // => accurately compute the rotation axis by computing the
  //    intersection of the two planes. This is done by solving:
  //       x^T v0 = 0
  //       x^T v1 = 0
@ -677,8 +673,13 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
  return static_cast<Scalar>(2 * acos(d));
 }

+ 
+    
 /** \returns the spherical linear interpolation between the two quaternions
-  * \c *this and \a other at the parameter \a t
+  * \c *this and \a other at the parameter \a t in [0;1].
+  * 
+  * This represents an interpolation for a constant motion between \c *this and \a other,
+  * see also http://en.wikipedia.org/wiki/Slerp.
  */
 template <class Derived>
 template <class OtherDerived>
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@ -530,9 +530,9 @@ public:

  inline Transform& operator=(const UniformScaling<Scalar>& t);
  inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
-  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry)> operator*(const UniformScaling<Scalar>& s) const
+  inline Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode)> operator*(const UniformScaling<Scalar>& s) const
  {
-    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Isometry),Options> res = *this;
+    Transform<Scalar,Dim,(int(Mode)==int(Isometry)?Affine:Mode),Options> res = *this;
    res.scale(s.factor());
    return res;
  }
@ -699,9 +699,13 @@ template<typename Scalar, int Dim, int Mode,int Options>
 Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const QMatrix& other)
 {
  EIGEN_STATIC_ASSERT(Dim==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
-  m_matrix << other.m11(), other.m21(), other.dx(),
-              other.m12(), other.m22(), other.dy(),
-              0, 0, 1;
+  if (Mode == int(AffineCompact))
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy();
+  else
+    m_matrix << other.m11(), other.m21(), other.dx(),
+                other.m12(), other.m22(), other.dy(),
+                0, 0, 1;
  return *this;
 }

--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@ -33,13 +33,13 @@ struct traits<FullPivHouseholderQRMatrixQReturnType<MatrixType> >
  *
  * \param MatrixType the type of the matrix of which we are computing the QR decomposition
  *
-  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b Q and \b R
+  * This class performs a rank-revealing QR decomposition of a matrix \b A into matrices \b P, \b P', \b Q and \b R
  * such that 
  * \f[
-  *  \mathbf{A} \, \mathbf{P} = \mathbf{Q} \, \mathbf{R}
+  *  \mathbf{P} \, \mathbf{A} \, \mathbf{P}' = \mathbf{Q} \, \mathbf{R}
  * \f]
-  * by using Householder transformations. Here, \b P is a permutation matrix, \b Q a unitary matrix and \b R an 
-  * upper triangular matrix.
+  * by using Householder transformations. Here, \b P and \b P' are permutation matrices, \b Q a unitary matrix 
+  * and \b R an upper triangular matrix.
  *
  * This decomposition performs a very prudent full pivoting in order to be rank-revealing and achieve optimal
  * numerical stability. The trade-off is that it is slower than HouseholderQR and ColPivHouseholderQR.
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@ -251,56 +251,62 @@ void householder_qr_inplace_unblocked(MatrixQR& mat, HCoeffs& hCoeffs, typename
 }

 /** \internal */
-template<typename MatrixQR, typename HCoeffs>
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
-                                       typename MatrixQR::Index maxBlockSize=32,
-                                       typename MatrixQR::Scalar* tempData = 0)
+template<typename MatrixQR, typename HCoeffs,
+  typename MatrixQRScalar = typename MatrixQR::Scalar,
+  bool InnerStrideIsOne = (MatrixQR::InnerStrideAtCompileTime == 1 && HCoeffs::InnerStrideAtCompileTime == 1)>
+struct householder_qr_inplace_blocked
 {
-  typedef typename MatrixQR::Index Index;
-  typedef typename MatrixQR::Scalar Scalar;
-  typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;
-
-  Index rows = mat.rows();
-  Index cols = mat.cols();
-  Index size = (std::min)(rows, cols);
-
-  typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
-  TempType tempVector;
-  if(tempData==0)
+  // This is specialized for MKL-supported Scalar types in HouseholderQR_MKL.h
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs,
+      typename MatrixQR::Index maxBlockSize=32,
+      typename MatrixQR::Scalar* tempData = 0)
  {
-    tempVector.resize(cols);
-    tempData = tempVector.data();
-  }
+    typedef typename MatrixQR::Index Index;
+    typedef typename MatrixQR::Scalar Scalar;
+    typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;

-  Index blockSize = (std::min)(maxBlockSize,size);
+    Index rows = mat.rows();
+    Index cols = mat.cols();
+    Index size = (std::min)(rows, cols);

-  Index k = 0;
-  for (k = 0; k < size; k += blockSize)
-  {
-    Index bs = (std::min)(size-k,blockSize);  // actual size of the block
-    Index tcols = cols - k - bs;            // trailing columns
-    Index brows = rows-k;                   // rows of the block
-
-    // partition the matrix:
-    //        A00 | A01 | A02
-    // mat  = A10 | A11 | A12
-    //        A20 | A21 | A22
-    // and performs the qr dec of [A11^T A12^T]^T
-    // and update [A21^T A22^T]^T using level 3 operations.
-    // Finally, the algorithm continue on A22
-
-    BlockType A11_21 = mat.block(k,k,brows,bs);
-    Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
-
-    householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
-
-    if(tcols)
+    typedef Matrix<Scalar,Dynamic,1,ColMajor,MatrixQR::MaxColsAtCompileTime,1> TempType;
+    TempType tempVector;
+    if(tempData==0)
    {
-      BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
-      apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      tempVector.resize(cols);
+      tempData = tempVector.data();
+    }
+
+    Index blockSize = (std::min)(maxBlockSize,size);
+
+    Index k = 0;
+    for (k = 0; k < size; k += blockSize)
+    {
+      Index bs = (std::min)(size-k,blockSize);  // actual size of the block
+      Index tcols = cols - k - bs;            // trailing columns
+      Index brows = rows-k;                   // rows of the block
+
+      // partition the matrix:
+      //        A00 | A01 | A02
+      // mat  = A10 | A11 | A12
+      //        A20 | A21 | A22
+      // and performs the qr dec of [A11^T A12^T]^T
+      // and update [A21^T A22^T]^T using level 3 operations.
+      // Finally, the algorithm continue on A22
+
+      BlockType A11_21 = mat.block(k,k,brows,bs);
+      Block<HCoeffs,Dynamic,1> hCoeffsSegment = hCoeffs.segment(k,bs);
+
+      householder_qr_inplace_unblocked(A11_21, hCoeffsSegment, tempData);
+
+      if(tcols)
+      {
+        BlockType A21_22 = mat.block(k,k+bs,brows,tcols);
+        apply_block_householder_on_the_left(A21_22,A11_21,hCoeffsSegment.adjoint());
+      }
    }
  }
-}
+};

 template<typename _MatrixType, typename Rhs>
 struct solve_retval<HouseholderQR<_MatrixType>, Rhs>
@ -352,7 +358,7 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&

  m_temp.resize(cols);

-  internal::householder_qr_inplace_blocked(m_qr, m_hCoeffs, 48, m_temp.data());
+  internal::householder_qr_inplace_blocked<MatrixType, HCoeffsType>::run(m_qr, m_hCoeffs, 48, m_temp.data());

  m_isInitialized = true;
  return *this;
--- a/Eigen/src/QR/HouseholderQR_MKL.h
+++ b/Eigen/src/QR/HouseholderQR_MKL.h
@ -34,7 +34,7 @@
 #ifndef EIGEN_QR_MKL_H
 #define EIGEN_QR_MKL_H

-#include "Eigen/src/Core/util/MKL_support.h"
+#include "../Core/util/MKL_support.h"

 namespace Eigen { 

@ -44,18 +44,20 @@ namespace internal {

 #define EIGEN_MKL_QR_NOPIV(EIGTYPE, MKLTYPE, MKLPREFIX) \
 template<typename MatrixQR, typename HCoeffs> \
-void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs, \
-                                       typename MatrixQR::Index maxBlockSize=32, \
-                                       EIGTYPE* tempData = 0) \
+struct householder_qr_inplace_blocked<MatrixQR, HCoeffs, EIGTYPE, true> \
 { \
-  lapack_int m = mat.rows(); \
-  lapack_int n = mat.cols(); \
-  lapack_int lda = mat.outerStride(); \
-  lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
-  LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
-  hCoeffs.adjointInPlace(); \
-\
-}
+  static void run(MatrixQR& mat, HCoeffs& hCoeffs, \
+      typename MatrixQR::Index = 32, \
+      typename MatrixQR::Scalar* = 0) \
+  { \
+    lapack_int m = (lapack_int) mat.rows(); \
+    lapack_int n = (lapack_int) mat.cols(); \
+    lapack_int lda = (lapack_int) mat.outerStride(); \
+    lapack_int matrix_order = (MatrixQR::IsRowMajor) ? LAPACK_ROW_MAJOR : LAPACK_COL_MAJOR; \
+    LAPACKE_##MKLPREFIX##geqrf( matrix_order, m, n, (MKLTYPE*)mat.data(), lda, (MKLTYPE*)hCoeffs.data()); \
+    hCoeffs.adjointInPlace(); \
+  } \
+};

 EIGEN_MKL_QR_NOPIV(double, double, d)
 EIGEN_MKL_QR_NOPIV(float, float, s)
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@ -66,9 +66,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    }

    // unordered insertion
-    for(int k=0; k<nnz; ++k)
+    for(Index k=0; k<nnz; ++k)
    {
-      int i = indices[k];
+      Index i = indices[k];
      res.insertBackByOuterInnerUnordered(j,i) = values[i];
      mask[i] = false;
    }
@ -76,8 +76,8 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 #if 0
    // alternative ordered insertion code:

-    int t200 = rows/(log2(200)*1.39);
-    int t = (rows*100)/139;
+    Index t200 = rows/(log2(200)*1.39);
+    Index t = (rows*100)/139;

    // FIXME reserve nnz non zeros
    // FIXME implement fast sort algorithms for very small nnz
@ -90,9 +90,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    if(true)
    {
      if(nnz>1) std::sort(indices.data(),indices.data()+nnz);
-      for(int k=0; k<nnz; ++k)
+      for(Index k=0; k<nnz; ++k)
      {
-        int i = indices[k];
+        Index i = indices[k];
        res.insertBackByOuterInner(j,i) = values[i];
        mask[i] = false;
      }
@ -100,7 +100,7 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
    else
    {
      // dense path
-      for(int i=0; i<rows; ++i)
+      for(Index i=0; i<rows; ++i)
      {
        if(mask[i])
        {
@ -134,8 +134,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C

  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix resCol(lhs.rows(),rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
    // sort the non zeros:
@ -149,7 +149,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-     typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+     typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
     RowMajorMatrix rhsRow = rhs;
     RowMajorMatrix resRow(lhs.rows(), rhs.cols());
     internal::conservative_sparse_sparse_product_impl<RowMajorMatrix,Lhs,RowMajorMatrix>(rhsRow, lhs, resRow);
@ -162,7 +162,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
    RowMajorMatrix lhsRow = lhs;
    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,RowMajorMatrix,RowMajorMatrix>(rhs, lhsRow, resRow);
@ -175,7 +175,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
    RowMajorMatrix resRow(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
    res = resRow;
@ -190,7 +190,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,C

  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,Rhs,ColMajorMatrix>(lhs, rhs, resCol);
    res = resCol;
@ -202,7 +202,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,C
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix lhsCol = lhs;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<ColMajorMatrix,Rhs,ColMajorMatrix>(lhsCol, rhs, resCol);
@ -215,7 +215,7 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,ColMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    ColMajorMatrix rhsCol = rhs;
    ColMajorMatrix resCol(lhs.rows(), rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Lhs,ColMajorMatrix,ColMajorMatrix>(lhs, rhsCol, resCol);
@ -228,8 +228,8 @@ struct conservative_sparse_sparse_product_selector<Lhs,Rhs,ResultType,RowMajor,R
 {
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,RowMajor> RowMajorMatrix;
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,RowMajor,typename ResultType::Index> RowMajorMatrix;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> ColMajorMatrix;
    RowMajorMatrix resRow(lhs.rows(),rhs.cols());
    internal::conservative_sparse_sparse_product_impl<Rhs,Lhs,RowMajorMatrix>(rhs, lhs, resRow);
    // sort the non zeros:
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@ -335,6 +335,14 @@ const Block<const Derived,Dynamic,Dynamic,true> SparseMatrixBase<Derived>::inner
  
 }

+namespace internal {
+  
+template< typename XprType, int BlockRows, int BlockCols, bool InnerPanel,
+          bool OuterVector =  (BlockCols==1 && XprType::IsRowMajor) || (BlockRows==1 && !XprType::IsRowMajor)>
+class GenericSparseBlockInnerIteratorImpl;
+
+}
+
 /** Generic implementation of sparse Block expression.
  * Real-only. 
  */
@ -342,11 +350,12 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
 class BlockImpl<XprType,BlockRows,BlockCols,InnerPanel,Sparse>
  : public SparseMatrixBase<Block<XprType,BlockRows,BlockCols,InnerPanel> >, internal::no_assignment_operator
 {
-  typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;
  typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
 public:
    enum { IsRowMajor = internal::traits<BlockType>::IsRowMajor };
    EIGEN_SPARSE_PUBLIC_INTERFACE(BlockType)
+    
+    typedef typename internal::remove_all<typename XprType::Nested>::type _MatrixTypeNested;

    /** Column or Row constructor
      */
@ -354,8 +363,8 @@ public:
      : m_matrix(xpr),
        m_startRow( (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0),
        m_startCol( (BlockRows==XprType::RowsAtCompileTime) && (BlockCols==1) ? i : 0),
-        m_blockRows(xpr.rows()),
-        m_blockCols(xpr.cols())
+        m_blockRows(BlockRows==1 ? 1 : xpr.rows()),
+        m_blockCols(BlockCols==1 ? 1 : xpr.cols())
    {}

    /** Dynamic-size constructor
@ -394,29 +403,8 @@ public:
    
    inline const _MatrixTypeNested& nestedExpression() const { return m_matrix; }
    
-    class InnerIterator : public _MatrixTypeNested::InnerIterator
-    {
-      typedef typename _MatrixTypeNested::InnerIterator Base;
-      const BlockType& m_block;
-      Index m_end;
-    public:
-
-      EIGEN_STRONG_INLINE InnerIterator(const BlockType& block, Index outer)
-        : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
-          m_block(block),
-          m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
-      {
-        while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
-          Base::operator++();
-      }
-
-      inline Index index()  const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
-      inline Index outer()  const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
-      inline Index row()    const { return Base::row()   - m_block.m_startRow.value(); }
-      inline Index col()    const { return Base::col()   - m_block.m_startCol.value(); }
-      
-      inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
-    };
+    typedef internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel> InnerIterator;
+    
    class ReverseInnerIterator : public _MatrixTypeNested::ReverseInnerIterator
    {
      typedef typename _MatrixTypeNested::ReverseInnerIterator Base;
@ -441,7 +429,7 @@ public:
      inline operator bool() const { return Base::operator bool() && Base::index() >= m_begin; }
    };
  protected:
-    friend class InnerIterator;
+    friend class internal::GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel>;
    friend class ReverseInnerIterator;
    
    EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
@ -454,6 +442,100 @@ public:

 };

+namespace internal {
+  template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+  class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,false> : public Block<XprType, BlockRows, BlockCols, InnerPanel>::_MatrixTypeNested::InnerIterator
+  {
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    enum {
+      IsRowMajor = BlockType::IsRowMajor
+    };
+    typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename BlockType::Index Index;
+    typedef typename _MatrixTypeNested::InnerIterator Base;
+    const BlockType& m_block;
+    Index m_end;
+  public:
+
+    EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer)
+      : Base(block.derived().nestedExpression(), outer + (IsRowMajor ? block.m_startRow.value() : block.m_startCol.value())),
+        m_block(block),
+        m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
+    {
+      while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
+        Base::operator++();
+    }
+    
+    inline Index index()  const { return Base::index() - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
+    inline Index outer()  const { return Base::outer() - (IsRowMajor ? m_block.m_startRow.value() : m_block.m_startCol.value()); }
+    inline Index row()    const { return Base::row()   - m_block.m_startRow.value(); }
+    inline Index col()    const { return Base::col()   - m_block.m_startCol.value(); }
+    
+    inline operator bool() const { return Base::operator bool() && Base::index() < m_end; }
+  };
+  
+  // Row vector of a column-major sparse matrix or column of a row-major one.
+  template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel>
+  class GenericSparseBlockInnerIteratorImpl<XprType,BlockRows,BlockCols,InnerPanel,true>
+  {
+    typedef Block<XprType, BlockRows, BlockCols, InnerPanel> BlockType;
+    enum {
+      IsRowMajor = BlockType::IsRowMajor
+    };
+    typedef typename BlockType::_MatrixTypeNested _MatrixTypeNested;
+    typedef typename BlockType::Index Index;
+    typedef typename BlockType::Scalar Scalar;
+    const BlockType& m_block;
+    Index m_outerPos;
+    Index m_innerIndex;
+    Scalar m_value;
+    Index m_end;
+  public:
+
+    EIGEN_STRONG_INLINE GenericSparseBlockInnerIteratorImpl(const BlockType& block, Index outer = 0)
+      : 
+        m_block(block),
+        m_outerPos( (IsRowMajor ? block.m_startCol.value() : block.m_startRow.value()) - 1), // -1 so that operator++ finds the first non-zero entry
+        m_innerIndex(IsRowMajor ? block.m_startRow.value() : block.m_startCol.value()),
+        m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
+    {
+      EIGEN_UNUSED_VARIABLE(outer);
+      eigen_assert(outer==0);
+      
+      ++(*this);
+    }
+    
+    inline Index index()  const { return m_outerPos - (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()); }
+    inline Index outer()  const { return 0; }
+    inline Index row()    const { return IsRowMajor ? 0 : index(); }
+    inline Index col()    const { return IsRowMajor ? index() : 0; }
+    
+    inline Scalar value() const { return m_value; }
+    
+    inline GenericSparseBlockInnerIteratorImpl& operator++()
+    {
+      // search next non-zero entry
+      while(m_outerPos<m_end)
+      {
+        m_outerPos++;
+        typename XprType::InnerIterator it(m_block.m_matrix, m_outerPos);
+        // search for the key m_innerIndex in the current outer-vector
+        while(it && it.index() < m_innerIndex) ++it;
+        if(it && it.index()==m_innerIndex)
+        {
+          m_value = it.value();
+          break;
+        }
+      }
+      return *this;
+    }
+    
+    inline operator bool() const { return m_outerPos < m_end; }
+  };
+  
+} // end namespace internal
+
+
 } // end namespace Eigen

 #endif // EIGEN_SPARSE_BLOCK_H
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@ -125,7 +125,7 @@ class SparseDenseOuterProduct<Lhs,Rhs,Transpose>::InnerIterator : public _LhsNes
    inline Scalar value() const { return Base::value() * m_factor; }

  protected:
-    int m_outer;
+    Index m_outer;
    Scalar m_factor;
 };

@ -156,7 +156,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
  {
    for(Index c=0; c<rhs.cols(); ++c)
    {
-      int n = lhs.outerSize();
+      Index n = lhs.outerSize();
      for(Index j=0; j<n; ++j)
      {
        typename Res::Scalar tmp(0);
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@ -402,7 +402,7 @@ class SparseMatrix
      * \sa insertBack, insertBackByOuterInner */
    inline void startVec(Index outer)
    {
-      eigen_assert(m_outerIndex[outer]==int(m_data.size()) && "You must call startVec for each inner vector sequentially");
+      eigen_assert(m_outerIndex[outer]==Index(m_data.size()) && "You must call startVec for each inner vector sequentially");
      eigen_assert(m_outerIndex[outer+1]==0 && "You must call startVec for each inner vector sequentially");
      m_outerIndex[outer+1] = m_outerIndex[outer];
    }
@ -480,7 +480,7 @@ class SparseMatrix
      if(m_innerNonZeros != 0)
        return; 
      m_innerNonZeros = static_cast<Index*>(std::malloc(m_outerSize * sizeof(Index)));
-      for (int i = 0; i < m_outerSize; i++)
+      for (Index i = 0; i < m_outerSize; i++)
      {
        m_innerNonZeros[i] = m_outerIndex[i+1] - m_outerIndex[i]; 
      }
@ -752,8 +752,8 @@ class SparseMatrix
        else
          for (Index i=0; i<m.outerSize(); ++i)
          {
-            int p = m.m_outerIndex[i];
-            int pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
+            Index p = m.m_outerIndex[i];
+            Index pe = m.m_outerIndex[i]+m.m_innerNonZeros[i];
            Index k=p;
            for (; k<pe; ++k)
              s << "(" << m.m_data.value(k) << "," << m.m_data.index(k) << ") ";
@ -1022,7 +1022,7 @@ void SparseMatrix<Scalar,_Options,_Index>::sumupDuplicates()
  wi.fill(-1);
  Index count = 0;
  // for each inner-vector, wi[inner_index] will hold the position of first element into the index/value buffers
-  for(int j=0; j<outerSize(); ++j)
+  for(Index j=0; j<outerSize(); ++j)
  {
    Index start   = count;
    Index oldEnd  = m_outerIndex[j]+m_innerNonZeros[j];
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@ -302,8 +302,8 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
        }
        else
        {
-          SparseMatrix<Scalar, RowMajorBit> trans = m;
-          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit> >&>(trans);
+          SparseMatrix<Scalar, RowMajorBit, Index> trans = m;
+          s << static_cast<const SparseMatrixBase<SparseMatrix<Scalar, RowMajorBit, Index> >&>(trans);
        }
      }
      return s;
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@ -16,6 +16,7 @@ template<typename Lhs, typename Rhs>
 struct SparseSparseProductReturnType
 {
  typedef typename internal::traits<Lhs>::Scalar Scalar;
+  typedef typename internal::traits<Lhs>::Index Index;
  enum {
    LhsRowMajor = internal::traits<Lhs>::Flags & RowMajorBit,
    RhsRowMajor = internal::traits<Rhs>::Flags & RowMajorBit,
@ -24,11 +25,11 @@ struct SparseSparseProductReturnType
  };

  typedef typename internal::conditional<TransposeLhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
    typename internal::nested<Lhs,Rhs::RowsAtCompileTime>::type>::type LhsNested;

  typedef typename internal::conditional<TransposeRhs,
-    SparseMatrix<Scalar,0>,
+    SparseMatrix<Scalar,0,Index>,
    typename internal::nested<Rhs,Lhs::RowsAtCompileTime>::type>::type RhsNested;

  typedef SparseSparseProduct<LhsNested, RhsNested> Type;
--- a/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
+++ b/Eigen/src/SparseCore/SparseSparseProductWithPruning.h
@ -27,7 +27,7 @@ static void sparse_sparse_product_with_pruning_impl(const Lhs& lhs, const Rhs& r
  // make sure to call innerSize/outerSize since we fake the storage order.
  Index rows = lhs.innerSize();
  Index cols = rhs.outerSize();
-  //int size = lhs.outerSize();
+  //Index size = lhs.outerSize();
  eigen_assert(lhs.outerSize() == rhs.innerSize());

  // allocate a temporary buffer
@ -100,7 +100,7 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,ColMajor,C
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
  {
    // we need a col-major matrix to hold the result
-    typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename ResultType::Index> SparseTemporaryType;
    SparseTemporaryType _res(res.rows(), res.cols());
    internal::sparse_sparse_product_with_pruning_impl<Lhs,Rhs,SparseTemporaryType>(lhs, rhs, _res, tolerance);
    res = _res;
@ -126,10 +126,11 @@ struct sparse_sparse_product_with_pruning_selector<Lhs,Rhs,ResultType,RowMajor,R
  typedef typename ResultType::RealScalar RealScalar;
  static void run(const Lhs& lhs, const Rhs& rhs, ResultType& res, const RealScalar& tolerance)
  {
-    typedef SparseMatrix<typename ResultType::Scalar,ColMajor> ColMajorMatrix;
-    ColMajorMatrix colLhs(lhs);
-    ColMajorMatrix colRhs(rhs);
-    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrix,ColMajorMatrix,ResultType>(colLhs, colRhs, res, tolerance);
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixLhs;
+    typedef SparseMatrix<typename ResultType::Scalar,ColMajor,typename Lhs::Index> ColMajorMatrixRhs;
+    ColMajorMatrixLhs colLhs(lhs);
+    ColMajorMatrixRhs colRhs(rhs);
+    internal::sparse_sparse_product_with_pruning_impl<ColMajorMatrixLhs,ColMajorMatrixRhs,ResultType>(colLhs, colRhs, res, tolerance);

    // let's transpose the product to get a column x column product
 //     typedef SparseMatrix<typename ResultType::Scalar> SparseTemporaryType;
--- a/Eigen/src/SparseLU/SparseLU_Memory.h
+++ b/Eigen/src/SparseLU/SparseLU_Memory.h
@ -70,7 +70,7 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
  if(num_expansions == 0 || keep_prev) 
    new_len = length ; // First time allocate requested
  else 
-    new_len = Index(alpha * length);
+    new_len = (std::max)(length+1,Index(alpha * length));
  
  VectorType old_vec; // Temporary vector to hold the previous values   
  if (nbElts > 0 )
@ -107,7 +107,7 @@ Index  SparseLUImpl<Scalar,Index>::expand(VectorType& vec, Index& length, Index
      do 
      {
        alpha = (alpha + 1)/2;
-        new_len = Index(alpha * length);
+        new_len = (std::max)(length+1,Index(alpha * length));
 #ifdef EIGEN_EXCEPTIONS
        try
 #endif
--- a/Eigen/src/misc/SparseSolve.h
+++ b/Eigen/src/misc/SparseSolve.h
@ -54,8 +54,10 @@ template<typename _DecompositionType, typename Rhs> struct sparse_solve_retval_b
      static const int NbColsAtOnce = 4;
      int rhsCols = m_rhs.cols();
      int size = m_rhs.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,rhsCols);
+      // the temporary matrices do not need more columns than NbColsAtOnce:
+      int tmpCols = (std::min)(rhsCols, NbColsAtOnce); 
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,tmpCols);
+      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmpX(size,tmpCols);
      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
      {
        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@ -119,13 +119,13 @@ inline const Block<const Derived, CRows, CCols> topRightCorner() const

 /** \returns an expression of a top-right corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@ -198,13 +198,13 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner() const

 /** \returns an expression of a top-left corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@ -277,13 +277,13 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner() const

 /** \returns an expression of a bottom-right corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@ -356,13 +356,13 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const

 /** \returns an expression of a bottom-left corner of *this.
  *
-  * \tparam CRows number of rows in corner as specified at compile time
-  * \tparam CCols number of columns in corner as specified at compile time
-  * \param  cRows number of rows in corner as specified at run time
-  * \param  cCols number of columns in corner as specified at run time
+  * \tparam CRows number of rows in corner as specified at compile-time
+  * \tparam CCols number of columns in corner as specified at compile-time
+  * \param  cRows number of rows in corner as specified at run-time
+  * \param  cCols number of columns in corner as specified at run-time
  *
-  * This function is mainly useful for corners where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for corners where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a cRows should equal \a CRows unless
  * \a CRows is \a Dynamic, and the same for the number of columns.
  *
@ -410,7 +410,11 @@ inline ConstRowsBlockXpr topRows(Index n) const

 /** \returns a block consisting of the top rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_topRows.cpp
  * Output: \verbinclude MatrixBase_template_int_topRows.out
@ -419,17 +423,17 @@ inline ConstRowsBlockXpr topRows(Index n) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type topRows()
+inline typename NRowsBlockXpr<N>::Type topRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }

 /** This is the const version of topRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type topRows() const
+inline typename ConstNRowsBlockXpr<N>::Type topRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, n, cols());
 }


@ -458,7 +462,11 @@ inline ConstRowsBlockXpr bottomRows(Index n) const

 /** \returns a block consisting of the bottom rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_bottomRows.cpp
  * Output: \verbinclude MatrixBase_template_int_bottomRows.out
@ -467,17 +475,17 @@ inline ConstRowsBlockXpr bottomRows(Index n) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type bottomRows()
+inline typename NRowsBlockXpr<N>::Type bottomRows(Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }

 /** This is the const version of bottomRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
+inline typename ConstNRowsBlockXpr<N>::Type bottomRows(Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - n, 0, n, cols());
 }


@ -485,7 +493,7 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
 /** \returns a block consisting of a range of rows of *this.
  *
  * \param startRow the index of the first row in the block
-  * \param numRows the number of rows in the block
+  * \param n the number of rows in the block
  *
  * Example: \include DenseBase_middleRows_int.cpp
  * Output: \verbinclude DenseBase_middleRows_int.out
@ -493,22 +501,26 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
  * \sa class Block, block(Index,Index,Index,Index)
  */
 EIGEN_DEVICE_FUNC
-inline RowsBlockXpr middleRows(Index startRow, Index numRows)
+inline RowsBlockXpr middleRows(Index startRow, Index n)
 {
-  return RowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return RowsBlockXpr(derived(), startRow, 0, n, cols());
 }

 /** This is the const version of middleRows(Index,Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
+inline ConstRowsBlockXpr middleRows(Index startRow, Index n) const
 {
-  return ConstRowsBlockXpr(derived(), startRow, 0, numRows, cols());
+  return ConstRowsBlockXpr(derived(), startRow, 0, n, cols());
 }

 /** \returns a block consisting of a range of rows of *this.
  *
-  * \tparam N the number of rows in the block
+  * \tparam N the number of rows in the block as specified at compile-time
  * \param startRow the index of the first row in the block
+  * \param n the number of rows in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include DenseBase_template_int_middleRows.cpp
  * Output: \verbinclude DenseBase_template_int_middleRows.out
@ -517,17 +529,17 @@ inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow)
+inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N)
 {
-  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }

 /** This is the const version of middleRows<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow) const
+inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow, Index n = N) const
 {
-  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
+  return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, n, cols());
 }


@ -556,7 +568,11 @@ inline ConstColsBlockXpr leftCols(Index n) const

 /** \returns a block consisting of the left columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_leftCols.cpp
  * Output: \verbinclude MatrixBase_template_int_leftCols.out
@ -565,17 +581,17 @@ inline ConstColsBlockXpr leftCols(Index n) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type leftCols()
+inline typename NColsBlockXpr<N>::Type leftCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }

 /** This is the const version of leftCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type leftCols() const
+inline typename ConstNColsBlockXpr<N>::Type leftCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), n);
 }


@ -604,7 +620,11 @@ inline ConstColsBlockXpr rightCols(Index n) const

 /** \returns a block consisting of the right columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_rightCols.cpp
  * Output: \verbinclude MatrixBase_template_int_rightCols.out
@ -613,17 +633,17 @@ inline ConstColsBlockXpr rightCols(Index n) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type rightCols()
+inline typename NColsBlockXpr<N>::Type rightCols(Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }

 /** This is the const version of rightCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type rightCols() const
+inline typename ConstNColsBlockXpr<N>::Type rightCols(Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - n, rows(), n);
 }


@ -653,8 +673,12 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const

 /** \returns a block consisting of a range of columns of *this.
  *
-  * \tparam N the number of columns in the block
+  * \tparam N the number of columns in the block as specified at compile-time
  * \param startCol the index of the first column in the block
+  * \param n the number of columns in the block as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include DenseBase_template_int_middleCols.cpp
  * Output: \verbinclude DenseBase_template_int_middleCols.out
@ -663,17 +687,17 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
  */
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename NColsBlockXpr<N>::Type middleCols(Index startCol)
+inline typename NColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N)
 {
-  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }

 /** This is the const version of middleCols<int>().*/
 template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol) const
+inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol, Index n = N) const
 {
-  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
+  return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), n);
 }


@ -711,15 +735,15 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In

 /** \returns an expression of a block in *this.
  *
-  * \tparam BlockRows number of rows in block as specified at compile time
-  * \tparam BlockCols number of columns in block as specified at compile time
+  * \tparam BlockRows number of rows in block as specified at compile-time
+  * \tparam BlockCols number of columns in block as specified at compile-time
  * \param  startRow  the first row in the block
  * \param  startCol  the first column in the block
-  * \param  blockRows number of rows in block as specified at run time
-  * \param  blockCols number of columns in block as specified at run time
+  * \param  blockRows number of rows in block as specified at run-time
+  * \param  blockCols number of columns in block as specified at run-time
  *
-  * This function is mainly useful for blocks where the number of rows is specified at compile time
-  * and the number of columns is specified at run time, or vice versa. The compile-time and run-time
+  * This function is mainly useful for blocks where the number of rows is specified at compile-time
+  * and the number of columns is specified at run-time, or vice versa. The compile-time and run-time
  * information should not contradict. In other words, \a blockRows should equal \a BlockRows unless
  * \a BlockRows is \a Dynamic, and the same for the number of columns.
  *
@ -786,7 +810,7 @@ inline ConstRowXpr row(Index i) const
  * \only_for_vectors
  *
  * \param start the first coefficient in the segment
-  * \param vecSize the number of coefficients in the segment
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_segment_int_int.cpp
  * Output: \verbinclude MatrixBase_segment_int_int.out
@ -798,26 +822,26 @@ inline ConstRowXpr row(Index i) const
  * \sa class Block, segment(Index)
  */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType segment(Index start, Index vecSize)
+inline SegmentReturnType segment(Index start, Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), start, vecSize);
+  return SegmentReturnType(derived(), start, n);
 }


 /** This is the const version of segment(Index,Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType segment(Index start, Index vecSize) const
+inline ConstSegmentReturnType segment(Index start, Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), start, vecSize);
+  return ConstSegmentReturnType(derived(), start, n);
 }

 /** \returns a dynamic-size expression of the first coefficients of *this.
  *
  * \only_for_vectors
  *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_start_int.cpp
  * Output: \verbinclude MatrixBase_start_int.out
@ -829,26 +853,25 @@ inline ConstSegmentReturnType segment(Index start, Index vecSize) const
  * \sa class Block, block(Index,Index)
  */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType head(Index vecSize)
+inline SegmentReturnType head(Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), 0, vecSize);
+  return SegmentReturnType(derived(), 0, n);
 }

 /** This is the const version of head(Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType
-  head(Index vecSize) const
+inline ConstSegmentReturnType head(Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), 0, vecSize);
+  return ConstSegmentReturnType(derived(), 0, n);
 }

 /** \returns a dynamic-size expression of the last coefficients of *this.
  *
  * \only_for_vectors
  *
-  * \param vecSize the number of coefficients in the block
+  * \param n the number of coefficients in the segment
  *
  * Example: \include MatrixBase_end_int.cpp
  * Output: \verbinclude MatrixBase_end_int.out
@ -860,102 +883,113 @@ inline ConstSegmentReturnType
  * \sa class Block, block(Index,Index)
  */
 EIGEN_DEVICE_FUNC
-inline SegmentReturnType tail(Index vecSize)
+inline SegmentReturnType tail(Index n)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return SegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return SegmentReturnType(derived(), this->size() - n, n);
 }

 /** This is the const version of tail(Index).*/
 EIGEN_DEVICE_FUNC
-inline ConstSegmentReturnType tail(Index vecSize) const
+inline ConstSegmentReturnType tail(Index n) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return ConstSegmentReturnType(derived(), this->size() - vecSize, vecSize);
+  return ConstSegmentReturnType(derived(), this->size() - n, n);
 }

 /** \returns a fixed-size expression of a segment (i.e. a vector block) in \c *this
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param start the index of the first element in the segment
+  * \param n the number of coefficients in the segment as specified at compile-time
  *
-  * \param start the index of the first element of the sub-vector
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_segment.cpp
  * Output: \verbinclude MatrixBase_template_int_segment.out
  *
  * \sa class Block
  */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type segment(Index start)
+inline typename FixedSegmentReturnType<N>::Type segment(Index start, Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename FixedSegmentReturnType<N>::Type(derived(), start, n);
 }

 /** This is the const version of segment<int>(Index).*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const
+inline typename ConstFixedSegmentReturnType<N>::Type segment(Index start, Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), start);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), start, n);
 }

 /** \returns a fixed-size expression of the first coefficients of *this.
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_start.cpp
  * Output: \verbinclude MatrixBase_template_int_start.out
  *
  * \sa class Block
  */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type head()
+inline typename FixedSegmentReturnType<N>::Type head(Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename FixedSegmentReturnType<N>::Type(derived(), 0, n);
 }

 /** This is the const version of head<int>().*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type head() const
+inline typename ConstFixedSegmentReturnType<N>::Type head(Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), 0);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), 0, n);
 }

 /** \returns a fixed-size expression of the last coefficients of *this.
  *
  * \only_for_vectors
  *
-  * The template parameter \a Size is the number of coefficients in the block
+  * \tparam N the number of coefficients in the segment as specified at compile-time
+  * \param  n the number of coefficients in the segment as specified at run-time
+  *
+  * The compile-time and run-time information should not contradict. In other words,
+  * \a n should equal \a N unless \a N is \a Dynamic.
  *
  * Example: \include MatrixBase_template_int_end.cpp
  * Output: \verbinclude MatrixBase_template_int_end.out
  *
  * \sa class Block
  */
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename FixedSegmentReturnType<Size>::Type tail()
+inline typename FixedSegmentReturnType<N>::Type tail(Index n = N)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename FixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename FixedSegmentReturnType<N>::Type(derived(), size() - n);
 }

 /** This is the const version of tail<int>.*/
-template<int Size>
+template<int N>
 EIGEN_DEVICE_FUNC
-inline typename ConstFixedSegmentReturnType<Size>::Type tail() const
+inline typename ConstFixedSegmentReturnType<N>::Type tail(Index n = N) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
-  return typename ConstFixedSegmentReturnType<Size>::Type(derived(), size() - Size);
+  return typename ConstFixedSegmentReturnType<N>::Type(derived(), size() - n);
 }
--- a/bench/bench_norm.cpp
+++ b/bench/bench_norm.cpp
@ -32,25 +32,25 @@ EIGEN_DONT_INLINE typename T::Scalar lapackNorm(T& v)
  Scalar ssq = 1;
  for (int i=0;i<n;++i)
  {
-    Scalar ax = internal::abs(v.coeff(i));
+    Scalar ax = std::abs(v.coeff(i));
    if (scale >= ax)
    {
-      ssq += internal::abs2(ax/scale);
+      ssq += numext::abs2(ax/scale);
    }
    else
    {
-      ssq = Scalar(1) + ssq * internal::abs2(scale/ax);
+      ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
      scale = ax;
    }
  }
-  return scale * internal::sqrt(ssq);
+  return scale * std::sqrt(ssq);
 }

 template<typename T>
 EIGEN_DONT_INLINE typename T::Scalar twopassNorm(T& v)
 {
  typedef typename T::Scalar Scalar;
-  Scalar s = v.cwise().abs().maxCoeff();
+  Scalar s = v.array().abs().maxCoeff();
  return s*(v/s).norm();
 }

@ -73,16 +73,20 @@ EIGEN_DONT_INLINE typename T::Scalar divacNorm(T& v)
      v(i) = v(2*i) + v(2*i+1);
    n = n/2;
  }
-  return internal::sqrt(v(0));
+  return std::sqrt(v(0));
 }

+namespace Eigen {
+namespace internal {
 #ifdef EIGEN_VECTORIZE
-Packet4f internal::plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
-Packet2d internal::plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
+Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
+Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }

-Packet4f internal::pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
-Packet2d internal::pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
+Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
+Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
 #endif
+}
+}

 template<typename T>
 EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
@ -126,7 +130,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)

    overfl  = rbig*s2m;          // overfow boundary for abig
    eps     = std::pow(ibeta, 1-it);
-    relerr  = internal::sqrt(eps);      // tolerance for neglecting asml
+    relerr  = std::sqrt(eps);      // tolerance for neglecting asml
    abig    = 1.0/eps - 1.0;
    if (Scalar(nbig)>abig)  nmax = abig;  // largest safe n
    else                    nmax = nbig;
@ -134,13 +138,13 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)

  typedef typename internal::packet_traits<Scalar>::type Packet;
  const int ps = internal::packet_traits<Scalar>::size;
-  Packet pasml = internal::pset1(Scalar(0));
-  Packet pamed = internal::pset1(Scalar(0));
-  Packet pabig = internal::pset1(Scalar(0));
-  Packet ps2m = internal::pset1(s2m);
-  Packet ps1m = internal::pset1(s1m);
-  Packet pb2  = internal::pset1(b2);
-  Packet pb1  = internal::pset1(b1);
+  Packet pasml = internal::pset1<Packet>(Scalar(0));
+  Packet pamed = internal::pset1<Packet>(Scalar(0));
+  Packet pabig = internal::pset1<Packet>(Scalar(0));
+  Packet ps2m = internal::pset1<Packet>(s2m);
+  Packet ps1m = internal::pset1<Packet>(s1m);
+  Packet pb2  = internal::pset1<Packet>(b2);
+  Packet pb1  = internal::pset1<Packet>(b1);
  for(int j=0; j<v.size(); j+=ps)
  {
    Packet ax = internal::pabs(v.template packet<Aligned>(j));
@ -170,7 +174,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
  Scalar amed = internal::predux(pamed);
  if(abig > Scalar(0))
  {
-    abig = internal::sqrt(abig);
+    abig = std::sqrt(abig);
    if(abig > overfl)
    {
      eigen_assert(false && "overflow");
@ -179,7 +183,7 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
    if(amed > Scalar(0))
    {
      abig = abig/s2m;
-      amed = internal::sqrt(amed);
+      amed = std::sqrt(amed);
    }
    else
    {
@ -191,24 +195,24 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
  {
    if (amed > Scalar(0))
    {
-      abig = internal::sqrt(amed);
-      amed = internal::sqrt(asml) / s1m;
+      abig = std::sqrt(amed);
+      amed = std::sqrt(asml) / s1m;
    }
    else
    {
-      return internal::sqrt(asml)/s1m;
+      return std::sqrt(asml)/s1m;
    }
  }
  else
  {
-    return internal::sqrt(amed);
+    return std::sqrt(amed);
  }
  asml = std::min(abig, amed);
  abig = std::max(abig, amed);
  if(asml <= abig*relerr)
    return abig;
  else
-    return abig * internal::sqrt(Scalar(1) + internal::abs2(asml/abig));
+    return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
  #endif
 }

@ -224,22 +228,22 @@ EIGEN_DONT_INLINE typename T::Scalar pblueNorm(const T& v)
    for (int i=0; i<iters; ++i) NRM(vd); \
    td.stop(); \
  } \
-  for (int k=0; k<std::max(1,tries/3); ++k) { \
+  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
    tcf.start(); \
    for (int i=0; i<iters; ++i) NRM(vcf); \
    tcf.stop(); \
-  } \
+  } */\
  std::cout << #NRM << "\t" << tf.value() << "   " << td.value() <<  "    " << tcf.value() << "\n"; \
 }

 void check_accuracy(double basef, double based, int s)
 {
-  double yf = basef * internal::abs(internal::random<double>());
-  double yd = based * internal::abs(internal::random<double>());
+  double yf = basef * std::abs(internal::random<double>());
+  double yd = based * std::abs(internal::random<double>());
  VectorXf vf = VectorXf::Ones(s) * yf;
  VectorXd vd = VectorXd::Ones(s) * yd;

-  std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
+  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
@ -255,8 +259,8 @@ void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
  VectorXd vd(s);
  for (int i=0; i<s; ++i)
  {
-    vf[i] = internal::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
-    vd[i] = internal::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
+    vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
+    vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
  }

  //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
@ -321,10 +325,10 @@ int main(int argc, char** argv)
    VectorXcf vcf = VectorXcf::Random(1024*1024*32) * y;
    BENCH_PERF(sqsumNorm);
    BENCH_PERF(blueNorm);
-//     BENCH_PERF(pblueNorm);
-//     BENCH_PERF(lapackNorm);
-//     BENCH_PERF(hypotNorm);
-//     BENCH_PERF(twopassNorm);
+    BENCH_PERF(pblueNorm);
+    BENCH_PERF(lapackNorm);
+    BENCH_PERF(hypotNorm);
+    BENCH_PERF(twopassNorm);
    BENCH_PERF(bl2passNorm);
  }

@ -336,10 +340,10 @@ int main(int argc, char** argv)
    VectorXcf vcf = VectorXcf::Random(512) * y;
    BENCH_PERF(sqsumNorm);
    BENCH_PERF(blueNorm);
-//     BENCH_PERF(pblueNorm);
-//     BENCH_PERF(lapackNorm);
-//     BENCH_PERF(hypotNorm);
-//     BENCH_PERF(twopassNorm);
+    BENCH_PERF(pblueNorm);
+    BENCH_PERF(lapackNorm);
+    BENCH_PERF(hypotNorm);
+    BENCH_PERF(twopassNorm);
    BENCH_PERF(bl2passNorm);
  }
 }
--- a/blas/CMakeLists.txt
+++ b/blas/CMakeLists.txt
@ -7,6 +7,9 @@ workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)

 if(EIGEN_Fortran_COMPILER_WORKS)
  enable_language(Fortran OPTIONAL)
+  if(NOT CMAKE_Fortran_COMPILER)
+    set(EIGEN_Fortran_COMPILER_WORKS OFF)
+  endif()
 endif()

 add_custom_target(blas)
--- a/cmake/language_support.cmake
+++ b/cmake/language_support.cmake
@ -23,7 +23,7 @@ function(workaround_9220 language language_works)
  #message("DEBUG: language = ${language}")
  set(text
    "project(test NONE)
-    cmake_minimum_required(VERSION 2.6.0)
+    cmake_minimum_required(VERSION 2.8.0)
    set (CMAKE_Fortran_FLAGS \"${CMAKE_Fortran_FLAGS}\")
    set (CMAKE_EXE_LINKER_FLAGS \"${CMAKE_EXE_LINKER_FLAGS}\")
    enable_language(${language} OPTIONAL)
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@ -16,8 +16,8 @@ double s;
 // Basic usage
 // Eigen          // Matlab           // comments
 x.size()          // length(x)        // vector size
-C.rows()          // size(C)(1)       // number of rows
-C.cols()          // size(C)(2)       // number of columns
+C.rows()          // size(C,1)        // number of rows
+C.cols()          // size(C,2)        // number of columns
 x(i)              // x(i+1)           // Matlab is 1-based
 C(i,j)            // C(i+1,j+1)       //

@ -51,20 +51,34 @@ v.setLinSpace(size,low,high)        // v = linspace(low,high,size)'
 // Eigen                           // Matlab
 x.head(n)                          // x(1:n)
 x.head<n>()                        // x(1:n)
-x.tail(n)                          // N = rows(x); x(N - n: N)
-x.tail<n>()                        // N = rows(x); x(N - n: N)
+x.tail(n)                          // x(end - n + 1: end)
+x.tail<n>()                        // x(end - n + 1: end)
 x.segment(i, n)                    // x(i+1 : i+n)
 x.segment<n>(i)                    // x(i+1 : i+n)
 P.block(i, j, rows, cols)          // P(i+1 : i+rows, j+1 : j+cols)
 P.block<rows, cols>(i, j)          // P(i+1 : i+rows, j+1 : j+cols)
+P.row(i)                           // P(i+1, :)
+P.col(j)                           // P(:, j+1)
+P.leftCols<cols>()                 // P(:, 1:cols)
+P.leftCols(cols)                   // P(:, 1:cols)
+P.middleCols<cols>(j)              // P(:, j+1:j+cols)
+P.middleCols(j, cols)              // P(:, j+1:j+cols)
+P.rightCols<cols>()                // P(:, end-cols+1:end)
+P.rightCols(cols)                  // P(:, end-cols+1:end)
+P.topRows<rows>()                  // P(1:rows, :)
+P.topRows(rows)                    // P(1:rows, :)
+P.middleRows<rows>(i)              // P(:, i+1:i+rows)
+P.middleRows(i, rows)              // P(:, i+1:i+rows)
+P.bottomRows<rows>()               // P(:, end-rows+1:end)
+P.bottomRows(rows)                 // P(:, end-rows+1:end)
 P.topLeftCorner(rows, cols)        // P(1:rows, 1:cols)
-P.topRightCorner(rows, cols)       // [m n]=size(P); P(1:rows, n-cols+1:n)
-P.bottomLeftCorner(rows, cols)     // [m n]=size(P); P(m-rows+1:m, 1:cols)
-P.bottomRightCorner(rows, cols)    // [m n]=size(P); P(m-rows+1:m, n-cols+1:n)
+P.topRightCorner(rows, cols)       // P(1:rows, end-cols+1:end)
+P.bottomLeftCorner(rows, cols)     // P(end-rows+1:end, 1:cols)
+P.bottomRightCorner(rows, cols)    // P(end-rows+1:end, end-cols+1:end)
 P.topLeftCorner<rows,cols>()       // P(1:rows, 1:cols)
-P.topRightCorner<rows,cols>()      // [m n]=size(P); P(1:rows, n-cols+1:n)
-P.bottomLeftCorner<rows,cols>()    // [m n]=size(P); P(m-rows+1:m, 1:cols)
-P.bottomRightCorner<rows,cols>()   // [m n]=size(P); P(m-rows+1:m, n-cols+1:n)
+P.topRightCorner<rows,cols>()      // P(1:rows, end-cols+1:end)
+P.bottomLeftCorner<rows,cols>()    // P(end-rows+1:end, 1:cols)
+P.bottomRightCorner<rows,cols>()   // P(end-rows+1:end, end-cols+1:end)

 // Of particular note is Eigen's swap function which is highly optimized.
 // Eigen                           // Matlab
@ -126,10 +140,8 @@ int r, c;
 // Eigen                  // Matlab
 R.minCoeff()              // min(R(:))
 R.maxCoeff()              // max(R(:))
-s = R.minCoeff(&r, &c)    // [aa, bb] = min(R); [cc, dd] = min(aa);
-                          // r = bb(dd); c = dd; s = cc
-s = R.maxCoeff(&r, &c)    // [aa, bb] = max(R); [cc, dd] = max(aa);
-                          // row = bb(dd); col = dd; s = cc
+s = R.minCoeff(&r, &c)    // [s, i] = min(R(:)); [r, c] = ind2sub(size(R), i);
+s = R.maxCoeff(&r, &c)    // [s, i] = max(R(:)); [r, c] = ind2sub(size(R), i);
 R.sum()                   // sum(R(:))
 R.colwise().sum()         // sum(R)
 R.rowwise().sum()         // sum(R, 2) or sum(R')'
@ -151,13 +163,25 @@ x.squaredNorm()           // dot(x, x)   Note the equivalence is not true for co
 x.dot(y)                  // dot(x, y)
 x.cross(y)                // cross(x, y) Requires #include <Eigen/Geometry>

+//// Type conversion
+// Eigen                           // Matlab
+A.cast<double>();                  // double(A)
+A.cast<float>();                   // single(A)
+A.cast<int>();                     // int32(A)
+// if the original type equals destination type, no work is done
+
+// Note that for most operations Eigen requires all operands to have the same type:
+MatrixXf F = MatrixXf::Zero(3,3);
+A += F;                // illegal in Eigen. In Matlab A = A+F is allowed
+A += F.cast<double>(); // F converted to double and then added (generally, conversion happens on-the-fly)
+
 // Eigen can map existing memory into Eigen matrices.
 float array[3];
-Map<Vector3f>(array, 3).fill(10);
-int data[4] = 1, 2, 3, 4;
-Matrix2i mat2x2(data);
-MatrixXi mat2x2 = Map<Matrix2i>(data);
-MatrixXi mat2x2 = Map<MatrixXi>(data, 2, 2);
+Vector3f::Map(array).fill(10);            // create a temporary Map over array and sets entries to 10
+int data[4] = {1, 2, 3, 4};
+Matrix2i mat2x2(data);                    // copies data into mat2x2
+Matrix2i::Map(data) = 2*mat2x2;           // overwrite elements of data with 2*mat2x2
+MatrixXi::Map(data, 2, 2) += mat2x2;      // adds mat2x2 to elements of data (alternative syntax if size is not know at compile time)

 // Solve Ax = b. Result stored in x. Matlab: x = A \ b.
 x = A.ldlt().solve(b));  // A sym. p.s.d.    #include <Eigen/Cholesky>
--- a/doc/Doxyfile.in
+++ b/doc/Doxyfile.in
@ -206,6 +206,7 @@ TAB_SIZE               = 8
 # You can put \n's in the value part of an alias to insert newlines.

 ALIASES                = "only_for_vectors=This is only for vectors (either row-vectors or column-vectors), i.e. matrices which are known at compile-time to have either one row or one column." \
+			 "not_reentrant=\warning This function is not re-entrant." \
                         "array_module=This is defined in the %Array module. \code #include <Eigen/Array> \endcode" \
                         "cholesky_module=This is defined in the %Cholesky module. \code #include <Eigen/Cholesky> \endcode" \
                         "eigenvalues_module=This is defined in the %Eigenvalues module. \code #include <Eigen/Eigenvalues> \endcode" \
@ -223,6 +224,7 @@ ALIASES                = "only_for_vectors=This is only for vectors (either row-
                         "note_about_checking_solutions=This method just tries to find as good a solution as possible. If you want to check whether a solution exists or if it is accurate, just call this function to get a result and then compute the error of this result, or use MatrixBase::isApprox() directly, for instance like this: \code bool a_solution_exists = (A*result).isApprox(b, precision); \endcode This method avoids dividing by zero, so that the non-existence of a solution doesn't by itself mean that you'll get \c inf or \c nan values." \
                         "note_try_to_help_rvo=This function returns the result by value. In order to make that efficient, it is implemented as just a return statement using a special constructor, hopefully allowing the compiler to perform a RVO (return value optimization)." \
                         "nonstableyet=\warning This is not considered to be part of the stable public API yet. Changes may happen in future releases. See \ref Experimental \"Experimental parts of Eigen\"
+                         

 ALIASES += "eigenAutoToc=  "
 ALIASES += "eigenManualPage=\defgroup"
--- a/doc/LeastSquares.dox
+++ b/doc/LeastSquares.dox
@ -0,0 +1,70 @@
+namespace Eigen {
+
+/** \eigenManualPage LeastSquares Solving linear least squares systems
+
+This page describes how to solve linear least squares systems using %Eigen. An overdetermined system
+of equations, say \a Ax = \a b, has no solutions. In this case, it makes sense to search for the
+vector \a x which is closest to being a solution, in the sense that the difference \a Ax - \a b is
+as small as possible. This \a x is called the least square solution (if the Euclidean norm is used).
+
+The three methods discussed on this page are the SVD decomposition, the QR decomposition and normal
+equations. Of these, the SVD decomposition is generally the most accurate but the slowest, normal
+equations is the fastest but least accurate, and the QR decomposition is in between.
+
+\eigenAutoToc
+
+
+\section LeastSquaresSVD Using the SVD decomposition
+
+The \link JacobiSVD::solve() solve() \endlink method in the JacobiSVD class can be directly used to
+solve linear squares systems. It is not enough to compute only the singular values (the default for
+this class); you also need the singular vectors but the thin SVD decomposition suffices for
+computing least squares solutions:
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include TutorialLinAlgSVDSolve.cpp </td>
+  <td>\verbinclude TutorialLinAlgSVDSolve.out </td>
+</tr>
+</table>
+
+This is example from the page \link TutorialLinearAlgebra Linear algebra and decompositions \endlink.
+
+
+\section LeastSquaresQR Using the QR decomposition
+
+The solve() method in QR decomposition classes also computes the least squares solution. There are
+three QR decomposition classes: HouseholderQR (no pivoting, so fast but unstable),
+ColPivHouseholderQR (column pivoting, thus a bit slower but more accurate) and FullPivHouseholderQR
+(full pivoting, so slowest and most stable). Here is an example with column pivoting:
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include LeastSquaresQR.cpp </td>
+  <td>\verbinclude LeastSquaresQR.out </td>
+</tr>
+</table>
+
+
+\section LeastSquaresNormalEquations Using normal equations
+
+Finding the least squares solution of \a Ax = \a b is equivalent to solving the normal equation
+<i>A</i><sup>T</sup><i>Ax</i> = <i>A</i><sup>T</sup><i>b</i>. This leads to the following code
+
+<table class="example">
+<tr><th>Example:</th><th>Output:</th></tr>
+<tr>
+  <td>\include LeastSquaresNormalEquations.cpp </td>
+  <td>\verbinclude LeastSquaresNormalEquations.out </td>
+</tr>
+</table>
+
+If the matrix \a A is ill-conditioned, then this is not a good method, because the condition number
+of <i>A</i><sup>T</sup><i>A</i> is the square of the condition number of \a A. This means that you
+lose twice as many digits using normal equation than if you use the other methods.
+
+*/
+
+}
--- a/doc/Manual.dox
+++ b/doc/Manual.dox
@ -11,6 +11,7 @@ namespace Eigen {
  - \subpage TopicCustomizingEigen
  - \subpage TopicMultiThreading
  - \subpage TopicUsingIntelMKL
+  - \subpage TopicCUDA
  - \subpage TopicTemplateKeyword
  - \subpage UserManual_UnderstandingEigen
 */
@ -96,6 +97,8 @@ namespace Eigen {
    \ingroup DenseLinearSolvers_chapter */
 /** \addtogroup TopicLinearAlgebraDecompositions
    \ingroup DenseLinearSolvers_chapter */
+/** \addtogroup LeastSquares 
+    \ingroup DenseLinearSolvers_chapter */

 /** \addtogroup DenseLinearSolvers_Reference
    \ingroup DenseLinearSolvers_chapter */
--- a/doc/TopicMultithreading.dox
+++ b/doc/TopicMultithreading.dox
@ -39,6 +39,8 @@ int main(int argc, char** argv)
 }
 \endcode

+\warning note that all functions generating random matrices are \b not re-entrant nor thread-safe. Those include DenseBase::Random(), and DenseBase::setRandom() despite a call to Eigen::initParallel(). This is because these functions are based on std::rand which is not re-entrant. For thread-safe random generator, we recommend the use of boost::random of c++11 random feature.
+
 In the case your application is parallelized with OpenMP, you might want to disable Eigen's own parallization as detailed in the previous section.

 */
--- a/doc/TutorialLinearAlgebra.dox
+++ b/doc/TutorialLinearAlgebra.dox
@ -167,8 +167,8 @@ Here is an example:

 \section TutorialLinAlgLeastsquares Least squares solving

-The best way to do least squares solving is with a SVD decomposition. Eigen provides one as the JacobiSVD class, and its solve()
-is doing least-squares solving.
+The most accurate method to do least squares solving is with a SVD decomposition. Eigen provides one
+as the JacobiSVD class, and its solve() is doing least-squares solving.

 Here is an example:
 <table class="example">
@ -179,9 +179,10 @@ Here is an example:
 </tr>
 </table>

-Another way, potentially faster but less reliable, is to use a LDLT decomposition
-of the normal matrix. In any case, just read any reference text on least squares, and it will be very easy for you
-to implement any linear least squares computation on top of Eigen.
+Another methods, potentially faster but less reliable, are to use a Cholesky decomposition of the
+normal matrix or a QR decomposition. Our page on \link LeastSquares least squares solving \endlink
+has more details.
+

 \section TutorialLinAlgSeparateComputation Separating the computation from the construction

--- a/doc/UsingNVCC.dox
+++ b/doc/UsingNVCC.dox
@ -0,0 +1,32 @@
+
+namespace Eigen {
+
+/** \page TopicCUDA Using Eigen in CUDA kernels
+
+\b Disclaimer: this page is about an \b experimental feature in %Eigen.
+
+Staring from CUDA 5.0, the CUDA compiler, \c nvcc, is able to properly parse %Eigen's code (almost).
+A few adaptations of the %Eigen's code already allows to use some parts of %Eigen in your own CUDA kernels.
+To this end you need the devel branch of %Eigen, CUDA 5.0 or greater with GCC.
+
+Known issues:
+
+ - \c nvcc with MS Visual Studio does not work (patch welcome)
+ 
+ - \c nvcc with \c clang does not work (patch welcome)
+ 
+ - \c nvcc 5.5 with gcc-4.7 (or greater) has issues with the standard \c <limits> header file. To workaround this, you can add the following before including any other files:
+   \code
+    // workaround issue between gcc >= 4.7 and cuda 5.5
+    #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
+      #undef _GLIBCXX_ATOMIC_BUILTINS
+      #undef _GLIBCXX_USE_INT128
+    #endif
+   \endcode
+   
+ - On 64bits system Eigen uses \c long \c int as the default type for indexes and sizes. On CUDA device, it would make sense to default to 32 bits \c int.
+   However, to keep host and CUDA code compatible, this cannot be done automatically by %Eigen, and the user is thus required to define \c EIGEN_DEFAULT_DENSE_INDEX_TYPE to \c int throughout his code (or only for CUDA code if there is no interaction between host and CUDA code through %Eigen's object).
+
+*/
+
+}
--- a/doc/snippets/LeastSquaresNormalEquations.cpp
+++ b/doc/snippets/LeastSquaresNormalEquations.cpp
@ -0,0 +1,4 @@
+MatrixXf A = MatrixXf::Random(3, 2);
+VectorXf b = VectorXf::Random(3);
+cout << "The solution using normal equations is:\n"
+     << (A.transpose() * A).ldlt().solve(A.transpose() * b) << endl;
--- a/doc/snippets/LeastSquaresQR.cpp
+++ b/doc/snippets/LeastSquaresQR.cpp
@ -0,0 +1,4 @@
+MatrixXf A = MatrixXf::Random(3, 2);
+VectorXf b = VectorXf::Random(3);
+cout << "The solution using the QR decomposition is:\n"
+     << A.colPivHouseholderQr().solve(b) << endl;
--- a/doc/snippets/MatrixBase_applyOnTheLeft.cpp
+++ b/doc/snippets/MatrixBase_applyOnTheLeft.cpp
@ -0,0 +1,7 @@
+Matrix3f A = Matrix3f::Random(3,3), B;
+B << 0,1,0,  
+     0,0,1,  
+     1,0,0;
+cout << "At start, A = " << endl << A << endl;
+A.applyOnTheLeft(B); 
+cout << "After applyOnTheLeft, A = " << endl << A << endl;
--- a/doc/snippets/MatrixBase_applyOnTheRight.cpp
+++ b/doc/snippets/MatrixBase_applyOnTheRight.cpp
@ -0,0 +1,9 @@
+Matrix3f A = Matrix3f::Random(3,3), B;
+B << 0,1,0,  
+     0,0,1,  
+     1,0,0;
+cout << "At start, A = " << endl << A << endl;
+A *= B;
+cout << "After A *= B, A = " << endl << A << endl;
+A.applyOnTheRight(B);  // equivalent to A *= B
+cout << "After applyOnTheRight, A = " << endl << A << endl;
--- a/doc/special_examples/CMakeLists.txt
+++ b/doc/special_examples/CMakeLists.txt
@ -19,3 +19,22 @@ if(QT4_FOUND)
                     
  add_dependencies(all_examples Tutorial_sparse_example)
 endif(QT4_FOUND)
+
+check_cxx_compiler_flag("-std=c++11" EIGEN_COMPILER_SUPPORT_CPP11)
+if(EIGEN_COMPILER_SUPPORT_CPP11)
+  add_executable(random_cpp11 random_cpp11.cpp)
+  target_link_libraries(random_cpp11 ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO})
+  add_dependencies(all_examples random_cpp11)
+  ei_add_target_property(random_cpp11 COMPILE_FLAGS "-std=c++11")
+  
+  get_target_property(random_cpp11_exec
+                      random_cpp11 LOCATION)
+  add_custom_command(
+    TARGET random_cpp11
+    POST_BUILD
+    COMMAND ${random_cpp11_exec}
+    ARGS >${CMAKE_CURRENT_BINARY_DIR}/random_cpp11.out
+  )
+  
+  
+endif()
--- a/doc/special_examples/random_cpp11.cpp
+++ b/doc/special_examples/random_cpp11.cpp
@ -0,0 +1,14 @@
+#include <Eigen/Core>
+#include <iostream>
+#include <random>
+
+using namespace Eigen;
+
+int main() {
+  std::default_random_engine generator;
+  std::poisson_distribution<int> distribution(4.1);
+  auto poisson = [&] (int) {return distribution(generator);};
+
+  RowVectorXi v = RowVectorXi::NullaryExpr(10, poisson );
+  std::cout << v << "\n";
+}
--- a/lapack/CMakeLists.txt
+++ b/lapack/CMakeLists.txt
@ -7,6 +7,9 @@ workaround_9220(Fortran EIGEN_Fortran_COMPILER_WORKS)

 if(EIGEN_Fortran_COMPILER_WORKS)
  enable_language(Fortran OPTIONAL)
+  if(NOT CMAKE_Fortran_COMPILER)
+    set(EIGEN_Fortran_COMPILER_WORKS OFF)
+  endif()
 endif()

 add_custom_target(lapack)
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -302,7 +302,6 @@ find_package(CUDA)
 if(CUDA_FOUND)
  
  set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-  set(CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
  cuda_include_directories(${CMAKE_CURRENT_BINARY_DIR})
  set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
  
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@ -263,8 +263,8 @@ template<typename MatrixType> void cholesky_bug241(const MatrixType& m)

 // LDLT is not guaranteed to work for indefinite matrices, but happens to work fine if matrix is diagonal.
 // This test checks that LDLT reports correctly that matrix is indefinite. 
-// See http://forum.kde.org/viewtopic.php?f=74&t=106942
-template<typename MatrixType> void cholesky_indefinite(const MatrixType& m)
+// See http://forum.kde.org/viewtopic.php?f=74&t=106942 and bug 736
+template<typename MatrixType> void cholesky_definiteness(const MatrixType& m)
 {
  eigen_assert(m.rows() == 2 && m.cols() == 2);
  MatrixType mat;
@ -280,6 +280,24 @@ template<typename MatrixType> void cholesky_indefinite(const MatrixType& m)
    VERIFY(!ldlt.isNegative());
    VERIFY(!ldlt.isPositive());
  }
+  {
+    mat << 0, 0, 0, 0;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(ldlt.isNegative());
+    VERIFY(ldlt.isPositive());
+  }
+  {
+    mat << 0, 0, 0, 1;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(!ldlt.isNegative());
+    VERIFY(ldlt.isPositive());
+  }
+  {
+    mat << -1, 0, 0, 0;
+    LDLT<MatrixType> ldlt(mat);
+    VERIFY(ldlt.isNegative());
+    VERIFY(!ldlt.isPositive());
+  }
 }

 template<typename MatrixType> void cholesky_verify_assert()
@ -309,7 +327,7 @@ void test_cholesky()
    CALL_SUBTEST_1( cholesky(Matrix<double,1,1>()) );
    CALL_SUBTEST_3( cholesky(Matrix2d()) );
    CALL_SUBTEST_3( cholesky_bug241(Matrix2d()) );
-    CALL_SUBTEST_3( cholesky_indefinite(Matrix2d()) );
+    CALL_SUBTEST_3( cholesky_definiteness(Matrix2d()) );
    CALL_SUBTEST_4( cholesky(Matrix3f()) );
    CALL_SUBTEST_5( cholesky(Matrix4d()) );
    s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE);
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@ -1,8 +1,16 @@


+// workaround issue between gcc >= 4.7 and cuda 5.5
+#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
+  #undef _GLIBCXX_ATOMIC_BUILTINS
+  #undef _GLIBCXX_USE_INT128
+#endif
+
 #define EIGEN_TEST_NO_LONGDOUBLE
 #define EIGEN_TEST_NO_COMPLEX
 #define EIGEN_TEST_FUNC cuda_basic
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+
 #include "main.h"
 #include "cuda_common.h"

@ -70,6 +78,17 @@ struct prod {
  }
 };

+template<typename T1, typename T2>
+struct diagonal {
+  EIGEN_DEVICE_FUNC
+  void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
+  {
+    using namespace Eigen;
+    T1 x1(in+i);
+    Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
+    res += x1.diagonal();
+  }
+};

 template<typename T>
 struct eigenvalues {
@ -82,12 +101,11 @@ struct eigenvalues {
    Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
    T A = M*M.adjoint();
    SelfAdjointEigenSolver<T> eig;
-    eig.computeDirect(A);
-    res = A.eigenvalues();
+    eig.computeDirect(M);
+    res = eig.eigenvalues();
  }
 };

-
 void test_cuda_basic()
 {
  ei_test_init_cuda();
@ -110,7 +128,10 @@ void test_cuda_basic()
  CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix3f,Matrix3f>(), nthreads, in, out) );
  CALL_SUBTEST( run_and_compare_to_cuda(prod<Matrix4f,Vector4f>(), nthreads, in, out) );
  
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
-//   CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_c<uda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
+  
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
+  CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );

 }
--- a/test/cuda_common.h
+++ b/test/cuda_common.h
@ -86,12 +86,15 @@ void ei_test_init_cuda()
  cudaDeviceProp deviceProp;
  cudaGetDeviceProperties(&deviceProp, device);
  std::cout << "CUDA device info:\n";
+  std::cout << "  name:                        " << deviceProp.name << "\n";
  std::cout << "  capability:                  " << deviceProp.major << "." << deviceProp.minor << "\n";
  std::cout << "  multiProcessorCount:         " << deviceProp.multiProcessorCount << "\n";
  std::cout << "  maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n";
+  std::cout << "  warpSize:                    " << deviceProp.warpSize << "\n";
  std::cout << "  regsPerBlock:                " << deviceProp.regsPerBlock << "\n";
  std::cout << "  concurrentKernels:           " << deviceProp.concurrentKernels << "\n";
  std::cout << "  clockRate:                   " << deviceProp.clockRate << "\n";
+  std::cout << "  canMapHostMemory:            " << deviceProp.canMapHostMemory << "\n";
  std::cout << "  computeMode:                 " << deviceProp.computeMode << "\n";
 }

--- a/test/geo_eulerangles.cpp
+++ b/test/geo_eulerangles.cpp
@ -12,36 +12,48 @@
 #include <Eigen/LU>
 #include <Eigen/SVD>

-template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+
+template<typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea, int i, int j, int k)
 {
  typedef Matrix<Scalar,3,3> Matrix3;
  typedef Matrix<Scalar,3,1> Vector3;
  typedef AngleAxis<Scalar> AngleAxisx;
  using std::abs;
+  Matrix3 m(AngleAxisx(ea[0], Vector3::Unit(i)) * AngleAxisx(ea[1], Vector3::Unit(j)) * AngleAxisx(ea[2], Vector3::Unit(k)));
+  Vector3 eabis = m.eulerAngles(i, j, k);
+  Matrix3 mbis(AngleAxisx(eabis[0], Vector3::Unit(i)) * AngleAxisx(eabis[1], Vector3::Unit(j)) * AngleAxisx(eabis[2], Vector3::Unit(k))); 
+  VERIFY_IS_APPROX(m,  mbis); 
+  /* If I==K, and ea[1]==0, then there no unique solution. */ 
+  /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ 
+  if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(M_PI/2),test_precision<Scalar>())) ) 
+    VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
  
-  #define VERIFY_EULER(I,J,K, X,Y,Z) { \
-    Matrix3 m(AngleAxisx(ea[0], Vector3::Unit##X()) * AngleAxisx(ea[1], Vector3::Unit##Y()) * AngleAxisx(ea[2], Vector3::Unit##Z())); \
-    Vector3 eabis = m.eulerAngles(I,J,K); \
-    Matrix3 mbis(AngleAxisx(eabis[0], Vector3::Unit##X()) * AngleAxisx(eabis[1], Vector3::Unit##Y()) * AngleAxisx(eabis[2], Vector3::Unit##Z())); \
-    VERIFY_IS_APPROX(m,  mbis); \
-    /* If I==K, and ea[1]==0, then there no unique solution. */ \
-    /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ \
-    if( (I!=K || ea[1]!=0) && (I==K || !internal::isApprox(abs(ea[1]),Scalar(M_PI/2),test_precision<Scalar>())) ) VERIFY((ea-eabis).norm() <= test_precision<Scalar>()); \
-  }
-  VERIFY_EULER(0,1,2, X,Y,Z);
-  VERIFY_EULER(0,1,0, X,Y,X);
-  VERIFY_EULER(0,2,1, X,Z,Y);
-  VERIFY_EULER(0,2,0, X,Z,X);
+  // approx_or_less_than does not work for 0
+  VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[0], Scalar(M_PI));
+  VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[1]);
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[1], Scalar(M_PI));
+  VERIFY_IS_APPROX_OR_LESS_THAN(-Scalar(M_PI), eabis[2]);
+  VERIFY_IS_APPROX_OR_LESS_THAN(eabis[2], Scalar(M_PI));
+}

-  VERIFY_EULER(1,2,0, Y,Z,X);
-  VERIFY_EULER(1,2,1, Y,Z,Y);
-  VERIFY_EULER(1,0,2, Y,X,Z);
-  VERIFY_EULER(1,0,1, Y,X,Y);
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+  verify_euler(ea, 0,1,2);
+  verify_euler(ea, 0,1,0);
+  verify_euler(ea, 0,2,1);
+  verify_euler(ea, 0,2,0);

-  VERIFY_EULER(2,0,1, Z,X,Y);
-  VERIFY_EULER(2,0,2, Z,X,Z);
-  VERIFY_EULER(2,1,0, Z,Y,X);
-  VERIFY_EULER(2,1,2, Z,Y,Z);
+  verify_euler(ea, 1,2,0);
+  verify_euler(ea, 1,2,1);
+  verify_euler(ea, 1,0,2);
+  verify_euler(ea, 1,0,1);
+
+  verify_euler(ea, 2,0,1);
+  verify_euler(ea, 2,0,2);
+  verify_euler(ea, 2,1,0);
+  verify_euler(ea, 2,1,2);
 }

 template<typename Scalar> void eulerangles()
@ -63,7 +75,16 @@ template<typename Scalar> void eulerangles()
  ea = m.eulerAngles(0,1,0);
  check_all_var(ea);
  
-  ea = (Array3::Random() + Array3(1,1,0))*Scalar(M_PI)*Array3(0.5,0.5,1);
+  // Check with purely random Quaternion:
+  q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
+  m = q1;
+  ea = m.eulerAngles(0,1,2);
+  check_all_var(ea);
+  ea = m.eulerAngles(0,1,0);
+  check_all_var(ea);
+  
+  // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
+  ea = (Array3::Random() + Array3(1,0,0))*Scalar(M_PI)*Array3(0.5,1,1);
  check_all_var(ea);
  
  ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(M_PI));
--- a/test/geo_hyperplane.cpp
+++ b/test/geo_hyperplane.cpp
@ -18,10 +18,12 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
  /* this test covers the following files:
     Hyperplane.h
  */
+  using std::abs;
  typedef typename HyperplaneType::Index Index;
  const Index dim = _plane.dim();
  enum { Options = HyperplaneType::Options };
  typedef typename HyperplaneType::Scalar Scalar;
+  typedef typename HyperplaneType::RealScalar RealScalar;
  typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime, 1> VectorType;
  typedef Matrix<Scalar, HyperplaneType::AmbientDimAtCompileTime,
                         HyperplaneType::AmbientDimAtCompileTime> MatrixType;
@ -42,7 +44,10 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
  VERIFY_IS_APPROX( n1.dot(n1), Scalar(1) );

  VERIFY_IS_MUCH_SMALLER_THAN( pl0.absDistance(p0), Scalar(1) );
-  VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0 );
+  if(numext::abs2(s0)>RealScalar(1e-6))
+    VERIFY_IS_APPROX( pl1.signedDistance(p1 + n1 * s0), s0);
+  else
+    VERIFY_IS_MUCH_SMALLER_THAN( abs(pl1.signedDistance(p1 + n1 * s0) - s0), Scalar(1) );
  VERIFY_IS_MUCH_SMALLER_THAN( pl1.signedDistance(pl1.projection(p0)), Scalar(1) );
  VERIFY_IS_MUCH_SMALLER_THAN( pl1.absDistance(p1 +  pl1.normal().unitOrthogonal() * s1), Scalar(1) );

@ -52,6 +57,8 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
    MatrixType rot = MatrixType::Random(dim,dim).householderQr().householderQ();
    DiagonalMatrix<Scalar,HyperplaneType::AmbientDimAtCompileTime> scaling(VectorType::Random());
    Translation<Scalar,HyperplaneType::AmbientDimAtCompileTime> translation(VectorType::Random());
+    
+    while(scaling.diagonal().cwiseAbs().minCoeff()<RealScalar(1e-4)) scaling.diagonal() = VectorType::Random();

    pl2 = pl1;
    VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot).absDistance(rot * p1), Scalar(1) );
@ -61,7 +68,7 @@ template<typename HyperplaneType> void hyperplane(const HyperplaneType& _plane)
    VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling).absDistance((rot*scaling) * p1), Scalar(1) );
    pl2 = pl1;
    VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*scaling*translation)
-                                 .absDistance((rot*scaling*translation) * p1), Scalar(1) );
+                                  .absDistance((rot*scaling*translation) * p1), Scalar(1) );
    pl2 = pl1;
    VERIFY_IS_MUCH_SMALLER_THAN( pl2.transform(rot*translation,Isometry)
                                 .absDistance((rot*translation) * p1), Scalar(1) );
@ -104,12 +111,15 @@ template<typename Scalar> void lines()
    Vector result = line_u.intersection(line_v);

    // the lines should intersect at the point we called "center"
-    VERIFY_IS_APPROX(result, center);
+    if(abs(a-1) > 1e-2 && abs(v.normalized().dot(u.normalized()))<0.9)
+      VERIFY_IS_APPROX(result, center);

    // check conversions between two types of lines
    PLine pl(line_u); // gcc 3.3 will commit suicide if we don't name this variable
-    CoeffsType converted_coeffs = HLine(pl).coeffs();
-    converted_coeffs *= (line_u.coeffs()[0])/(converted_coeffs[0]);
+    HLine line_u2(pl);
+    CoeffsType converted_coeffs = line_u2.coeffs();
+    if(line_u2.normal().dot(line_u.normal())<0.)
+      converted_coeffs = -line_u2.coeffs();
    VERIFY(line_u.coeffs().isApprox(converted_coeffs));
  }
 }
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@ -279,6 +279,13 @@ template<typename Scalar, int Mode, int Options> void transformations()
  t1 = Eigen::Scaling(s0,s0,s0) * t1;
  VERIFY_IS_APPROX(t0.matrix(), t1.matrix());

+  t0 = t3;
+  t0.scale(s0);
+  t1 = t3 * Eigen::Scaling(s0);
+  VERIFY_IS_APPROX(t0.matrix(), t1.matrix());
+  t0.prescale(s0);
+  t1 = Eigen::Scaling(s0) * t1;
+  VERIFY_IS_APPROX(t0.matrix(), t1.matrix());

  t0.setIdentity();
  t0.prerotate(q1).prescale(v0).pretranslate(v0);
--- a/test/qr.cpp
+++ b/test/qr.cpp
@ -54,6 +54,8 @@ template<typename MatrixType> void qr_invertible()
 {
  using std::log;
  using std::abs;
+  using std::pow;
+  using std::max;
  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
  typedef typename MatrixType::Scalar Scalar;

@ -65,7 +67,7 @@ template<typename MatrixType> void qr_invertible()
  if (internal::is_same<RealScalar,float>::value)
  {
    // let's build a matrix more stable to inverse
-    MatrixType a = MatrixType::Random(size,size*2);
+    MatrixType a = MatrixType::Random(size,size*4);
    m1 += a * a.adjoint();
  }

@ -81,8 +83,11 @@ template<typename MatrixType> void qr_invertible()
  m3 = qr.householderQ(); // get a unitary
  m1 = m3 * m1 * m3;
  qr.compute(m1);
-  VERIFY_IS_APPROX(absdet, qr.absDeterminant());
  VERIFY_IS_APPROX(log(absdet), qr.logAbsDeterminant());
+  // This test is tricky if the determinant becomes too small.
+  // Since we generate random numbers with magnitude rrange [0,1], the average determinant is 0.5^size
+  VERIFY_IS_MUCH_SMALLER_THAN( abs(absdet-qr.absDeterminant()), (max)(RealScalar(pow(0.5,size)),(max)(abs(absdet),abs(qr.absDeterminant()))) );
+  
 }

 template<typename MatrixType> void qr_verify_assert()
--- a/test/sparse.h
+++ b/test/sparse.h
@ -154,16 +154,16 @@ initSparse(double density,
  sparseMat.finalize();
 }

-template<typename Scalar> void
+template<typename Scalar,int Options,typename Index> void
 initSparse(double density,
           Matrix<Scalar,Dynamic,1>& refVec,
-           SparseVector<Scalar>& sparseVec,
+           SparseVector<Scalar,Options,Index>& sparseVec,
           std::vector<int>* zeroCoords = 0,
           std::vector<int>* nonzeroCoords = 0)
 {
  sparseVec.reserve(int(refVec.size()*density));
  sparseVec.setZero();
-  for(int i=0; i<refVec.size(); i++)
+  for(Index i=0; i<refVec.size(); i++)
  {
    Scalar v = (internal::random<double>(0,1) < density) ? internal::random<Scalar>() : Scalar(0);
    if (v!=Scalar(0))
@ -178,10 +178,10 @@ initSparse(double density,
  }
 }

-template<typename Scalar> void
+template<typename Scalar,int Options,typename Index> void
 initSparse(double density,
           Matrix<Scalar,1,Dynamic>& refVec,
-           SparseVector<Scalar,RowMajor>& sparseVec,
+           SparseVector<Scalar,Options,Index>& sparseVec,
           std::vector<int>* zeroCoords = 0,
           std::vector<int>* nonzeroCoords = 0)
 {
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@ -270,6 +270,14 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
      VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0)));
    else
      VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.col(0).dot(refM2.row(0)));
+    
+    DenseVector rv = DenseVector::Random(m1.cols());
+    DenseVector cv = DenseVector::Random(m1.rows());
+    Index r = internal::random<Index>(0,m1.rows()-2);
+    Index c = internal::random<Index>(0,m1.cols()-1);
+    VERIFY_IS_APPROX(( m1.template block<1,Dynamic>(r,0,1,m1.cols()).dot(rv)) , refM1.row(r).dot(rv));
+    VERIFY_IS_APPROX(m1.row(r).dot(rv), refM1.row(r).dot(rv));
+    VERIFY_IS_APPROX(m1.col(c).dot(cv), refM1.col(c).dot(cv));

    VERIFY_IS_APPROX(m1.conjugate(), refM1.conjugate());
    VERIFY_IS_APPROX(m1.real(), refM1.real());
--- a/test/sparse_product.cpp
+++ b/test/sparse_product.cpp
@ -13,8 +13,9 @@ template<typename SparseMatrixType, typename DenseMatrix, bool IsRowMajor=Sparse

 template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<SparseMatrixType,DenseMatrix,false> {
  static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) {
-    int c  = internal::random(0,m2.cols()-1);
-    int c1 = internal::random(0,m2.cols()-1);
+    typedef typename SparseMatrixType::Index Index;
+    Index c  = internal::random<Index>(0,m2.cols()-1);
+    Index c1 = internal::random<Index>(0,m2.cols()-1);
    VERIFY_IS_APPROX(m4=m2.col(c)*refMat2.col(c1).transpose(), refMat4=refMat2.col(c)*refMat2.col(c1).transpose());
    VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.col(c).transpose(), refMat4=refMat2.col(c1)*refMat2.col(c).transpose());
  }
@ -22,8 +23,9 @@ template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<Spar

 template<typename SparseMatrixType, typename DenseMatrix> struct test_outer<SparseMatrixType,DenseMatrix,true> {
  static void run(SparseMatrixType& m2, SparseMatrixType& m4, DenseMatrix& refMat2, DenseMatrix& refMat4) {
-    int r  = internal::random(0,m2.rows()-1);
-    int c1 = internal::random(0,m2.cols()-1);
+    typedef typename SparseMatrixType::Index Index;
+    Index r  = internal::random<Index>(0,m2.rows()-1);
+    Index c1 = internal::random<Index>(0,m2.cols()-1);
    VERIFY_IS_APPROX(m4=m2.row(r).transpose()*refMat2.col(c1).transpose(), refMat4=refMat2.row(r).transpose()*refMat2.col(c1).transpose());
    VERIFY_IS_APPROX(m4=refMat2.col(c1)*m2.row(r), refMat4=refMat2.col(c1)*refMat2.row(r));
  }
@ -37,9 +39,9 @@ template<typename SparseMatrixType> void sparse_product()
 {
  typedef typename SparseMatrixType::Index Index;
  Index n = 100;
-  const Index rows  = internal::random<int>(1,n);
-  const Index cols  = internal::random<int>(1,n);
-  const Index depth = internal::random<int>(1,n);
+  const Index rows  = internal::random<Index>(1,n);
+  const Index cols  = internal::random<Index>(1,n);
+  const Index depth = internal::random<Index>(1,n);
  typedef typename SparseMatrixType::Scalar Scalar;
  enum { Flags = SparseMatrixType::Flags };

@ -244,6 +246,7 @@ void test_sparse_product()
    CALL_SUBTEST_1( (sparse_product<SparseMatrix<double,RowMajor> >()) );
    CALL_SUBTEST_2( (sparse_product<SparseMatrix<std::complex<double>, ColMajor > >()) );
    CALL_SUBTEST_2( (sparse_product<SparseMatrix<std::complex<double>, RowMajor > >()) );
+    CALL_SUBTEST_3( (sparse_product<SparseMatrix<float,ColMajor,long int> >()) );
    CALL_SUBTEST_4( (sparse_product_regression_test<SparseMatrix<double,RowMajor>, Matrix<double, Dynamic, Dynamic, RowMajor> >()) );
  }
 }
--- a/test/sparse_vector.cpp
+++ b/test/sparse_vector.cpp
@ -9,14 +9,14 @@

 #include "sparse.h"

-template<typename Scalar> void sparse_vector(int rows, int cols)
+template<typename Scalar,typename Index> void sparse_vector(int rows, int cols)
 {
  double densityMat = (std::max)(8./(rows*cols), 0.01);
  double densityVec = (std::max)(8./float(rows), 0.1);
  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
  typedef Matrix<Scalar,Dynamic,1> DenseVector;
-  typedef SparseVector<Scalar> SparseVectorType;
-  typedef SparseMatrix<Scalar> SparseMatrixType;
+  typedef SparseVector<Scalar,0,Index> SparseVectorType;
+  typedef SparseMatrix<Scalar,0,Index> SparseMatrixType;
  Scalar eps = 1e-6;

  SparseMatrixType m1(rows,rows);
@ -101,9 +101,10 @@ template<typename Scalar> void sparse_vector(int rows, int cols)
 void test_sparse_vector()
 {
  for(int i = 0; i < g_repeat; i++) {
-    CALL_SUBTEST_1( sparse_vector<double>(8, 8) );
-    CALL_SUBTEST_2( sparse_vector<std::complex<double> >(16, 16) );
-    CALL_SUBTEST_1( sparse_vector<double>(299, 535) );
+    CALL_SUBTEST_1(( sparse_vector<double,int>(8, 8) ));
+    CALL_SUBTEST_2(( sparse_vector<std::complex<double>, int>(16, 16) ));
+    CALL_SUBTEST_1(( sparse_vector<double,long int>(299, 535) ));
+    CALL_SUBTEST_1(( sparse_vector<double,short>(299, 535) ));
  }
 }

--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@ -55,8 +55,16 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
  Index rows = m.rows();
  Index cols = m.cols();

-  Scalar big = internal::random<Scalar>() * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
-  Scalar small = internal::random<Scalar>() * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
+  // get a non-zero random factor
+  Scalar factor = internal::random<Scalar>();
+  while(numext::abs2(factor)<RealScalar(1e-4))
+    factor = internal::random<Scalar>();
+  Scalar big = factor * ((std::numeric_limits<RealScalar>::max)() * RealScalar(1e-4));
+  
+  factor = internal::random<Scalar>();
+  while(numext::abs2(factor)<RealScalar(1e-4))
+    factor = internal::random<Scalar>();
+  Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));

  MatrixType  vzero = MatrixType::Zero(rows, cols),
              vrand = MatrixType::Random(rows, cols),
@ -91,7 +99,7 @@ template<typename MatrixType> void stable_norm(const MatrixType& m)
  VERIFY_IS_APPROX(vsmall.blueNorm(),   sqrt(size)*abs(small));
  VERIFY_IS_APPROX(vsmall.hypotNorm(),  sqrt(size)*abs(small));

-// Test compilation of cwise() version
+  // Test compilation of cwise() version
  VERIFY_IS_APPROX(vrand.colwise().stableNorm(),      vrand.colwise().norm());
  VERIFY_IS_APPROX(vrand.colwise().blueNorm(),        vrand.colwise().norm());
  VERIFY_IS_APPROX(vrand.colwise().hypotNorm(),       vrand.colwise().norm());
--- a/test/umeyama.cpp
+++ b/test/umeyama.cpp
@ -130,10 +130,11 @@ void run_fixed_size_test(int num_elements)

  // MUST be positive because in any other case det(cR_t) may become negative for
  // odd dimensions!
-  const Scalar c = abs(internal::random<Scalar>());
+  // Also if c is to small compared to t.norm(), problem is ill-posed (cf. Bug 744)
+  const Scalar c = internal::random<Scalar>(0.5, 2.0);

  FixedMatrix R = randMatrixSpecialUnitary<Scalar>(dim);
-  FixedVector t = Scalar(50)*FixedVector::Random(dim,1);
+  FixedVector t = Scalar(32)*FixedVector::Random(dim,1);

  HomMatrix cR_t = HomMatrix::Identity(dim+1,dim+1);
  cR_t.block(0,0,dim,dim) = c*R;
@ -149,9 +150,9 @@ void run_fixed_size_test(int num_elements)

  HomMatrix cR_t_umeyama = umeyama(src_block, dst_block);

-  const Scalar error = ( cR_t_umeyama*src - dst ).array().square().sum();
+  const Scalar error = ( cR_t_umeyama*src - dst ).squaredNorm();

-  VERIFY(error < Scalar(10)*std::numeric_limits<Scalar>::epsilon());
+  VERIFY(error < Scalar(16)*std::numeric_limits<Scalar>::epsilon());
 }

 void test_umeyama()
--- a/unsupported/Eigen/OpenGLSupport
+++ b/unsupported/Eigen/OpenGLSupport
@ -11,7 +11,12 @@
 #define EIGEN_OPENGL_MODULE

 #include <Eigen/Geometry>
-#include <GL/gl.h>
+
+#if defined(__APPLE_CC__)
+  #include <OpenGL/gl.h>
+#else
+  #include <GL/gl.h>
+#endif

 namespace Eigen {

--- a/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
+++ b/unsupported/Eigen/src/IterativeSolvers/ConstrainedConjGrad.h
@ -58,7 +58,9 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
  Scalar rho, rho_1, alpha;
  d.setZero();

-  CINV.startFill(); // FIXME estimate the number of non-zeros
+  typedef Triplet<double> T;
+  std::vector<T> tripletList;
+    
  for (Index i = 0; i < rows; ++i)
  {
    d[i] = 1.0;
@ -84,11 +86,12 @@ void pseudo_inverse(const CMatrix &C, CINVMatrix &CINV)
    // FIXME add a generic "prune/filter" expression for both dense and sparse object to sparse
    for (Index j=0; j<l.size(); ++j)
      if (l[j]<1e-15)
-        CINV.fill(i,j) = l[j];
+	tripletList.push_back(T(i,j,l(j)));

+	
    d[i] = 0.0;
  }
-  CINV.endFill();
+  CINV.setFromTriplets(tripletList.begin(), tripletList.end());
 }


@ -103,6 +106,7 @@ template<typename TMatrix, typename CMatrix,
 void constrained_cg(const TMatrix& A, const CMatrix& C, VectorX& x,
                       const VectorB& b, const VectorF& f, IterationController &iter)
 {
+  using std::sqrt;
  typedef typename TMatrix::Scalar Scalar;
  typedef typename TMatrix::Index Index;
  typedef Matrix<Scalar,Dynamic,1>  TmpVec;
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@ -153,7 +153,22 @@ void KroneckerProductSparse<Lhs,Rhs>::evalTo(Dest& dst) const
              Bc = m_B.cols();
  dst.resize(this->rows(), this->cols());
  dst.resizeNonZeros(0);
-  dst.reserve(m_A.nonZeros() * m_B.nonZeros());
+  
+  // compute number of non-zeros per innervectors of dst
+  {
+    VectorXi nnzA = VectorXi::Zero(Dest::IsRowMajor ? m_A.rows() : m_A.cols());
+    for (Index kA=0; kA < m_A.outerSize(); ++kA)
+      for (typename Lhs::InnerIterator itA(m_A,kA); itA; ++itA)
+        nnzA(Dest::IsRowMajor ? itA.row() : itA.col())++;
+      
+    VectorXi nnzB = VectorXi::Zero(Dest::IsRowMajor ? m_B.rows() : m_B.cols());
+    for (Index kB=0; kB < m_B.outerSize(); ++kB)
+      for (typename Rhs::InnerIterator itB(m_B,kB); itB; ++itB)
+        nnzB(Dest::IsRowMajor ? itB.row() : itB.col())++;
+    
+    Matrix<int,Dynamic,Dynamic,ColMajor> nnzAB = nnzB * nnzA.transpose();
+    dst.reserve(VectorXi::Map(nnzAB.data(), nnzAB.size()));
+  }

  for (Index kA=0; kA < m_A.outerSize(); ++kA)
  {
--- a/unsupported/Eigen/src/Splines/SplineFwd.h
+++ b/unsupported/Eigen/src/Splines/SplineFwd.h
@ -31,6 +31,8 @@ namespace Eigen

      enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
      enum { NumOfDerivativesAtCompileTime = OrderAtCompileTime /*!< The number of derivatives defined for the current spline. */ };
+      
+      enum { DerivativeMemoryLayout = Dimension==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };

      /** \brief The data type used to store non-zero basis functions. */
      typedef Array<Scalar,1,OrderAtCompileTime> BasisVectorType;
@ -39,7 +41,7 @@ namespace Eigen
      typedef Array<Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
      
      /** \brief The data type used to store the spline's derivative values. */
-      typedef Array<Scalar,Dimension,Dynamic,ColMajor,Dimension,NumOfDerivativesAtCompileTime> DerivativeType;
+      typedef Array<Scalar,Dimension,Dynamic,DerivativeMemoryLayout,Dimension,NumOfDerivativesAtCompileTime> DerivativeType;

      /** \brief The point type the spline is representing. */
      typedef Array<Scalar,Dimension,1> PointType;
@ -62,12 +64,14 @@ namespace Eigen
    {
      enum { OrderAtCompileTime = _Degree==Dynamic ? Dynamic : _Degree+1 /*!< The spline curve's order at compile-time. */ };
      enum { NumOfDerivativesAtCompileTime = _DerivativeOrder==Dynamic ? Dynamic : _DerivativeOrder+1 /*!< The number of derivatives defined for the current spline. */ };
+      
+      enum { DerivativeMemoryLayout = _Dim==1 ? RowMajor : ColMajor /*!< The derivative type's memory layout. */ };

      /** \brief The data type used to store the values of the basis function derivatives. */
      typedef Array<_Scalar,Dynamic,Dynamic,RowMajor,NumOfDerivativesAtCompileTime,OrderAtCompileTime> BasisDerivativeType;
      
      /** \brief The data type used to store the spline's derivative values. */      
-      typedef Array<_Scalar,_Dim,Dynamic,ColMajor,_Dim,NumOfDerivativesAtCompileTime> DerivativeType;
+      typedef Array<_Scalar,_Dim,Dynamic,DerivativeMemoryLayout,_Dim,NumOfDerivativesAtCompileTime> DerivativeType;
    };

    /** \brief 2D float B-spline with dynamic degree. */
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@ -16,9 +16,6 @@ std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .
 using namespace std;
 using namespace Eigen;

-float norm(float x) {return x*x;}
-double norm(double x) {return x*x;}
-long double norm(long double x) {return x*x;}

 template < typename T>
 complex<long double>  promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
@ -40,11 +37,11 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
            for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
            }
-            totalpower += norm(acc);
+            totalpower += numext::abs2(acc);
            complex<long double> x = promote(fftbuf[k0]); 
            complex<long double> dif = acc - x;
-            difpower += norm(dif);
-            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(norm(dif)) << endl;
+            difpower += numext::abs2(dif);
+            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
        }
        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
        return sqrt(difpower/totalpower);
@ -57,8 +54,8 @@ complex<long double>  promote(long double x) { return complex<long double>( x);
        long double difpower=0;
        size_t n = (min)( buf1.size(),buf2.size() );
        for (size_t k=0;k<n;++k) {
-            totalpower += (norm( buf1[k] ) + norm(buf2[k]) )/2.;
-            difpower += norm(buf1[k] - buf2[k]);
+            totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2.;
+            difpower += numext::abs2(buf1[k] - buf2[k]);
        }
        return sqrt(difpower/totalpower);
    }
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@ -183,4 +183,38 @@ void test_kronecker_product()
  DM_b2.resize(4,8);
  DM_ab2 = kroneckerProduct(DM_a2,DM_b2);
  CALL_SUBTEST(check_dimension(DM_ab2,10*4,9*8));
+  
+  for(int i = 0; i < g_repeat; i++)
+  {
+    double density = Eigen::internal::random<double>(0.01,0.5);
+    int ra = Eigen::internal::random<int>(1,50);
+    int ca = Eigen::internal::random<int>(1,50);
+    int rb = Eigen::internal::random<int>(1,50);
+    int cb = Eigen::internal::random<int>(1,50);
+    SparseMatrix<float,ColMajor> sA(ra,ca), sB(rb,cb), sC;
+    SparseMatrix<float,RowMajor> sC2;
+    MatrixXf dA(ra,ca), dB(rb,cb), dC;
+    initSparse(density, dA, sA);
+    initSparse(density, dB, sB);
+    
+    sC = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB);
+    dC = kroneckerProduct(dA.transpose(),dB);
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA.transpose(),sB.transpose());
+    dC = kroneckerProduct(dA.transpose(),dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC = kroneckerProduct(sA,sB.transpose());
+    dC = kroneckerProduct(dA,dB.transpose());
+    VERIFY_IS_APPROX(MatrixXf(sC),dC);
+    
+    sC2 = kroneckerProduct(sA,sB);
+    dC = kroneckerProduct(dA,dB);
+    VERIFY_IS_APPROX(MatrixXf(sC2),dC);
+  }
 }
--- a/unsupported/test/polynomialsolver.cpp
+++ b/unsupported/test/polynomialsolver.cpp
@ -106,7 +106,6 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const

    typedef typename POLYNOMIAL::Scalar                 Scalar;
    typedef typename REAL_ROOTS::Scalar                 Real;
-    typedef PolynomialSolver<Scalar, Deg >              PolynomialSolverType;

    //Test realRoots
    std::vector< Real > calc_realRoots;
@ -120,7 +119,7 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
      bool found = false;
      for( size_t j=0; j<calc_realRoots.size()&& !found; ++j )
      {
-        if( internal::isApprox( calc_realRoots[i], real_roots[j] ), psPrec ){
+        if( internal::isApprox( calc_realRoots[i], real_roots[j], psPrec ) ){
          found = true; }
      }
      VERIFY( found );