merge and add start/end to Eigen2Support

2025-09-11 00:43:13 +08:00 · 2010-01-05 13:07:32 +01:00 · 2010-01-05 13:07:32 +01:00 · 9d9e00b608
commit 9d9e00b608
parent 90d2ae7fec 51b8f014f3
78 changed files with 1072 additions and 592 deletions
--- a/Eigen/Core
+++ b/Eigen/Core
@ -59,6 +59,10 @@
  #define EIGEN_DONT_VECTORIZE
 #endif
 #ifdef __clang__
 #define EIGEN_DONT_VECTORIZE
 #endif
 #ifndef EIGEN_DONT_VECTORIZE
  #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
    #define EIGEN_VECTORIZE
--- a/Eigen/src/Array/VectorwiseOp.h
+++ b/Eigen/src/Array/VectorwiseOp.h
@ -384,7 +384,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    const Reverse<ExpressionType, Direction> reverse() const
    { return Reverse<ExpressionType, Direction>( _expression() ); }
-    const Replicate<ExpressionType,Direction==Vertical?Dynamic:1,Direction==Horizontal?Dynamic:1>
+    const Replicate<ExpressionType,(Direction==Vertical?Dynamic:1),(Direction==Horizontal?Dynamic:1)>
    replicate(int factor) const;
    /** \nonstableyet
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@ -194,7 +194,7 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
  {
    // Find largest diagonal element
    int index_of_biggest_in_corner;
-    biggest_in_corner = m_matrix.diagonal().end(size-j).cwiseAbs()
+    biggest_in_corner = m_matrix.diagonal().tail(size-j).cwiseAbs()
                       .maxCoeff(&index_of_biggest_in_corner);
    index_of_biggest_in_corner += j;
@ -227,12 +227,12 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
    if (j == 0) {
      m_matrix.row(0) = m_matrix.row(0).conjugate();
-      m_matrix.col(0).end(size-1) = m_matrix.row(0).end(size-1) / m_matrix.coeff(0,0);
+      m_matrix.col(0).tail(size-1) = m_matrix.row(0).tail(size-1) / m_matrix.coeff(0,0);
      continue;
    }
-    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).start(j)
+    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).head(j)
-                                               .dot(m_matrix.col(j).start(j)));
+                                               .dot(m_matrix.col(j).head(j)));
    m_matrix.coeffRef(j,j) = Djj;
    // Finish early if the matrix is not full rank.
@ -244,13 +244,13 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
    int endSize = size - j - 1;
    if (endSize > 0) {
-      _temporary.end(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
+      _temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
-                                * m_matrix.col(j).start(j).conjugate();
+                                * m_matrix.col(j).head(j).conjugate();
-      m_matrix.row(j).end(endSize) = m_matrix.row(j).end(endSize).conjugate()
+      m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
-                                   - _temporary.end(endSize).transpose();
+                                   - _temporary.tail(endSize).transpose();
-      m_matrix.col(j).end(endSize) = m_matrix.row(j).end(endSize) / Djj;
+      m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
    }
  }
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@ -166,7 +166,7 @@ template<> struct ei_llt_inplace<LowerTriangular>
      Block<MatrixType,Dynamic,Dynamic> A20(mat,k+1,0,rs,k);
      RealScalar x = ei_real(mat.coeff(k,k));
-      if (k>0) x -= mat.row(k).start(k).squaredNorm();
+      if (k>0) x -= mat.row(k).head(k).squaredNorm();
      if (x<=RealScalar(0))
        return false;
      mat.coeffRef(k,k) = x = ei_sqrt(x);
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@ -49,6 +49,7 @@ private:
    InnerMaxSize = int(Derived::Flags)&RowMajorBit
              ? Derived::MaxColsAtCompileTime
              : Derived::MaxRowsAtCompileTime,
    MaxSizeAtCompileTime = ei_size_at_compile_time<Derived::MaxColsAtCompileTime,Derived::MaxRowsAtCompileTime>::ret,
    PacketSize = ei_packet_traits<typename Derived::Scalar>::size
  };
@ -60,9 +61,9 @@ private:
                       && int(DstIsAligned) && int(SrcIsAligned),
    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
    MayLinearVectorize = MightVectorize && MayLinearize
-                                        && (DstIsAligned || InnerMaxSize == Dynamic),
+                                        && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
-         so it's only good for large enough sizes. See remark below about InnerMaxSize. */
+         so it's only good for large enough sizes. */
    MaySliceVectorize  = MightVectorize && int(InnerMaxSize)>=3*PacketSize
      /* slice vectorization can be slow, so we only want it if the slices are big, which is
         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
@ -385,7 +386,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
    const int size = dst.size();
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
    const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                           : ei_alignmentOffset(&dst.coeffRef(0), size);
+                           : ei_first_aligned(&dst.coeffRef(0), size);
    const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
    for(int index = 0; index < alignedStart; ++index)
@ -430,7 +431,7 @@ struct ei_assign_impl<Derived1, Derived2, SliceVectorizedTraversal, NoUnrolling>
    const int outerSize = dst.outerSize();
    const int alignedStep = (packetSize - dst.stride() % packetSize) & packetAlignedMask;
    int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                     : ei_alignmentOffset(&dst.coeffRef(0,0), innerSize);
+                     : ei_first_aligned(&dst.coeffRef(0,0), innerSize);
    for(int i = 0; i < outerSize; ++i)
    {
@ -480,11 +481,11 @@ EIGEN_STRONG_INLINE Derived& DenseBase<Derived>
  EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived,OtherDerived)
  EIGEN_STATIC_ASSERT((ei_is_same_type<typename Derived::Scalar, typename OtherDerived::Scalar>::ret),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
  ei_assert(rows() == other.rows() && cols() == other.cols());
  ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
 #ifdef EIGEN_DEBUG_ASSIGN
  ei_assign_traits<Derived, OtherDerived>::debug();
 #endif
  ei_assert(rows() == other.rows() && cols() == other.cols());
  ei_assign_impl<Derived, OtherDerived>::run(derived(),other.derived());
 #ifndef EIGEN_NO_DEBUG
  checkTransposeAliasing(other.derived());
 #endif
--- a/Eigen/src/Core/Coeffs.h
+++ b/Eigen/src/Core/Coeffs.h
@ -379,36 +379,33 @@ EIGEN_STRONG_INLINE void DenseBase<Derived>::copyPacket(int index, const DenseBa
    other.derived().template packet<LoadMode>(index));
 }
-
+template<typename Derived, bool JustReturnZero>
-template<typename Derived, typename Integer, bool JustReturnZero>
+struct ei_first_aligned_impl
 struct ei_alignmentOffset_impl
 {
-  inline static Integer run(const DenseBase<Derived>&, Integer)
+  inline static int run(const DenseBase<Derived>&)
  { return 0; }
 };
-template<typename Derived, typename Integer>
+template<typename Derived>
-struct ei_alignmentOffset_impl<Derived, Integer, false>
+struct ei_first_aligned_impl<Derived, false>
 {
-  inline static Integer run(const DenseBase<Derived>& m, Integer maxOffset)
+  inline static int run(const DenseBase<Derived>& m)
  {
-    return ei_alignmentOffset(&m.const_cast_derived().coeffRef(0,0), maxOffset);
+    return ei_first_aligned(&m.const_cast_derived().coeffRef(0,0), m.size());
  }
 };
-/** \internal \returns the number of elements which have to be skipped, starting
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
  * from the address of coeffRef(0,0), to find the first 16-byte aligned element.
  *
-  * \note If the expression doesn't have the DirectAccessBit, this function returns 0.
+  * There is also the variant ei_first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more
-  *
+  * documentation.
  * There is also the variant ei_alignmentOffset(const Scalar*, Integer) defined in Memory.h.
  */
-template<typename Derived, typename Integer>
+template<typename Derived>
-inline static Integer ei_alignmentOffset(const DenseBase<Derived>& m, Integer maxOffset)
+inline static int ei_first_aligned(const DenseBase<Derived>& m)
 {
-  return ei_alignmentOffset_impl<Derived, Integer,
+  return ei_first_aligned_impl
-                                 (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
+           <Derived, (Derived::Flags & AlignedBit) || !(Derived::Flags & DirectAccessBit)>
-                                 ::run(m, maxOffset);
+         ::run(m);
 }
 #endif
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@ -290,11 +290,11 @@ template<typename Derived> class DenseBase
    VectorBlock<Derived> segment(int start, int size);
    const VectorBlock<Derived> segment(int start, int size) const;
-    VectorBlock<Derived> start(int size);
+    VectorBlock<Derived> head(int size);
-    const VectorBlock<Derived> start(int size) const;
+    const VectorBlock<Derived> head(int size) const;
-    VectorBlock<Derived> end(int size);
+    VectorBlock<Derived> tail(int size);
-    const VectorBlock<Derived> end(int size) const;
+    const VectorBlock<Derived> tail(int size) const;
    typename BlockReturnType<Derived>::Type corner(CornerType type, int cRows, int cCols);
    const typename BlockReturnType<Derived>::Type corner(CornerType type, int cRows, int cCols) const;
@ -309,11 +309,11 @@ template<typename Derived> class DenseBase
    template<int CRows, int CCols>
    const typename BlockReturnType<Derived, CRows, CCols>::Type corner(CornerType type) const;
-    template<int Size> VectorBlock<Derived,Size> start(void);
+    template<int Size> VectorBlock<Derived,Size> head(void);
-    template<int Size> const VectorBlock<Derived,Size> start() const;
+    template<int Size> const VectorBlock<Derived,Size> head() const;
-    template<int Size> VectorBlock<Derived,Size> end();
+    template<int Size> VectorBlock<Derived,Size> tail();
-    template<int Size> const VectorBlock<Derived,Size> end() const;
+    template<int Size> const VectorBlock<Derived,Size> tail() const;
    template<int Size> VectorBlock<Derived,Size> segment(int start);
    template<int Size> const VectorBlock<Derived,Size> segment(int start) const;
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -218,7 +218,7 @@ inline float ei_norm1(const std::complex<float> &x) { return(ei_abs(x.real()) +
 inline std::complex<float> ei_exp(std::complex<float> x)  { return std::exp(x); }
 inline std::complex<float> ei_sin(std::complex<float> x)  { return std::sin(x); }
 inline std::complex<float> ei_cos(std::complex<float> x)  { return std::cos(x); }
-inline std::complex<float> ei_atan2(std::complex<float>, std::complex<float> )  { ei_assert(false); return 0; }
+inline std::complex<float> ei_atan2(std::complex<float>, std::complex<float> )  { ei_assert(false); return 0.f; }
 template<> inline std::complex<float> ei_random()
 {
@ -255,7 +255,7 @@ inline double ei_norm1(const std::complex<double> &x) { return(ei_abs(x.real())
 inline std::complex<double> ei_exp(std::complex<double> x)  { return std::exp(x); }
 inline std::complex<double> ei_sin(std::complex<double> x)  { return std::sin(x); }
 inline std::complex<double> ei_cos(std::complex<double> x)  { return std::cos(x); }
-inline std::complex<double> ei_atan2(std::complex<double>, std::complex<double>)  { ei_assert(false); return 0; }
+inline std::complex<double> ei_atan2(std::complex<double>, std::complex<double>)  { ei_assert(false); return 0.; }
 template<> inline std::complex<double> ei_random()
 {
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -397,6 +397,15 @@ template<typename Derived> class MatrixBase
    inline const Cwise<Derived> cwise() const;
    inline Cwise<Derived> cwise();
    VectorBlock<Derived> start(int size);
    const VectorBlock<Derived> start(int size) const;
    VectorBlock<Derived> end(int size);
    const VectorBlock<Derived> end(int size) const;
    template<int Size> VectorBlock<Derived,Size> start();
    template<int Size> const VectorBlock<Derived,Size> start() const;
    template<int Size> VectorBlock<Derived,Size> end();
    template<int Size> const VectorBlock<Derived,Size> end() const;
    template<typename OtherDerived>
    typename ei_plain_matrix_type_column_major<OtherDerived>::type
    solveTriangular(const MatrixBase<OtherDerived>& other) const;
--- a/Eigen/src/Core/MatrixStorage.h
+++ b/Eigen/src/Core/MatrixStorage.h
@ -98,7 +98,7 @@ template<typename T, int _Rows, int _Cols, int _Options> class ei_matrix_storage
    inline explicit ei_matrix_storage() {}
    inline ei_matrix_storage(ei_constructor_without_unaligned_array_assert) {}
    inline ei_matrix_storage(int,int,int) {}
-    inline void swap(ei_matrix_storage& other) {}
+    inline void swap(ei_matrix_storage& ) {}
    inline static int rows(void) {return _Rows;}
    inline static int cols(void) {return _Cols;}
    inline void resize(int,int,int) {}
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@ -209,7 +209,7 @@ struct ei_redux_impl<Func, Derived, LinearVectorizedTraversal, NoUnrolling>
  {
    const int size = mat.size();
    const int packetSize = ei_packet_traits<Scalar>::size;
-    const int alignedStart = ei_alignmentOffset(mat,size);
+    const int alignedStart = ei_first_aligned(mat);
    enum {
      alignment = (Derived::Flags & DirectAccessBit) || (Derived::Flags & AlignedBit)
                ? Aligned : Unaligned
--- a/Eigen/src/Core/SolveTriangular.h
+++ b/Eigen/src/Core/SolveTriangular.h
@ -25,13 +25,27 @@
 #ifndef EIGEN_SOLVETRIANGULAR_H
 #define EIGEN_SOLVETRIANGULAR_H
 template<typename Lhs, typename Rhs, int Side>
 class ei_trsolve_traits
 {
  private:
    enum {
      RhsIsVectorAtCompileTime = (Side==OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime)==1
    };
  public:
    enum {
      Unrolling   = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime <= 8)
                  ? CompleteUnrolling : NoUnrolling,
      RhsVectors  = RhsIsVectorAtCompileTime ? 1 : Dynamic
    };
 };
 template<typename Lhs, typename Rhs,
  int Mode, // can be Upper/Lower | UnitDiag
  int Side, // can be OnTheLeft/OnTheRight
-  int Unrolling = Rhs::IsVectorAtCompileTime && Rhs::SizeAtCompileTime <= 8 // FIXME
+  int Mode, // can be Upper/Lower | UnitDiag
-                ? CompleteUnrolling : NoUnrolling,
+  int Unrolling = ei_trsolve_traits<Lhs,Rhs,Side>::Unrolling,
  int StorageOrder = (int(Lhs::Flags) & RowMajorBit) ? RowMajor : ColMajor,
-  int RhsCols = Rhs::ColsAtCompileTime
+  int RhsVectors = ei_trsolve_traits<Lhs,Rhs,Side>::RhsVectors
  >
 struct ei_triangular_solver_selector;
@ -142,12 +156,24 @@ struct ei_triangular_solver_selector<Lhs,Rhs,OnTheLeft,Mode,NoUnrolling,ColMajor
  }
 };
 // transpose OnTheRight cases for vectors
 template<typename Lhs, typename Rhs, int Mode, int Unrolling, int StorageOrder>
 struct ei_triangular_solver_selector<Lhs,Rhs,OnTheRight,Mode,Unrolling,StorageOrder,1>
 {
  static void run(const Lhs& lhs, Rhs& rhs)
  {
    Transpose<Rhs> rhsTr(rhs);
    Transpose<Lhs> lhsTr(lhs);
    ei_triangular_solver_selector<Transpose<Lhs>,Transpose<Rhs>,OnTheLeft,TriangularView<Lhs,Mode>::TransposeMode>::run(lhsTr,rhsTr);
  }
 };
 template <typename Scalar, int Side, int Mode, bool Conjugate, int TriStorageOrder, int OtherStorageOrder>
 struct ei_triangular_solve_matrix;
 // the rhs is a matrix
-template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder, int RhsCols>
+template<typename Lhs, typename Rhs, int Side, int Mode, int StorageOrder>
-struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,RhsCols>
+struct ei_triangular_solver_selector<Lhs,Rhs,Side,Mode,NoUnrolling,StorageOrder,Dynamic>
 {
  typedef typename Rhs::Scalar Scalar;
  typedef ei_blas_traits<Lhs> LhsProductTraits;
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@ -65,9 +65,9 @@ MatrixBase<Derived>::stableNorm() const
  int bi=0;
  if ((int(Flags)&DirectAccessBit) && !(int(Flags)&AlignedBit))
  {
-    bi = ei_alignmentOffset(&const_cast_derived().coeffRef(0), n);
+    bi = ei_first_aligned(&const_cast_derived().coeffRef(0), n);
    if (bi>0)
-      ei_stable_norm_kernel(this->start(bi), ssq, scale, invScale);
+      ei_stable_norm_kernel(this->head(bi), ssq, scale, invScale);
  }
  for (; bi<n; bi+=blockSize)
    ei_stable_norm_kernel(this->segment(bi,std::min(blockSize, n - bi)).template forceAlignedAccessIf<Alignment>(), ssq, scale, invScale);
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@ -95,12 +95,14 @@ template<typename Derived> class TriangularBase : public AnyMatrixBase<Derived>
                || ((Mode==StrictlyLowerTriangular || Mode==UnitLowerTriangular) && col<row));
    }
    #ifdef EIGEN_INTERNAL_DEBUGGING
    void check_coordinates_internal(int row, int col)
    {
      #ifdef EIGEN_INTERNAL_DEBUGGING
      check_coordinates(row, col);
      #endif
    }
    #else
    void check_coordinates_internal(int , int ) {}
    #endif
 };
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@ -154,16 +154,16 @@ DenseBase<Derived>::segment(int start, int size) const
  */
 template<typename Derived>
 inline VectorBlock<Derived>
-DenseBase<Derived>::start(int size)
+DenseBase<Derived>::head(int size)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), 0, size);
 }
-/** This is the const version of start(int).*/
+/** This is the const version of head(int).*/
 template<typename Derived>
 inline const VectorBlock<Derived>
-DenseBase<Derived>::start(int size) const
+DenseBase<Derived>::head(int size) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), 0, size);
@ -186,16 +186,16 @@ DenseBase<Derived>::start(int size) const
  */
 template<typename Derived>
 inline VectorBlock<Derived>
-DenseBase<Derived>::end(int size)
+DenseBase<Derived>::tail(int size)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), this->size() - size, size);
 }
-/** This is the const version of end(int).*/
+/** This is the const version of tail(int).*/
 template<typename Derived>
 inline const VectorBlock<Derived>
-DenseBase<Derived>::end(int size) const
+DenseBase<Derived>::tail(int size) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), this->size() - size, size);
@ -247,17 +247,17 @@ DenseBase<Derived>::segment(int start) const
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
-DenseBase<Derived>::start()
+DenseBase<Derived>::head()
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived,Size>(derived(), 0);
 }
-/** This is the const version of start<int>().*/
+/** This is the const version of head<int>().*/
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
-DenseBase<Derived>::start() const
+DenseBase<Derived>::head() const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived,Size>(derived(), 0);
@ -277,17 +277,17 @@ DenseBase<Derived>::start() const
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
-DenseBase<Derived>::end()
+DenseBase<Derived>::tail()
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived, Size>(derived(), size() - Size);
 }
-/** This is the const version of end<int>.*/
+/** This is the const version of tail<int>.*/
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
-DenseBase<Derived>::end() const
+DenseBase<Derived>::tail() const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived, Size>(derived(), size() - Size);
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@ -69,7 +69,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
  // How many coeffs of the result do we have to skip to be aligned.
  // Here we assume data are at least aligned on the base scalar type.
-  int alignedStart = ei_alignmentOffset(res,size);
+  int alignedStart = ei_first_aligned(res,size);
  int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
  const int peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@ -79,7 +79,7 @@ void ei_cache_friendly_product_colmajor_times_vector(
                       : FirstAligned;
  // we cannot assume the first element is aligned because of sub-matrices
-  const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
+  const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
  // find how many columns do we have to skip to be aligned with the result (if possible)
  int skipColumns = 0;
@ -282,7 +282,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
  // How many coeffs of the result do we have to skip to be aligned.
  // Here we assume data are at least aligned on the base scalar type
  // if that's not the case then vectorization is discarded, see below.
-  int alignedStart = ei_alignmentOffset(rhs, size);
+  int alignedStart = ei_first_aligned(rhs, size);
  int alignedSize = PacketSize>1 ? alignedStart + ((size-alignedStart) & ~PacketAlignedMask) : 0;
  const int peeledSize  = peels>1 ? alignedStart + ((alignedSize-alignedStart) & ~PeelAlignedMask) : alignedStart;
@ -292,7 +292,7 @@ static EIGEN_DONT_INLINE void ei_cache_friendly_product_rowmajor_times_vector(
                       : FirstAligned;
  // we cannot assume the first element is aligned because of sub-matrices
-  const int lhsAlignmentOffset = ei_alignmentOffset(lhs,size);
+  const int lhsAlignmentOffset = ei_first_aligned(lhs,size);
  // find how many rows do we have to skip to be aligned with rhs (if possible)
  int skipRows = 0;
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@ -313,7 +313,6 @@ struct ei_product_selfadjoint_matrix<Scalar,LhsStorageOrder,false,ConjugateLhs,
    int size = cols;
    ei_const_blas_data_mapper<Scalar, LhsStorageOrder> lhs(_lhs,lhsStride);
    ei_const_blas_data_mapper<Scalar, RhsStorageOrder> rhs(_rhs,rhsStride);
    if (ConjugateRhs)
      alpha = ei_conj(alpha);
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@ -86,7 +86,7 @@ static EIGEN_DONT_INLINE void ei_product_selfadjoint_vector(
    size_t starti = FirstTriangular ? 0 : j+2;
    size_t endi   = FirstTriangular ? j : size;
    size_t alignedEnd = starti;
-    size_t alignedStart = (starti) + ei_alignmentOffset(&res[starti], endi-starti);
+    size_t alignedStart = (starti) + ei_first_aligned(&res[starti], endi-starti);
    alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
    res[j] += cj0.pmul(A0[j], t0);
--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@ -41,8 +41,8 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,LowerTriangular>
    for (int i=0; i<size; ++i)
    {
      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+i, size-i) +=
-                        (alpha * ei_conj(u.coeff(i))) * v.end(size-i)
+                        (alpha * ei_conj(u.coeff(i))) * v.tail(size-i)
-                      + (alpha * ei_conj(v.coeff(i))) * u.end(size-i);
+                      + (alpha * ei_conj(v.coeff(i))) * u.tail(size-i);
    }
  }
 };
@ -55,8 +55,8 @@ struct ei_selfadjoint_rank2_update_selector<Scalar,UType,VType,UpperTriangular>
    const int size = u.size();
    for (int i=0; i<size; ++i)
      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i, i+1) +=
-                        (alpha * ei_conj(u.coeff(i))) * v.start(i+1)
+                        (alpha * ei_conj(u.coeff(i))) * v.head(i+1)
-                      + (alpha * ei_conj(v.coeff(i))) * u.start(i+1);
+                      + (alpha * ei_conj(v.coeff(i))) * u.head(i+1);
  }
 };
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@ -223,7 +223,8 @@ struct ei_blas_traits<Transpose<NestedXpr> >
  typedef typename NestedXpr::Scalar Scalar;
  typedef ei_blas_traits<NestedXpr> Base;
  typedef Transpose<NestedXpr> XprType;
-  typedef Transpose<typename Base::_ExtractType> ExtractType;
+  typedef Transpose<typename Base::_ExtractType>  ExtractType;
  typedef Transpose<typename Base::_ExtractType> _ExtractType;
  typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess,
    ExtractType,
    typename ExtractType::PlainMatrixType
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -209,27 +209,53 @@ template<typename T, bool Align> inline void ei_conditional_aligned_delete(T *pt
  ei_conditional_aligned_free<Align>(ptr);
 }
-/** \internal \returns the number of elements which have to be skipped to
+/** \internal \returns the index of the first element of the array that is well aligned for vectorization.
  * find the first 16-byte aligned element
  *
-  * There is also the variant ei_alignmentOffset(const MatrixBase&, Integer) defined in Coeffs.h.
+  * \param array the address of the start of the array
  * \param size the size of the array
  *
  * \note If no element of the array is well aligned, the size of the array is returned. Typically,
  * for example with SSE, "well aligned" means 16-byte-aligned. If vectorization is disabled or if the
  * packet size for the given scalar type is 1, then everything is considered well-aligned.
  *
  * \note If the scalar type is vectorizable, we rely on the following assumptions: sizeof(Scalar) is a
  * power of 2, the packet size in bytes is also a power of 2, and is a multiple of sizeof(Scalar). On the
  * other hand, we do not assume that the array address is a multiple of sizeof(Scalar), as that fails for
  * example with Scalar=double on certain 32-bit platforms, see bug #79.
  *
  * There is also the variant ei_first_aligned(const MatrixBase&, Integer) defined in Coeffs.h.
  */
 template<typename Scalar, typename Integer>
-inline static Integer ei_alignmentOffset(const Scalar* ptr, Integer maxOffset)
+inline static Integer ei_first_aligned(const Scalar* array, Integer size)
 {
  typedef typename ei_packet_traits<Scalar>::type Packet;
-  const Integer PacketSize = ei_packet_traits<Scalar>::size;
+  enum { PacketSize = ei_packet_traits<Scalar>::size,
-  const Integer PacketAlignedMask = PacketSize-1;
+         PacketAlignedMask = PacketSize-1
-  const bool Vectorized = PacketSize>1;
+  };
-  return Vectorized
+  
-          ? std::min<Integer>( (PacketSize - (Integer((size_t(ptr)/sizeof(Scalar))) & PacketAlignedMask))
+  if(PacketSize==1)
-                           & PacketAlignedMask, maxOffset)
+  {
-          : 0;
+    // Either there is no vectorization, or a packet consists of exactly 1 scalar so that all elements
    // of the array have the same aligment.
    return 0;
  }
  else if(size_t(array) & (sizeof(Scalar)-1))
  {
    // There is vectorization for this scalar type, but the array is not aligned to the size of a single scalar.
    // Consequently, no element of the array is well aligned.
    return size;
  }
  else
  {
    return std::min<Integer>( (PacketSize - (Integer((size_t(array)/sizeof(Scalar))) & PacketAlignedMask))
                           & PacketAlignedMask, size);
  }
 }
 /** \internal
  * ei_aligned_stack_alloc(SIZE) allocates an aligned buffer of SIZE bytes
-  * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT.
+  * on the stack if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and
  * if stack allocation is supported by the platform (currently, this is linux only).
  * Otherwise the memory is allocated on the heap.
  * Data allocated with ei_aligned_stack_alloc \b must be freed by calling ei_aligned_stack_free(PTR,SIZE).
  * \code
@ -381,10 +407,10 @@ public:
        ei_aligned_free( p );
    }
-    bool operator!=(const aligned_allocator<T>& other) const
+    bool operator!=(const aligned_allocator<T>& ) const
    { return false; }
-    bool operator==(const aligned_allocator<T>& other) const
+    bool operator==(const aligned_allocator<T>& ) const
    { return true; }
 };
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@ -109,23 +109,6 @@ template<int _Rows, int _Cols> struct ei_size_at_compile_time
 * whereas ei_eval is a const reference in the case of a matrix
 */
 // template<typename Derived> class MatrixBase;
 // template<typename Derived> class ArrayBase;
 // template<typename Object> struct ei_is_matrix_or_array
 // {
 //   struct is_matrix  {int a[1];};
 //   struct is_array   {int a[2];};
 //   struct is_none    {int a[3];};
 //
 //   template<typename T>
 //   static is_matrix testBaseClass(const MatrixBase<T>*);
 //   template<typename T>
 //   static is_array  testBaseClass(const ArrayBase<T>*);
 // //   static is_none   testBaseClass(...);
 //
 //   enum {BaseClassType = sizeof(testBaseClass(static_cast<const Object*>(0)))};
 // };
 template<typename T, typename StorageType = typename ei_traits<T>::StorageType> class ei_plain_matrix_type;
 template<typename T, typename BaseClassType> struct ei_plain_matrix_type_dense;
 template<typename T> struct ei_plain_matrix_type<T,Dense>
--- a/Eigen/src/Eigen2Support/VectorBlock.h
+++ b/Eigen/src/Eigen2Support/VectorBlock.h
@ -0,0 +1,105 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #ifndef EIGEN_VECTORBLOCK2_H
 #define EIGEN_VECTORBLOCK2_H
 /** \deprecated use DenseMase::start(int) */
 template<typename Derived>
 inline VectorBlock<Derived>
 MatrixBase<Derived>::start(int size)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), 0, size);
 }
 /** \deprecated use DenseMase::start(int) */
 template<typename Derived>
 inline const VectorBlock<Derived>
 MatrixBase<Derived>::start(int size) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), 0, size);
 }
 /** \deprecated use DenseMase::end(int) */
 template<typename Derived>
 inline VectorBlock<Derived>
 MatrixBase<Derived>::end(int size)
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), this->size() - size, size);
 }
 /** \deprecated use DenseMase::end(int) */
 template<typename Derived>
 inline const VectorBlock<Derived>
 MatrixBase<Derived>::end(int size) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived>(derived(), this->size() - size, size);
 }
 /** \deprecated use DenseMase::start() */
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
 MatrixBase<Derived>::start()
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived,Size>(derived(), 0);
 }
 /** \deprecated use DenseMase::start() */
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
 MatrixBase<Derived>::start() const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived,Size>(derived(), 0);
 }
 /** \deprecated use DenseMase::end() */
 template<typename Derived>
 template<int Size>
 inline VectorBlock<Derived,Size>
 MatrixBase<Derived>::end()
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived, Size>(derived(), size() - Size);
 }
 /** \deprecated use DenseMase::end() */
 template<typename Derived>
 template<int Size>
 inline const VectorBlock<Derived,Size>
 MatrixBase<Derived>::end() const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
  return VectorBlock<Derived, Size>(derived(), size() - Size);
 }
 #endif // EIGEN_VECTORBLOCK2_H
--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@ -133,7 +133,7 @@ void ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix)
    for (int i=0; i<n; i++)
    {
      int k;
-      m_eivalues.cwiseAbs().end(n-i).minCoeff(&k);
+      m_eivalues.cwiseAbs().tail(n-i).minCoeff(&k);
      if (k != 0)
      {
        k += i;
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@ -620,7 +620,7 @@ void EigenSolver<MatrixType>::hqr2(MatrixType& matH)
          // Overflow control
          t = ei_abs(matH.coeff(i,n));
          if ((eps * t) * t > 1)
-            matH.col(n).end(nn-i) /= t;
+            matH.col(n).tail(nn-i) /= t;
        }
      }
    }
@ -708,7 +708,7 @@ void EigenSolver<MatrixType>::hqr2(MatrixType& matH)
    // in this algo low==0 and high==nn-1 !!
    if (i < low || i > high)
    {
-      m_eivec.row(i).end(nn-i) = matH.row(i).end(nn-i);
+      m_eivec.row(i).tail(nn-i) = matH.row(i).tail(nn-i);
    }
  }
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2008-2009 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
@ -55,25 +55,23 @@ template<typename _MatrixType> class HessenbergDecomposition
    };
    typedef Matrix<Scalar, SizeMinusOne, 1> CoeffVectorType;
    typedef Matrix<RealScalar, Size, 1> DiagonalType;
    typedef Matrix<RealScalar, SizeMinusOne, 1> SubDiagonalType;
    typedef typename Diagonal<MatrixType,0>::RealReturnType DiagonalReturnType;
    typedef typename Diagonal<
        Block<MatrixType,SizeMinusOne,SizeMinusOne>,0 >::RealReturnType SubDiagonalReturnType;
    /** This constructor initializes a HessenbergDecomposition object for
      * further use with HessenbergDecomposition::compute()
      */
    HessenbergDecomposition(int size = Size==Dynamic ? 2 : Size)
-      : m_matrix(size,size), m_hCoeffs(size-1)
+      : m_matrix(size,size)
-    {}
+    {
      if(size>1)
        m_hCoeffs.resize(size-1);
    }
    HessenbergDecomposition(const MatrixType& matrix)
-      : m_matrix(matrix),
+      : m_matrix(matrix)
        m_hCoeffs(matrix.cols()-1)
    {
      if(matrix.rows()<2)
        return;
      m_hCoeffs.resize(matrix.rows()-1,1);
      _compute(m_matrix, m_hCoeffs);
    }
@ -84,6 +82,8 @@ template<typename _MatrixType> class HessenbergDecomposition
    void compute(const MatrixType& matrix)
    {
      m_matrix = matrix;
      if(matrix.rows()<2)
        return;
      m_hCoeffs.resize(matrix.rows()-1,1);
      _compute(m_matrix, m_hCoeffs);
    }
@ -150,7 +150,7 @@ void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVector
    int remainingSize = n-i-1;
    RealScalar beta;
    Scalar h;
-    matA.col(i).end(remainingSize).makeHouseholderInPlace(h, beta);
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
    matA.col(i).coeffRef(i+1) = beta;
    hCoeffs.coeffRef(i) = h;
@ -159,11 +159,11 @@ void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVector
    // A = H A
    matA.corner(BottomRight, remainingSize, remainingSize)
-        .applyHouseholderOnTheLeft(matA.col(i).end(remainingSize-1), h, &temp.coeffRef(0));
+        .applyHouseholderOnTheLeft(matA.col(i).tail(remainingSize-1), h, &temp.coeffRef(0));
    // A = A H'
    matA.corner(BottomRight, n, remainingSize)
-        .applyHouseholderOnTheRight(matA.col(i).end(remainingSize-1).conjugate(), ei_conj(h), &temp.coeffRef(0));
+        .applyHouseholderOnTheRight(matA.col(i).tail(remainingSize-1).conjugate(), ei_conj(h), &temp.coeffRef(0));
  }
 }
@ -178,7 +178,7 @@ HessenbergDecomposition<MatrixType>::matrixQ() const
  for (int i = n-2; i>=0; i--)
  {
    matQ.corner(BottomRight,n-i-1,n-i-1)
-        .applyHouseholderOnTheLeft(m_matrix.col(i).end(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &temp.coeffRef(0,0));
+        .applyHouseholderOnTheLeft(m_matrix.col(i).tail(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &temp.coeffRef(0,0));
  }
  return matQ;
 }
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@ -197,25 +197,24 @@ void Tridiagonalization<MatrixType>::_compute(MatrixType& matA, CoeffVectorType&
 {
  assert(matA.rows()==matA.cols());
  int n = matA.rows();
  Matrix<Scalar,1,Dynamic> aux(n);
  for (int i = 0; i<n-1; ++i)
  {
    int remainingSize = n-i-1;
    RealScalar beta;
    Scalar h;
-    matA.col(i).end(remainingSize).makeHouseholderInPlace(h, beta);
+    matA.col(i).tail(remainingSize).makeHouseholderInPlace(h, beta);
    // Apply similarity transformation to remaining columns,
-    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).end(n-i-1)
+    // i.e., A = H A H' where H = I - h v v' and v = matA.col(i).tail(n-i-1)
    matA.col(i).coeffRef(i+1) = 1;
-    hCoeffs.end(n-i-1) = (matA.corner(BottomRight,remainingSize,remainingSize).template selfadjointView<LowerTriangular>()
+    hCoeffs.tail(n-i-1) = (matA.corner(BottomRight,remainingSize,remainingSize).template selfadjointView<LowerTriangular>()
-                        * (ei_conj(h) * matA.col(i).end(remainingSize)));
+                        * (ei_conj(h) * matA.col(i).tail(remainingSize)));
-    hCoeffs.end(n-i-1) += (ei_conj(h)*Scalar(-0.5)*(hCoeffs.end(remainingSize).dot(matA.col(i).end(remainingSize)))) * matA.col(i).end(n-i-1);
+    hCoeffs.tail(n-i-1) += (ei_conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
    matA.corner(BottomRight, remainingSize, remainingSize).template selfadjointView<LowerTriangular>()
-      .rankUpdate(matA.col(i).end(remainingSize), hCoeffs.end(remainingSize), -1);
+      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1);
    matA.col(i).coeffRef(i+1) = beta;
    hCoeffs.coeffRef(i) = h;
@ -243,7 +242,7 @@ void Tridiagonalization<MatrixType>::matrixQInPlace(MatrixBase<QDerived>* q) con
  for (int i = n-2; i>=0; i--)
  {
    matQ.corner(BottomRight,n-i-1,n-i-1)
-        .applyHouseholderOnTheLeft(m_matrix.col(i).end(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &aux.coeffRef(0,0));
+        .applyHouseholderOnTheLeft(m_matrix.col(i).tail(n-i-2), ei_conj(m_hCoeffs.coeff(i)), &aux.coeffRef(0,0));
  }
 }
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@ -173,7 +173,7 @@ struct ei_unitOrthogonal_selector<Derived,3>
    if((!ei_isMuchSmallerThan(src.x(), src.z()))
    || (!ei_isMuchSmallerThan(src.y(), src.z())))
    {
-      RealScalar invnm = RealScalar(1)/src.template start<2>().norm();
+      RealScalar invnm = RealScalar(1)/src.template head<2>().norm();
      perp.coeffRef(0) = -ei_conj(src.y())*invnm;
      perp.coeffRef(1) = ei_conj(src.x())*invnm;
      perp.coeffRef(2) = 0;
@ -184,7 +184,7 @@ struct ei_unitOrthogonal_selector<Derived,3>
     */
    else
    {
-      RealScalar invnm = RealScalar(1)/src.template end<2>().norm();
+      RealScalar invnm = RealScalar(1)/src.template tail<2>().norm();
      perp.coeffRef(0) = 0;
      perp.coeffRef(1) = -ei_conj(src.z())*invnm;
      perp.coeffRef(2) = ei_conj(src.y())*invnm;
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@ -77,10 +77,10 @@ public:
  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
  /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const VectorBlock<Coefficients,3> vec() const { return coeffs().template start<3>(); }
+  inline const VectorBlock<Coefficients,3> vec() const { return coeffs().template head<3>(); }
  /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline VectorBlock<Coefficients,3> vec() { return coeffs().template start<3>(); }
+  inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
  /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
  inline const typename ei_traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@ -1102,7 +1102,7 @@ struct ei_transform_right_product_impl<Other,AffineCompact, Dim,HDim, HDim,1>
  static ResultType run(const TransformType& tr, const Other& other)
  {
    ResultType res;
-    res.template start<HDim>() = tr.matrix() * other;
+    res.template head<HDim>() = tr.matrix() * other;
    res.coeffRef(Dim) = other.coeff(Dim);
  }
 };
@ -1120,7 +1120,7 @@ struct ei_transform_right_product_impl<Other,Mode, Dim,HDim, Dim,Dim>
    res.matrix().col(Dim) = tr.matrix().col(Dim);
    res.linearExt().noalias() = (tr.linearExt() * other);
    if(Mode==Affine)
-      res.matrix().row(Dim).template start<Dim>() = tr.matrix().row(Dim).template start<Dim>();
+      res.matrix().row(Dim).template head<Dim>() = tr.matrix().row(Dim).template head<Dim>();
    return res;
  }
 };
--- a/Eigen/src/Geometry/Umeyama.h
+++ b/Eigen/src/Geometry/Umeyama.h
@ -170,8 +170,8 @@ umeyama(const MatrixBase<Derived>& src, const MatrixBase<OtherDerived>& dst, boo
  // Eq. (41)
  // Note that we first assign dst_mean to the destination so that there no need
  // for a temporary.
-  Rt.col(m).start(m) = dst_mean;
+  Rt.col(m).head(m) = dst_mean;
-  Rt.col(m).start(m).noalias() -= c*Rt.corner(TopLeft,m,m)*src_mean;
+  Rt.col(m).head(m).noalias() -= c*Rt.corner(TopLeft,m,m)*src_mean;
  if (with_scaling) Rt.block(0,0,m,m) *= c;
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@ -105,10 +105,10 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
      {
        if(m_trans)
          dst.corner(BottomRight, length-k, length-k)
-          .applyHouseholderOnTheRight(m_vectors.col(k).end(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(0));
+          .applyHouseholderOnTheRight(m_vectors.col(k).tail(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(0));
        else
          dst.corner(BottomRight, length-k, length-k)
-            .applyHouseholderOnTheLeft(m_vectors.col(k).end(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(k));
+            .applyHouseholderOnTheLeft(m_vectors.col(k).tail(length-k-1), m_coeffs.coeff(k), &temp.coeffRef(k));
      }
    }
@ -122,7 +122,7 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
      {
        int actual_k = m_trans ? vecs-k-1 : k;
        dst.corner(BottomRight, dst.rows(), length-actual_k)
-           .applyHouseholderOnTheRight(m_vectors.col(actual_k).end(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheRight(m_vectors.col(actual_k).tail(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
      }
    }
@ -136,7 +136,7 @@ template<typename VectorsType, typename CoeffsType> class HouseholderSequence
      {
        int actual_k = m_trans ? k : vecs-k-1;
        dst.corner(BottomRight, length-actual_k, dst.cols())
-           .applyHouseholderOnTheLeft(m_vectors.col(actual_k).end(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
+           .applyHouseholderOnTheLeft(m_vectors.col(actual_k).tail(length-actual_k-1), m_coeffs.coeff(actual_k), &temp.coeffRef(0));
      }
    }
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@ -318,7 +318,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
    typedef typename ei_packet_traits<Scalar>::type Packet;
    enum { PacketSize = ei_packet_traits<Scalar>::size, Peeling = 2 };
-    int alignedStart = ei_alignmentOffset(y, size);
+    int alignedStart = ei_first_aligned(y, size);
    int alignedEnd = alignedStart + ((size-alignedStart)/PacketSize)*PacketSize;
    const Packet pc = ei_pset1(Scalar(j.c()));
@ -336,7 +336,7 @@ void /*EIGEN_DONT_INLINE*/ ei_apply_rotation_in_the_plane(VectorX& _x, VectorY&
    Scalar* px = x + alignedStart;
    Scalar* py = y + alignedStart;
-    if(ei_alignmentOffset(x, size)==alignedStart)
+    if(ei_first_aligned(x, size)==alignedStart)
    {
      for(int i=alignedStart; i<alignedEnd; i+=PacketSize)
      {
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@ -451,9 +451,9 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
    // bottom-right corner by Gaussian elimination.
    if(k<rows-1)
-      m_lu.col(k).end(rows-k-1) /= m_lu.coeff(k,k);
+      m_lu.col(k).tail(rows-k-1) /= m_lu.coeff(k,k);
    if(k<size-1)
-      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).end(rows-k-1) * m_lu.row(k).end(cols-k-1);
+      m_lu.block(k+1,k+1,rows-k-1,cols-k-1).noalias() -= m_lu.col(k).tail(rows-k-1) * m_lu.row(k).tail(cols-k-1);
  }
  // the main loop is over, we still have to accumulate the transpositions to find the
@ -537,8 +537,8 @@ struct ei_kernel_retval<FullPivLU<_MatrixType> >
      m(dec().matrixLU().block(0, 0, rank(), cols));
    for(int i = 0; i < rank(); ++i)
    {
-      if(i) m.row(i).start(i).setZero();
+      if(i) m.row(i).head(i).setZero();
-      m.row(i).end(cols-i) = dec().matrixLU().row(pivots.coeff(i)).end(cols-i);
+      m.row(i).tail(cols-i) = dec().matrixLU().row(pivots.coeff(i)).tail(cols-i);
    }
    m.block(0, 0, rank(), rank());
    m.block(0, 0, rank(), rank()).template triangularView<StrictlyLowerTriangular>().setZero();
@ -558,7 +558,7 @@ struct ei_kernel_retval<FullPivLU<_MatrixType> >
      m.col(i).swap(m.col(pivots.coeff(i)));
    // see the negative sign in the next line, that's what we were talking about above.
-    for(int i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).end(dimker);
+    for(int i = 0; i < rank(); ++i) dst.row(dec().permutationQ().indices().coeff(i)) = -m.row(i).tail(dimker);
    for(int i = rank(); i < cols; ++i) dst.row(dec().permutationQ().indices().coeff(i)).setZero();
    for(int k = 0; k < dimker; ++k) dst.coeffRef(dec().permutationQ().indices().coeff(rank()+k), k) = Scalar(1);
  }
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@ -229,7 +229,7 @@ struct ei_partial_lu_impl
    {
      int row_of_biggest_in_col;
      RealScalar biggest_in_corner
-        = lu.col(k).end(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
+        = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
      row_of_biggest_in_col += k;
      if(biggest_in_corner == 0) // the pivot is exactly zero: the matrix is singular
@ -256,8 +256,8 @@ struct ei_partial_lu_impl
      {
        int rrows = rows-k-1;
        int rsize = size-k-1;
-        lu.col(k).end(rrows) /= lu.coeff(k,k);
+        lu.col(k).tail(rrows) /= lu.coeff(k,k);
-        lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).end(rrows) * lu.row(k).end(rsize);
+        lu.corner(BottomRight,rrows,rsize).noalias() -= lu.col(k).tail(rrows) * lu.row(k).tail(rsize);
      }
    }
    return true;
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@ -359,14 +359,14 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
  {
    // first, we look up in our table colSqNorms which column has the biggest squared norm
    int biggest_col_index;
-    RealScalar biggest_col_sq_norm = colSqNorms.end(cols-k).maxCoeff(&biggest_col_index);
+    RealScalar biggest_col_sq_norm = colSqNorms.tail(cols-k).maxCoeff(&biggest_col_index);
    biggest_col_index += k;
    // since our table colSqNorms accumulates imprecision at every step, we must now recompute
    // the actual squared norm of the selected column.
    // Note that not doing so does result in solve() sometimes returning inf/nan values
    // when running the unit test with 1000 repetitions.
-    biggest_col_sq_norm = m_qr.col(biggest_col_index).end(rows-k).squaredNorm();
+    biggest_col_sq_norm = m_qr.col(biggest_col_index).tail(rows-k).squaredNorm();
    // we store that back into our table: it can't hurt to correct our table.
    colSqNorms.coeffRef(biggest_col_index) = biggest_col_sq_norm;
@ -379,7 +379,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
    if(biggest_col_sq_norm < threshold_helper * (rows-k))
    {
      m_nonzero_pivots = k;
-      m_hCoeffs.end(size-k).setZero();
+      m_hCoeffs.tail(size-k).setZero();
      m_qr.corner(BottomRight,rows-k,cols-k)
          .template triangularView<StrictlyLowerTriangular>()
          .setZero();
@ -396,7 +396,7 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
    // generate the householder vector, store it below the diagonal
    RealScalar beta;
-    m_qr.col(k).end(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
    // apply the householder transformation to the diagonal coefficient
    m_qr.coeffRef(k,k) = beta;
@ -406,10 +406,10 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
    // apply the householder transformation
    m_qr.corner(BottomRight, rows-k, cols-k-1)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
    // update our table of squared norms of the columns
-    colSqNorms.end(cols-k-1) -= m_qr.row(k).end(cols-k-1).cwiseAbs2();
+    colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
  }
  m_cols_permutation.setIdentity(cols);
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@ -306,7 +306,7 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
    m_rows_transpositions.coeffRef(k) = row_of_biggest_in_corner;
    cols_transpositions.coeffRef(k) = col_of_biggest_in_corner;
    if(k != row_of_biggest_in_corner) {
-      m_qr.row(k).end(cols-k).swap(m_qr.row(row_of_biggest_in_corner).end(cols-k));
+      m_qr.row(k).tail(cols-k).swap(m_qr.row(row_of_biggest_in_corner).tail(cols-k));
      ++number_of_transpositions;
    }
    if(k != col_of_biggest_in_corner) {
@ -315,11 +315,11 @@ FullPivHouseholderQR<MatrixType>& FullPivHouseholderQR<MatrixType>::compute(cons
    }
    RealScalar beta;
-    m_qr.col(k).end(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(rows-k).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
    m_qr.coeffRef(k,k) = beta;
    m_qr.corner(BottomRight, rows-k, cols-k-1)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
  }
  m_cols_permutation.setIdentity(cols);
@ -360,7 +360,7 @@ struct ei_solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
      int remainingSize = rows-k;
      c.row(k).swap(c.row(dec().rowsTranspositions().coeff(k)));
      c.corner(BottomRight, remainingSize, rhs().cols())
-       .applyHouseholderOnTheLeft(dec().matrixQR().col(k).end(remainingSize-1),
+       .applyHouseholderOnTheLeft(dec().matrixQR().col(k).tail(remainingSize-1),
                                  dec().hCoeffs().coeff(k), &temp.coeffRef(0));
    }
@ -400,7 +400,7 @@ typename FullPivHouseholderQR<MatrixType>::MatrixQType FullPivHouseholderQR<Matr
  for (int k = size-1; k >= 0; k--)
  {
    res.block(k, k, rows-k, rows-k)
-       .applyHouseholderOnTheLeft(m_qr.col(k).end(rows-k-1), ei_conj(m_hCoeffs.coeff(k)), &temp.coeffRef(k));
+       .applyHouseholderOnTheLeft(m_qr.col(k).tail(rows-k-1), ei_conj(m_hCoeffs.coeff(k)), &temp.coeffRef(k));
    res.row(k).swap(res.row(m_rows_transpositions.coeff(k)));
  }
  return res;
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@ -197,12 +197,12 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&
    int remainingCols = cols - k - 1;
    RealScalar beta;
-    m_qr.col(k).end(remainingRows).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
+    m_qr.col(k).tail(remainingRows).makeHouseholderInPlace(m_hCoeffs.coeffRef(k), beta);
    m_qr.coeffRef(k,k) = beta;
    // apply H to remaining part of m_qr from the left
    m_qr.corner(BottomRight, remainingRows, remainingCols)
-        .applyHouseholderOnTheLeft(m_qr.col(k).end(remainingRows-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
+        .applyHouseholderOnTheLeft(m_qr.col(k).tail(remainingRows-1), m_hCoeffs.coeffRef(k), &temp.coeffRef(k+1));
  }
  m_isInitialized = true;
  return *this;
@ -226,7 +226,7 @@ struct ei_solve_retval<HouseholderQR<_MatrixType>, Rhs>
    // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
    c.applyOnTheLeft(householderSequence(
      dec().matrixQR().corner(TopLeft,rows,rank),
-      dec().hCoeffs().start(rank)).transpose()
+      dec().hCoeffs().head(rank)).transpose()
    );
    dec().matrixQR()
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@ -342,7 +342,7 @@ JacobiSVD<MatrixType, Options>& JacobiSVD<MatrixType, Options>::compute(const Ma
  for(int i = 0; i < diagSize; i++)
  {
    int pos;
-    m_singularValues.end(diagSize-i).maxCoeff(&pos);
+    m_singularValues.tail(diagSize-i).maxCoeff(&pos);
    if(pos)
    {
      pos += i;
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@ -205,7 +205,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
    g = s = scale = 0.0;
    if (i < m)
    {
-      scale = A.col(i).end(m-i).cwiseAbs().sum();
+      scale = A.col(i).tail(m-i).cwiseAbs().sum();
      if (scale != Scalar(0))
      {
        for (k=i; k<m; k++)
@ -219,18 +219,18 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
        A(i, i)=f-g;
        for (j=l-1; j<n; j++)
        {
-          s = A.col(j).end(m-i).dot(A.col(i).end(m-i));
+          s = A.col(j).tail(m-i).dot(A.col(i).tail(m-i));
          f = s/h;
-          A.col(j).end(m-i) += f*A.col(i).end(m-i);
+          A.col(j).tail(m-i) += f*A.col(i).tail(m-i);
        }
-        A.col(i).end(m-i) *= scale;
+        A.col(i).tail(m-i) *= scale;
      }
    }
    W[i] = scale * g;
    g = s = scale = 0.0;
    if (i+1 <= m && i+1 != n)
    {
-      scale = A.row(i).end(n-l+1).cwiseAbs().sum();
+      scale = A.row(i).tail(n-l+1).cwiseAbs().sum();
      if (scale != Scalar(0))
      {
        for (k=l-1; k<n; k++)
@ -242,13 +242,13 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
        g = -sign(ei_sqrt(s),f);
        h = f*g - s;
        A(i,l-1) = f-g;
-        rv1.end(n-l+1) = A.row(i).end(n-l+1)/h;
+        rv1.tail(n-l+1) = A.row(i).tail(n-l+1)/h;
        for (j=l-1; j<m; j++)
        {
-          s = A.row(i).end(n-l+1).dot(A.row(j).end(n-l+1));
+          s = A.row(i).tail(n-l+1).dot(A.row(j).tail(n-l+1));
-          A.row(j).end(n-l+1) += s*rv1.end(n-l+1).transpose();
+          A.row(j).tail(n-l+1) += s*rv1.tail(n-l+1).transpose();
        }
-        A.row(i).end(n-l+1) *= scale;
+        A.row(i).tail(n-l+1) *= scale;
      }
    }
    anorm = std::max( anorm, (ei_abs(W[i])+ei_abs(rv1[i])) );
@ -265,12 +265,12 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
          V(j, i) = (A(i, j)/A(i, l))/g;
        for (j=l; j<n; j++)
        {
-          s = V.col(j).end(n-l).dot(A.row(i).end(n-l));
+          s = V.col(j).tail(n-l).dot(A.row(i).tail(n-l));
-          V.col(j).end(n-l) += s * V.col(i).end(n-l);
+          V.col(j).tail(n-l) += s * V.col(i).tail(n-l);
        }
      }
-      V.row(i).end(n-l).setZero();
+      V.row(i).tail(n-l).setZero();
-      V.col(i).end(n-l).setZero();
+      V.col(i).tail(n-l).setZero();
    }
    V(i, i) = 1.0;
    g = rv1[i];
@ -282,7 +282,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
    l = i+1;
    g = W[i];
    if (n-l>0)
-      A.row(i).end(n-l).setZero();
+      A.row(i).tail(n-l).setZero();
    if (g != Scalar(0.0))
    {
      g = Scalar(1.0)/g;
@ -290,15 +290,15 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
      {
        for (j=l; j<n; j++)
        {
-          s = A.col(j).end(m-l).dot(A.col(i).end(m-l));
+          s = A.col(j).tail(m-l).dot(A.col(i).tail(m-l));
          f = (s/A(i,i))*g;
-          A.col(j).end(m-i) += f * A.col(i).end(m-i);
+          A.col(j).tail(m-i) += f * A.col(i).tail(m-i);
        }
      }
-      A.col(i).end(m-i) *= g;
+      A.col(i).tail(m-i) *= g;
    }
    else
-      A.col(i).end(m-i).setZero();
+      A.col(i).tail(m-i).setZero();
    ++A(i,i);
  }
  // Diagonalization of the bidiagonal form: Loop over
@ -408,7 +408,7 @@ SVD<MatrixType>& SVD<MatrixType>::compute(const MatrixType& matrix)
    for (int i=0; i<n; i++)
    {
      int k;
-      W.end(n-i).maxCoeff(&k);
+      W.tail(n-i).maxCoeff(&k);
      if (k != 0)
      {
        k += i;
@ -451,8 +451,8 @@ struct ei_solve_retval<SVD<_MatrixType>, Rhs>
          aux.coeffRef(i) /= si;
      }
      const int minsize = std::min(dec().rows(),dec().cols());
-      dst.col(j).start(minsize) = aux.start(minsize);
+      dst.col(j).head(minsize) = aux.head(minsize);
-      if(dec().cols()>dec().rows()) dst.col(j).end(cols()-minsize).setZero();
+      if(dec().cols()>dec().rows()) dst.col(j).tail(cols()-minsize).setZero();
      dst.col(j) = dec().matrixV() * dst.col(j);
    }
  }
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@ -124,7 +124,7 @@ public :
      Scalar* A0 = dst.data() + j*dst.stride();
      int starti = j;
      int alignedEnd = starti;
-      int alignedStart = (starti) + ei_alignmentOffset(&A0[starti], size-starti);
+      int alignedStart = (starti) + ei_first_aligned(&A0[starti], size-starti);
      alignedEnd = alignedStart + ((size-alignedStart)/(2*PacketSize))*(PacketSize*2);
      // do the non-vectorizable part of the assignment
@ -153,14 +153,14 @@ public :
        else
          dst.copyCoeff(index, j, src);
      }
-      //dst.col(j).end(N-j) = src.col(j).end(N-j);
+      //dst.col(j).tail(N-j) = src.col(j).tail(N-j);
    }
  }
  static EIGEN_DONT_INLINE void syr2(gene_matrix & A,  gene_vector & X, gene_vector & Y, int N){
    // ei_product_selfadjoint_rank2_update<real,0,LowerTriangularBit>(N,A.data(),N, X.data(), 1, Y.data(), 1, -1);
    for(int j=0; j<N; ++j)
-      A.col(j).end(N-j) += X[j] * Y.end(N-j) + Y[j] * X.end(N-j);
+      A.col(j).tail(N-j) += X[j] * Y.tail(N-j) + Y[j] * X.tail(N-j);
  }
  static EIGEN_DONT_INLINE void ger(gene_matrix & A,  gene_vector & X, gene_vector & Y, int N){
--- a/bench/btl/libs/hand_vec/hand_vec_interface.hh
+++ b/bench/btl/libs/hand_vec/hand_vec_interface.hh
@ -265,7 +265,7 @@ public :
      int starti = j+2;
      int alignedEnd = starti;
-      int alignedStart = (starti) + ei_alignmentOffset(&X[starti], N-starti);
+      int alignedStart = (starti) + ei_first_aligned(&X[starti], N-starti);
      alignedEnd = alignedStart + ((N-alignedStart)/(PacketSize))*(PacketSize);
      X[j]   += t0 * A0[j];
--- a/disabled/Householder.h
+++ b/disabled/Householder.h
@ -16,8 +16,8 @@ void ei_compute_householder(const InputVector& x, OutputVector *v, typename Outp
  typedef typename OutputVector::RealScalar RealScalar;
  ei_assert(x.size() == v->size()+1);
  int n = x.size();
-  RealScalar sigma = x.end(n-1).squaredNorm();
+  RealScalar sigma = x.tail(n-1).squaredNorm();
-  *v = x.end(n-1);
+  *v = x.tail(n-1);
  // the big assumption in this code is that ei_abs2(x->coeff(0)) is not much smaller than sigma.
  if(ei_isMuchSmallerThan(sigma, ei_abs2(x.coeff(0))))
  {
--- a/doc/AsciiQuickReference.txt
+++ b/doc/AsciiQuickReference.txt
@ -41,8 +41,8 @@ A.setIdentity();  // Fill A with the identity.
 // Eigen                           // Matlab
 x.start(n)                         // x(1:n)
 x.start<n>()                       // x(1:n)
-x.end(n)                           // N = rows(x); x(N - n: N)
+x.tail(n)                           // N = rows(x); x(N - n: N)
-x.end<n>()                         // N = rows(x); x(N - n: N)
+x.tail<n>()                         // N = rows(x); x(N - n: N)
 x.segment(i, n)                    // x(i+1 : i+n)
 x.segment<n>(i)                    // x(i+1 : i+n)
 P.block(i, j, rows, cols)          // P(i+1 : i+rows, j+1 : j+cols)
--- a/doc/C01_QuickStartGuide.dox
+++ b/doc/C01_QuickStartGuide.dox
@ -493,7 +493,7 @@ Read-write access to sub-vectors:
 <td></td>
 <tr><td>\code vec1.start(n)\endcode</td><td>\code vec1.start<n>()\endcode</td><td>the first \c n coeffs </td></tr>
-<tr><td>\code vec1.end(n)\endcode</td><td>\code vec1.end<n>()\endcode</td><td>the last \c n coeffs </td></tr>
+<tr><td>\code vec1.tail(n)\endcode</td><td>\code vec1.tail<n>()\endcode</td><td>the last \c n coeffs </td></tr>
 <tr><td>\code vec1.segment(pos,n)\endcode</td><td>\code vec1.segment<n>(pos)\endcode</td>
    <td>the \c size coeffs in \n the range [\c pos : \c pos + \c n [</td></tr>
 <tr style="border-style: dashed none dashed none;"><td>
--- a/doc/D01_StlContainers.dox
+++ b/doc/D01_StlContainers.dox
@ -9,7 +9,7 @@ namespace Eigen {
 \section summary Executive summary
-Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types" requires taking the following two steps:
+Using STL containers on \ref FixedSizeVectorizable "fixed-size vectorizable Eigen types", or classes having members of such types, requires taking the following two steps:
 \li A 16-byte-aligned allocator must be used. Eigen does provide one ready for use: aligned_allocator.
 \li If you want to use the std::vector container, you need to \#include <Eigen/StdVector>.
--- a/doc/D11_UnalignedArrayAssert.dox
+++ b/doc/D11_UnalignedArrayAssert.dox
@ -55,11 +55,12 @@ Note that here, Eigen::Vector2d is only used as an example, more generally the i
 \section c2 Cause 2: STL Containers
-If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, like this,
+If you use STL Containers such as std::vector, std::map, ..., with Eigen objects, or with classes containing Eigen objects, like this,
 \code
 std::vector<Eigen::Matrix2f> my_vector;
-std::map<int, Eigen::Matrix2f> my_map;
+struct my_class { ... Eigen::Matrix2f m; ... };
 std::map<int, my_class> my_map;
 \endcode
 then you need to read this separate page: \ref StlContainers "Using STL Containers with Eigen".
--- a/doc/I03_InsideEigenExample.dox
+++ b/doc/I03_InsideEigenExample.dox
@ -343,7 +343,7 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorization, NoUnrolling>
    const int size = dst.size();
    const int packetSize = ei_packet_traits<typename Derived1::Scalar>::size;
    const int alignedStart = ei_assign_traits<Derived1,Derived2>::DstIsAligned ? 0
-                           : ei_alignmentOffset(&dst.coeffRef(0), size);
+                           : ei_first_aligned(&dst.coeffRef(0), size);
    const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
    for(int index = 0; index < alignedStart; index++)
--- a/doc/echelon.cpp
+++ b/doc/echelon.cpp
@ -27,8 +27,8 @@ struct unroll_echelon
    m.row(k).swap(m.row(k+rowOfBiggest));
    m.col(k).swap(m.col(k+colOfBiggest));
    m.template corner<CornerRows-1, CornerCols>(BottomRight)
-      -= m.col(k).template end<CornerRows-1>()
+      -= m.col(k).template tail<CornerRows-1>()
-       * (m.row(k).template end<CornerCols>() / m(k,k));
+       * (m.row(k).template tail<CornerCols>() / m(k,k));
  }
 };
@ -59,7 +59,7 @@ struct unroll_echelon<Derived, Dynamic>
      m.row(k).swap(m.row(k+rowOfBiggest));
      m.col(k).swap(m.col(k+colOfBiggest));
      m.corner(BottomRight, cornerRows-1, cornerCols)
-        -= m.col(k).end(cornerRows-1) * (m.row(k).end(cornerCols) / m(k,k));
+        -= m.col(k).tail(cornerRows-1) * (m.row(k).tail(cornerCols) / m(k,k));
    }
  }
 };
--- a/doc/snippets/MatrixBase_end_int.cpp
+++ b/doc/snippets/MatrixBase_end_int.cpp
@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.end(2):" << endl << v.end(2) << endl;
+cout << "Here is v.tail(2):" << endl << v.tail(2) << endl;
-v.end(2).setZero();
+v.tail(2).setZero();
 cout << "Now the vector v is:" << endl << v << endl;
--- a/doc/snippets/MatrixBase_eval.cpp
+++ b/doc/snippets/MatrixBase_eval.cpp
@ -2,11 +2,11 @@ Matrix2f M = Matrix2f::Random();
 Matrix2f m;
 m = M;
 cout << "Here is the matrix m:" << endl << m << endl;
-cout << "Now we want to replace m by its own transpose." << endl;
+cout << "Now we want to copy a column into a row." << endl;
-cout << "If we do m = m.transpose(), then m becomes:" << endl;
+cout << "If we do m.col(1) = m.row(0), then m becomes:" << endl;
-m = m.transpose() * 1;
+m.col(1) = m.row(0);
 cout << m << endl << "which is wrong!" << endl;
-cout << "Now let us instead do m = m.transpose().eval(). Then m becomes" << endl;
+cout << "Now let us instead do m.col(1) = m.row(0).eval(). Then m becomes" << endl;
 m = M;
-m = m.transpose().eval();
+m.col(1) = m.row(0).eval();
 cout << m << endl << "which is right." << endl;
--- a/doc/snippets/MatrixBase_start_int.cpp
+++ b/doc/snippets/MatrixBase_start_int.cpp
@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.start(2):" << endl << v.start(2) << endl;
+cout << "Here is v.head(2):" << endl << v.head(2) << endl;
-v.start(2).setZero();
+v.head(2).setZero();
 cout << "Now the vector v is:" << endl << v << endl;
--- a/doc/snippets/MatrixBase_template_int_end.cpp
+++ b/doc/snippets/MatrixBase_template_int_end.cpp
@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.end(2):" << endl << v.end<2>() << endl;
+cout << "Here is v.tail(2):" << endl << v.tail<2>() << endl;
-v.end<2>().setZero();
+v.tail<2>().setZero();
 cout << "Now the vector v is:" << endl << v << endl;
--- a/doc/snippets/MatrixBase_template_int_start.cpp
+++ b/doc/snippets/MatrixBase_template_int_start.cpp
@ -1,5 +1,5 @@
 RowVector4i v = RowVector4i::Random();
 cout << "Here is the vector v:" << endl << v << endl;
-cout << "Here is v.start(2):" << endl << v.start<2>() << endl;
+cout << "Here is v.head(2):" << endl << v.head<2>() << endl;
-v.start<2>().setZero();
+v.head<2>().setZero();
 cout << "Now the vector v is:" << endl << v << endl;
--- a/scripts/CMakeLists.txt
+++ b/scripts/CMakeLists.txt
@ -1,6 +1,6 @@
 get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST)
-configure_file(buildtests.in ${CMAKE_BINARY_DIR}/buildtests)
+configure_file(buildtests.in ${CMAKE_BINARY_DIR}/buildtests @ONLY)
-configure_file(check.in ${CMAKE_BINARY_DIR}/check)
+configure_file(check.in ${CMAKE_BINARY_DIR}/check COPYONLY)
-configure_file(debug.in ${CMAKE_BINARY_DIR}/debug)
+configure_file(debug.in ${CMAKE_BINARY_DIR}/debug COPYONLY)
-configure_file(release.in ${CMAKE_BINARY_DIR}/release)
+configure_file(release.in ${CMAKE_BINARY_DIR}/release COPYONLY)
--- a/scripts/buildtests.in
+++ b/scripts/buildtests.in
@ -1,24 +1,22 @@
 #!/bin/bash
-if [ $# == 0 -o $# -ge 3 ]
+if [[ $# != 1 || $1 == *help ]]
 then
-  echo "usage: ./buildtests regexp [jobs]"
+  echo "usage: ./check regexp"
-  echo "  makes tests matching the regexp, with [jobs] concurrent make jobs"
+  echo "  Builds tests matching the regexp."
  echo "  The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'."
  echo "    For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'"
  exit 0
 fi
-TESTSLIST="${EIGEN_TESTS_LIST}"
+TESTSLIST="@EIGEN_TESTS_LIST@"
 targets_to_make=`echo "$TESTSLIST" | egrep "$1" | xargs echo`
-if [ $# == 1 ]
+if [ -n "${EIGEN_MAKE_ARGS:+x}" ]
 then
  make $targets_to_make ${EIGEN_MAKE_ARGS}
 else
  make $targets_to_make
  exit $?
 fi
 exit $?
 if [ $# == 2 ]
 then
  make -j $2 $targets_to_make
  exit $?
 fi
--- a/scripts/check.in
+++ b/scripts/check.in
@ -1,12 +1,21 @@
 #!/bin/bash
 # check : shorthand for make and ctest -R
-if [ $# == 0 -o $# -ge 3 ]
+if [[ $# != 1 || $1 == *help ]]
 then
-  echo "usage: ./check regexp [jobs]"
+  echo "usage: ./check regexp"
-  echo "  makes and runs tests matching the regexp, with [jobs] concurrent make jobs"
+  echo "  Builds and runs tests matching the regexp."
  echo "  The EIGEN_MAKE_ARGS environment variable allows to pass args to 'make'."
  echo "    For example, to launch 5 concurrent builds, use EIGEN_MAKE_ARGS='-j5'"
  echo "  The EIGEN_CTEST_ARGS environment variable allows to pass args to 'ctest'."
  echo "    For example, with CTest 2.8, you can use EIGEN_CTEST_ARGS='-j5'."
  exit 0
 fi
-# TODO when ctest 2.8 comes out, honor the jobs parameter
+if [ -n "${EIGEN_CTEST_ARGS:+x}" ]
-./buildtests "$1" "${2:-1}" && ctest -R "$1"
+then
  ./buildtests "$1" && ctest -R "$1" ${EIGEN_CTEST_ARGS}
 else
  ./buildtests "$1" && ctest -R "$1"
 fi
 exit $?
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -88,6 +88,7 @@ ei_add_test(meta)
 ei_add_test(sizeof)
 ei_add_test(dynalloc)
 ei_add_test(nomalloc)
 ei_add_test(first_aligned)
 ei_add_test(mixingtypes)
 ei_add_test(packetmath)
 ei_add_test(unalignedassert)
@ -117,7 +118,7 @@ ei_add_test(product_symm)
 ei_add_test(product_syrk)
 ei_add_test(product_trmv)
 ei_add_test(product_trmm)
-ei_add_test(product_trsm)
+ei_add_test(product_trsolve)
 ei_add_test(product_notemporary)
 ei_add_test(stable_norm)
 ei_add_test(bandmatrix)
@ -128,6 +129,7 @@ ei_add_test(inverse)
 ei_add_test(qr)
 ei_add_test(qr_colpivoting)
 ei_add_test(qr_fullpivoting)
 ei_add_test(hessenberg " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_selfadjoint " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_generic " " "${GSL_LIBRARIES}")
 ei_add_test(eigensolver_complex)
--- a/test/first_aligned.cpp
+++ b/test/first_aligned.cpp
@ -0,0 +1,64 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2009 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #include "main.h"
 template<typename Scalar>
 void test_first_aligned_helper(Scalar *array, int size)
 {
  const int packet_size = sizeof(Scalar) * ei_packet_traits<Scalar>::size;
  VERIFY(((size_t(array) + sizeof(Scalar) * ei_first_aligned(array, size)) % packet_size) == 0);
 }
 template<typename Scalar>
 void test_none_aligned_helper(Scalar *array, int size)
 {
  VERIFY(ei_packet_traits<Scalar>::size == 1 || ei_first_aligned(array, size) == size);
 }
 struct some_non_vectorizable_type { float x; };
 void test_first_aligned()
 {
  EIGEN_ALIGN16 float array_float[100];
  test_first_aligned_helper(array_float, 50);
  test_first_aligned_helper(array_float+1, 50);
  test_first_aligned_helper(array_float+2, 50);
  test_first_aligned_helper(array_float+3, 50);
  test_first_aligned_helper(array_float+4, 50);
  test_first_aligned_helper(array_float+5, 50);
  EIGEN_ALIGN16 double array_double[100];
  test_first_aligned_helper(array_float, 50);
  test_first_aligned_helper(array_float+1, 50);
  test_first_aligned_helper(array_float+2, 50);
  double *array_double_plus_4_bytes = (double*)(size_t(array_double)+4);
  test_none_aligned_helper(array_double_plus_4_bytes, 50);
  test_none_aligned_helper(array_double_plus_4_bytes+1, 50);
  some_non_vectorizable_type array_nonvec[100];
  test_first_aligned_helper(array_nonvec, 100);
  test_none_aligned_helper(array_nonvec, 100);
 }
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@ -67,7 +67,7 @@ template<typename Scalar,int Size> void homogeneous(void)
  VERIFY_IS_APPROX(m0, hm0.colwise().hnormalized());
  hm0.row(Size-1).setRandom();
  for(int j=0; j<Size; ++j)
-    m0.col(j) = hm0.col(j).start(Size) / hm0(Size,j);
+    m0.col(j) = hm0.col(j).head(Size) / hm0(Size,j);
  VERIFY_IS_APPROX(m0, hm0.colwise().hnormalized());
  T1MatrixType t1 = T1MatrixType::Random();
--- a/test/geo_orthomethods.cpp
+++ b/test/geo_orthomethods.cpp
@ -66,7 +66,7 @@ template<typename Scalar> void orthomethods_3()
          v41 = Vector4::Random(),
          v42 = Vector4::Random();
  v40.w() = v41.w() = v42.w() = 0;
-  v42.template start<3>() = v40.template start<3>().cross(v41.template start<3>());
+  v42.template head<3>() = v40.template head<3>().cross(v41.template head<3>());
  VERIFY_IS_APPROX(v40.cross3(v41), v42);
 }
@ -88,8 +88,8 @@ template<typename Scalar, int Size> void orthomethods(int size=Size)
  if (size>=3)
  {
-    v0.template start<2>().setZero();
+    v0.template head<2>().setZero();
-    v0.end(size-2).setRandom();
+    v0.tail(size-2).setRandom();
    VERIFY_IS_MUCH_SMALLER_THAN(v0.unitOrthogonal().dot(v0), Scalar(1));
    VERIFY_IS_APPROX(v0.unitOrthogonal().norm(), RealScalar(1));
--- a/test/geo_transformations.cpp
+++ b/test/geo_transformations.cpp
@ -118,7 +118,7 @@ template<typename Scalar, int Mode> void transformations(void)
  t0.scale(v0);
  t1.prescale(v0);
-  VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).template start<3>().norm(), v0.x());
+  VERIFY_IS_APPROX( (t0 * Vector3(1,0,0)).template head<3>().norm(), v0.x());
  //VERIFY(!ei_isApprox((t1 * Vector3(1,0,0)).norm(), v0.x()));
  t0.setIdentity();
@ -290,12 +290,12 @@ template<typename Scalar, int Mode> void transformations(void)
  // translation * vector
  t0.setIdentity();
  t0.translate(v0);
-  VERIFY_IS_APPROX((t0 * v1).template start<3>(), Translation3(v0) * v1);
+  VERIFY_IS_APPROX((t0 * v1).template head<3>(), Translation3(v0) * v1);
  // AlignedScaling * vector
  t0.setIdentity();
  t0.scale(v0);
-  VERIFY_IS_APPROX((t0 * v1).template start<3>(), AlignedScaling3(v0) * v1);
+  VERIFY_IS_APPROX((t0 * v1).template head<3>(), AlignedScaling3(v0) * v1);
  // test transform inversion
  t0.setIdentity();
--- a/test/hessenberg.cpp
+++ b/test/hessenberg.cpp
@ -0,0 +1,46 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2009 Gael Guennebaud <g.gael@free.fr>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #include "main.h"
 #include <Eigen/Eigenvalues>
 template<typename Scalar,int Size> void hessenberg(int size = Size)
 {
  typedef Matrix<Scalar,Size,Size> MatrixType;
  MatrixType m = MatrixType::Random(size,size);
  HessenbergDecomposition<MatrixType> hess(m);
  VERIFY_IS_APPROX(m, hess.matrixQ() * hess.matrixH() * hess.matrixQ().adjoint());
 }
 void test_hessenberg()
 {
  for(int i = 0; i < g_repeat; i++) {
    CALL_SUBTEST_1(( hessenberg<std::complex<double>,1>() ));
    CALL_SUBTEST_2(( hessenberg<std::complex<double>,2>() ));
    CALL_SUBTEST_3(( hessenberg<std::complex<float>,4>() ));
    CALL_SUBTEST_4(( hessenberg<float,Dynamic>(ei_random<int>(1,320)) ));
    CALL_SUBTEST_5(( hessenberg<std::complex<double>,Dynamic>(ei_random<int>(1,320)) ));
  }
 }
--- a/test/householder.cpp
+++ b/test/householder.cpp
@ -53,7 +53,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
  v1.makeHouseholder(essential, beta, alpha);
  v1.applyHouseholderOnTheLeft(essential,beta,tmp);
  VERIFY_IS_APPROX(v1.norm(), v2.norm());
-  VERIFY_IS_MUCH_SMALLER_THAN(v1.end(rows-1).norm(), v1.norm());
+  VERIFY_IS_MUCH_SMALLER_THAN(v1.tail(rows-1).norm(), v1.norm());
  v1 = VectorType::Random(rows);
  v2 = v1;
  v1.applyHouseholderOnTheLeft(essential,beta,tmp);
@ -63,7 +63,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
             m2(rows, cols);
  v1 = VectorType::Random(rows);
-  if(even) v1.end(rows-1).setZero();
+  if(even) v1.tail(rows-1).setZero();
  m1.colwise() = v1;
  m2 = m1;
  m1.col(0).makeHouseholder(essential, beta, alpha);
@ -74,7 +74,7 @@ template<typename MatrixType> void householder(const MatrixType& m)
  VERIFY_IS_APPROX(ei_real(m1(0,0)), alpha);
  v1 = VectorType::Random(rows);
-  if(even) v1.end(rows-1).setZero();
+  if(even) v1.tail(rows-1).setZero();
  SquareMatrixType m3(rows,rows), m4(rows,rows);
  m3.rowwise() = v1.transpose();
  m4 = m3;
--- a/test/product_selfadjoint.cpp
+++ b/test/product_selfadjoint.cpp
@ -68,9 +68,9 @@ template<typename MatrixType> void product_selfadjoint(const MatrixType& m)
  if (rows>1)
  {
    m2 = m1.template triangularView<LowerTriangular>();
-    m2.block(1,1,rows-1,cols-1).template selfadjointView<LowerTriangular>().rankUpdate(v1.end(rows-1),v2.start(cols-1));
+    m2.block(1,1,rows-1,cols-1).template selfadjointView<LowerTriangular>().rankUpdate(v1.tail(rows-1),v2.head(cols-1));
    m3 = m1;
-    m3.block(1,1,rows-1,cols-1) += v1.end(rows-1) * v2.start(cols-1).adjoint()+ v2.start(cols-1) * v1.end(rows-1).adjoint();
+    m3.block(1,1,rows-1,cols-1) += v1.tail(rows-1) * v2.head(cols-1).adjoint()+ v2.head(cols-1) * v1.tail(rows-1).adjoint();
    VERIFY_IS_APPROX(m2, m3.template triangularView<LowerTriangular>().toDenseMatrix());
  }
 }
--- a/test/product_trsolve.cpp
+++ b/test/product_trsolve.cpp
@ -30,15 +30,21 @@
    VERIFY_IS_APPROX((TRI).toDenseMatrix() * (XB), ref); \
  }
-template<typename Scalar> void trsm(int size,int cols)
+#define VERIFY_TRSM_ONTHERIGHT(TRI,XB) { \
    (XB).setRandom(); ref = (XB); \
    (TRI).transpose().template solveInPlace<OnTheRight>(XB.transpose()); \
    VERIFY_IS_APPROX((XB).transpose() * (TRI).transpose().toDenseMatrix(), ref.transpose()); \
  }
 template<typename Scalar,int Size, int Cols> void trsolve(int size=Size,int cols=Cols)
 {
  typedef typename NumTraits<Scalar>::Real RealScalar;
-  Matrix<Scalar,Dynamic,Dynamic,ColMajor> cmLhs(size,size);
+  Matrix<Scalar,Size,Size,ColMajor> cmLhs(size,size);
-  Matrix<Scalar,Dynamic,Dynamic,RowMajor> rmLhs(size,size);
+  Matrix<Scalar,Size,Size,RowMajor> rmLhs(size,size);
-  Matrix<Scalar,Dynamic,Dynamic,ColMajor> cmRhs(size,cols), ref(size,cols);
+  Matrix<Scalar,Size,Cols,ColMajor> cmRhs(size,cols), ref(size,cols);
-  Matrix<Scalar,Dynamic,Dynamic,RowMajor> rmRhs(size,cols);
+  Matrix<Scalar,Size,Cols,RowMajor> rmRhs(size,cols);
  cmLhs.setRandom(); cmLhs *= static_cast<RealScalar>(0.1); cmLhs.diagonal().array() += static_cast<RealScalar>(1);
  rmLhs.setRandom(); rmLhs *= static_cast<RealScalar>(0.1); rmLhs.diagonal().array() += static_cast<RealScalar>(1);
@ -53,13 +59,32 @@ template<typename Scalar> void trsm(int size,int cols)
  VERIFY_TRSM(rmLhs            .template triangularView<LowerTriangular>(), cmRhs);
  VERIFY_TRSM(rmLhs.conjugate().template triangularView<UnitUpperTriangular>(), rmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<LowerTriangular>(), cmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<UpperTriangular>(), cmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<LowerTriangular>(), rmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<UpperTriangular>(), rmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs.conjugate().template triangularView<UnitLowerTriangular>(), cmRhs);
  VERIFY_TRSM_ONTHERIGHT(cmLhs            .template triangularView<UnitUpperTriangular>(), rmRhs);
  VERIFY_TRSM_ONTHERIGHT(rmLhs            .template triangularView<LowerTriangular>(), cmRhs);
  VERIFY_TRSM_ONTHERIGHT(rmLhs.conjugate().template triangularView<UnitUpperTriangular>(), rmRhs);
 }
-void test_product_trsm()
+void test_product_trsolve()
 {
  for(int i = 0; i < g_repeat ; i++)
  {
-    CALL_SUBTEST_1((trsm<float>(ei_random<int>(1,320),ei_random<int>(1,320))));
+    // matrices
-    CALL_SUBTEST_2((trsm<std::complex<double> >(ei_random<int>(1,320),ei_random<int>(1,320))));
+    CALL_SUBTEST_1((trsolve<float,Dynamic,Dynamic>(ei_random<int>(1,320),ei_random<int>(1,320))));
    CALL_SUBTEST_2((trsolve<std::complex<double>,Dynamic,Dynamic>(ei_random<int>(1,320),ei_random<int>(1,320))));
    // vectors
    CALL_SUBTEST_3((trsolve<std::complex<double>,Dynamic,1>(ei_random<int>(1,320))));
    CALL_SUBTEST_4((trsolve<float,1,1>()));
    CALL_SUBTEST_5((trsolve<float,1,2>()));
    CALL_SUBTEST_6((trsolve<std::complex<float>,4,1>()));
  }
 }
--- a/test/redux.cpp
+++ b/test/redux.cpp
@ -68,10 +68,10 @@ template<typename VectorType> void vectorRedux(const VectorType& w)
      minc = std::min(minc, ei_real(v[j]));
      maxc = std::max(maxc, ei_real(v[j]));
    }
-    VERIFY_IS_APPROX(s, v.start(i).sum());
+    VERIFY_IS_APPROX(s, v.head(i).sum());
-    VERIFY_IS_APPROX(p, v.start(i).prod());
+    VERIFY_IS_APPROX(p, v.head(i).prod());
-    VERIFY_IS_APPROX(minc, v.real().start(i).minCoeff());
+    VERIFY_IS_APPROX(minc, v.real().head(i).minCoeff());
-    VERIFY_IS_APPROX(maxc, v.real().start(i).maxCoeff());
+    VERIFY_IS_APPROX(maxc, v.real().head(i).maxCoeff());
  }
  for(int i = 0; i < size-1; i++)
@ -85,10 +85,10 @@ template<typename VectorType> void vectorRedux(const VectorType& w)
      minc = std::min(minc, ei_real(v[j]));
      maxc = std::max(maxc, ei_real(v[j]));
    }
-    VERIFY_IS_APPROX(s, v.end(size-i).sum());
+    VERIFY_IS_APPROX(s, v.tail(size-i).sum());
-    VERIFY_IS_APPROX(p, v.end(size-i).prod());
+    VERIFY_IS_APPROX(p, v.tail(size-i).prod());
-    VERIFY_IS_APPROX(minc, v.real().end(size-i).minCoeff());
+    VERIFY_IS_APPROX(minc, v.real().tail(size-i).minCoeff());
-    VERIFY_IS_APPROX(maxc, v.real().end(size-i).maxCoeff());
+    VERIFY_IS_APPROX(maxc, v.real().tail(size-i).maxCoeff());
  }
  for(int i = 0; i < size/2; i++)
--- a/test/regression.cpp
+++ b/test/regression.cpp
@ -51,7 +51,7 @@ void makeNoisyCohyperplanarPoints(int numPoints,
    {
      cur_point = VectorType::Random(size)/*.normalized()*/;
      // project cur_point onto the hyperplane
-      Scalar x = - (hyperplane->coeffs().start(size).cwiseProduct(cur_point)).sum();
+      Scalar x = - (hyperplane->coeffs().head(size).cwiseProduct(cur_point)).sum();
      cur_point *= hyperplane->coeffs().coeff(size) / x;
    } while( cur_point.norm() < 0.5
          || cur_point.norm() > 2.0 );
--- a/test/submatrices.cpp
+++ b/test/submatrices.cpp
@ -127,15 +127,15 @@ template<typename MatrixType> void submatrices(const MatrixType& m)
  if (rows>2)
  {
    // test sub vectors
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.block(0,0,2,1));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.block(0,0,2,1));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.start(2));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.head(2));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.segment(0,2));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.segment(0,2));
-    VERIFY_IS_APPROX(v1.template start<2>(), v1.template segment<2>(0));
+    VERIFY_IS_APPROX(v1.template head<2>(), v1.template segment<2>(0));
    int i = rows-2;
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.block(i,0,2,1));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.block(i,0,2,1));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.end(2));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.tail(2));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.segment(i,2));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.segment(i,2));
-    VERIFY_IS_APPROX(v1.template end<2>(), v1.template segment<2>(i));
+    VERIFY_IS_APPROX(v1.template tail<2>(), v1.template segment<2>(i));
    i = ei_random(0,rows-2);
    VERIFY_IS_APPROX(v1.segment(i,2), v1.template segment<2>(i));
--- a/unsupported/Eigen/AlignedVector3
+++ b/unsupported/Eigen/AlignedVector3
@ -106,7 +106,7 @@ template<typename _Scalar> class AlignedVector3
    {
      inline static void run(AlignedVector3& dest, const XprType& src)
      {
-        dest.m_coeffs.template start<3>() = src;
+        dest.m_coeffs.template head<3>() = src;
        dest.m_coeffs.w() = Scalar(0);
      }
    };
@ -190,7 +190,7 @@ template<typename _Scalar> class AlignedVector3
    template<typename Derived>
    inline bool isApprox(const MatrixBase<Derived>& other, RealScalar eps=dummy_precision<Scalar>()) const
    {
-      return m_coeffs.template start<3>().isApprox(other,eps);
+      return m_coeffs.template head<3>().isApprox(other,eps);
    }
 };
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@ -213,7 +213,7 @@ class FFT
        int nfft = src.size();
        int nout = HasFlag(HalfSpectrum) ? ((nfft>>1)+1) : nfft;
        dst.derived().resize( nout );
-        inv( &dst[0],&src[0],src.size() );
+        inv( &dst[0],&src[0], nfft);
    }
    template <typename _Output>
--- a/unsupported/Eigen/NonLinearOptimization
+++ b/unsupported/Eigen/NonLinearOptimization
@ -27,6 +27,7 @@
 #include <Eigen/Core>
 #include <Eigen/Jacobi>
 #include <Eigen/QR>
 #include <unsupported/Eigen/NumericalDiff>
 namespace Eigen {
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@ -40,9 +40,13 @@ struct ei_stem_function
  * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
  * \param[out] result pointer to the matrix in which to store the result, \f$ f(M) \f$.
  *
-  * Suppose that \f$ f \f$ is an entire function (that is, a function
+  * This function computes \f$ f(A) \f$ and stores the result in the
-  * on the complex plane that is everywhere complex differentiable).
+  * matrix pointed to by \p result.
-  * Then its Taylor series
+  *
  * %Matrix functions are defined as follows.  Suppose that \f$ f \f$
  * is an entire function (that is, a function on the complex plane
  * that is everywhere complex differentiable).  Then its Taylor
  * series
  * \f[ f(0) + f'(0) x + \frac{f''(0)}{2} x^2 + \frac{f'''(0)}{3!} x^3 + \cdots \f]
  * converges to \f$ f(x) \f$. In this case, we can define the matrix
  * function by the same series:
@ -53,6 +57,8 @@ struct ei_stem_function
  * "A Schur-Parlett algorithm for computing matrix functions", 
  * <em>SIAM J. %Matrix Anal. Applic.</em>, <b>25</b>:464&ndash;485, 2003.
  *
  * The actual work is done by the MatrixFunction class.
  *
  * Example: The following program checks that
  * \f[ \exp \left[ \begin{array}{ccc} 
  *       0 & \frac14\pi & 0 \\ 
@ -78,145 +84,279 @@ EIGEN_STRONG_INLINE void ei_matrix_function(const MatrixBase<Derived>& M,
 					    typename ei_stem_function<typename ei_traits<Derived>::Scalar>::type f,
 					    typename MatrixBase<Derived>::PlainMatrixType* result);
 #include "MatrixFunctionAtomic.h"
 /** \ingroup MatrixFunctions_Module 
  * \class MatrixFunction
  * \brief Helper class for computing matrix functions. 
  */
-template <typename MatrixType, 
+template <typename MatrixType, int IsComplex = NumTraits<typename ei_traits<MatrixType>::Scalar>::IsComplex>
-  int IsComplex = NumTraits<typename ei_traits<MatrixType>::Scalar>::IsComplex,
+class MatrixFunction
  int IsDynamic = ( (ei_traits<MatrixType>::RowsAtCompileTime == Dynamic) 
 		    && (ei_traits<MatrixType>::RowsAtCompileTime == Dynamic) ) >
 class MatrixFunction;
 /* Partial specialization of MatrixFunction for real matrices */
 template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols, int IsDynamic>
 class MatrixFunction<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, 0, IsDynamic>
 {  
  private:
    typedef typename ei_traits<MatrixType>::Scalar Scalar;
    typedef typename ei_stem_function<Scalar>::type StemFunction;
  public:
    /** \brief Constructor. Computes matrix function. 
      *
      * \param[in]  A      argument of matrix function, should be a square matrix.
      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
      *
      * This function computes \f$ f(A) \f$ and stores the result in
      * the matrix pointed to by \p result.
      *
      * See ei_matrix_function() for details.
      */
    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result);
 };
 /** \ingroup MatrixFunctions_Module 
  * \brief Partial specialization of MatrixFunction for real matrices \internal 
  */
 template <typename MatrixType>
 class MatrixFunction<MatrixType, 0>
 {  
  private:
    typedef ei_traits<MatrixType> Traits;
    typedef typename Traits::Scalar Scalar;
    static const int Rows = Traits::RowsAtCompileTime;
    static const int Cols = Traits::ColsAtCompileTime;
    static const int Options = MatrixType::Options;
    static const int MaxRows = Traits::MaxRowsAtCompileTime;
    static const int MaxCols = Traits::MaxColsAtCompileTime;
    typedef std::complex<Scalar> ComplexScalar;
    typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> MatrixType;
    typedef Matrix<ComplexScalar, Rows, Cols, Options, MaxRows, MaxCols> ComplexMatrix;
    typedef typename ei_stem_function<Scalar>::type StemFunction;
  public:
    /** \brief Constructor. Computes matrix function. 
      *
      * \param[in]  A      argument of matrix function, should be a square matrix.
      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
      *
      * This function converts the real matrix \c A to a complex matrix,
      * uses MatrixFunction<MatrixType,1> and then converts the result back to
      * a real matrix.
      */
    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) 
    {
      ComplexMatrix CA = A.template cast<ComplexScalar>();
      ComplexMatrix Cresult;
      MatrixFunction<ComplexMatrix>(CA, f, &Cresult);
-      result->resize(A.cols(), A.rows());
+      *result = Cresult.real();
      for (int j = 0; j < A.cols(); j++)
 	for (int i = 0; i < A.rows(); i++)
 	  (*result)(i,j) = std::real(Cresult(i,j));
    }
 };
 /* Partial specialization of MatrixFunction for complex static-size matrices */
 template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
 class MatrixFunction<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols>, 1, 0>
 {  
  public:
    typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> MatrixType;
    typedef Matrix<Scalar, Dynamic, Dynamic, Options, MaxRows, MaxCols> DynamicMatrix;
    typedef typename ei_stem_function<Scalar>::type StemFunction;
    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) 
    {
      DynamicMatrix DA = A;
      DynamicMatrix Dresult;
      MatrixFunction<DynamicMatrix>(DA, f, &Dresult);
      *result = Dresult;
    }
 };
 /* Partial specialization of MatrixFunction for complex dynamic-size matrices */
 /** \ingroup MatrixFunctions_Module 
  * \brief Partial specialization of MatrixFunction for complex matrices \internal 
  */
 template <typename MatrixType>
-class MatrixFunction<MatrixType, 1, 1>
+class MatrixFunction<MatrixType, 1>
 {
-  public:
+  private:
    typedef ei_traits<MatrixType> Traits;
    typedef typename Traits::Scalar Scalar;
    static const int RowsAtCompileTime = Traits::RowsAtCompileTime;
    static const int ColsAtCompileTime = Traits::ColsAtCompileTime;
    static const int Options = MatrixType::Options;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    typedef typename ei_stem_function<Scalar>::type StemFunction;
    typedef Matrix<Scalar, Traits::RowsAtCompileTime, 1> VectorType;
    typedef Matrix<int, Traits::RowsAtCompileTime, 1> IntVectorType;
-    typedef std::list<Scalar> listOfScalars;
+    typedef std::list<Scalar> Cluster;
-    typedef std::list<listOfScalars> listOfLists;
+    typedef std::list<Cluster> ListOfClusters;
    typedef Matrix<Scalar, Dynamic, Dynamic, Options, RowsAtCompileTime, ColsAtCompileTime> DynMatrixType;
-    /** \brief Compute matrix function. 
+  public:
-     *
+
-     * \param A      argument of matrix function.
+    /** \brief Constructor. Computes matrix function. 
-     * \param f      function to compute.
+      *
-     * \param result pointer to the matrix in which to store the result.
+      * \param[in]  A      argument of matrix function, should be a square matrix.
-     */
+      * \param[in]  f      an entire function; \c f(x,n) should compute the n-th derivative of f at x.
      * \param[out] result pointer to the matrix in which to store the result, \f$ f(A) \f$.
      */
    MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result);
  private:
-    // Prevent copying
+    void computeSchurDecomposition(const MatrixType& A);
-    MatrixFunction(const MatrixFunction&);
+    void partitionEigenvalues();
-    MatrixFunction& operator=(const MatrixFunction&);
+    typename ListOfClusters::iterator findCluster(Scalar key);
    void computeClusterSize();
    void computeBlockStart();
    void constructPermutation();
    void permuteSchur();
    void swapEntriesInSchur(int index);
    void computeBlockAtomic();
    Block<MatrixType> block(const MatrixType& A, int i, int j);
    void computeOffDiagonal();
    DynMatrixType solveTriangularSylvester(const DynMatrixType& A, const DynMatrixType& B, const DynMatrixType& C);
-    void separateBlocksInSchur(MatrixType& T, MatrixType& U, IntVectorType& blockSize);
+    StemFunction *m_f; /**< \brief Stem function for matrix function under consideration */
-    void permuteSchur(const IntVectorType& permutation, MatrixType& T, MatrixType& U);
+    MatrixType m_T; /**< \brief Triangular part of Schur decomposition */
-    void swapEntriesInSchur(int index, MatrixType& T, MatrixType& U);
+    MatrixType m_U; /**< \brief Unitary part of Schur decomposition */
-    void computeTriangular(const MatrixType& T, MatrixType& result, const IntVectorType& blockSize);
+    MatrixType m_fT; /**< \brief %Matrix function applied to #m_T */
-    void computeBlockAtomic(const MatrixType& T, MatrixType& result, const IntVectorType& blockSize);
+    ListOfClusters m_clusters; /**< \brief Partition of eigenvalues into clusters of ei'vals "close" to each other */
-    MatrixType solveSylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C);
+    VectorXi m_eivalToCluster; /**< \brief m_eivalToCluster[i] = j means i-th ei'val is in j-th cluster */
-    MatrixType computeAtomic(const MatrixType& T);
+    VectorXi m_clusterSize; /**< \brief Number of eigenvalues in each clusters  */
-    void divideInBlocks(const VectorType& v, listOfLists* result);
+    VectorXi m_blockStart; /**< \brief Row index at which block corresponding to i-th cluster starts */
-    void constructPermutation(const VectorType& diag, const listOfLists& blocks, 
+    IntVectorType m_permutation; /**< \brief Permutation which groups ei'vals in the same cluster together */
 			      IntVectorType& blockSize, IntVectorType& permutation);
    RealScalar computeMu(const MatrixType& M);
    bool taylorConverged(const MatrixType& T, int s, const MatrixType& F, 
 			 const MatrixType& Fincr, const MatrixType& P, RealScalar mu);
    /** \brief Maximum distance allowed between eigenvalues to be considered "close".
      *
      * This is morally a \c static \c const \c Scalar, but only
      * integers can be static constant class members in C++. The
      * separation constant is set to 0.01, a value taken from the
      * paper by Davies and Higham. */
    static const RealScalar separation() { return static_cast<RealScalar>(0.01); }
    StemFunction *m_f;
 };
 template <typename MatrixType>
-MatrixFunction<MatrixType,1,1>::MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) :
+MatrixFunction<MatrixType,1>::MatrixFunction(const MatrixType& A, StemFunction f, MatrixType* result) :
  m_f(f)
 {
-  if (A.rows() == 1) {
+  computeSchurDecomposition(A);
-    result->resize(1,1);
+  partitionEigenvalues();
-    (*result)(0,0) = f(A(0,0), 0);
+  computeClusterSize();
-  } else {
+  computeBlockStart();
-    const ComplexSchur<MatrixType> schurOfA(A);  
+  constructPermutation();
-    MatrixType T = schurOfA.matrixT();
+  permuteSchur();
-    MatrixType U = schurOfA.matrixU();
+  computeBlockAtomic();
-    IntVectorType blockSize, permutation;
+  computeOffDiagonal();
-    separateBlocksInSchur(T, U, blockSize);
+  *result = m_U * m_fT * m_U.adjoint();
-    MatrixType fT;
+}
-    computeTriangular(T, fT, blockSize);
+
-    *result = U * fT * U.adjoint();
+/** \brief Store the Schur decomposition of \p A in #m_T and #m_U */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::computeSchurDecomposition(const MatrixType& A)
 {
  const ComplexSchur<MatrixType> schurOfA(A);  
  m_T = schurOfA.matrixT();
  m_U = schurOfA.matrixU();
 }
 /** \brief Partition eigenvalues in clusters of ei'vals close to each other
  * 
  * This function computes #m_clusters. This is a partition of the
  * eigenvalues of #m_T in clusters, such that
  * # Any eigenvalue in a certain cluster is at most separation() away
  *   from another eigenvalue in the same cluster.
  * # The distance between two eigenvalues in different clusters is
  *   more than separation().
  * The implementation follows Algorithm 4.1 in the paper of Davies
  * and Higham. 
  */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::partitionEigenvalues()
 {
  const int rows = m_T.rows();
  VectorType diag = m_T.diagonal(); // contains eigenvalues of A
  for (int i=0; i<rows; ++i) {
    // Find set containing diag(i), adding a new set if necessary
    typename ListOfClusters::iterator qi = findCluster(diag(i));
    if (qi == m_clusters.end()) {
      Cluster l;
      l.push_back(diag(i));
      m_clusters.push_back(l);
      qi = m_clusters.end();
      --qi;
    }
    // Look for other element to add to the set
    for (int j=i+1; j<rows; ++j) {
      if (ei_abs(diag(j) - diag(i)) <= separation() && std::find(qi->begin(), qi->end(), diag(j)) == qi->end()) {
 	typename ListOfClusters::iterator qj = findCluster(diag(j));
 	if (qj == m_clusters.end()) {
 	  qi->push_back(diag(j));
 	} else {
 	  qi->insert(qi->end(), qj->begin(), qj->end());
 	  m_clusters.erase(qj);
 	}
      }
    }
  }
 }
 /** \brief Find cluster in #m_clusters containing some value 
  * \param[in] key Value to find
  * \returns Iterator to cluster containing \c key, or
  * \c m_clusters.end() if no cluster in m_clusters contains \c key.
  */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::separateBlocksInSchur(MatrixType& T, MatrixType& U, IntVectorType& blockSize)
+typename MatrixFunction<MatrixType,1>::ListOfClusters::iterator MatrixFunction<MatrixType,1>::findCluster(Scalar key)
 {
-  const VectorType d = T.diagonal();
+  typename Cluster::iterator j;
-  listOfLists blocks;
+  for (typename ListOfClusters::iterator i = m_clusters.begin(); i != m_clusters.end(); ++i) {
-  divideInBlocks(d, &blocks);
+    j = std::find(i->begin(), i->end(), key);
-
+    if (j != i->end())
-  IntVectorType permutation;
+      return i;
-  constructPermutation(d, blocks, blockSize, permutation);
+  }
-  permuteSchur(permutation, T, U);
+  return m_clusters.end();
 }
 /** \brief Compute #m_clusterSize and #m_eivalToCluster using #m_clusters */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::permuteSchur(const IntVectorType& permutation, MatrixType& T, MatrixType& U)
+void MatrixFunction<MatrixType,1>::computeClusterSize()
 {
-  IntVectorType p = permutation;
+  const int rows = m_T.rows();
  VectorType diag = m_T.diagonal(); 
  const int numClusters = m_clusters.size();
  m_clusterSize.setZero(numClusters);
  m_eivalToCluster.resize(rows);
  int clusterIndex = 0;
  for (typename ListOfClusters::const_iterator cluster = m_clusters.begin(); cluster != m_clusters.end(); ++cluster) {
    for (int i = 0; i < diag.rows(); ++i) {
      if (std::find(cluster->begin(), cluster->end(), diag(i)) != cluster->end()) {
 	++m_clusterSize[clusterIndex];
 	m_eivalToCluster[i] = clusterIndex;
      }
    }
    ++clusterIndex;
  }
 }
 /** \brief Compute #m_blockStart using #m_clusterSize */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::computeBlockStart()
 {
  m_blockStart.resize(m_clusterSize.rows());
  m_blockStart(0) = 0;
  for (int i = 1; i < m_clusterSize.rows(); i++) {
    m_blockStart(i) = m_blockStart(i-1) + m_clusterSize(i-1);
  }
 }
 /** \brief Compute #m_permutation using #m_eivalToCluster and #m_blockStart */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::constructPermutation()
 {
  VectorXi indexNextEntry = m_blockStart;
  m_permutation.resize(m_T.rows());
  for (int i = 0; i < m_T.rows(); i++) {
    int cluster = m_eivalToCluster[i];
    m_permutation[i] = indexNextEntry[cluster];
    ++indexNextEntry[cluster];
  }
 }  
 /** \brief Permute Schur decomposition in #m_U and #m_T according to #m_permutation */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::permuteSchur()
 {
  IntVectorType p = m_permutation;
  for (int i = 0; i < p.rows() - 1; i++) {
    int j;
    for (j = i; j < p.rows(); j++) {
@ -224,252 +364,150 @@ void MatrixFunction<MatrixType,1,1>::permuteSchur(const IntVectorType& permutati
    }
    ei_assert(p(j) == i);
    for (int k = j-1; k >= i; k--) {
-      swapEntriesInSchur(k, T, U);
+      swapEntriesInSchur(k);
      std::swap(p.coeffRef(k), p.coeffRef(k+1));
    }
  }
 }
-// swap T(index, index) and T(index+1, index+1)
+/** \brief Swap rows \a index and \a index+1 in Schur decomposition in #m_U and #m_T */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::swapEntriesInSchur(int index, MatrixType& T, MatrixType& U)
+void MatrixFunction<MatrixType,1>::swapEntriesInSchur(int index)
 {
  PlanarRotation<Scalar> rotation;
-  rotation.makeGivens(T(index, index+1), T(index+1, index+1) - T(index, index));
+  rotation.makeGivens(m_T(index, index+1), m_T(index+1, index+1) - m_T(index, index));
-  T.applyOnTheLeft(index, index+1, rotation.adjoint());
+  m_T.applyOnTheLeft(index, index+1, rotation.adjoint());
-  T.applyOnTheRight(index, index+1, rotation);
+  m_T.applyOnTheRight(index, index+1, rotation);
-  U.applyOnTheRight(index, index+1, rotation);
+  m_U.applyOnTheRight(index, index+1, rotation);
 }  
 /** \brief Compute block diagonal part of #m_fT.
  *
  * This routine computes the matrix function #m_f applied to the block
  * diagonal part of #m_T, with the blocking given by #m_blockStart. The
  * result is stored in #m_fT. The off-diagonal parts of #m_fT are set
  * to zero.
  */
 template <typename MatrixType>
-void MatrixFunction<MatrixType,1,1>::computeTriangular(const MatrixType& T, MatrixType& result, 
+void MatrixFunction<MatrixType,1>::computeBlockAtomic()
 						       const IntVectorType& blockSize)
 { 
-  MatrixType expT;
+  m_fT.resize(m_T.rows(), m_T.cols());
-  ei_matrix_exponential(T, &expT);
+  m_fT.setZero();
-  computeBlockAtomic(T, result, blockSize);
+  MatrixFunctionAtomic<DynMatrixType> mfa(m_f);
-  IntVectorType blockStart(blockSize.rows());
+  for (int i = 0; i < m_clusterSize.rows(); ++i) {
-  blockStart(0) = 0;
+    block(m_fT, i, i) = mfa.compute(block(m_T, i, i));
  for (int i = 1; i < blockSize.rows(); i++) {
    blockStart(i) = blockStart(i-1) + blockSize(i-1);
  }
-  for (int diagIndex = 1; diagIndex < blockSize.rows(); diagIndex++) {
+}
-    for (int blockIndex = 0; blockIndex < blockSize.rows() - diagIndex; blockIndex++) {
+
 /** \brief Return block of matrix according to blocking given by #m_blockStart */
 template <typename MatrixType>
 Block<MatrixType> MatrixFunction<MatrixType,1>::block(const MatrixType& A, int i, int j)
 {
  return A.block(m_blockStart(i), m_blockStart(j), m_clusterSize(i), m_clusterSize(j));
 }
 /** \brief Compute part of #m_fT above block diagonal.
  *
  * This routine assumes that the block diagonal part of #m_fT (which
  * equals #m_f applied to #m_T) has already been computed and computes
  * the part above the block diagonal. The part below the diagonal is
  * zero, because #m_T is upper triangular.
  */
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1>::computeOffDiagonal()
 { 
  for (int diagIndex = 1; diagIndex < m_clusterSize.rows(); diagIndex++) {
    for (int blockIndex = 0; blockIndex < m_clusterSize.rows() - diagIndex; blockIndex++) {
      // compute (blockIndex, blockIndex+diagIndex) block
-      MatrixType A = T.block(blockStart(blockIndex), blockStart(blockIndex), blockSize(blockIndex), blockSize(blockIndex));
+      DynMatrixType A = block(m_T, blockIndex, blockIndex);
-      MatrixType B = -T.block(blockStart(blockIndex+diagIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex+diagIndex), blockSize(blockIndex+diagIndex));
+      DynMatrixType B = -block(m_T, blockIndex+diagIndex, blockIndex+diagIndex);
-      MatrixType C = result.block(blockStart(blockIndex), blockStart(blockIndex), blockSize(blockIndex), blockSize(blockIndex)) * T.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex));
+      DynMatrixType C = block(m_fT, blockIndex, blockIndex) * block(m_T, blockIndex, blockIndex+diagIndex);
-      C -= T.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex)) * result.block(blockStart(blockIndex+diagIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex+diagIndex), blockSize(blockIndex+diagIndex));
+      C -= block(m_T, blockIndex, blockIndex+diagIndex) * block(m_fT, blockIndex+diagIndex, blockIndex+diagIndex);
      for (int k = blockIndex + 1; k < blockIndex + diagIndex; k++) {
-	C += result.block(blockStart(blockIndex), blockStart(k), blockSize(blockIndex), blockSize(k)) * T.block(blockStart(k), blockStart(blockIndex+diagIndex), blockSize(k), blockSize(blockIndex+diagIndex));
+	C += block(m_fT, blockIndex, k) * block(m_T, k, blockIndex+diagIndex);
-	C -= T.block(blockStart(blockIndex), blockStart(k), blockSize(blockIndex), blockSize(k)) * result.block(blockStart(k), blockStart(blockIndex+diagIndex), blockSize(k), blockSize(blockIndex+diagIndex));
+	C -= block(m_T, blockIndex, k) * block(m_fT, k, blockIndex+diagIndex);
      }
-      result.block(blockStart(blockIndex), blockStart(blockIndex+diagIndex), blockSize(blockIndex), blockSize(blockIndex+diagIndex)) = solveSylvester(A, B, C);
+      block(m_fT, blockIndex, blockIndex+diagIndex) = solveTriangularSylvester(A, B, C);
    }
  }
 }
-// solve AX + XB = C  <=>  U* A' U X V V* + U* U X V B' V* = U* U C V V*  <=>  A' U X V + U X V B' = U C V 
+/** \brief Solve a triangular Sylvester equation AX + XB = C 
-// Schur: A* = U A'* U* (so A = U* A' U), B = V B' V*, define: X' = U X V, C' = U C V, to get: A' X' + X' B' = C'
+  *
-// A is m-by-m, B is n-by-n, X is m-by-n, C is m-by-n, U is m-by-m, V is n-by-n
+  * \param[in]  A  the matrix A; should be square and upper triangular
  * \param[in]  B  the matrix B; should be square and upper triangular
  * \param[in]  C  the matrix C; should have correct size.
  *
  * \returns the solution X.
  *
  * If A is m-by-m and B is n-by-n, then both C and X are m-by-n. 
  * The (i,j)-th component of the Sylvester equation is
  * \f[ 
  *     \sum_{k=i}^m A_{ik} X_{kj} + \sum_{k=1}^j X_{ik} B_{kj} = C_{ij}. 
  * \f]
  * This can be re-arranged to yield:
  * \f[ 
  *     X_{ij} = \frac{1}{A_{ii} + B_{jj}} \Bigl( C_{ij}
  *     - \sum_{k=i+1}^m A_{ik} X_{kj} - \sum_{k=1}^{j-1} X_{ik} B_{kj} \Bigr).
  * \f]
  * It is assumed that A and B are such that the numerator is never
  * zero (otherwise the Sylvester equation does not have a unique
  * solution). In that case, these equations can be evaluated in the
  * order \f$ i=m,\ldots,1 \f$ and \f$ j=1,\ldots,n \f$.
  */
 template <typename MatrixType>
-MatrixType MatrixFunction<MatrixType,1,1>::solveSylvester(const MatrixType& A, const MatrixType& B, const MatrixType& C)
+typename MatrixFunction<MatrixType,1>::DynMatrixType MatrixFunction<MatrixType,1>::solveTriangularSylvester(
  const DynMatrixType& A, 
  const DynMatrixType& B, 
  const DynMatrixType& C)
 {
-  MatrixType U = MatrixType::Zero(A.rows(), A.rows());
+  ei_assert(A.rows() == A.cols());
-  for (int i = 0; i < A.rows(); i++) {
+  ei_assert(A.isUpperTriangular());
-    U(i, A.rows() - 1 - i) = static_cast<Scalar>(1);
+  ei_assert(B.rows() == B.cols());
-  }
+  ei_assert(B.isUpperTriangular());
-  MatrixType Aprime = U * A * U;
+  ei_assert(C.rows() == A.rows());
  ei_assert(C.cols() == B.rows());
-  MatrixType Bprime = B;
+  int m = A.rows();
-  MatrixType V = MatrixType::Identity(B.rows(), B.rows());
+  int n = B.rows();
  DynMatrixType X(m, n);
-  MatrixType Cprime = U * C * V;
+  for (int i = m - 1; i >= 0; --i) {
-  MatrixType Xprime(A.rows(), B.rows());
+    for (int j = 0; j < n; ++j) {
-  for (int l = 0; l < B.rows(); l++) {
+
-    for (int k = 0; k < A.rows(); k++) {
+      // Compute AX = \sum_{k=i+1}^m A_{ik} X_{kj}
-      Scalar tmp1, tmp2;
+      Scalar AX;
-      if (k == 0) {
+      if (i == m - 1) {
-	tmp1 = 0; 
+	AX = 0; 
      } else {
-	Matrix<Scalar,1,1> tmp1matrix = Aprime.row(k).start(k) * Xprime.col(l).start(k);
+	Matrix<Scalar,1,1> AXmatrix = A.row(i).tail(m-1-i) * X.col(j).tail(m-1-i);
-	tmp1 = tmp1matrix(0,0);
+	AX = AXmatrix(0,0);
      }
-      if (l == 0) {
+
-	tmp2 = 0; 
+      // Compute XB = \sum_{k=1}^{j-1} X_{ik} B_{kj}
      Scalar XB;
      if (j == 0) {
 	XB = 0; 
      } else {
-	Matrix<Scalar,1,1> tmp2matrix = Xprime.row(k).start(l) * Bprime.col(l).start(l);
+	Matrix<Scalar,1,1> XBmatrix = X.row(i).head(j) * B.col(j).head(j);
-	tmp2 = tmp2matrix(0,0);
+	XB = XBmatrix(0,0);
      }
-      Xprime(k,l) = (Cprime(k,l) - tmp1 - tmp2) / (Aprime(k,k) + Bprime(l,l));
+
      X(i,j) = (C(i,j) - AX - XB) / (A(i,i) + B(j,j));
    }
  }
-  return U.adjoint() * Xprime * V.adjoint();
+  return X;
 }
 // does not touch irrelevant parts of T
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1,1>::computeBlockAtomic(const MatrixType& T, MatrixType& result, 
 							const IntVectorType& blockSize)
 { 
  int blockStart = 0;
  result.resize(T.rows(), T.cols());
  result.setZero();
  for (int i = 0; i < blockSize.rows(); i++) {
    result.block(blockStart, blockStart, blockSize(i), blockSize(i))
      = computeAtomic(T.block(blockStart, blockStart, blockSize(i), blockSize(i)));
    blockStart += blockSize(i);
  }
 }
 template <typename Scalar>
 typename std::list<std::list<Scalar> >::iterator ei_find_in_list_of_lists(typename std::list<std::list<Scalar> >& ll, Scalar x)
 {
  typename std::list<Scalar>::iterator j;
  for (typename std::list<std::list<Scalar> >::iterator i = ll.begin(); i != ll.end(); i++) {
    j = std::find(i->begin(), i->end(), x);
    if (j != i->end())
      return i;
  }
  return ll.end();
 }
 // Alg 4.1
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1,1>::divideInBlocks(const VectorType& v, listOfLists* result)
 {
  const int n = v.rows();
  for (int i=0; i<n; i++) {
    // Find set containing v(i), adding a new set if necessary
    typename listOfLists::iterator qi = ei_find_in_list_of_lists(*result, v(i));
    if (qi == result->end()) {
      listOfScalars l;
      l.push_back(v(i));
      result->push_back(l);
      qi = result->end();
      qi--;
    }
    // Look for other element to add to the set
    for (int j=i+1; j<n; j++) {
      if (ei_abs(v(j) - v(i)) <= separation() && std::find(qi->begin(), qi->end(), v(j)) == qi->end()) {
 	typename listOfLists::iterator qj = ei_find_in_list_of_lists(*result, v(j));
 	if (qj == result->end()) {
 	  qi->push_back(v(j));
 	} else {
 	  qi->insert(qi->end(), qj->begin(), qj->end());
 	  result->erase(qj);
 	}
      }
    }
  }
 }
 // Construct permutation P, such that P(D) has eigenvalues clustered together
 template <typename MatrixType>
 void MatrixFunction<MatrixType,1,1>::constructPermutation(const VectorType& diag, const listOfLists& blocks, 
 							  IntVectorType& blockSize, IntVectorType& permutation)
 {
  const int n = diag.rows();
  const int numBlocks = blocks.size();
  // For every block in blocks, mark and count the entries in diag that
  // appear in that block
  blockSize.setZero(numBlocks);
  IntVectorType entryToBlock(n);
  int blockIndex = 0;
  for (typename listOfLists::const_iterator block = blocks.begin(); block != blocks.end(); block++) {
    for (int i = 0; i < diag.rows(); i++) {
      if (std::find(block->begin(), block->end(), diag(i)) != block->end()) {
 	blockSize[blockIndex]++;
 	entryToBlock[i] = blockIndex;
      }
    }
    blockIndex++;
  }
  // Compute index of first entry in every block as the sum of sizes
  // of all the preceding blocks
  IntVectorType indexNextEntry(numBlocks);
  indexNextEntry[0] = 0;
  for (blockIndex = 1; blockIndex < numBlocks; blockIndex++) {
    indexNextEntry[blockIndex] = indexNextEntry[blockIndex-1] + blockSize[blockIndex-1];
  }
  // Construct permutation 
  permutation.resize(n);
  for (int i = 0; i < n; i++) {
    int block = entryToBlock[i];
    permutation[i] = indexNextEntry[block];
    indexNextEntry[block]++;
  }
 }  
 template <typename MatrixType>
 MatrixType MatrixFunction<MatrixType,1,1>::computeAtomic(const MatrixType& T)
 {
  // TODO: Use that T is upper triangular
  const int n = T.rows();
  const Scalar sigma = T.trace() / Scalar(n);
  const MatrixType M = T - sigma * MatrixType::Identity(n, n);
  const RealScalar mu = computeMu(M);
  MatrixType F = m_f(sigma, 0) * MatrixType::Identity(n, n);
  MatrixType P = M;
  MatrixType Fincr;
  for (int s = 1; s < 1.1*n + 10; s++) { // upper limit is fairly arbitrary
    Fincr = m_f(sigma, s) * P;
    F += Fincr;
    P = (1/(s + 1.0)) * P * M;
    if (taylorConverged(T, s, F, Fincr, P, mu)) {
      return F;
    }
  }
  ei_assert("Taylor series does not converge" && 0);
  return F;
 }
 template <typename MatrixType>
 typename MatrixFunction<MatrixType,1,1>::RealScalar MatrixFunction<MatrixType,1,1>::computeMu(const MatrixType& M)
 {
  const int n = M.rows();
  const MatrixType N = MatrixType::Identity(n, n) - M;
  VectorType e = VectorType::Ones(n);
  N.template triangularView<UpperTriangular>().solveInPlace(e);
  return e.cwise().abs().maxCoeff();
 }
 template <typename MatrixType>
 bool MatrixFunction<MatrixType,1,1>::taylorConverged(const MatrixType& T, int s, const MatrixType& F, 
 						   const MatrixType& Fincr, const MatrixType& P, RealScalar mu)
 {
  const int n = F.rows();
  const RealScalar F_norm = F.cwise().abs().rowwise().sum().maxCoeff();
  const RealScalar Fincr_norm = Fincr.cwise().abs().rowwise().sum().maxCoeff();
  if (Fincr_norm < epsilon<Scalar>() * F_norm) {
    RealScalar delta = 0;
    RealScalar rfactorial = 1;
    for (int r = 0; r < n; r++) {
      RealScalar mx = 0;
      for (int i = 0; i < n; i++) 
 	mx = std::max(mx, std::abs(m_f(T(i, i), s+r)));
       if (r != 0)
 	rfactorial *= r;
      delta = std::max(delta, mx / rfactorial);
    }
    const RealScalar P_norm = P.cwise().abs().rowwise().sum().maxCoeff();
    if (mu * delta * P_norm < epsilon<Scalar>() * F_norm) 
      return true;
  }
  return false;
 }
 template <typename Derived>
 EIGEN_STRONG_INLINE void ei_matrix_function(const MatrixBase<Derived>& M, 
 					    typename ei_stem_function<typename ei_traits<Derived>::Scalar>::type f,
 					    typename MatrixBase<Derived>::PlainMatrixType* result)
 {
  ei_assert(M.rows() == M.cols());
-  MatrixFunction<typename MatrixBase<Derived>::PlainMatrixType>(M, f, result);
+  typedef typename MatrixBase<Derived>::PlainMatrixType PlainMatrixType;
  MatrixFunction<PlainMatrixType>(M, f, result);
 }
 #endif // EIGEN_MATRIX_FUNCTION
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunctionAtomic.h
@ -0,0 +1,142 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2009 Jitse Niesen <jitse@maths.leeds.ac.uk>
 //
 // Eigen is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
 // version 3 of the License, or (at your option) any later version.
 //
 // Alternatively, you can redistribute it and/or
 // modify it under the terms of the GNU General Public License as
 // published by the Free Software Foundation; either version 2 of
 // the License, or (at your option) any later version.
 //
 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
 // GNU General Public License for more details.
 //
 // You should have received a copy of the GNU Lesser General Public
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 #ifndef EIGEN_MATRIX_FUNCTION_ATOMIC
 #define EIGEN_MATRIX_FUNCTION_ATOMIC
 /** \ingroup MatrixFunctions_Module 
  * \class MatrixFunctionAtomic
  * \brief Helper class for computing matrix functions of atomic matrices.
  *
  * \internal
  * Here, an atomic matrix is a triangular matrix whose diagonal
  * entries are close to each other.
  */
 template <typename MatrixType>
 class MatrixFunctionAtomic
 {
  public:
    typedef ei_traits<MatrixType> Traits;
    typedef typename Traits::Scalar Scalar;
    typedef typename NumTraits<Scalar>::Real RealScalar;
    typedef typename ei_stem_function<Scalar>::type StemFunction;
    typedef Matrix<Scalar, Traits::RowsAtCompileTime, 1> VectorType;
    /** \brief Constructor
      * \param[in]  f  matrix function to compute.
      */
    MatrixFunctionAtomic(StemFunction f) : m_f(f) { }
    /** \brief Compute matrix function of atomic matrix
      * \param[in]  A  argument of matrix function, should be upper triangular and atomic
      * \returns  f(A), the matrix function evaluated at the given matrix
      */
    MatrixType compute(const MatrixType& A);
  private:
    // Prevent copying
    MatrixFunctionAtomic(const MatrixFunctionAtomic&);
    MatrixFunctionAtomic& operator=(const MatrixFunctionAtomic&);
    void computeMu();
    bool taylorConverged(int s, const MatrixType& F, const MatrixType& Fincr, const MatrixType& P);
    /** \brief Pointer to scalar function */
    StemFunction* m_f;
    /** \brief Size of matrix function */
    int m_Arows;
    /** \brief Mean of eigenvalues */
    Scalar m_avgEival;
    /** \brief Argument shifted by mean of eigenvalues */
    MatrixType m_Ashifted;
    /** \brief Constant used to determine whether Taylor series has converged */
    RealScalar m_mu;
 };
 template <typename MatrixType>
 MatrixType MatrixFunctionAtomic<MatrixType>::compute(const MatrixType& A)
 {
  // TODO: Use that A is upper triangular
  m_Arows = A.rows();
  m_avgEival = A.trace() / Scalar(m_Arows);
  m_Ashifted = A - m_avgEival * MatrixType::Identity(m_Arows, m_Arows);
  computeMu();
  MatrixType F = m_f(m_avgEival, 0) * MatrixType::Identity(m_Arows, m_Arows);
  MatrixType P = m_Ashifted;
  MatrixType Fincr;
  for (int s = 1; s < 1.1 * m_Arows + 10; s++) { // upper limit is fairly arbitrary
    Fincr = m_f(m_avgEival, s) * P;
    F += Fincr;
    P = (1/(s + 1.0)) * P * m_Ashifted;
    if (taylorConverged(s, F, Fincr, P)) {
      return F;
    }
  }
  ei_assert("Taylor series does not converge" && 0);
  return F;
 }
 /** \brief Compute \c m_mu. */
 template <typename MatrixType>
 void MatrixFunctionAtomic<MatrixType>::computeMu()
 {
  const MatrixType N = MatrixType::Identity(m_Arows, m_Arows) - m_Ashifted;
  VectorType e = VectorType::Ones(m_Arows);
  N.template triangularView<UpperTriangular>().solveInPlace(e);
  m_mu = e.cwise().abs().maxCoeff();
 }
 /** \brief Determine whether Taylor series has converged */
 template <typename MatrixType>
 bool MatrixFunctionAtomic<MatrixType>::taylorConverged(int s, const MatrixType& F, 
 						       const MatrixType& Fincr, const MatrixType& P)
 {
  const int n = F.rows();
  const RealScalar F_norm = F.cwise().abs().rowwise().sum().maxCoeff();
  const RealScalar Fincr_norm = Fincr.cwise().abs().rowwise().sum().maxCoeff();
  if (Fincr_norm < epsilon<Scalar>() * F_norm) {
    RealScalar delta = 0;
    RealScalar rfactorial = 1;
    for (int r = 0; r < n; r++) {
      RealScalar mx = 0;
      for (int i = 0; i < n; i++) 
 	mx = std::max(mx, std::abs(m_f(m_Ashifted(i, i) + m_avgEival, s+r)));
       if (r != 0)
 	rfactorial *= r;
      delta = std::max(delta, mx / rfactorial);
    }
    const RealScalar P_norm = P.cwise().abs().rowwise().sum().maxCoeff();
    if (m_mu * delta * P_norm < epsilon<Scalar>() * F_norm) 
      return true;
  }
  return false;
 }
 #endif // EIGEN_MATRIX_FUNCTION_ATOMIC
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@ -129,6 +129,8 @@ public:
    int njev;
    int iter;
    Scalar fnorm, gnorm;
    Scalar lm_param(void) { return par; }
 private:
    FunctorType &functor;
    int n;
@ -533,7 +535,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
            sing = true;
        }
        ipvt[j] = j;
-        wa2[j] = fjac.col(j).start(j).stableNorm();
+        wa2[j] = fjac.col(j).head(j).stableNorm();
    }
    if (sing) {
        ipvt.cwise()+=1;
--- a/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/lmpar.h
@ -87,7 +87,7 @@ void ei_lmpar(
    /* calculate an upper bound, paru, for the zero of the function. */
    for (j = 0; j < n; ++j)
-        wa1[j] = r.col(j).start(j+1).dot(qtb.start(j+1)) / diag[ipvt[j]];
+        wa1[j] = r.col(j).head(j+1).dot(qtb.head(j+1)) / diag[ipvt[j]];
    gnorm = wa1.stableNorm();
    paru = gnorm / delta;
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@ -14,7 +14,7 @@ ei_add_test(NonLinearOptimization)
 ei_add_test(NumericalDiff)
 ei_add_test(autodiff)
 ei_add_test(BVH)
-ei_add_test(matrixExponential)
+ei_add_test(matrix_exponential)
 ei_add_test(alignedvector3)
 ei_add_test(FFT)
--- a/unsupported/test/matrix_exponential.cpp
+++ b/unsupported/test/matrix_exponential.cpp
@ -144,7 +144,7 @@ void randomTest(const MatrixType& m, double tol)
  }
 }
-void test_matrixExponential()
+void test_matrix_exponential()
 {
  CALL_SUBTEST_2(test2dRotation<double>(1e-13));
  CALL_SUBTEST_1(test2dRotation<float>(1e-5));