From 4f8773c23af0b3f16d4382494e6802f02b16af09 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 19 Feb 2010 17:46:36 +0100
Subject: [PATCH 01/38] fix stupid enum values

---
 Eigen/src/Core/Product.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index af05773ee..fe6d29c7d 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -50,8 +50,8 @@ class GeneralProduct;
 template<int Rows, int Cols, int Depth> struct ei_product_type_selector;
 
 enum {
-  Large = Dynamic,
-  Small = Dynamic/2
+  Large = 2,
+  Small = 3
 };
 
 template<typename Lhs, typename Rhs> struct ei_product_type

From f0c8dcf1e2f01fb200a8e48463d9f73c77bc1436 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sat, 20 Feb 2010 15:26:02 +0100
Subject: [PATCH 02/38] Renamed AnyMatrixBase to EigenBase.

---
 Eigen/Core                                    |  2 +-
 Eigen/src/Array/Array.h                       |  6 ++---
 Eigen/src/Core/BandMatrix.h                   |  2 +-
 Eigen/src/Core/DenseBase.h                    | 12 ++++-----
 Eigen/src/Core/DenseStorageBase.h             |  8 +++---
 Eigen/src/Core/DiagonalMatrix.h               |  2 +-
 .../src/Core/{AnyMatrixBase.h => EigenBase.h} | 24 ++++++++---------
 Eigen/src/Core/Matrix.h                       |  8 +++---
 Eigen/src/Core/MatrixBase.h                   |  6 ++---
 Eigen/src/Core/PermutationMatrix.h            |  4 +--
 Eigen/src/Core/TriangularMatrix.h             |  2 +-
 Eigen/src/Core/util/ForwardDeclarations.h     |  2 +-
 Eigen/src/Core/util/XprHelper.h               | 26 +++++++++++++++++--
 Eigen/src/Geometry/RotationBase.h             |  4 +--
 Eigen/src/Geometry/Transform.h                | 12 ++++-----
 Eigen/src/Geometry/Translation.h              |  6 ++---
 Eigen/src/Householder/HouseholderSequence.h   |  2 +-
 Eigen/src/Sparse/SparseMatrixBase.h           |  2 +-
 .../Eigen/src/Skyline/SkylineMatrixBase.h     |  2 +-
 19 files changed, 77 insertions(+), 55 deletions(-)
 rename Eigen/src/Core/{AnyMatrixBase.h => EigenBase.h} (86%)

diff --git a/Eigen/Core b/Eigen/Core
index 18b0fafa9..cbca16640 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -164,7 +164,7 @@ struct Dense {};
 #include "src/Core/Functors.h"
 #include "src/Core/DenseBase.h"
 #include "src/Core/MatrixBase.h"
-#include "src/Core/AnyMatrixBase.h"
+#include "src/Core/EigenBase.h"
 #include "src/Core/Coeffs.h"
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
diff --git a/Eigen/src/Array/Array.h b/Eigen/src/Array/Array.h
index ceef71afd..77d0c41ac 100644
--- a/Eigen/src/Array/Array.h
+++ b/Eigen/src/Array/Array.h
@@ -61,7 +61,7 @@ class Array
       * the usage of 'using'. This should be done only for operator=.
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Array& operator=(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
     }
@@ -196,9 +196,9 @@ class Array
       other.evalTo(*this);
     }
 
-    /** \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) */
+    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Array(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
       : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
       Base::_check_template_params();
diff --git a/Eigen/src/Core/BandMatrix.h b/Eigen/src/Core/BandMatrix.h
index 538e6dd76..432df0b34 100644
--- a/Eigen/src/Core/BandMatrix.h
+++ b/Eigen/src/Core/BandMatrix.h
@@ -57,7 +57,7 @@ struct ei_traits<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
 };
 
 template<typename _Scalar, int Rows, int Cols, int Supers, int Subs, int Options>
-class BandMatrix : public AnyMatrixBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
+class BandMatrix : public EigenBase<BandMatrix<_Scalar,Rows,Cols,Supers,Subs,Options> >
 {
   public:
 
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 21d792f49..2850c60cb 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -40,7 +40,7 @@ template<typename Derived> class DenseBase
   : public ei_special_scalar_op_base<Derived,typename ei_traits<Derived>::Scalar,
                                      typename NumTraits<typename ei_traits<Derived>::Scalar>::Real>
 #else
-  : public AnyMatrixBase<Derived>
+  : public EigenBase<Derived>
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 {
   public:
@@ -53,8 +53,8 @@ template<typename Derived> class DenseBase
     typedef typename ei_traits<Derived>::Scalar Scalar;
     typedef typename ei_packet_traits<Scalar>::type PacketScalar;
 
-    using AnyMatrixBase<Derived>::derived;
-    using AnyMatrixBase<Derived>::const_cast_derived;
+    using EigenBase<Derived>::derived;
+    using EigenBase<Derived>::const_cast_derived;
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
     enum {
@@ -214,13 +214,13 @@ template<typename Derived> class DenseBase
     Derived& operator=(const DenseBase& other);
 
     template<typename OtherDerived>
-    Derived& operator=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
-    Derived& operator+=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator+=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
-    Derived& operator-=(const AnyMatrixBase<OtherDerived> &other);
+    Derived& operator-=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
     Derived& operator=(const ReturnByValue<OtherDerived>& func);
diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h
index 5c8e48768..530ddcd07 100644
--- a/Eigen/src/Core/DenseStorageBase.h
+++ b/Eigen/src/Core/DenseStorageBase.h
@@ -355,19 +355,19 @@ class DenseStorageBase : public _Base<Derived>
 //       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
     }
 
-    /** \copydoc MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&)
+    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Derived& operator=(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
     {
       resize(other.derived().rows(), other.derived().cols());
       Base::operator=(other.derived());
       return this->derived();
     }
 
-    /** \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&) */
+    /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE DenseStorageBase(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE DenseStorageBase(const EigenBase<OtherDerived> &other)
       : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
       _check_template_params();
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 08c046611..774b0d7ae 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -28,7 +28,7 @@
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 template<typename Derived>
-class DiagonalBase : public AnyMatrixBase<Derived>
+class DiagonalBase : public EigenBase<Derived>
 {
   public:
     typedef typename ei_traits<Derived>::DiagonalVectorType DiagonalVectorType;
diff --git a/Eigen/src/Core/AnyMatrixBase.h b/Eigen/src/Core/EigenBase.h
similarity index 86%
rename from Eigen/src/Core/AnyMatrixBase.h
rename to Eigen/src/Core/EigenBase.h
index a5d1cfe9f..8b302b663 100644
--- a/Eigen/src/Core/AnyMatrixBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -23,19 +23,19 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
-#ifndef EIGEN_ANYMATRIXBASE_H
-#define EIGEN_ANYMATRIXBASE_H
+#ifndef EIGEN_EIGENBASE_H
+#define EIGEN_EIGENBASE_H
 
 
 /** Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T).
   *
-  * In other words, an AnyMatrixBase object is an object that can be copied into a MatrixBase.
+  * In other words, an EigenBase object is an object that can be copied into a MatrixBase.
   *
   * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc.
   *
   * Notice that this class is trivial, it is only used to disambiguate overloaded functions.
   */
-template<typename Derived> struct AnyMatrixBase
+template<typename Derived> struct EigenBase
 {
 //   typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
 
@@ -45,7 +45,7 @@ template<typename Derived> struct AnyMatrixBase
   const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
   inline Derived& const_cast_derived() const
-  { return *static_cast<Derived*>(const_cast<AnyMatrixBase*>(this)); }
+  { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
 
   /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
   inline int rows() const { return derived().rows(); }
@@ -108,7 +108,7 @@ template<typename Derived> struct AnyMatrixBase
   */
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator=(const EigenBase<OtherDerived> &other)
 {
   other.derived().evalTo(derived());
   return derived();
@@ -116,7 +116,7 @@ Derived& DenseBase<Derived>::operator=(const AnyMatrixBase<OtherDerived> &other)
 
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
   other.derived().addTo(derived());
   return derived();
@@ -124,7 +124,7 @@ Derived& DenseBase<Derived>::operator+=(const AnyMatrixBase<OtherDerived> &other
 
 template<typename Derived>
 template<typename OtherDerived>
-Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other)
+Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
   other.derived().subTo(derived());
   return derived();
@@ -137,7 +137,7 @@ Derived& DenseBase<Derived>::operator-=(const AnyMatrixBase<OtherDerived> &other
 template<typename Derived>
 template<typename OtherDerived>
 inline Derived&
-MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other)
+MatrixBase<Derived>::operator*=(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheRight(derived());
   return derived();
@@ -146,7 +146,7 @@ MatrixBase<Derived>::operator*=(const AnyMatrixBase<OtherDerived> &other)
 /** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=() */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerived> &other)
+inline void MatrixBase<Derived>::applyOnTheRight(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheRight(derived());
 }
@@ -154,9 +154,9 @@ inline void MatrixBase<Derived>::applyOnTheRight(const AnyMatrixBase<OtherDerive
 /** replaces \c *this by \c *this * \a other. */
 template<typename Derived>
 template<typename OtherDerived>
-inline void MatrixBase<Derived>::applyOnTheLeft(const AnyMatrixBase<OtherDerived> &other)
+inline void MatrixBase<Derived>::applyOnTheLeft(const EigenBase<OtherDerived> &other)
 {
   other.derived().applyThisOnTheLeft(derived());
 }
 
-#endif // EIGEN_ANYMATRIXBASE_H
+#endif // EIGEN_EIGENBASE_H
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 6f194ffba..1c43340a6 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -181,10 +181,10 @@ class Matrix
 
     /**
       * \brief Copies the generic expression \a other into *this.
-      * \copydetails DenseBase::operator=(const AnyMatrixBase<OtherDerived> &other)
+      * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Matrix& operator=(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
     }
@@ -297,10 +297,10 @@ class Matrix
     }
 
     /** \brief Copy constructor for generic expressions.
-      * \sa MatrixBase::operator=(const AnyMatrixBase<OtherDerived>&)
+      * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE Matrix(const AnyMatrixBase<OtherDerived> &other)
+    EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
       : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
       Base::_check_template_params();
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 229195046..122a2271b 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -193,13 +193,13 @@ template<typename Derived> class MatrixBase
     lazyProduct(const MatrixBase<OtherDerived> &other) const;
 
     template<typename OtherDerived>
-    Derived& operator*=(const AnyMatrixBase<OtherDerived>& other);
+    Derived& operator*=(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
-    void applyOnTheLeft(const AnyMatrixBase<OtherDerived>& other);
+    void applyOnTheLeft(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
-    void applyOnTheRight(const AnyMatrixBase<OtherDerived>& other);
+    void applyOnTheRight(const EigenBase<OtherDerived>& other);
 
     template<typename DiagonalDerived>
     const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 284baf678..2d97c9c38 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -55,7 +55,7 @@ struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
 {};
 
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
-class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
+class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
 {
   public:
 
@@ -144,7 +144,7 @@ class PermutationMatrix : public AnyMatrixBase<PermutationMatrix<SizeAtCompileTi
 
     /** \returns a Matrix object initialized from this permutation matrix. Notice that it
       * is inefficient to return this Matrix object by value. For efficiency, favor using
-      * the Matrix constructor taking AnyMatrixBase objects.
+      * the Matrix constructor taking EigenBase objects.
       */
     DenseMatrixType toDenseMatrix() const
     {
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 8bea0aa68..859f10298 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -32,7 +32,7 @@
   *
   * \brief Base class for triangular part in a matrix
   */
-template<typename Derived> class TriangularBase : public AnyMatrixBase<Derived>
+template<typename Derived> class TriangularBase : public EigenBase<Derived>
 {
   public:
 
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index aed0abe6d..a56e4fb84 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -28,7 +28,7 @@
 template<typename T> struct ei_traits;
 template<typename T> struct NumTraits;
 
-template<typename Derived> struct AnyMatrixBase;
+template<typename Derived> struct EigenBase;
 
 template<typename _Scalar, int _Rows, int _Cols,
          int _Options = EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION | AutoAlign,
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index a86e7be89..4884557e5 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -201,6 +201,28 @@ template<typename T> struct ei_plain_matrix_type_row_major
 // we should be able to get rid of this one too
 template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 
+template<class T>
+struct ei_is_reference
+{
+#ifndef NDEBUG
+  static void check() { std::cout << typeid(T).name() << std::endl; }
+#else
+  static void check() {}
+#endif
+  enum { ret = false };
+};
+
+template<class T>
+struct ei_is_reference<T&>
+{
+#ifndef NDEBUG
+  static void check() { std::cout << typeid(T).name() << "&" << std::endl; }
+#else
+  static void check() {}
+#endif
+  enum { ret = true };
+};
+
 /**
 * The reference selector for template expressions. The idea is that we don't
 * need to use references for expressions since they are light weight proxy
@@ -258,7 +280,7 @@ template<unsigned int Flags> struct ei_are_flags_consistent
   * overloads for complex types */
 template<typename Derived,typename Scalar,typename OtherScalar,
          bool EnableIt = !ei_is_same_type<Scalar,OtherScalar>::ret >
-struct ei_special_scalar_op_base : public AnyMatrixBase<Derived>
+struct ei_special_scalar_op_base : public EigenBase<Derived>
 {
   // dummy operator* so that the
   // "using ei_special_scalar_op_base::operator*" compiles
@@ -266,7 +288,7 @@ struct ei_special_scalar_op_base : public AnyMatrixBase<Derived>
 };
 
 template<typename Derived,typename Scalar,typename OtherScalar>
-struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public AnyMatrixBase<Derived>
+struct ei_special_scalar_op_base<Derived,Scalar,OtherScalar,true>  : public EigenBase<Derived>
 {
   const CwiseUnaryOp<ei_scalar_multiple2_op<Scalar,OtherScalar>, Derived>
   operator*(const OtherScalar& scalar) const
diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h
index e8bb16f17..9b865889c 100644
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -74,12 +74,12 @@ class RotationBase
       */
     template<typename OtherDerived>
     EIGEN_STRONG_INLINE typename ei_rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
-    operator*(const AnyMatrixBase<OtherDerived>& e) const
+    operator*(const EigenBase<OtherDerived>& e) const
     { return ei_rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
 
     /** \returns the concatenation of a linear transformation \a l with the rotation \a r */
     template<typename OtherDerived> friend
-    inline RotationMatrixType operator*(const AnyMatrixBase<OtherDerived>& l, const Derived& r)
+    inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
     { return l.derived() * r.toRotationMatrix(); }
 
     /** \returns the concatenation of the rotation \c *this with a transformation \a t */
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 40bc7033a..1240a95bc 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -226,14 +226,14 @@ public:
 
   /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline explicit Transform(const AnyMatrixBase<OtherDerived>& other)
+  inline explicit Transform(const EigenBase<OtherDerived>& other)
   {
     ei_transform_construct_from_matrix<OtherDerived,Mode,Dim,HDim>::run(this, other.derived());
   }
 
   /** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline Transform& operator=(const AnyMatrixBase<OtherDerived>& other)
+  inline Transform& operator=(const EigenBase<OtherDerived>& other)
   {
     ei_transform_construct_from_matrix<OtherDerived,Mode,Dim,HDim>::run(this, other.derived());
     return *this;
@@ -310,7 +310,7 @@ public:
   // note: this function is defined here because some compilers cannot find the respective declaration
   template<typename OtherDerived>
   inline const typename ei_transform_right_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
-  operator * (const AnyMatrixBase<OtherDerived> &other) const
+  operator * (const EigenBase<OtherDerived> &other) const
   { return ei_transform_right_product_impl<OtherDerived,Mode,Dim,HDim>::run(*this,other.derived()); }
 
   /** \returns the product expression of a transformation matrix \a a times a transform \a b
@@ -322,11 +322,11 @@ public:
     */
   template<typename OtherDerived> friend
   inline const typename ei_transform_left_product_impl<OtherDerived,Mode,_Dim,_Dim+1>::ResultType
-  operator * (const AnyMatrixBase<OtherDerived> &a, const Transform &b)
+  operator * (const EigenBase<OtherDerived> &a, const Transform &b)
   { return ei_transform_left_product_impl<OtherDerived,Mode,Dim,HDim>::run(a.derived(),b); }
 
   template<typename OtherDerived>
-  inline Transform& operator*=(const AnyMatrixBase<OtherDerived>& other) { return *this = *this * other; }
+  inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
 
   /** Contatenates two transformations */
   inline const Transform operator * (const Transform& other) const
@@ -1021,7 +1021,7 @@ struct ei_transform_construct_from_matrix<Other, AffineCompact,Dim,HDim, HDim,HD
 };
 
 /*********************************************************
-*** Specializations of operator* with a AnyMatrixBase ***
+*** Specializations of operator* with a EigenBase ***
 *********************************************************/
 
 // ei_general_product_return_type is a generalization of ProductReturnType, for all types (including e.g. DiagonalBase...),
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index 1e3906bde..bde72c738 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -93,7 +93,7 @@ public:
 
   /** Concatenates a translation and a linear transformation */
   template<typename OtherDerived>
-  inline AffineTransformType operator* (const AnyMatrixBase<OtherDerived>& linear) const;
+  inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
 
   /** Concatenates a translation and a rotation */
   template<typename Derived>
@@ -103,7 +103,7 @@ public:
   /** \returns the concatenation of a linear transformation \a l with the translation \a t */
   // its a nightmare to define a templated friend function outside its declaration
   template<typename OtherDerived> friend
-  inline AffineTransformType operator*(const AnyMatrixBase<OtherDerived>& linear, const Translation& t)
+  inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
   {
     AffineTransformType res;
     res.matrix().setZero();
@@ -182,7 +182,7 @@ Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 template<typename Scalar, int Dim>
 template<typename OtherDerived>
 inline typename Translation<Scalar,Dim>::AffineTransformType
-Translation<Scalar,Dim>::operator* (const AnyMatrixBase<OtherDerived>& linear) const
+Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
 {
   AffineTransformType res;
   res.matrix().setZero();
diff --git a/Eigen/src/Householder/HouseholderSequence.h b/Eigen/src/Householder/HouseholderSequence.h
index f8ae4136b..05e883168 100644
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@@ -97,7 +97,7 @@ template<typename OtherScalarType, typename MatrixType> struct ei_matrix_type_ti
 };
 
 template<typename VectorsType, typename CoeffsType, int Side> class HouseholderSequence
-  : public AnyMatrixBase<HouseholderSequence<VectorsType,CoeffsType,Side> >
+  : public EigenBase<HouseholderSequence<VectorsType,CoeffsType,Side> >
 {
     enum {
       RowsAtCompileTime = ei_traits<HouseholderSequence>::RowsAtCompileTime,
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index 205325939..7bf4ef836 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -36,7 +36,7 @@
   *
   *
   */
-template<typename Derived> class SparseMatrixBase : public AnyMatrixBase<Derived>
+template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
 {
   public:
 
diff --git a/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h b/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
index b90a6f9e9..ff20b830c 100644
--- a/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
+++ b/unsupported/Eigen/src/Skyline/SkylineMatrixBase.h
@@ -36,7 +36,7 @@
  * \param Derived
  *
  */
-template<typename Derived> class SkylineMatrixBase : public AnyMatrixBase<Derived> {
+template<typename Derived> class SkylineMatrixBase : public EigenBase<Derived> {
 public:
 
     typedef typename ei_traits<Derived>::Scalar Scalar;

From 67ce07ea831fa2df4081436c0e8c4bfcd77d6ede Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Sat, 20 Feb 2010 14:45:50 +0000
Subject: [PATCH 03/38] matrix_function test: replace expm(A).inverse() by
 expm(-A) The latter is more stable. This fixes one of the issues with the
 test. Also, make typedef's in MatrixFunctionReturnValue public; this is
 necessary to get the test to compile.

---
 .../Eigen/src/MatrixFunctions/MatrixFunction.h       |  6 ++----
 unsupported/test/matrix_function.cpp                 | 12 ++++++------
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index d63bcbce9..16fce5b29 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -492,14 +492,12 @@ typename MatrixFunction<MatrixType,1>::DynMatrixType MatrixFunction<MatrixType,1
 template<typename Derived> class MatrixFunctionReturnValue
 : public ReturnByValue<MatrixFunctionReturnValue<Derived> >
 {
-  private:
+  public:
 
     typedef typename ei_traits<Derived>::Scalar Scalar;
     typedef typename ei_stem_function<Scalar>::type StemFunction;
 
-  public:
-
-    /** \brief Constructor.
+   /** \brief Constructor.
       *
       * \param[in] A  %Matrix (expression) forming the argument of the
       * matrix function.
diff --git a/unsupported/test/matrix_function.cpp b/unsupported/test/matrix_function.cpp
index 4ff6d7f1e..7a1501da2 100644
--- a/unsupported/test/matrix_function.cpp
+++ b/unsupported/test/matrix_function.cpp
@@ -109,11 +109,10 @@ template<typename MatrixType>
 void testHyperbolicFunctions(const MatrixType& A)
 {
   for (int i = 0; i < g_repeat; i++) {
-    MatrixType sinhA = ei_matrix_sinh(A);
-    MatrixType coshA = ei_matrix_cosh(A);
     MatrixType expA = ei_matrix_exponential(A);
-    VERIFY_IS_APPROX(sinhA, (expA - expA.inverse())/2);
-    VERIFY_IS_APPROX(coshA, (expA + expA.inverse())/2);
+    MatrixType expmA = ei_matrix_exponential(-A);
+    VERIFY_IS_APPROX(ei_matrix_sinh(A), (expA - expmA) / 2);
+    VERIFY_IS_APPROX(ei_matrix_cosh(A), (expA + expmA) / 2);
   }
 }
 
@@ -134,14 +133,15 @@ void testGonioFunctions(const MatrixType& A)
     ComplexMatrix Ac = A.template cast<ComplexScalar>();
 
     ComplexMatrix exp_iA = ei_matrix_exponential(imagUnit * Ac);
+    ComplexMatrix exp_miA = ei_matrix_exponential(-imagUnit * Ac);
 
     MatrixType sinA = ei_matrix_sin(A);
     ComplexMatrix sinAc = sinA.template cast<ComplexScalar>();
-    VERIFY_IS_APPROX(sinAc, (exp_iA - exp_iA.inverse()) / (two*imagUnit));
+    VERIFY_IS_APPROX(sinAc, (exp_iA - exp_miA) / (two*imagUnit));
 
     MatrixType cosA = ei_matrix_cos(A);
     ComplexMatrix cosAc = cosA.template cast<ComplexScalar>();
-    VERIFY_IS_APPROX(cosAc, (exp_iA + exp_iA.inverse()) / 2);
+    VERIFY_IS_APPROX(cosAc, (exp_iA + exp_miA) / 2);
   }
 }
 

From abc8c010807f0706b80bc0ef13303b6485473a57 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sat, 20 Feb 2010 15:53:57 +0100
Subject: [PATCH 04/38] Renamed PlainMatrixType to PlainObject (Array !=
 Matrix). Renamed ReturnByValue::ReturnMatrixType ReturnByValue::ReturnType
 (again, Array != Matrix).

---
 Eigen/src/Array/Array.h                       |  2 +-
 Eigen/src/Array/ArrayBase.h                   |  4 +--
 Eigen/src/Array/VectorwiseOp.h                |  2 +-
 Eigen/src/Cholesky/LDLT.h                     |  2 +-
 Eigen/src/Cholesky/LLT.h                      |  8 ++---
 Eigen/src/Core/DenseStorageBase.h             | 10 +++---
 Eigen/src/Core/Dot.h                          |  2 +-
 Eigen/src/Core/EigenBase.h                    |  6 ++--
 Eigen/src/Core/Flagged.h                      |  2 +-
 Eigen/src/Core/Matrix.h                       |  2 +-
 Eigen/src/Core/MatrixBase.h                   | 31 +++++++++----------
 Eigen/src/Core/PermutationMatrix.h            |  2 +-
 Eigen/src/Core/ProductBase.h                  | 14 ++++-----
 Eigen/src/Core/ReturnByValue.h                | 16 +++++-----
 Eigen/src/Core/SelfAdjointView.h              |  6 ++--
 Eigen/src/Core/SelfCwiseBinaryOp.h            |  8 ++---
 Eigen/src/Core/TriangularMatrix.h             |  2 +-
 Eigen/src/Core/products/CoeffBasedProduct.h   | 14 ++++-----
 Eigen/src/Core/util/BlasUtil.h                |  4 +--
 Eigen/src/Core/util/XprHelper.h               |  6 ++--
 Eigen/src/Eigen2Support/TriangularSolver.h    |  2 +-
 .../src/Eigenvalues/SelfAdjointEigenSolver.h  |  4 +--
 Eigen/src/Geometry/Homogeneous.h              |  8 ++---
 Eigen/src/Geometry/OrthoMethods.h             |  6 ++--
 Eigen/src/Householder/Householder.h           |  4 +--
 Eigen/src/LU/FullPivLU.h                      |  6 ++--
 Eigen/src/LU/Inverse.h                        |  4 +--
 Eigen/src/LU/PartialPivLU.h                   |  8 ++---
 Eigen/src/QR/ColPivHouseholderQR.h            |  8 ++---
 Eigen/src/QR/FullPivHouseholderQR.h           |  6 ++--
 Eigen/src/QR/HouseholderQR.h                  |  6 ++--
 Eigen/src/SVD/SVD.h                           |  4 +--
 Eigen/src/SVD/UpperBidiagonalization.h        |  4 +--
 Eigen/src/Sparse/SparseMatrixBase.h           |  4 +--
 Eigen/src/Sparse/SparseSelfAdjointView.h      |  4 +--
 Eigen/src/misc/Image.h                        |  2 +-
 Eigen/src/misc/Kernel.h                       |  2 +-
 Eigen/src/misc/Solve.h                        |  4 +--
 disabled/SkylineMatrix.h                      |  6 ++--
 test/lu.cpp                                   |  4 +--
 .../src/MatrixFunctions/MatrixExponential.h   |  4 +--
 .../src/MatrixFunctions/MatrixFunction.h      |  4 +--
 42 files changed, 123 insertions(+), 124 deletions(-)

diff --git a/Eigen/src/Array/Array.h b/Eigen/src/Array/Array.h
index 77d0c41ac..5a398d849 100644
--- a/Eigen/src/Array/Array.h
+++ b/Eigen/src/Array/Array.h
@@ -41,7 +41,7 @@ class Array
     EIGEN_DENSE_PUBLIC_INTERFACE(Array)
 
     enum { Options = _Options };
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
   protected:
     using Base::m_storage;
diff --git a/Eigen/src/Array/ArrayBase.h b/Eigen/src/Array/ArrayBase.h
index 21f6fefb1..97807e5fc 100644
--- a/Eigen/src/Array/ArrayBase.h
+++ b/Eigen/src/Array/ArrayBase.h
@@ -97,7 +97,7 @@ template<typename Derived> class ArrayBase
     /** \internal the plain matrix type corresponding to this expression. Note that is not necessarily
       * exactly the return type of eval(): in the case of plain matrices, the return type of eval() is a const
       * reference to a matrix, not a matrix! It is however guaranteed that the return type of eval() is either
-      * PlainMatrixType or const PlainMatrixType&.
+      * PlainObject or const PlainObject&.
       */
     typedef Array<typename ei_traits<Derived>::Scalar,
                 ei_traits<Derived>::RowsAtCompileTime,
@@ -105,7 +105,7 @@ template<typename Derived> class ArrayBase
                 AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                 ei_traits<Derived>::MaxRowsAtCompileTime,
                 ei_traits<Derived>::MaxColsAtCompileTime
-          > PlainMatrixType;
+          > PlainObject;
 
 
     /** \internal Represents a matrix with all coefficients equal to one another*/
diff --git a/Eigen/src/Array/VectorwiseOp.h b/Eigen/src/Array/VectorwiseOp.h
index 697a07d32..06d999b14 100644
--- a/Eigen/src/Array/VectorwiseOp.h
+++ b/Eigen/src/Array/VectorwiseOp.h
@@ -462,7 +462,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 
     const Homogeneous<ExpressionType,Direction> homogeneous() const;
 
-    typedef typename ExpressionType::PlainMatrixType CrossReturnType;
+    typedef typename ExpressionType::PlainObject CrossReturnType;
     template<typename OtherDerived>
     const CrossReturnType cross(const MatrixBase<OtherDerived>& other) const;
 
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index b794b0c43..ceb818f19 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -319,7 +319,7 @@ bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
   * \returns the Cholesky decomposition with full pivoting without square root of \c *this
   */
 template<typename Derived>
-inline const LDLT<typename MatrixBase<Derived>::PlainMatrixType>
+inline const LDLT<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::ldlt() const
 {
   return derived();
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 8a149a316..474b82406 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -299,20 +299,20 @@ bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
   * \returns the LLT decomposition of \c *this
   */
 template<typename Derived>
-inline const LLT<typename MatrixBase<Derived>::PlainMatrixType>
+inline const LLT<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::llt() const
 {
-  return LLT<PlainMatrixType>(derived());
+  return LLT<PlainObject>(derived());
 }
 
 /** \cholesky_module
   * \returns the LLT decomposition of \c *this
   */
 template<typename MatrixType, unsigned int UpLo>
-inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainMatrixType, UpLo>
+inline const LLT<typename SelfAdjointView<MatrixType, UpLo>::PlainObject, UpLo>
 SelfAdjointView<MatrixType, UpLo>::llt() const
 {
-  return LLT<PlainMatrixType,UpLo>(m_matrix);
+  return LLT<PlainObject,UpLo>(m_matrix);
 }
 
 #endif // EIGEN_LLT_H
diff --git a/Eigen/src/Core/DenseStorageBase.h b/Eigen/src/Core/DenseStorageBase.h
index 530ddcd07..6245b8007 100644
--- a/Eigen/src/Core/DenseStorageBase.h
+++ b/Eigen/src/Core/DenseStorageBase.h
@@ -44,7 +44,7 @@ class DenseStorageBase : public _Base<Derived>
   public:
     enum { Options = _Options };
     typedef _Base<Derived> Base;
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
     typedef typename Base::Scalar Scalar;
     typedef typename Base::PacketScalar PacketScalar;
     using Base::RowsAtCompileTime;
@@ -544,7 +544,7 @@ struct ei_conservative_resize_like_impl
   {
     if (_this.rows() == rows && _this.cols() == cols) return;
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
-    typename Derived::PlainMatrixType tmp(rows,cols);
+    typename Derived::PlainObject tmp(rows,cols);
     const int common_rows = std::min(rows, _this.rows());
     const int common_cols = std::min(cols, _this.cols());
     tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
@@ -563,7 +563,7 @@ struct ei_conservative_resize_like_impl
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived)
     EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived)
 
-    typename Derived::PlainMatrixType tmp(other);
+    typename Derived::PlainObject tmp(other);
     const int common_rows = std::min(tmp.rows(), _this.rows());
     const int common_cols = std::min(tmp.cols(), _this.cols());
     tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
@@ -577,7 +577,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
   static void run(DenseBase<Derived>& _this, int size)
   {
     if (_this.size() == size) return;
-    typename Derived::PlainMatrixType tmp(size);
+    typename Derived::PlainObject tmp(size);
     const int common_size = std::min<int>(_this.size(),size);
     tmp.segment(0,common_size) = _this.segment(0,common_size);
     _this.derived().swap(tmp);
@@ -588,7 +588,7 @@ struct ei_conservative_resize_like_impl<Derived,OtherDerived,true>
     if (_this.rows() == other.rows() && _this.cols() == other.cols()) return;
 
     // segment(...) will check whether Derived/OtherDerived are vectors!
-    typename Derived::PlainMatrixType tmp(other);
+    typename Derived::PlainObject tmp(other);
     const int common_size = std::min<int>(_this.size(),tmp.size());
     tmp.segment(0,common_size) = _this.segment(0,common_size);
     _this.derived().swap(tmp);
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index f0c520b1f..201bd23ca 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -299,7 +299,7 @@ inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real MatrixBase<
   * \sa norm(), normalize()
   */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::PlainMatrixType
+inline const typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::normalized() const
 {
   typedef typename ei_nested<Derived>::type Nested;
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 8b302b663..cf1ce4376 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -37,7 +37,7 @@
   */
 template<typename Derived> struct EigenBase
 {
-//   typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
+//   typedef typename ei_plain_matrix_type<Derived>::type PlainObject;
 
   /** \returns a reference to the derived object */
   Derived& derived() { return *static_cast<Derived*>(this); }
@@ -61,7 +61,7 @@ template<typename Derived> struct EigenBase
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
-    typename Dest::PlainMatrixType res(rows(),cols());
+    typename Dest::PlainObject res(rows(),cols());
     evalTo(res);
     dst += res;
   }
@@ -71,7 +71,7 @@ template<typename Derived> struct EigenBase
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
-    typename Dest::PlainMatrixType res(rows(),cols());
+    typename Dest::PlainObject res(rows(),cols());
     evalTo(res);
     dst -= res;
   }
diff --git a/Eigen/src/Core/Flagged.h b/Eigen/src/Core/Flagged.h
index 7f42a1e73..0044fe7cb 100644
--- a/Eigen/src/Core/Flagged.h
+++ b/Eigen/src/Core/Flagged.h
@@ -109,7 +109,7 @@ template<typename ExpressionType, unsigned int Added, unsigned int Removed> clas
     const ExpressionType& _expression() const { return m_matrix; }
 
     template<typename OtherDerived>
-    typename ExpressionType::PlainMatrixType solveTriangular(const MatrixBase<OtherDerived>& other) const;
+    typename ExpressionType::PlainObject solveTriangular(const MatrixBase<OtherDerived>& other) const;
 
     template<typename OtherDerived>
     void solveTriangularInPlace(const MatrixBase<OtherDerived>& other) const;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 1c43340a6..44a0ef7d1 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -139,7 +139,7 @@ class Matrix
 
     EIGEN_DENSE_PUBLIC_INTERFACE(Matrix)
 
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
     enum { NeedsToAlign = (!(Options&DontAlign))
                           && SizeAtCompileTime!=Dynamic && ((sizeof(Scalar)*SizeAtCompileTime)%16)==0 };
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index 122a2271b..9c62163ba 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -121,7 +121,7 @@ template<typename Derived> class MatrixBase
       *
       * This is not necessarily exactly the return type of eval(). In the case of plain matrices,
       * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed
-      * that the return type of eval() is either PlainMatrixType or const PlainMatrixType&.
+      * that the return type of eval() is either PlainObject or const PlainObject&.
       */
     typedef Matrix<typename ei_traits<Derived>::Scalar,
                 ei_traits<Derived>::RowsAtCompileTime,
@@ -129,8 +129,7 @@ template<typename Derived> class MatrixBase
                 AutoAlign | (ei_traits<Derived>::Flags&RowMajorBit ? RowMajor : ColMajor),
                 ei_traits<Derived>::MaxRowsAtCompileTime,
                 ei_traits<Derived>::MaxColsAtCompileTime
-          > PlainMatrixType;
-    // typedef typename ei_plain_matrix_type<Derived>::type PlainMatrixType;
+          > PlainObject;
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal Represents a matrix with all coefficients equal to one another*/
@@ -212,7 +211,7 @@ template<typename Derived> class MatrixBase
     RealScalar stableNorm() const;
     RealScalar blueNorm() const;
     RealScalar hypotNorm() const;
-    const PlainMatrixType normalized() const;
+    const PlainObject normalized() const;
     void normalize();
 
     const AdjointReturnType adjoint() const;
@@ -301,9 +300,9 @@ template<typename Derived> class MatrixBase
 
 /////////// LU module ///////////
 
-    const FullPivLU<PlainMatrixType> fullPivLu() const;
-    const PartialPivLU<PlainMatrixType> partialPivLu() const;
-    const PartialPivLU<PlainMatrixType> lu() const;
+    const FullPivLU<PlainObject> fullPivLu() const;
+    const PartialPivLU<PlainObject> partialPivLu() const;
+    const PartialPivLU<PlainObject> lu() const;
     const ei_inverse_impl<Derived> inverse() const;
     template<typename ResultType>
     void computeInverseAndDetWithCheck(
@@ -322,29 +321,29 @@ template<typename Derived> class MatrixBase
 
 /////////// Cholesky module ///////////
 
-    const LLT<PlainMatrixType>  llt() const;
-    const LDLT<PlainMatrixType> ldlt() const;
+    const LLT<PlainObject>  llt() const;
+    const LDLT<PlainObject> ldlt() const;
 
 /////////// QR module ///////////
 
-    const HouseholderQR<PlainMatrixType> householderQr() const;
-    const ColPivHouseholderQR<PlainMatrixType> colPivHouseholderQr() const;
-    const FullPivHouseholderQR<PlainMatrixType> fullPivHouseholderQr() const;
+    const HouseholderQR<PlainObject> householderQr() const;
+    const ColPivHouseholderQR<PlainObject> colPivHouseholderQr() const;
+    const FullPivHouseholderQR<PlainObject> fullPivHouseholderQr() const;
 
     EigenvaluesReturnType eigenvalues() const;
     RealScalar operatorNorm() const;
 
 /////////// SVD module ///////////
 
-    SVD<PlainMatrixType> svd() const;
+    SVD<PlainObject> svd() const;
 
 /////////// Geometry module ///////////
 
     template<typename OtherDerived>
-    PlainMatrixType cross(const MatrixBase<OtherDerived>& other) const;
+    PlainObject cross(const MatrixBase<OtherDerived>& other) const;
     template<typename OtherDerived>
-    PlainMatrixType cross3(const MatrixBase<OtherDerived>& other) const;
-    PlainMatrixType unitOrthogonal(void) const;
+    PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+    PlainObject unitOrthogonal(void) const;
     Matrix<Scalar,3,1> eulerAngles(int a0, int a1, int a2) const;
     const ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
     enum {
diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index 2d97c9c38..fcd2e46cc 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -280,7 +280,7 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm
 template<typename PermutationType, typename MatrixType, int Side>
 struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
 {
-  typedef typename MatrixType::PlainMatrixType ReturnMatrixType;
+  typedef typename MatrixType::PlainObject ReturnType;
 };
 
 template<typename PermutationType, typename MatrixType, int Side>
diff --git a/Eigen/src/Core/ProductBase.h b/Eigen/src/Core/ProductBase.h
index 481e7c760..789aecfb6 100644
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@@ -88,7 +88,7 @@ class ProductBase : public MatrixBase<Derived>
 
   public:
 
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
     ProductBase(const Lhs& lhs, const Rhs& rhs)
       : m_lhs(lhs), m_rhs(rhs)
@@ -116,8 +116,8 @@ class ProductBase : public MatrixBase<Derived>
     const _LhsNested& lhs() const { return m_lhs; }
     const _RhsNested& rhs() const { return m_rhs; }
 
-    // Implicit convertion to the nested type (trigger the evaluation of the product)
-    operator const PlainMatrixType& () const
+    // Implicit conversion to the nested type (trigger the evaluation of the product)
+    operator const PlainObject& () const
     {
       m_result.resize(m_lhs.rows(), m_rhs.cols());
       this->evalTo(m_result);
@@ -139,7 +139,7 @@ class ProductBase : public MatrixBase<Derived>
     const LhsNested m_lhs;
     const RhsNested m_rhs;
 
-    mutable PlainMatrixType m_result;
+    mutable PlainObject m_result;
 
   private:
 
@@ -152,10 +152,10 @@ class ProductBase : public MatrixBase<Derived>
 
 // here we need to overload the nested rule for products
 // such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int Mode, int N, typename PlainMatrixType>
-struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainMatrixType>
+template<typename Lhs, typename Rhs, int Mode, int N, typename PlainObject>
+struct ei_nested<GeneralProduct<Lhs,Rhs,Mode>, N, PlainObject>
 {
-  typedef PlainMatrixType const& type;
+  typedef PlainObject const& type;
 };
 
 template<typename NestedProduct>
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index 920269365..d375f0b5c 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -31,7 +31,7 @@
   */
 template<typename Derived>
 struct ei_traits<ReturnByValue<Derived> >
-  : public ei_traits<typename ei_traits<Derived>::ReturnMatrixType>
+  : public ei_traits<typename ei_traits<Derived>::ReturnType>
 {
   enum {
     // FIXME had to remove the DirectAccessBit for usage like
@@ -42,7 +42,7 @@ struct ei_traits<ReturnByValue<Derived> >
     // The fact that I had to do that shows that when doing xpr.block() with a non-direct-access xpr,
     // even if xpr has the EvalBeforeNestingBit, the block() doesn't use direct access on the evaluated
     // xpr.
-    Flags = (ei_traits<typename ei_traits<Derived>::ReturnMatrixType>::Flags
+    Flags = (ei_traits<typename ei_traits<Derived>::ReturnType>::Flags
              | EvalBeforeNestingBit) & ~DirectAccessBit
   };
 };
@@ -51,18 +51,18 @@ struct ei_traits<ReturnByValue<Derived> >
  * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix.
  * So ei_nested always gives the plain return matrix type.
  */
-template<typename Derived,int n,typename PlainMatrixType>
-struct ei_nested<ReturnByValue<Derived>, n, PlainMatrixType>
+template<typename Derived,int n,typename PlainObject>
+struct ei_nested<ReturnByValue<Derived>, n, PlainObject>
 {
-  typedef typename ei_traits<Derived>::ReturnMatrixType type;
+  typedef typename ei_traits<Derived>::ReturnType type;
 };
 
 template<typename Derived> class ReturnByValue
-  : public ei_traits<Derived>::ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type
+  : public ei_traits<Derived>::ReturnType::template MakeBase<ReturnByValue<Derived> >::Type
 {
   public:
-    typedef typename ei_traits<Derived>::ReturnMatrixType ReturnMatrixType;
-    typedef typename ReturnMatrixType::template MakeBase<ReturnByValue<Derived> >::Type Base;
+    typedef typename ei_traits<Derived>::ReturnType ReturnType;
+    typedef typename ReturnType::template MakeBase<ReturnByValue<Derived> >::Type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
 
     template<typename Dest>
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 6d01ee495..c3c5b17ff 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -68,7 +68,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     enum {
       Mode = ei_traits<SelfAdjointView>::Mode
     };
-    typedef typename MatrixType::PlainMatrixType PlainMatrixType;
+    typedef typename MatrixType::PlainObject PlainObject;
 
     inline SelfAdjointView(const MatrixType& matrix) : m_matrix(matrix)
     { ei_assert(ei_are_flags_consistent<Mode>::ret); }
@@ -146,8 +146,8 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
 
 /////////// Cholesky module ///////////
 
-    const LLT<PlainMatrixType, UpLo> llt() const;
-    const LDLT<PlainMatrixType> ldlt() const;
+    const LLT<PlainObject, UpLo> llt() const;
+    const LDLT<PlainObject> ldlt() const;
 
   protected:
     const typename MatrixType::Nested m_matrix;
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 7ae2e82a4..d2690b66b 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -124,8 +124,8 @@ template<typename Derived>
 inline Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
   SelfCwiseBinaryOp<ei_scalar_product_op<Scalar>, Derived> tmp(derived());
-  typedef typename Derived::PlainMatrixType PlainMatrixType;
-  tmp = PlainMatrixType::Constant(rows(),cols(),other);
+  typedef typename Derived::PlainObject PlainObject;
+  tmp = PlainObject::Constant(rows(),cols(),other);
   return derived();
 }
 
@@ -133,8 +133,8 @@ template<typename Derived>
 inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
   SelfCwiseBinaryOp<typename ei_meta_if<NumTraits<Scalar>::HasFloatingPoint,ei_scalar_product_op<Scalar>,ei_scalar_quotient_op<Scalar> >::ret, Derived> tmp(derived());
-  typedef typename Derived::PlainMatrixType PlainMatrixType;
-  tmp = PlainMatrixType::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other);
+  typedef typename Derived::PlainObject PlainObject;
+  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::HasFloatingPoint ? Scalar(1)/other : other);
   return derived();
 }
 
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 859f10298..7d978b800 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -148,7 +148,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     typedef TriangularBase<TriangularView> Base;
     typedef typename ei_traits<TriangularView>::Scalar Scalar;
     typedef _MatrixType MatrixType;
-    typedef typename MatrixType::PlainMatrixType DenseMatrixType;
+    typedef typename MatrixType::PlainObject DenseMatrixType;
     typedef typename MatrixType::Nested MatrixTypeNested;
     typedef typename ei_cleantype<MatrixTypeNested>::type _MatrixTypeNested;
 
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index f030d59b5..3343b1875 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -109,7 +109,7 @@ class CoeffBasedProduct
 
     typedef MatrixBase<CoeffBasedProduct> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
-    typedef typename Base::PlainMatrixType PlainMatrixType;
+    typedef typename Base::PlainObject PlainObject;
 
   private:
 
@@ -181,8 +181,8 @@ class CoeffBasedProduct
       return res;
     }
 
-    // Implicit convertion to the nested type (trigger the evaluation of the product)
-    operator const PlainMatrixType& () const
+    // Implicit conversion to the nested type (trigger the evaluation of the product)
+    operator const PlainObject& () const
     {
       m_result.lazyAssign(*this);
       return m_result;
@@ -205,15 +205,15 @@ class CoeffBasedProduct
     const LhsNested m_lhs;
     const RhsNested m_rhs;
 
-    mutable PlainMatrixType m_result;
+    mutable PlainObject m_result;
 };
 
 // here we need to overload the nested rule for products
 // such that the nested type is a const reference to a plain matrix
-template<typename Lhs, typename Rhs, int N, typename PlainMatrixType>
-struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainMatrixType>
+template<typename Lhs, typename Rhs, int N, typename PlainObject>
+struct ei_nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
 {
-  typedef PlainMatrixType const& type;
+  typedef PlainObject const& type;
 };
 
 /***************************************************************************
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 3777464dc..2ca463d5d 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -166,7 +166,7 @@ template<typename XprType> struct ei_blas_traits
   };
   typedef typename ei_meta_if<int(ActualAccess)==HasDirectAccess,
     ExtractType,
-    typename _ExtractType::PlainMatrixType
+    typename _ExtractType::PlainObject
     >::ret DirectLinearAccessType;
   static inline ExtractType extract(const XprType& x) { return x; }
   static inline Scalar extractScalarFactor(const XprType&) { return Scalar(1); }
@@ -227,7 +227,7 @@ struct ei_blas_traits<Transpose<NestedXpr> >
   typedef Transpose<typename Base::_ExtractType> _ExtractType;
   typedef typename ei_meta_if<int(Base::ActualAccess)==HasDirectAccess,
     ExtractType,
-    typename ExtractType::PlainMatrixType
+    typename ExtractType::PlainObject
     >::ret DirectLinearAccessType;
   enum {
     IsTransposed = Base::IsTransposed ? 0 : 1
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 4884557e5..a54ddd155 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -147,7 +147,7 @@ template<typename T, typename StorageType = typename ei_traits<T>::StorageType>
 template<typename T> struct ei_eval<T,Dense>
 {
   typedef typename ei_plain_matrix_type<T>::type type;
-//   typedef typename T::PlainMatrixType type;
+//   typedef typename T::PlainObject type;
 //   typedef T::Matrix<typename ei_traits<T>::Scalar,
 //                 ei_traits<T>::RowsAtCompileTime,
 //                 ei_traits<T>::ColsAtCompileTime,
@@ -256,7 +256,7 @@ struct ei_ref_selector
   * const Matrix3d&, because the internal logic of ei_nested determined that since a was already a matrix, there was no point
   * in copying it into another matrix.
   */
-template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::type> struct ei_nested
+template<typename T, int n=1, typename PlainObject = typename ei_eval<T>::type> struct ei_nested
 {
   enum {
     CostEval   = (n+1) * int(NumTraits<typename ei_traits<T>::Scalar>::ReadCost),
@@ -266,7 +266,7 @@ template<typename T, int n=1, typename PlainMatrixType = typename ei_eval<T>::ty
   typedef typename ei_meta_if<
     ( int(ei_traits<T>::Flags) & EvalBeforeNestingBit ) ||
     ( int(CostEval) <= int(CostNoEval) ),
-      PlainMatrixType,
+      PlainObject,
       typename ei_ref_selector<T>::type
   >::ret type;
 };
diff --git a/Eigen/src/Eigen2Support/TriangularSolver.h b/Eigen/src/Eigen2Support/TriangularSolver.h
index 94b92577e..833dd58d2 100644
--- a/Eigen/src/Eigen2Support/TriangularSolver.h
+++ b/Eigen/src/Eigen2Support/TriangularSolver.h
@@ -37,7 +37,7 @@ const unsigned int UnitLowerTriangular = UnitLower;
 
 template<typename ExpressionType, unsigned int Added, unsigned int Removed>
 template<typename OtherDerived>
-typename ExpressionType::PlainMatrixType
+typename ExpressionType::PlainObject
 Flagged<ExpressionType,Added,Removed>::solveTriangular(const MatrixBase<OtherDerived>& other) const
 {
   return m_matrix.template triangularView<Added>.solve(other.derived());
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 960e9b417..e8e9bea97 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -276,7 +276,7 @@ inline Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_
 MatrixBase<Derived>::eigenvalues() const
 {
   ei_assert(Flags&SelfAdjoint);
-  return SelfAdjointEigenSolver<typename Derived::PlainMatrixType>(eval(),false).eigenvalues();
+  return SelfAdjointEigenSolver<typename Derived::PlainObject>(eval(),false).eigenvalues();
 }
 
 template<typename Derived, bool IsSelfAdjoint>
@@ -296,7 +296,7 @@ template<typename Derived> struct ei_operatorNorm_selector<Derived, false>
   static inline typename NumTraits<typename ei_traits<Derived>::Scalar>::Real
   operatorNorm(const MatrixBase<Derived>& m)
   {
-    typename Derived::PlainMatrixType m_eval(m);
+    typename Derived::PlainObject m_eval(m);
     // FIXME if it is really guaranteed that the eigenvalues are already sorted,
     // then we don't need to compute a maxCoeff() here, comparing the 1st and last ones is enough.
     return ei_sqrt(
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index 76ca66c57..2b8b9cf8e 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -213,9 +213,9 @@ struct ei_traits<ei_homogeneous_left_product_impl<Homogeneous<MatrixType,Vertica
   typedef Matrix<typename ei_traits<MatrixType>::Scalar,
                  Lhs::RowsAtCompileTime,
                  MatrixType::ColsAtCompileTime,
-                 MatrixType::PlainMatrixType::Options,
+                 MatrixType::PlainObject::Options,
                  Lhs::MaxRowsAtCompileTime,
-                 MatrixType::MaxColsAtCompileTime> ReturnMatrixType;
+                 MatrixType::MaxColsAtCompileTime> ReturnType;
 };
 
 template<typename MatrixType,typename Lhs>
@@ -251,9 +251,9 @@ struct ei_traits<ei_homogeneous_right_product_impl<Homogeneous<MatrixType,Horizo
   typedef Matrix<typename ei_traits<MatrixType>::Scalar,
                  MatrixType::RowsAtCompileTime,
                  Rhs::ColsAtCompileTime,
-                 MatrixType::PlainMatrixType::Options,
+                 MatrixType::PlainObject::Options,
                  MatrixType::MaxRowsAtCompileTime,
-                 Rhs::MaxColsAtCompileTime> ReturnMatrixType;
+                 Rhs::MaxColsAtCompileTime> ReturnType;
 };
 
 template<typename MatrixType,typename Rhs>
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index c10b6abf4..265507eb9 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -35,7 +35,7 @@
   */
 template<typename Derived>
 template<typename OtherDerived>
-inline typename MatrixBase<Derived>::PlainMatrixType
+inline typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::cross(const MatrixBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,3)
@@ -79,7 +79,7 @@ struct ei_cross3_impl {
   */
 template<typename Derived>
 template<typename OtherDerived>
-inline typename MatrixBase<Derived>::PlainMatrixType
+inline typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
@@ -210,7 +210,7 @@ struct ei_unitOrthogonal_selector<Derived,2>
   * \sa cross()
   */
 template<typename Derived>
-typename MatrixBase<Derived>::PlainMatrixType
+typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::unitOrthogonal() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
diff --git a/Eigen/src/Householder/Householder.h b/Eigen/src/Householder/Householder.h
index d86e287fa..9d1383543 100644
--- a/Eigen/src/Householder/Householder.h
+++ b/Eigen/src/Householder/Householder.h
@@ -99,7 +99,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheLeft(
   const Scalar& tau,
   Scalar* workspace)
 {
-  Map<Matrix<Scalar, 1, Base::ColsAtCompileTime, PlainMatrixType::Options, 1, Base::MaxColsAtCompileTime> > tmp(workspace,cols());
+  Map<Matrix<Scalar, 1, Base::ColsAtCompileTime, PlainObject::Options, 1, Base::MaxColsAtCompileTime> > tmp(workspace,cols());
   Block<Derived, EssentialPart::SizeAtCompileTime, Derived::ColsAtCompileTime> bottom(derived(), 1, 0, rows()-1, cols());
   tmp.noalias() = essential.adjoint() * bottom;
   tmp += this->row(0);
@@ -114,7 +114,7 @@ void MatrixBase<Derived>::applyHouseholderOnTheRight(
   const Scalar& tau,
   Scalar* workspace)
 {
-  Map<Matrix<Scalar, Base::RowsAtCompileTime, 1, PlainMatrixType::Options, Base::MaxRowsAtCompileTime, 1> > tmp(workspace,rows());
+  Map<Matrix<Scalar, Base::RowsAtCompileTime, 1, PlainObject::Options, Base::MaxRowsAtCompileTime, 1> > tmp(workspace,rows());
   Block<Derived, Derived::RowsAtCompileTime, EssentialPart::SizeAtCompileTime> right(derived(), 0, 1, rows(), cols()-1);
   tmp.noalias() = right * essential.conjugate();
   tmp += this->col(0);
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 72e878223..1129293d5 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -630,7 +630,7 @@ struct ei_solve_retval<FullPivLU<_MatrixType>, Rhs>
       return;
     }
 
-    typename Rhs::PlainMatrixType c(rhs().rows(), rhs().cols());
+    typename Rhs::PlainObject c(rhs().rows(), rhs().cols());
 
     // Step 1
     c = dec().permutationP() * rhs();
@@ -670,10 +670,10 @@ struct ei_solve_retval<FullPivLU<_MatrixType>, Rhs>
   * \sa class FullPivLU
   */
 template<typename Derived>
-inline const FullPivLU<typename MatrixBase<Derived>::PlainMatrixType>
+inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::fullPivLu() const
 {
-  return FullPivLU<PlainMatrixType>(eval());
+  return FullPivLU<PlainObject>(eval());
 }
 
 #endif // EIGEN_LU_H
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index 36392c8d8..e20da70d6 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -238,7 +238,7 @@ struct ei_compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
 template<typename MatrixType>
 struct ei_traits<ei_inverse_impl<MatrixType> >
 {
-  typedef typename MatrixType::PlainMatrixType ReturnMatrixType;
+  typedef typename MatrixType::PlainObject ReturnType;
 };
 
 template<typename MatrixType>
@@ -327,7 +327,7 @@ inline void MatrixBase<Derived>::computeInverseAndDetWithCheck(
   typedef typename ei_meta_if<
     RowsAtCompileTime == 2,
     typename ei_cleantype<typename ei_nested<Derived, 2>::type>::type,
-    PlainMatrixType
+    PlainObject
   >::ret MatrixType;
   ei_compute_inverse_and_det_with_check<MatrixType, ResultType>::run
     (derived(), absDeterminantThreshold, inverse, determinant, invertible);
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 809e4aad6..ed2354d78 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -442,10 +442,10 @@ struct ei_solve_retval<PartialPivLU<_MatrixType>, Rhs>
   * \sa class PartialPivLU
   */
 template<typename Derived>
-inline const PartialPivLU<typename MatrixBase<Derived>::PlainMatrixType>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::partialPivLu() const
 {
-  return PartialPivLU<PlainMatrixType>(eval());
+  return PartialPivLU<PlainObject>(eval());
 }
 
 /** \lu_module
@@ -457,10 +457,10 @@ MatrixBase<Derived>::partialPivLu() const
   * \sa class PartialPivLU
   */
 template<typename Derived>
-inline const PartialPivLU<typename MatrixBase<Derived>::PlainMatrixType>
+inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::lu() const
 {
-  return PartialPivLU<PlainMatrixType>(eval());
+  return PartialPivLU<PlainObject>(eval());
 }
 
 #endif // EIGEN_PARTIALLU_H
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index bf28605dd..1219f1918 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -441,7 +441,7 @@ struct ei_solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
       return;
     }
 
-    typename Rhs::PlainMatrixType c(rhs());
+    typename Rhs::PlainObject c(rhs());
 
     // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
     c.applyOnTheLeft(householderSequence(
@@ -458,7 +458,7 @@ struct ei_solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
        .solveInPlace(c.corner(TopLeft, nonzero_pivots, c.cols()));
 
 
-    typename Rhs::PlainMatrixType d(c);
+    typename Rhs::PlainObject d(c);
     d.corner(TopLeft, nonzero_pivots, c.cols())
       = dec().matrixQR()
        .corner(TopLeft, nonzero_pivots, nonzero_pivots)
@@ -486,10 +486,10 @@ typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHousehol
   * \sa class ColPivHouseholderQR
   */
 template<typename Derived>
-const ColPivHouseholderQR<typename MatrixBase<Derived>::PlainMatrixType>
+const ColPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::colPivHouseholderQr() const
 {
-  return ColPivHouseholderQR<PlainMatrixType>(eval());
+  return ColPivHouseholderQR<PlainObject>(eval());
 }
 
 
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index 1ec60aeaf..07be47f47 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -352,7 +352,7 @@ struct ei_solve_retval<FullPivHouseholderQR<_MatrixType>, Rhs>
       return;
     }
 
-    typename Rhs::PlainMatrixType c(rhs());
+    typename Rhs::PlainObject c(rhs());
 
     Matrix<Scalar,1,Rhs::ColsAtCompileTime> temp(rhs().cols());
     for (int k = 0; k < dec().rank(); ++k)
@@ -413,10 +413,10 @@ typename FullPivHouseholderQR<MatrixType>::MatrixQType FullPivHouseholderQR<Matr
   * \sa class FullPivHouseholderQR
   */
 template<typename Derived>
-const FullPivHouseholderQR<typename MatrixBase<Derived>::PlainMatrixType>
+const FullPivHouseholderQR<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::fullPivHouseholderQr() const
 {
-  return FullPivHouseholderQR<PlainMatrixType>(eval());
+  return FullPivHouseholderQR<PlainObject>(eval());
 }
 
 #endif // EIGEN_FULLPIVOTINGHOUSEHOLDERQR_H
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index b6b07ea63..4709e4b77 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -221,7 +221,7 @@ struct ei_solve_retval<HouseholderQR<_MatrixType>, Rhs>
     const int rank = std::min(rows, cols);
     ei_assert(rhs().rows() == rows);
 
-    typename Rhs::PlainMatrixType c(rhs());
+    typename Rhs::PlainObject c(rhs());
 
     // Note that the matrix Q = H_0^* H_1^*... so its inverse is Q^* = (H_0 H_1 ...)^T
     c.applyOnTheLeft(householderSequence(
@@ -246,10 +246,10 @@ struct ei_solve_retval<HouseholderQR<_MatrixType>, Rhs>
   * \sa class HouseholderQR
   */
 template<typename Derived>
-const HouseholderQR<typename MatrixBase<Derived>::PlainMatrixType>
+const HouseholderQR<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::householderQr() const
 {
-  return HouseholderQR<PlainMatrixType>(eval());
+  return HouseholderQR<PlainObject>(eval());
 }
 
 
diff --git a/Eigen/src/SVD/SVD.h b/Eigen/src/SVD/SVD.h
index c308ff3ee..fa3b82ce2 100644
--- a/Eigen/src/SVD/SVD.h
+++ b/Eigen/src/SVD/SVD.h
@@ -555,10 +555,10 @@ void SVD<MatrixType>::computeScalingRotation(ScalingType *scaling, RotationType
   * \returns the SVD decomposition of \c *this
   */
 template<typename Derived>
-inline SVD<typename MatrixBase<Derived>::PlainMatrixType>
+inline SVD<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::svd() const
 {
-  return SVD<PlainMatrixType>(derived());
+  return SVD<PlainObject>(derived());
 }
 
 #endif // EIGEN_SVD_H
diff --git a/Eigen/src/SVD/UpperBidiagonalization.h b/Eigen/src/SVD/UpperBidiagonalization.h
index a2cb44733..0a63b4265 100644
--- a/Eigen/src/SVD/UpperBidiagonalization.h
+++ b/Eigen/src/SVD/UpperBidiagonalization.h
@@ -141,10 +141,10 @@ UpperBidiagonalization<_MatrixType>& UpperBidiagonalization<_MatrixType>::comput
   * \sa class Bidiagonalization
   */
 template<typename Derived>
-const UpperBidiagonalization<typename MatrixBase<Derived>::PlainMatrixType>
+const UpperBidiagonalization<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::bidiagonalization() const
 {
-  return UpperBidiagonalization<PlainMatrixType>(eval());
+  return UpperBidiagonalization<PlainObject>(eval());
 }
 #endif
 
diff --git a/Eigen/src/Sparse/SparseMatrixBase.h b/Eigen/src/Sparse/SparseMatrixBase.h
index 7bf4ef836..cf1a5d7bf 100644
--- a/Eigen/src/Sparse/SparseMatrixBase.h
+++ b/Eigen/src/Sparse/SparseMatrixBase.h
@@ -109,7 +109,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
                         Transpose<Derived>
                      >::ret AdjointReturnType;
 
-    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor> PlainMatrixType;
+    typedef SparseMatrix<Scalar, Flags&RowMajorBit ? RowMajor : ColMajor> PlainObject;
 
     #define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::SparseMatrixBase
     #include "../plugins/CommonCwiseUnaryOps.h"
@@ -396,7 +396,7 @@ template<typename Derived> class SparseMatrixBase : public EigenBase<Derived>
     template<typename OtherDerived> Scalar dot(const SparseMatrixBase<OtherDerived>& other) const;
     RealScalar squaredNorm() const;
     RealScalar norm()  const;
-//     const PlainMatrixType normalized() const;
+//     const PlainObject normalized() const;
 //     void normalize();
 
     Transpose<Derived> transpose() { return derived(); }
diff --git a/Eigen/src/Sparse/SparseSelfAdjointView.h b/Eigen/src/Sparse/SparseSelfAdjointView.h
index 039e5c725..f3d4fbcbd 100644
--- a/Eigen/src/Sparse/SparseSelfAdjointView.h
+++ b/Eigen/src/Sparse/SparseSelfAdjointView.h
@@ -93,8 +93,8 @@ template<typename MatrixType, unsigned int UpLo> class SparseSelfAdjointView
     template<typename DerivedU>
     SparseSelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));
 
-    // const SparseLLT<PlainMatrixType, UpLo> llt() const;
-    // const SparseLDLT<PlainMatrixType, UpLo> ldlt() const;
+    // const SparseLLT<PlainObject, UpLo> llt() const;
+    // const SparseLDLT<PlainObject, UpLo> ldlt() const;
 
   protected:
 
diff --git a/Eigen/src/misc/Image.h b/Eigen/src/misc/Image.h
index 2f39d6b9d..1d63d8143 100644
--- a/Eigen/src/misc/Image.h
+++ b/Eigen/src/misc/Image.h
@@ -40,7 +40,7 @@ struct ei_traits<ei_image_retval_base<DecompositionType> >
     MatrixType::Options,
     MatrixType::MaxRowsAtCompileTime, // the image matrix will consist of columns from the original matrix,
     MatrixType::MaxColsAtCompileTime  // so it has the same number of rows and at most as many columns.
-  > ReturnMatrixType;
+  > ReturnType;
 };
 
 template<typename _DecompositionType> struct ei_image_retval_base
diff --git a/Eigen/src/misc/Kernel.h b/Eigen/src/misc/Kernel.h
index 908c408e9..497b42eab 100644
--- a/Eigen/src/misc/Kernel.h
+++ b/Eigen/src/misc/Kernel.h
@@ -42,7 +42,7 @@ struct ei_traits<ei_kernel_retval_base<DecompositionType> >
     MatrixType::MaxColsAtCompileTime, // see explanation for 2nd template parameter
     MatrixType::MaxColsAtCompileTime // the kernel is a subspace of the domain space,
                                      // whose dimension is the number of columns of the original matrix
-  > ReturnMatrixType;
+  > ReturnType;
 };
 
 template<typename _DecompositionType> struct ei_kernel_retval_base
diff --git a/Eigen/src/misc/Solve.h b/Eigen/src/misc/Solve.h
index 4ab0775fc..028716aa2 100644
--- a/Eigen/src/misc/Solve.h
+++ b/Eigen/src/misc/Solve.h
@@ -35,9 +35,9 @@ struct ei_traits<ei_solve_retval_base<DecompositionType, Rhs> >
   typedef Matrix<typename Rhs::Scalar,
                  MatrixType::ColsAtCompileTime,
                  Rhs::ColsAtCompileTime,
-                 Rhs::PlainMatrixType::Options,
+                 Rhs::PlainObject::Options,
                  MatrixType::MaxColsAtCompileTime,
-                 Rhs::MaxColsAtCompileTime> ReturnMatrixType;
+                 Rhs::MaxColsAtCompileTime> ReturnType;
 };
 
 template<typename _DecompositionType, typename Rhs> struct ei_solve_retval_base
diff --git a/disabled/SkylineMatrix.h b/disabled/SkylineMatrix.h
index 03d17dac2..640f676bd 100644
--- a/disabled/SkylineMatrix.h
+++ b/disabled/SkylineMatrix.h
@@ -62,7 +62,7 @@ class BandMatrix : public MultiplierBase<BandMatrix<_Scalar,Supers,Subs,Options>
       MaxColsAtCompileTime = ei_traits<BandMatrix>::MaxColsAtCompileTime
     };
     typedef typename ei_traits<BandMatrix>::Scalar Scalar;
-    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> PlainMatrixType;
+    typedef Matrix<Scalar,RowsAtCompileTime,ColsAtCompileTime> PlainObject;
     
   protected:
     enum {
@@ -125,9 +125,9 @@ class BandMatrix : public MultiplierBase<BandMatrix<_Scalar,Supers,Subs,Options>
 //     inline VectorBlock<DataType,Size> subDiagonal()
 //     { return VectorBlock<DataType,Size>(m_data,0,m_size.value()); }
 
-    PlainMatrixType toDense() const
+    PlainObject toDense() const
     {
-      PlainMatrixType res(rows(),cols());
+      PlainObject res(rows(),cols());
       res.setZero();
       res.diagonal() = diagonal();
       for (int i=1; i<=supers();++i)
diff --git a/test/lu.cpp b/test/lu.cpp
index 45308ff82..568db8230 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -51,8 +51,8 @@ template<typename MatrixType> void lu_non_invertible()
     cols2 = cols = MatrixType::ColsAtCompileTime;
   }
 
-  typedef typename ei_kernel_retval_base<FullPivLU<MatrixType> >::ReturnMatrixType KernelMatrixType;
-  typedef typename ei_image_retval_base<FullPivLU<MatrixType> >::ReturnMatrixType ImageMatrixType;
+  typedef typename ei_kernel_retval_base<FullPivLU<MatrixType> >::ReturnType KernelMatrixType;
+  typedef typename ei_image_retval_base<FullPivLU<MatrixType> >::ReturnType ImageMatrixType;
   typedef Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> DynamicMatrixType;
   typedef Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime>
           CMatrixType;
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
index 39c23cdc5..2c761e648 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@@ -313,7 +313,7 @@ template<typename Derived> struct MatrixExponentialReturnValue
     inline void evalTo(ResultType& result) const
     {
       const typename ei_eval<Derived>::type srcEvaluated = m_src.eval();
-      MatrixExponential<typename Derived::PlainMatrixType> me(srcEvaluated);
+      MatrixExponential<typename Derived::PlainObject> me(srcEvaluated);
       me.compute(result);
     }
 
@@ -327,7 +327,7 @@ template<typename Derived> struct MatrixExponentialReturnValue
 template<typename Derived>
 struct ei_traits<MatrixExponentialReturnValue<Derived> >
 {
-  typedef typename Derived::PlainMatrixType ReturnMatrixType;
+  typedef typename Derived::PlainObject ReturnType;
 };
 
 /** \ingroup MatrixFunctions_Module
diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index d63bcbce9..12322a256 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -516,7 +516,7 @@ template<typename Derived> class MatrixFunctionReturnValue
     inline void evalTo(ResultType& result) const
     {
       const typename ei_eval<Derived>::type Aevaluated = m_A.eval();
-      MatrixFunction<typename Derived::PlainMatrixType> mf(Aevaluated, m_f);
+      MatrixFunction<typename Derived::PlainObject> mf(Aevaluated, m_f);
       mf.compute(result);
     }
 
@@ -531,7 +531,7 @@ template<typename Derived> class MatrixFunctionReturnValue
 template<typename Derived>
 struct ei_traits<MatrixFunctionReturnValue<Derived> >
 {
-  typedef typename Derived::PlainMatrixType ReturnMatrixType;
+  typedef typename Derived::PlainObject ReturnType;
 };
 
 

From 4e389b195d51fc6a01bfc5efe06751d84c37d0bf Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Sat, 20 Feb 2010 16:27:04 +0000
Subject: [PATCH 05/38] Change MatrixFunction::separation() parameter from 0.01
 to 0.1 . The latter is actually the value used in the literature.

---
 unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
index 16fce5b29..b4b652235 100644
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixFunction.h
@@ -178,9 +178,9 @@ class MatrixFunction<MatrixType, 1>
       *
       * This is morally a \c static \c const \c Scalar, but only
       * integers can be static constant class members in C++. The
-      * separation constant is set to 0.01, a value taken from the
+      * separation constant is set to 0.1, a value taken from the
       * paper by Davies and Higham. */
-    static const RealScalar separation() { return static_cast<RealScalar>(0.01); }
+    static const RealScalar separation() { return static_cast<RealScalar>(0.1); }
 };
 
 /** \brief Constructor. 

From 6b4cecc1c650627c776b8c2d762a5eadfc11be08 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sat, 20 Feb 2010 17:39:58 +0100
Subject: [PATCH 06/38] CMake cleanup.

---
 CMakeLists.txt           | 94 +++++++++++++++++++---------------------
 cmake/EigenTesting.cmake | 13 ------
 2 files changed, 45 insertions(+), 62 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4c2973d5..afe375041 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -52,64 +52,63 @@ else()
 endif()
 
 if(CMAKE_COMPILER_IS_GNUCXX)
-  if(CMAKE_SYSTEM_NAME MATCHES Linux)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
-    set(CMAKE_CXX_FLAGS_DEBUG "-g3")
-    set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
+  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
+  set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
 
-    check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO)
-    if(COMPILER_SUPPORT_WNOVARIADICMACRO)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros")
-    endif()
+  check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO)
+  if(COMPILER_SUPPORT_WNOVARIADICMACRO)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros")
+  endif()
 
-    check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
-    if(COMPILER_SUPPORT_WEXTRA)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
-    endif()
+  check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
+  if(COMPILER_SUPPORT_WEXTRA)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+  endif()
 
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
 
-    option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
-    if(EIGEN_TEST_SSE2)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
-      message("Enabling SSE2 in tests/examples")
-    endif()
+  option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
+  if(EIGEN_TEST_SSE2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
+    message("Enabling SSE2 in tests/examples")
+  endif()
 
-    option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
-    if(EIGEN_TEST_SSE3)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
-      message("Enabling SSE3 in tests/examples")
-    endif()
+  option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF)
+  if(EIGEN_TEST_SSE3)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3")
+    message("Enabling SSE3 in tests/examples")
+  endif()
 
-    option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
-    if(EIGEN_TEST_SSSE3)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
-      message("Enabling SSSE3 in tests/examples")
-    endif()
+  option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF)
+  if(EIGEN_TEST_SSSE3)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
+    message("Enabling SSSE3 in tests/examples")
+  endif()
 
-    option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
-    if(EIGEN_TEST_SSE4_1)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
-      message("Enabling SSE4.1 in tests/examples")
-    endif()
+  option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF)
+  if(EIGEN_TEST_SSE4_1)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1")
+    message("Enabling SSE4.1 in tests/examples")
+  endif()
 
-    option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
-    if(EIGEN_TEST_SSE4_2)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
-      message("Enabling SSE4.2 in tests/examples")
-    endif()
+  option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF)
+  if(EIGEN_TEST_SSE4_2)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
+    message("Enabling SSE4.2 in tests/examples")
+  endif()
 
-    option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
-    if(EIGEN_TEST_ALTIVEC)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
-      message("Enabling AltiVec in tests/examples")
-    endif()
-
-  endif(CMAKE_SYSTEM_NAME MATCHES Linux)
+  option(EIGEN_TEST_ALTIVEC "Enable/Disable altivec in tests/examples" OFF)
+  if(EIGEN_TEST_ALTIVEC)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec")
+    message("Enabling AltiVec in tests/examples")
+  endif()
 endif(CMAKE_COMPILER_IS_GNUCXX)
 
 if(MSVC)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc")
+  # C4127 - conditional expression is constant
+  # C4505 - unreferenced local function has been removed
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W4 /wd4127 /wd4505")
   option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
   if(EIGEN_TEST_SSE2)
     if(NOT CMAKE_CL_64)
@@ -128,9 +127,6 @@ endif(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION)
 
 option(EIGEN_TEST_C++0x "Enables all C++0x features." OFF)
 
-option(EIGEN_TEST_MAX_WARNING_LEVEL "Sets the warning level to /Wall while building the unit tests." OFF)
-mark_as_advanced(EIGEN_TEST_MAX_WARNING_LEVEL)
-
 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
 
 set(INCLUDE_INSTALL_DIR
diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake
index c445f842b..7d90882a2 100644
--- a/cmake/EigenTesting.cmake
+++ b/cmake/EigenTesting.cmake
@@ -232,14 +232,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   if(EIGEN_TEST_C++0x)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x")
   endif(EIGEN_TEST_C++0x)
-  if(EIGEN_TEST_MAX_WARNING_LEVEL)
-    CHECK_CXX_COMPILER_FLAG("-Wno-variadic-macros" FLAG_VARIADIC)
-    if(FLAG_VARIADIC)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wconversion -Wno-variadic-macros")
-    else(FLAG_VARIADIC)
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wconversion")
-    endif(FLAG_VARIADIC)
-  endif(EIGEN_TEST_MAX_WARNING_LEVEL)  
   if(CMAKE_SYSTEM_NAME MATCHES Linux)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COVERAGE_FLAGS} -g2")
     set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${COVERAGE_FLAGS} -O2 -g2")
@@ -248,9 +240,4 @@ if(CMAKE_COMPILER_IS_GNUCXX)
   endif(CMAKE_SYSTEM_NAME MATCHES Linux)
 elseif(MSVC)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS")
-  if(EIGEN_TEST_MAX_WARNING_LEVEL)
-	# C4127 - conditional expression is constant
-	# C4505 - unreferenced local function has been removed
-	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4127 /wd4505")
-  endif(EIGEN_TEST_MAX_WARNING_LEVEL)
 endif(CMAKE_COMPILER_IS_GNUCXX)

From 71fecd23713ed728a5b94fd066b22fc92d122b9d Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sat, 20 Feb 2010 18:19:34 +0100
Subject: [PATCH 07/38] add missing return value

---
 Eigen/src/LU/FullPivLU.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 1129293d5..9afc448cc 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -251,6 +251,7 @@ template<typename _MatrixType> class FullPivLU
     {
       m_usePrescribedThreshold = true;
       m_prescribedThreshold = threshold;
+      return *this;
     }
 
     /** Allows to come back to the default behavior, letting Eigen use its default formula for

From 608959aa6fce375abb92872350267074a1d90283 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Sun, 21 Feb 2010 10:29:19 +0100
Subject: [PATCH 08/38] compilation fix in ldlt() for non matrix types

---
 Eigen/src/Cholesky/LDLT.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index ceb818f19..4d3149d42 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -322,7 +322,7 @@ template<typename Derived>
 inline const LDLT<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::ldlt() const
 {
-  return derived();
+  return LDLT<PlainObject>(derived());
 }
 
 #endif // EIGEN_LDLT_H

From a7d085eb4ecedd45f091eeadb93277c7f3878b27 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Sun, 21 Feb 2010 12:41:37 +0100
Subject: [PATCH 09/38] NonLinearOptimization : clean 'mode' handling from the
 old minpack code : * this is actually a boolean, not an int * use a better
 name * can be set at initialization time instead of bloating all methods
 signatures

---
 .../HybridNonLinearSolver.h                   | 108 ++++++------------
 .../LevenbergMarquardt.h                      |  95 +++++----------
 unsupported/test/NonLinearOptimization.cpp    |   6 +-
 3 files changed, 68 insertions(+), 141 deletions(-)

diff --git a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
index 0754a426b..35dc332e0 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/HybridNonLinearSolver.h
@@ -57,7 +57,7 @@ class HybridNonLinearSolver
 {
 public:
     HybridNonLinearSolver(FunctorType &_functor)
-        : functor(_functor) { nfev=njev=iter = 0;  fnorm= 0.; }
+        : functor(_functor) { nfev=njev=iter = 0;  fnorm= 0.; useExternalScaling=false;}
 
     struct Parameters {
         Parameters()
@@ -84,36 +84,18 @@ public:
             const Scalar tol = ei_sqrt(NumTraits<Scalar>::epsilon())
             );
 
-    HybridNonLinearSolverSpace::Status solveInit(
-            FVectorType  &x,
-            const int mode=1
-            );
-    HybridNonLinearSolverSpace::Status solveOneStep(
-            FVectorType  &x,
-            const int mode=1
-            );
-    HybridNonLinearSolverSpace::Status solve(
-            FVectorType  &x,
-            const int mode=1
-            );
+    HybridNonLinearSolverSpace::Status solveInit(FVectorType  &x);
+    HybridNonLinearSolverSpace::Status solveOneStep(FVectorType  &x);
+    HybridNonLinearSolverSpace::Status solve(FVectorType  &x);
 
     HybridNonLinearSolverSpace::Status hybrd1(
             FVectorType  &x,
             const Scalar tol = ei_sqrt(NumTraits<Scalar>::epsilon())
             );
 
-    HybridNonLinearSolverSpace::Status solveNumericalDiffInit(
-            FVectorType  &x,
-            const int mode=1
-            );
-    HybridNonLinearSolverSpace::Status solveNumericalDiffOneStep(
-            FVectorType  &x,
-            const int mode=1
-            );
-    HybridNonLinearSolverSpace::Status solveNumericalDiff(
-            FVectorType  &x,
-            const int mode=1
-            );
+    HybridNonLinearSolverSpace::Status solveNumericalDiffInit(FVectorType  &x);
+    HybridNonLinearSolverSpace::Status solveNumericalDiffOneStep(FVectorType  &x);
+    HybridNonLinearSolverSpace::Status solveNumericalDiff(FVectorType  &x);
 
     void resetParameters(void) { parameters = Parameters(); }
     Parameters parameters;
@@ -124,6 +106,7 @@ public:
     int njev;
     int iter;
     Scalar fnorm;
+    bool useExternalScaling; 
 private:
     FunctorType &functor;
     int n;
@@ -160,18 +143,13 @@ HybridNonLinearSolver<FunctorType,Scalar>::hybrj1(
     parameters.maxfev = 100*(n+1);
     parameters.xtol = tol;
     diag.setConstant(n, 1.);
-    return solve(
-        x,
-        2
-    );
+    useExternalScaling = true;
+    return solve(x);
 }
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solveInit(FVectorType  &x)
 {
     n = x.size();
 
@@ -179,9 +157,9 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
     fvec.resize(n);
     qtf.resize(n);
     fjac.resize(n, n);
-    if (mode != 2)
+    if (!useExternalScaling)
         diag.resize(n);
-    assert( (mode!=2 || diag.size()==n) || "When using mode==2, the caller must provide a valid 'diag'");
+    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
 
     /* Function Body */
     nfev = 0;
@@ -190,7 +168,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
     /*     check the input parameters for errors. */
     if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0. )
         return HybridNonLinearSolverSpace::ImproperInputParameters;
-    if (mode == 2)
+    if (useExternalScaling)
         for (int j = 0; j < n; ++j)
             if (diag[j] <= 0.)
                 return HybridNonLinearSolverSpace::ImproperInputParameters;
@@ -214,10 +192,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveInit(
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(FVectorType  &x)
 {
     int j;
     std::vector<PlanarRotation<Scalar> > v_givens(n), w_givens(n);
@@ -231,10 +206,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
 
     wa2 = fjac.colwise().blueNorm();
 
-    /* on the first iteration and if mode is 1, scale according */
+    /* on the first iteration and if external scaling is not used, scale according */
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
-        if (mode != 2)
+        if (!useExternalScaling)
             for (j = 0; j < n; ++j)
                 diag[j] = (wa2[j]==0.) ? 1. : wa2[j];
 
@@ -260,7 +235,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
     qtf = fjac.transpose() * fvec;
 
     /* rescale if necessary. */
-    if (mode != 2)
+    if (!useExternalScaling)
         diag = diag.cwiseMax(wa2);
 
     while (true) {
@@ -372,14 +347,11 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveOneStep(
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solve(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solve(FVectorType  &x)
 {
-    HybridNonLinearSolverSpace::Status status = solveInit(x, mode);
+    HybridNonLinearSolverSpace::Status status = solveInit(x);
     while (status==HybridNonLinearSolverSpace::Running)
-        status = solveOneStep(x, mode);
+        status = solveOneStep(x);
     return status;
 }
 
@@ -403,18 +375,13 @@ HybridNonLinearSolver<FunctorType,Scalar>::hybrd1(
     parameters.xtol = tol;
 
     diag.setConstant(n, 1.);
-    return solveNumericalDiff(
-        x,
-        2
-    );
+    useExternalScaling = true;
+    return solveNumericalDiff(x);
 }
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(FVectorType  &x)
 {
     n = x.size();
 
@@ -425,10 +392,9 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
     qtf.resize(n);
     fjac.resize(n, n);
     fvec.resize(n);
-    if (mode != 2)
+    if (!useExternalScaling)
         diag.resize(n);
-    assert( (mode!=2 || diag.size()==n) || "When using mode==2, the caller must provide a valid 'diag'");
-
+    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
 
     /* Function Body */
     nfev = 0;
@@ -437,7 +403,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
     /*     check the input parameters for errors. */
     if (n <= 0 || parameters.xtol < 0. || parameters.maxfev <= 0 || parameters.nb_of_subdiagonals< 0 || parameters.nb_of_superdiagonals< 0 || parameters.factor <= 0. )
         return HybridNonLinearSolverSpace::ImproperInputParameters;
-    if (mode == 2)
+    if (useExternalScaling)
         for (int j = 0; j < n; ++j)
             if (diag[j] <= 0.)
                 return HybridNonLinearSolverSpace::ImproperInputParameters;
@@ -461,10 +427,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffInit(
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(FVectorType  &x)
 {
     int j;
     std::vector<PlanarRotation<Scalar> > v_givens(n), w_givens(n);
@@ -480,10 +443,10 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
 
     wa2 = fjac.colwise().blueNorm();
 
-    /* on the first iteration and if mode is 1, scale according */
+    /* on the first iteration and if external scaling is not used, scale according */
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
-        if (mode != 2)
+        if (!useExternalScaling)
             for (j = 0; j < n; ++j)
                 diag[j] = (wa2[j]==0.) ? 1. : wa2[j];
 
@@ -509,7 +472,7 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
     qtf = fjac.transpose() * fvec;
 
     /* rescale if necessary. */
-    if (mode != 2)
+    if (!useExternalScaling)
         diag = diag.cwiseMax(wa2);
 
     while (true) {
@@ -621,14 +584,11 @@ HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiffOneStep(
 
 template<typename FunctorType, typename Scalar>
 HybridNonLinearSolverSpace::Status
-HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiff(
-        FVectorType  &x,
-        const int mode
-        )
+HybridNonLinearSolver<FunctorType,Scalar>::solveNumericalDiff(FVectorType  &x)
 {
-    HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x, mode);
+    HybridNonLinearSolverSpace::Status status = solveNumericalDiffInit(x);
     while (status==HybridNonLinearSolverSpace::Running)
-        status = solveNumericalDiffOneStep(x, mode);
+        status = solveNumericalDiffOneStep(x);
     return status;
 }
 
diff --git a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
index f21331e3e..8bae1e131 100644
--- a/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
+++ b/unsupported/Eigen/src/NonLinearOptimization/LevenbergMarquardt.h
@@ -61,7 +61,7 @@ class LevenbergMarquardt
 {
 public:
     LevenbergMarquardt(FunctorType &_functor)
-        : functor(_functor) { nfev = njev = iter = 0;  fnorm=gnorm = 0.; }
+        : functor(_functor) { nfev = njev = iter = 0;  fnorm = gnorm = 0.; useExternalScaling=false; }
 
     struct Parameters {
         Parameters()
@@ -87,18 +87,9 @@ public:
             const Scalar tol = ei_sqrt(NumTraits<Scalar>::epsilon())
             );
 
-    LevenbergMarquardtSpace::Status minimize(
-            FVectorType &x,
-            const int mode=1
-            );
-    LevenbergMarquardtSpace::Status minimizeInit(
-            FVectorType &x,
-            const int mode=1
-            );
-    LevenbergMarquardtSpace::Status minimizeOneStep(
-            FVectorType &x,
-            const int mode=1
-            );
+    LevenbergMarquardtSpace::Status minimize(FVectorType &x);
+    LevenbergMarquardtSpace::Status minimizeInit(FVectorType &x);
+    LevenbergMarquardtSpace::Status minimizeOneStep(FVectorType &x);
 
     static LevenbergMarquardtSpace::Status lmdif1(
             FunctorType &functor,
@@ -112,18 +103,9 @@ public:
             const Scalar tol = ei_sqrt(NumTraits<Scalar>::epsilon())
             );
 
-    LevenbergMarquardtSpace::Status minimizeOptimumStorage(
-            FVectorType  &x,
-            const int mode=1
-            );
-    LevenbergMarquardtSpace::Status minimizeOptimumStorageInit(
-            FVectorType  &x,
-            const int mode=1
-            );
-    LevenbergMarquardtSpace::Status minimizeOptimumStorageOneStep(
-            FVectorType  &x,
-            const int mode=1
-            );
+    LevenbergMarquardtSpace::Status minimizeOptimumStorage(FVectorType  &x);
+    LevenbergMarquardtSpace::Status minimizeOptimumStorageInit(FVectorType  &x);
+    LevenbergMarquardtSpace::Status minimizeOptimumStorageOneStep(FVectorType  &x);
 
     void resetParameters(void) { parameters = Parameters(); }
 
@@ -135,6 +117,7 @@ public:
     int njev;
     int iter;
     Scalar fnorm, gnorm;
+    bool useExternalScaling; 
 
     Scalar lm_param(void) { return par; }
 private:
@@ -175,24 +158,18 @@ LevenbergMarquardt<FunctorType,Scalar>::lmder1(
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimize(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimize(FVectorType  &x)
 {
-    LevenbergMarquardtSpace::Status status = minimizeInit(x, mode);
+    LevenbergMarquardtSpace::Status status = minimizeInit(x);
     do {
-        status = minimizeOneStep(x, mode);
+        status = minimizeOneStep(x);
     } while (status==LevenbergMarquardtSpace::Running);
     return status;
 }
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(FVectorType  &x)
 {
     n = x.size();
     m = functor.values();
@@ -201,9 +178,9 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(
     wa4.resize(m);
     fvec.resize(m);
     fjac.resize(m, n);
-    if (mode != 2)
+    if (!useExternalScaling)
         diag.resize(n);
-    assert( (mode!=2 || diag.size()==n) || "When using mode==2, the caller must provide a valid 'diag'");
+    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
     qtf.resize(n);
 
     /* Function Body */
@@ -214,7 +191,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(
     if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.)
         return LevenbergMarquardtSpace::ImproperInputParameters;
 
-    if (mode == 2)
+    if (useExternalScaling)
         for (int j = 0; j < n; ++j)
             if (diag[j] <= 0.)
                 return LevenbergMarquardtSpace::ImproperInputParameters;
@@ -235,10 +212,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeInit(
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(FVectorType  &x)
 {
     int j;
 
@@ -257,10 +231,10 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(
     fjac = qrfac.matrixQR();
     permutation = qrfac.colsPermutation();
 
-    /* on the first iteration and if mode is 1, scale according */
+    /* on the first iteration and if external scaling is not used, scale according */
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
-        if (mode != 2)
+        if (!useExternalScaling)
             for (j = 0; j < n; ++j)
                 diag[j] = (wa2[j]==0.)? 1. : wa2[j];
 
@@ -290,7 +264,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOneStep(
         return LevenbergMarquardtSpace::CosinusTooSmall;
 
     /* rescale if necessary. */
-    if (mode != 2)
+    if (!useExternalScaling)
         diag = diag.cwiseMax(wa2);
 
     do {
@@ -406,10 +380,7 @@ LevenbergMarquardt<FunctorType,Scalar>::lmstr1(
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(FVectorType  &x)
 {
     n = x.size();
     m = functor.values();
@@ -423,9 +394,9 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(
     // The purpose it to only use a nxn matrix, instead of mxn here, so
     // that we can handle cases where m>>n :
     fjac.resize(n, n);
-    if (mode != 2)
+    if (!useExternalScaling)
         diag.resize(n);
-    assert( (mode!=2 || diag.size()==n) || "When using mode==2, the caller must provide a valid 'diag'");
+    assert( (!useExternalScaling || diag.size()==n) || "When useExternalScaling is set, the caller must provide a valid 'diag'");
     qtf.resize(n);
 
     /* Function Body */
@@ -436,7 +407,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(
     if (n <= 0 || m < n || parameters.ftol < 0. || parameters.xtol < 0. || parameters.gtol < 0. || parameters.maxfev <= 0 || parameters.factor <= 0.)
         return LevenbergMarquardtSpace::ImproperInputParameters;
 
-    if (mode == 2)
+    if (useExternalScaling)
         for (int j = 0; j < n; ++j)
             if (diag[j] <= 0.)
                 return LevenbergMarquardtSpace::ImproperInputParameters;
@@ -458,10 +429,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageInit(
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(FVectorType  &x)
 {
     int i, j;
     bool sing;
@@ -514,10 +482,10 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
         }
     }
 
-    /* on the first iteration and if mode is 1, scale according */
+    /* on the first iteration and if external scaling is not used, scale according */
     /* to the norms of the columns of the initial jacobian. */
     if (iter == 1) {
-        if (mode != 2)
+        if (!useExternalScaling)
             for (j = 0; j < n; ++j)
                 diag[j] = (wa2[j]==0.)? 1. : wa2[j];
 
@@ -541,7 +509,7 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
         return LevenbergMarquardtSpace::CosinusTooSmall;
 
     /* rescale if necessary. */
-    if (mode != 2)
+    if (!useExternalScaling)
         diag = diag.cwiseMax(wa2);
 
     do {
@@ -635,14 +603,11 @@ LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorageOneStep(
 
 template<typename FunctorType, typename Scalar>
 LevenbergMarquardtSpace::Status
-LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorage(
-        FVectorType  &x,
-        const int mode
-        )
+LevenbergMarquardt<FunctorType,Scalar>::minimizeOptimumStorage(FVectorType  &x)
 {
-    LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x, mode);
+    LevenbergMarquardtSpace::Status status = minimizeOptimumStorageInit(x);
     do {
-        status = minimizeOptimumStorageOneStep(x, mode);
+        status = minimizeOptimumStorageOneStep(x);
     } while (status==LevenbergMarquardtSpace::Running);
     return status;
 }
diff --git a/unsupported/test/NonLinearOptimization.cpp b/unsupported/test/NonLinearOptimization.cpp
index 1313726a1..7aea7b361 100644
--- a/unsupported/test/NonLinearOptimization.cpp
+++ b/unsupported/test/NonLinearOptimization.cpp
@@ -317,7 +317,8 @@ void testHybrj()
   hybrj_functor functor;
   HybridNonLinearSolver<hybrj_functor> solver(functor);
   solver.diag.setConstant(n, 1.);
-  info = solver.solve(x, 2);
+  solver.useExternalScaling = true;
+  info = solver.solve(x);
 
   // check return value
   VERIFY( 1 == info);
@@ -401,7 +402,8 @@ void testHybrd()
   solver.parameters.nb_of_subdiagonals = 1;
   solver.parameters.nb_of_superdiagonals = 1;
   solver.diag.setConstant(n, 1.);
-  info = solver.solveNumericalDiff(x, 2);
+  solver.useExternalScaling = true;
+  info = solver.solveNumericalDiff(x);
 
   // check return value
   VERIFY( 1 == info);

From ac8ff442780f78c154d0035efa0e97b4779fd7e2 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sun, 21 Feb 2010 15:23:51 +0100
Subject: [PATCH 10/38] Tried to get rid of MSVC warning D9025.

---
 CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index afe375041..eefaf4b47 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -108,7 +108,8 @@ endif(CMAKE_COMPILER_IS_GNUCXX)
 if(MSVC)
   # C4127 - conditional expression is constant
   # C4505 - unreferenced local function has been removed
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W4 /wd4127 /wd4505")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505")
+  string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
   option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
   if(EIGEN_TEST_SSE2)
     if(NOT CMAKE_CL_64)

From f75e6773b0d3dc4838acc04993dd2cd9a3422878 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sun, 21 Feb 2010 15:24:10 +0100
Subject: [PATCH 11/38] Added ei_traits<Quaternion>::PlainObject.

---
 Eigen/src/Geometry/Quaternion.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 67319d15b..e0dfadd7c 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -211,6 +211,7 @@ public:
 template<typename _Scalar>
 struct ei_traits<Quaternion<_Scalar> >
 {
+  typedef Quaternion<_Scalar> PlainObject;
   typedef _Scalar Scalar;
   typedef Matrix<_Scalar,4,1> Coefficients;
   enum{

From e2a059863e6d02f77389c615f86ff036d7081e22 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sun, 21 Feb 2010 15:44:34 +0100
Subject: [PATCH 12/38] Added missing precision/eps functions to
 AutoDiffScalar.

---
 unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index ec25106f5..4b7331035 100644
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -563,6 +563,8 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
     AddCost = 1,
     MulCost = 1
   };
+  inline static Real epsilon() { return std::numeric_limits<Real>::epsilon(); }
+  inline static Real dummy_precision() { return NumTraits<Real>::dummy_precision(); }
 };
 
 }

From a901bed33a33354b1e6b287907d4bf28d5c31b96 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Sun, 21 Feb 2010 18:26:14 +0100
Subject: [PATCH 13/38] Added IsRowMajor enum to DenseBase.

---
 Eigen/src/Core/DenseBase.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index 2850c60cb..eb018738f 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -124,6 +124,8 @@ template<typename Derived> class DenseBase
           * constructed from this one. See the \ref flags "list of flags".
           */
 
+      IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression is row major. */
+
       CoeffReadCost = ei_traits<Derived>::CoeffReadCost,
         /**< This is a rough measure of how expensive it is to read one coefficient from
           * this expression.

From 1a70f3b48d54d505f60613395f83dd181e9e51dc Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Sun, 21 Feb 2010 19:30:11 +0100
Subject: [PATCH 14/38] fix compilation

---
 Eigen/src/Core/util/XprHelper.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index a54ddd155..f1ed4ed9f 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -204,22 +204,12 @@ template<typename T> struct ei_must_nest_by_value { enum { ret = false }; };
 template<class T>
 struct ei_is_reference
 {
-#ifndef NDEBUG
-  static void check() { std::cout << typeid(T).name() << std::endl; }
-#else
-  static void check() {}
-#endif
   enum { ret = false };
 };
 
 template<class T>
 struct ei_is_reference<T&>
 {
-#ifndef NDEBUG
-  static void check() { std::cout << typeid(T).name() << "&" << std::endl; }
-#else
-  static void check() {}
-#endif
   enum { ret = true };
 };
 

From 437f40acc1cbd9ce2f2a2a3f413cae3a5b35f8fb Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 22 Feb 2010 09:32:16 +0100
Subject: [PATCH 15/38] fix BTL's eigen interface

---
 bench/btl/libs/eigen2/eigen2_interface.hh | 27 ++++++++++-------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/bench/btl/libs/eigen2/eigen2_interface.hh b/bench/btl/libs/eigen2/eigen2_interface.hh
index 1166a37a1..a8b5b884f 100644
--- a/bench/btl/libs/eigen2/eigen2_interface.hh
+++ b/bench/btl/libs/eigen2/eigen2_interface.hh
@@ -17,11 +17,8 @@
 //
 #ifndef EIGEN2_INTERFACE_HH
 #define EIGEN2_INTERFACE_HH
-// #include <cblas.h>
-#include <Eigen/Array>
-#include <Eigen/Cholesky>
-#include <Eigen/LU>
-#include <Eigen/QR>
+
+#include <Eigen/Eigen>
 #include <vector>
 #include "btl.hh"
 
@@ -88,27 +85,27 @@ public :
   }
 
   static inline void matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A*B).lazy();
+    X.noalias() = A*B;
   }
 
   static inline void transposed_matrix_matrix_product(const gene_matrix & A, const gene_matrix & B, gene_matrix & X, int N){
-    X = (A.transpose()*B.transpose()).lazy();
+    X.noalias() = A.transpose()*B.transpose();
   }
 
   static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A.transpose()*A).lazy();
+    X.noalias() = A.transpose()*A;
   }
 
   static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
-    X = (A*A.transpose()).lazy();
+    X.noalias() = A*A.transpose();
   }
 
   static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
-    X = (A*B).lazy();
+    X.noalias() = A*B;
   }
 
   static inline void symv(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N){
-    X = (A.template selfadjointView<LowerTriangular>() * B)/*.lazy()*/;
+    X.noalias() = (A.template selfadjointView<Lower>() * B);
 //     ei_product_selfadjoint_vector<real,0,LowerTriangularBit,false,false>(N,A.data(),N, B.data(), 1, X.data(), 1);
   }
 
@@ -173,7 +170,7 @@ public :
   }
 
   static inline void atv_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){
-    X = (A.transpose()*B).lazy();
+    X.noalias() = (A.transpose()*B);
   }
 
   static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
@@ -193,16 +190,16 @@ public :
   }
 
   static inline void trisolve_lower(const gene_matrix & L, const gene_vector& B, gene_vector& X, int N){
-    X = L.template triangularView<LowerTriangular>().solve(B);
+    X = L.template triangularView<Lower>().solve(B);
   }
 
   static inline void trisolve_lower_matrix(const gene_matrix & L, const gene_matrix& B, gene_matrix& X, int N){
-    X = L.template triangularView<LowerTriangular>().solve(B);
+    X = L.template triangularView<Lower>().solve(B);
   }
 
   static inline void cholesky(const gene_matrix & X, gene_matrix & C, int N){
     C = X;
-    ei_llt_inplace<LowerTriangular>::blocked(C);
+    ei_llt_inplace<Lower>::blocked(C);
     //C = X.llt().matrixL();
 //     C = X;
 //     Cholesky<gene_matrix>::computeInPlace(C);

From f797ba0abe5307e5b42069b316925b0cea53b188 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 22 Feb 2010 11:04:35 +0100
Subject: [PATCH 16/38] extend the bench timer to allow benchmarking of
 parallel code, improvements are welcome

---
 bench/BenchTimer.h | 93 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 74 insertions(+), 19 deletions(-)

diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index b2d0fc5f6..6889654fe 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -23,24 +23,31 @@
 // License and a copy of the GNU General Public License along with
 // Eigen. If not, see <http://www.gnu.org/licenses/>.
 
-#ifndef EIGEN_BENCH_TIMER_H
-#define EIGEN_BENCH_TIMER_H
+#ifndef EIGEN_BENCH_TIMERR_H
+#define EIGEN_BENCH_TIMERR_H
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 #define NOMINMAX
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #else
+#include <sys/time.h>
 #include <time.h>
 #include <unistd.h>
 #endif
 
+#include <cmath>
 #include <cstdlib>
 #include <numeric>
 
 namespace Eigen
 {
 
+enum {
+  CPU_TIMER = 0,
+  REAL_TIMER = 1
+};
+
 /** Elapsed time timer keeping the best try.
   *
   * On POSIX platforms we use clock_gettime with CLOCK_PROCESS_CPUTIME_ID.
@@ -52,37 +59,58 @@ class BenchTimer
 {
 public:
 
-  BenchTimer() 
-  { 
+  BenchTimer()
+  {
 #if defined(_WIN32) || defined(__CYGWIN__)
     LARGE_INTEGER freq;
     QueryPerformanceFrequency(&freq);
     m_frequency = (double)freq.QuadPart;
 #endif
-    reset(); 
+    reset();
   }
 
   ~BenchTimer() {}
 
-  inline void reset(void) {m_best = 1e6;}
-  inline void start(void) {m_start = getTime();}
-  inline void stop(void)
+  inline void reset()
   {
-    m_best = std::min(m_best, getTime() - m_start);
+    m_bests.fill(1e9);
+    m_totals.setZero();
+  }
+  inline void start()
+  {
+    m_starts[CPU_TIMER]  = getCpuTime();
+    m_starts[REAL_TIMER] = getRealTime();
+  }
+  inline void stop()
+  {
+    m_times[CPU_TIMER] = getCpuTime() - m_starts[CPU_TIMER];
+    m_times[REAL_TIMER] = getRealTime() - m_starts[REAL_TIMER];
+    m_bests = m_bests.cwiseMin(m_times);
+    m_totals += m_times;
   }
 
-  /** Return the best elapsed time in seconds.
+  /** Return the elapsed time in seconds between the last start/stop pair
     */
-  inline double value(void)
+  inline double value(int TIMER = CPU_TIMER)
   {
-    return m_best;
+    return m_times[TIMER];
   }
 
-#if defined(_WIN32) || defined(__CYGWIN__)
-  inline double getTime(void)
-#else
-  static inline double getTime(void)
-#endif
+  /** Return the best elapsed time in seconds
+    */
+  inline double best(int TIMER = CPU_TIMER)
+  {
+    return m_bests[TIMER];
+  }
+
+  /** Return the total elapsed time in seconds.
+    */
+  inline double total(int TIMER = CPU_TIMER)
+  {
+    return m_totals[TIMER];
+  }
+
+  inline double getCpuTime()
   {
 #ifdef WIN32
     LARGE_INTEGER query_ticks;
@@ -95,14 +123,41 @@ public:
 #endif
   }
 
+  inline double getRealTime()
+  {
+#ifdef WIN32
+    // TODO
+    return getCputime;
+#else
+    struct timeval tv;
+    struct timezone tz;
+    gettimeofday(&tv, &tz);
+    return (double)tv.tv_sec + 1.e-6 * (double)tv.tv_usec;
+#endif
+  }
+
 protected:
 #if defined(_WIN32) || defined(__CYGWIN__)
   double m_frequency;
 #endif
-  double m_best, m_start;
+  Vector2d m_starts;
+  Vector2d m_times;
+  Vector2d m_bests;
+  Vector2d m_totals;
 
 };
 
+#define BENCH(TIMER,TRIES,REP,CODE) { \
+    TIMER.reset(); \
+    for(int uglyvarname1=0; uglyvarname1<TRIES; ++uglyvarname1){ \
+      TIMER.start(); \
+      for(int uglyvarname2=0; uglyvarname2<REP; ++uglyvarname2){ \
+        CODE; \
+      } \
+      TIMER.stop(); \
+    } \
+  }
+
 }
 
-#endif // EIGEN_BENCH_TIMER_H
+#endif // EIGEN_BENCH_TIMERR_H

From d5af5ab92b867d01a344c56beac1266076e44f94 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Mon, 22 Feb 2010 11:23:27 +0100
Subject: [PATCH 17/38] Added getRealTime() for windows.

---
 bench/BenchTimer.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index 6889654fe..a32495d60 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -126,8 +126,9 @@ public:
   inline double getRealTime()
   {
 #ifdef WIN32
-    // TODO
-    return getCputime;
+	SYSTEMTIME st;
+	GetSystemTime(&st);
+	return (double)st.wSecond + 1.e-6 * (double)st.wMilliseconds;
 #else
     struct timeval tv;
     struct timezone tz;

From 3e6ab8f93bd794f3f81f08c98098b146791719de Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Mon, 22 Feb 2010 11:34:25 +0100
Subject: [PATCH 18/38] ups

---
 bench/BenchTimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index a32495d60..e49afa07f 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -128,7 +128,7 @@ public:
 #ifdef WIN32
 	SYSTEMTIME st;
 	GetSystemTime(&st);
-	return (double)st.wSecond + 1.e-6 * (double)st.wMilliseconds;
+	return (double)st.wSecond + 1.e-3 * (double)st.wMilliseconds;
 #else
     struct timeval tv;
     struct timezone tz;

From 51a4b929a17ec36c45b1fb814c566098a164b7df Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 22 Feb 2010 15:18:29 +0100
Subject: [PATCH 19/38] implement an even lower level version of the gebp
 kernel for MSVC (it seems to be faster with gcc as well) (transplanted from
 9a5643551fe068497f84a81cd8986febf1918382)

---
 .../Core/products/GeneralBlockPanelKernel.h   | 72 ++++++++++++++++++-
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index fe1987bdd..8c29d2218 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -76,6 +76,7 @@ struct ei_gebp_kernel
         {
           PacketType B0, B1, B2, B3, A0, A1;
 
+          #if 0
                     A0 = ei_pload(&blA[0*PacketSize]);
                     A1 = ei_pload(&blA[1*PacketSize]);
                     B0 = ei_pload(&blB[0*PacketSize]);
@@ -134,6 +135,73 @@ struct ei_gebp_kernel
           if(nr==4) C3 = cj.pmadd(A0, B3, C3);
           if(nr==4) C7 = cj.pmadd(A1, B3, C7);
 
+          #else
+
+          PacketType T0, T1;
+
+          #define MADD(A,B,C,T) { T = A; T = ei_pmul(T,B); C = ei_padd(C,T); }
+
+                    A0 = ei_pload(&blA[0*PacketSize]);
+                    A1 = ei_pload(&blA[1*PacketSize]);
+                    B0 = ei_pload(&blB[0*PacketSize]);
+                    B1 = ei_pload(&blB[1*PacketSize]);
+
+                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
+          if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+                      MADD(A1,B0,C4,T1); // C4 = cj.pmadd(A1, B0, C4);
+          if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                      B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
+                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
+          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
+          if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
+          if(nr==4) { MADD(A0,B3,C3,T0); }// C3 = cj.pmadd(A0, B3, C3);
+                      A0 = ei_pload(&blA[2*PacketSize]);
+          if(nr==4) { MADD(A1,B3,C7,T1); }// C7 = cj.pmadd(A1, B3, C7);
+                      A1 = ei_pload(&blA[3*PacketSize]);
+          if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
+                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
+                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
+                      B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
+                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
+          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
+          if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
+          if(nr==4) { MADD(A0,B3,C3,T0); } // C3 = cj.pmadd(A0, B3, C3);
+                      A0 = ei_pload(&blA[4*PacketSize]);
+          if(nr==4) { MADD(A1,B3,C7,T1); }// C7 = cj.pmadd(A1, B3, C7);
+                      A1 = ei_pload(&blA[5*PacketSize]);
+          if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
+
+                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
+                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
+                      B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
+                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
+                      B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
+          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
+          if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
+          if(nr==4) { MADD(A0,B3,C3,T0); } // C3 = cj.pmadd(A0, B3, C3);
+                      A0 = ei_pload(&blA[6*PacketSize]);
+          if(nr==4) { MADD(A1,B3,C7,T1); } // C7 = cj.pmadd(A1, B3, C7);
+                      A1 = ei_pload(&blA[7*PacketSize]);
+          if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
+                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
+                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
+                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
+                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
+          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
+          if(nr==4) { MADD(A1,B2,C6,T1); }//C6 = cj.pmadd(A1, B2, C6);
+          if(nr==4) { MADD(A0,B3,C3,T0); }//C3 = cj.pmadd(A0, B3, C3);
+          if(nr==4) { MADD(A1,B3,C7,T1); }//C7 = cj.pmadd(A1, B3, C7);
+
+          #endif
+
           blB += 4*nr*PacketSize;
           blA += 4*mr;
         }
@@ -334,7 +402,7 @@ struct ei_gebp_kernel
         #endif
 
         PacketType C0 = ei_ploadu(&res[(j2+0)*resStride + i]);
-        
+
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB];
         for(int k=0; k<depth; k++)
         {
@@ -474,7 +542,7 @@ struct ei_gemm_pack_rhs<Scalar, nr, ColMajor, PanelMode>
       // skip what we have after
       if(PanelMode) count += PacketSize * nr * (stride-offset-depth);
     }
-    
+
     // copy the remaining columns one at a time (nr==1)
     for(int j2=packet_cols; j2<cols; ++j2)
     {

From d2b0eadf523198f4b8b7b545ea0bbdb0d334c966 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Mon, 22 Feb 2010 16:35:05 +0100
Subject: [PATCH 20/38] fully adapt the gebp kernel and optimize it for CPU
 with only 8 registers (transplanted from
 2ed88ebbf1995be90b8d0c25ff10248c8f56d023)

---
 .../Core/products/GeneralBlockPanelKernel.h   | 456 ++++++++++--------
 1 file changed, 262 insertions(+), 194 deletions(-)

diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
index 8c29d2218..18e913b0e 100644
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@@ -27,6 +27,12 @@
 
 #ifndef EIGEN_EXTERN_INSTANTIATIONS
 
+#ifdef EIGEN_HAS_FUSE_CJMADD
+#define CJMADD(A,B,C,T)  C = cj.pmadd(A,B,C);
+#else
+#define CJMADD(A,B,C,T)  T = A; T = cj.pmul(T,B); C = ei_padd(C,T);
+#endif
+
 // optimized GEneral packed Block * packed Panel product kernel
 template<typename Scalar, int mr, int nr, typename Conj>
 struct ei_gebp_kernel
@@ -74,133 +80,111 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<peeled_kc; k+=4)
         {
-          PacketType B0, B1, B2, B3, A0, A1;
+          if(nr==2)
+          {
+            PacketType B0, T0, A0, A1;
 
-          #if 0
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    A1 = ei_pload(&blA[1*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-                    C4 = cj.pmadd(A1, B0, C4);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[2*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[3*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[4*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[5*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
+            A0 = ei_pload(&blA[0*PacketSize]);
+            A1 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
 
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-                    B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[6*PacketSize]);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
-                    A1 = ei_pload(&blA[7*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C4 = cj.pmadd(A1, B0, C4);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
+            A0 = ei_pload(&blA[2*PacketSize]);
+            A1 = ei_pload(&blA[3*PacketSize]);
+            B0 = ei_pload(&blB[2*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[3*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
 
-          #else
+            A0 = ei_pload(&blA[4*PacketSize]);
+            A1 = ei_pload(&blA[5*PacketSize]);
+            B0 = ei_pload(&blB[4*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[5*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
 
-          PacketType T0, T1;
+            A0 = ei_pload(&blA[6*PacketSize]);
+            A1 = ei_pload(&blA[7*PacketSize]);
+            B0 = ei_pload(&blB[6*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[7*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+          }
+          else
+          {
 
-          #define MADD(A,B,C,T) { T = A; T = ei_pmul(T,B); C = ei_padd(C,T); }
+            PacketType B0, B1, B2, B3, A0, A1;
+            PacketType T0, T1;
 
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    A1 = ei_pload(&blA[1*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        A1 = ei_pload(&blA[1*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
 
-                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
-                      MADD(A1,B0,C4,T1); // C4 = cj.pmadd(A1, B0, C4);
-          if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
-                      B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
-                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
-                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
-                      B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
-          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
-          if(nr==4) { MADD(A0,B3,C3,T0); }// C3 = cj.pmadd(A0, B3, C3);
-                      A0 = ei_pload(&blA[2*PacketSize]);
-          if(nr==4) { MADD(A1,B3,C7,T1); }// C7 = cj.pmadd(A1, B3, C7);
-                      A1 = ei_pload(&blA[3*PacketSize]);
-          if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
-                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
-                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
-                      B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
-                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
-                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
-                      B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
-          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
-          if(nr==4) { MADD(A0,B3,C3,T0); } // C3 = cj.pmadd(A0, B3, C3);
-                      A0 = ei_pload(&blA[4*PacketSize]);
-          if(nr==4) { MADD(A1,B3,C7,T1); }// C7 = cj.pmadd(A1, B3, C7);
-                      A1 = ei_pload(&blA[5*PacketSize]);
-          if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+                        CJMADD(A1,B0,C4,T1);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[2*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[3*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[4*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[5*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
 
-                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
-                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
-                      B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
-                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
-                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
-                      B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
-          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) { MADD(A1,B2,C6,T1); }// C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
-          if(nr==4) { MADD(A0,B3,C3,T0); } // C3 = cj.pmadd(A0, B3, C3);
-                      A0 = ei_pload(&blA[6*PacketSize]);
-          if(nr==4) { MADD(A1,B3,C7,T1); } // C7 = cj.pmadd(A1, B3, C7);
-                      A1 = ei_pload(&blA[7*PacketSize]);
-          if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
-                      MADD(A0,B0,C0,T0);  // C0 = cj.pmadd(A0, B0, C0);
-                      MADD(A1,B0,C4,T1);  // C4 = cj.pmadd(A1, B0, C4);
-                      MADD(A0,B1,C1,T0);  // C1 = cj.pmadd(A0, B1, C1);
-                      MADD(A1,B1,C5,T1);  // C5 = cj.pmadd(A1, B1, C5);
-          if(nr==4) { MADD(A0,B2,C2,T0); }// C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) { MADD(A1,B2,C6,T1); }//C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) { MADD(A0,B3,C3,T0); }//C3 = cj.pmadd(A0, B3, C3);
-          if(nr==4) { MADD(A1,B3,C7,T1); }//C7 = cj.pmadd(A1, B3, C7);
-
-          #endif
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+                        A0 = ei_pload(&blA[6*PacketSize]);
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+                        A1 = ei_pload(&blA[7*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A1,B0,C4,T1);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+          }
 
           blB += 4*nr*PacketSize;
           blA += 4*mr;
@@ -208,22 +192,40 @@ struct ei_gebp_kernel
         // process remaining peeled loop
         for(int k=peeled_kc; k<depth; k++)
         {
-          PacketType B0, B1, B2, B3, A0, A1;
+          if(nr==2)
+          {
+            PacketType B0, T0, A0, A1;
 
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    A1 = ei_pload(&blA[1*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-                    C4 = cj.pmadd(A1, B0, C4);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    C5 = cj.pmadd(A1, B1, C5);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C6 = cj.pmadd(A1, B2, C6);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-          if(nr==4) C7 = cj.pmadd(A1, B3, C7);
+            A0 = ei_pload(&blA[0*PacketSize]);
+            A1 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            CJMADD(A1,B0,C4,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+            CJMADD(A1,B0,C5,T0);
+          }
+          else
+          {
+            PacketType B0, B1, B2, B3, A0, A1, T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        A1 = ei_pload(&blA[1*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+                        CJMADD(A1,B0,C4,T1);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T0);
+                        CJMADD(A1,B1,C5,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A1,B2,C6,T1); }
+            if(nr==4) { CJMADD(A0,B3,C3,T0); }
+            if(nr==4) { CJMADD(A1,B3,C7,T1); }
+          }
 
           blB += nr*PacketSize;
           blA += mr;
@@ -257,45 +259,79 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<peeled_kc; k+=4)
         {
-          PacketType B0, B1, B2, B3, A0;
+          if(nr==2)
+          {
+            PacketType B0, T0, A0;
 
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[6*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[1*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[7*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[10*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[11*PacketSize]);
+            A0 = ei_pload(&blA[0*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
 
-                    C0 = cj.pmadd(A0, B0, C0);
-                    B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-                    B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) B2 = ei_pload(&blB[14*PacketSize]);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
-                    A0 = ei_pload(&blA[3*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[15*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+            A0 = ei_pload(&blA[1*PacketSize]);
+            B0 = ei_pload(&blB[2*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[3*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+
+            A0 = ei_pload(&blA[2*PacketSize]);
+            B0 = ei_pload(&blB[4*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[5*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+
+            A0 = ei_pload(&blA[3*PacketSize]);
+            B0 = ei_pload(&blB[6*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[7*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+
+            PacketType B0, B1, B2, B3, A0;
+            PacketType T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+                        B0 = ei_pload(&blB[(nr==4 ? 4 : 2)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 5 : 3)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[6*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[1*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[7*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        B0 = ei_pload(&blB[(nr==4 ? 8 : 4)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 9 : 5)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[10*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[11*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+                        B0 = ei_pload(&blB[(nr==4 ? 12 : 6)*PacketSize]);
+                        CJMADD(A0,B1,C1,T1);
+                        B1 = ei_pload(&blB[(nr==4 ? 13 : 7)*PacketSize]);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4)   B2 = ei_pload(&blB[14*PacketSize]);
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+                        A0 = ei_pload(&blA[3*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[15*PacketSize]);
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += 4*nr*PacketSize;
           blA += 4*PacketSize;
@@ -303,17 +339,32 @@ struct ei_gebp_kernel
         // process remaining peeled loop
         for(int k=peeled_kc; k<depth; k++)
         {
-          PacketType B0, B1, B2, B3, A0;
+          if(nr==2)
+          {
+            PacketType B0, T0, A0;
 
-                    A0 = ei_pload(&blA[0*PacketSize]);
-                    B0 = ei_pload(&blB[0*PacketSize]);
-                    B1 = ei_pload(&blB[1*PacketSize]);
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 = ei_pload(&blB[2*PacketSize]);
-          if(nr==4) B3 = ei_pload(&blB[3*PacketSize]);
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+            A0 = ei_pload(&blA[0*PacketSize]);
+            B0 = ei_pload(&blB[0*PacketSize]);
+            CJMADD(A0,B0,C0,T0);
+            B0 = ei_pload(&blB[1*PacketSize]);
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+            PacketType B0, B1, B2, B3, A0;
+            PacketType T0, T1;
+
+                        A0 = ei_pload(&blA[0*PacketSize]);
+                        B0 = ei_pload(&blB[0*PacketSize]);
+                        B1 = ei_pload(&blB[1*PacketSize]);
+            if(nr==4)   B2 = ei_pload(&blB[2*PacketSize]);
+            if(nr==4)   B3 = ei_pload(&blB[3*PacketSize]);
+
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += nr*PacketSize;
           blA += PacketSize;
@@ -336,17 +387,32 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr];
         for(int k=0; k<depth; k++)
         {
-          Scalar B0, B1, B2, B3, A0;
+          if(nr==2)
+          {
+            Scalar B0, T0, A0;
 
-                    A0 =  blA[k];
-                    B0 =  blB[0*PacketSize];
-                    B1 =  blB[1*PacketSize];
-                    C0 = cj.pmadd(A0, B0, C0);
-          if(nr==4) B2 =  blB[2*PacketSize];
-          if(nr==4) B3 =  blB[3*PacketSize];
-                    C1 = cj.pmadd(A0, B1, C1);
-          if(nr==4) C2 = cj.pmadd(A0, B2, C2);
-          if(nr==4) C3 = cj.pmadd(A0, B3, C3);
+            A0 = blA[0*PacketSize];
+            B0 = blB[0*PacketSize];
+            CJMADD(A0,B0,C0,T0);
+            B0 = blB[1*PacketSize];
+            CJMADD(A0,B0,C1,T0);
+          }
+          else
+          {
+            Scalar B0, B1, B2, B3, A0;
+            Scalar T0, T1;
+
+                        A0 = blA[k];
+                        B0 = blB[0*PacketSize];
+                        B1 = blB[1*PacketSize];
+            if(nr==4)   B2 = blB[2*PacketSize];
+            if(nr==4)   B3 = blB[3*PacketSize];
+
+                        CJMADD(A0,B0,C0,T0);
+                        CJMADD(A0,B1,C1,T1);
+            if(nr==4) { CJMADD(A0,B2,C2,T0); }
+            if(nr==4) { CJMADD(A0,B3,C3,T1); }
+          }
 
           blB += nr*PacketSize;
         }
@@ -378,13 +444,13 @@ struct ei_gebp_kernel
         const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB];
         for(int k=0; k<depth; k++)
         {
-          PacketType B0, A0, A1;
+          PacketType B0, A0, A1, T0, T1;
 
           A0 = ei_pload(&blA[0*PacketSize]);
           A1 = ei_pload(&blA[1*PacketSize]);
           B0 = ei_pload(&blB[0*PacketSize]);
-          C0 = cj.pmadd(A0, B0, C0);
-          C4 = cj.pmadd(A1, B0, C4);
+          CJMADD(A0,B0,C0,T0);
+          CJMADD(A1,B0,C4,T1);
 
           blB += PacketSize;
           blA += mr;
@@ -431,6 +497,8 @@ struct ei_gebp_kernel
   }
 };
 
+#undef CJMADD
+
 // pack a block of the lhs
 // The travesal is as follow (mr==4):
 //   0  4  8 12 ...

From d3b314569b8040c813b914381cc7d0a2e4a1a529 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Mon, 22 Feb 2010 15:39:17 +0100
Subject: [PATCH 21/38] provide default values for CXX, remove duplicate define

---
 bench/bench_unrolling | 3 ++-
 bench/benchmark_suite | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/bench/bench_unrolling b/bench/bench_unrolling
index bf01cce7d..826443845 100755
--- a/bench/bench_unrolling
+++ b/bench/bench_unrolling
@@ -2,10 +2,11 @@
 
 # gcc : CXX="g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000"
 # icc : CXX="icpc -fast -no-inline-max-size -fno-exceptions"
+CXX=${CXX-g++  -finline-limit=10000 -ftemplate-depth-2000 --param max-inline-recursive-depth=2000} # default value
 
 for ((i=1; i<16; ++i)); do
     echo "Matrix size: $i x $i :"
-    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=1024 -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
+    $CXX -O3 -I.. -DNDEBUG  benchmark.cpp -DMATSIZE=$i -DEIGEN_UNROLLING_LIMIT=400 -o benchmark && time ./benchmark >/dev/null
     $CXX -O3 -I.. -DNDEBUG -finline-limit=10000 benchmark.cpp -DMATSIZE=$i -DEIGEN_DONT_USE_UNROLLED_LOOPS=1 -o benchmark && time ./benchmark >/dev/null
     echo " "
 done
diff --git a/bench/benchmark_suite b/bench/benchmark_suite
index a8fc6dced..3f21d3661 100755
--- a/bench/benchmark_suite
+++ b/bench/benchmark_suite
@@ -1,4 +1,5 @@
 #!/bin/bash
+CXX=${CXX-g++} # default value unless caller has defined CXX
 echo "Fixed size 3x3, column-major, -DNDEBUG"
 $CXX -O3 -I .. -DNDEBUG benchmark.cpp -o benchmark && time ./benchmark >/dev/null
 echo "Fixed size 3x3, column-major, with asserts"

From 5d530e0373712a71e1e17c04dcbdbf5805687c5f Mon Sep 17 00:00:00 2001
From: Mark Borgerding <mark@borgerding.net>
Date: Mon, 22 Feb 2010 21:43:30 -0500
Subject: [PATCH 22/38] enable caller to supply FFT length for Eigen Matrix
 interface functions to effect zero pad or source shrink at Nyquist bin

---
 unsupported/Eigen/FFT | 94 +++++++++++++++++++++++++++++++------------
 1 file changed, 69 insertions(+), 25 deletions(-)

diff --git a/unsupported/Eigen/FFT b/unsupported/Eigen/FFT
index c37628ed8..3c4d85d3b 100644
--- a/unsupported/Eigen/FFT
+++ b/unsupported/Eigen/FFT
@@ -173,7 +173,7 @@ class FFT
 
     template<typename InputDerived, typename ComplexDerived>
     inline
-    void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src)
+    void fwd( MatrixBase<ComplexDerived> & dst, const MatrixBase<InputDerived> & src,int nfft=-1)
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(InputDerived)
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
@@ -183,16 +183,25 @@ class FFT
       EIGEN_STATIC_ASSERT(int(InputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
             THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
 
-      if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
-        dst.derived().resize( (src.size()>>1)+1);
-      else
-        dst.derived().resize(src.size());
+      if (nfft<1)
+        nfft = src.size();
 
-      if (src.stride() != 1) {
-        Matrix<typename InputDerived::Scalar,1,Dynamic> tmp = src;
-        fwd( &dst[0],&tmp[0],src.size() );
+      if ( NumTraits< typename InputDerived::Scalar >::IsComplex == 0 && HasFlag(HalfSpectrum) )
+        dst.derived().resize( (nfft>>1)+1);
+      else
+        dst.derived().resize(nfft);
+
+      if ( src.stride() != 1 || src.size() < nfft ) {
+        Matrix<typename InputDerived::Scalar,1,Dynamic> tmp;
+        if (src.size()<nfft) {
+          tmp.setZero(nfft);
+          tmp.block(0,0,src.size(),1 ) = src;
+        }else{
+          tmp = src;
+        }
+        fwd( &dst[0],&tmp[0],nfft );
       }else{
-        fwd( &dst[0],&src[0],src.size() );
+        fwd( &dst[0],&src[0],nfft );
       }
     }
 
@@ -216,25 +225,60 @@ class FFT
     inline
     void inv( MatrixBase<OutputDerived> & dst, const MatrixBase<ComplexDerived> & src, int nfft=-1)
     {
-        EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
-        EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
-        EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time
-        EIGEN_STATIC_ASSERT((ei_is_same_type<typename ComplexDerived::Scalar, Complex>::ret),
-                            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
-        EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
-                            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
+      typedef typename ComplexDerived::Scalar src_type;
+      typedef typename OutputDerived::Scalar dst_type;
+      const bool realfft= (NumTraits<dst_type>::IsComplex == 0);
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(OutputDerived)
+      EIGEN_STATIC_ASSERT_VECTOR_ONLY(ComplexDerived)
+      EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(ComplexDerived,OutputDerived) // size at compile-time
+      EIGEN_STATIC_ASSERT((ei_is_same_type<src_type, Complex>::ret),
+            YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
+      EIGEN_STATIC_ASSERT(int(OutputDerived::Flags)&int(ComplexDerived::Flags)&DirectAccessBit,
+            THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_WITH_DIRECT_MEMORY_ACCESS_SUCH_AS_MAP_OR_PLAIN_MATRICES)
 
-        if (nfft<1) {
-          nfft = ( NumTraits<typename OutputDerived::Scalar>::IsComplex == 0 && HasFlag(HalfSpectrum) ) ? 2*(src.size()-1) : src.size();
-        }
-        dst.derived().resize( nfft );
-        if (src.stride() != 1) {
-          // if the vector is strided, then we need to copy it to a packed temporary
-          Matrix<typename ComplexDerived::Scalar,1,Dynamic> tmp = src;
-          inv( &dst[0],&tmp[0], nfft);
+      if (nfft<1) { //automatic FFT size determination
+        if ( realfft && HasFlag(HalfSpectrum) ) 
+          nfft = 2*(src.size()-1); //assume even fft size
+        else
+          nfft = src.size();
+      }
+      dst.derived().resize( nfft );
+
+      // check for nfft that does not fit the input data size
+      int resize_input= ( realfft && HasFlag(HalfSpectrum) )
+        ? ( (nfft/2+1) - src.size() )
+        : ( nfft - src.size() );
+
+      if ( src.stride() != 1 || resize_input ) {
+        // if the vector is strided, then we need to copy it to a packed temporary
+        Matrix<src_type,1,Dynamic> tmp;
+        if ( resize_input ) {
+          size_t ncopy = min(src.size(),src.size() + resize_input);
+          tmp.setZero(src.size() + resize_input);
+          if ( realfft && HasFlag(HalfSpectrum) ) {
+            // pad at the Nyquist bin
+            tmp.head(ncopy) = src.head(ncopy);
+            tmp(ncopy-1) = real(tmp(ncopy-1)); // enforce real-only Nyquist bin
+          }else{
+            size_t nhead,ntail;
+            nhead = 1+ncopy/2-1; // range  [0:pi)
+            ntail = ncopy/2-1;   // range (-pi:0)
+            tmp.head(nhead) = src.head(nhead);
+            tmp.tail(ntail) = src.tail(ntail);
+            if (resize_input<0) { //shrinking -- create the Nyquist bin as the average of the two bins that fold into it
+              tmp(nhead) = ( src(nfft/2) + src( src.size() - nfft/2 ) )*src_type(.5);
+            }else{ // expanding -- split the old Nyquist bin into two halves
+              tmp(nhead) = src(nhead) * src_type(.5);
+              tmp(tmp.size()-nhead) = tmp(nhead);
+            }
+          }
         }else{
-          inv( &dst[0],&src[0], nfft);
+          tmp = src;
         }
+        inv( &dst[0],&tmp[0], nfft);
+      }else{
+        inv( &dst[0],&src[0], nfft);
+      }
     }
 
     template <typename _Output>

From 1fd8d7b96a4aac14fe829b214c6dc6d3c8d8d326 Mon Sep 17 00:00:00 2001
From: Hauke Heibel <hauke.heibel@gmail.com>
Date: Tue, 23 Feb 2010 11:35:51 +0100
Subject: [PATCH 23/38] Attempt to fix PGI compilation issue.

---
 Eigen/src/Core/util/Macros.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index dc1aa150b..37ccef047 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -211,7 +211,7 @@ using Eigen::ei_cos;
  */
 #if !EIGEN_ALIGN
   #define EIGEN_ALIGN_TO_BOUNDARY(n)
-#elif (defined __GNUC__)
+#elif (defined __GNUC__) || (defined __PGI)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __attribute__((aligned(n)))
 #elif (defined _MSC_VER)
   #define EIGEN_ALIGN_TO_BOUNDARY(n) __declspec(align(n))

From 7dc75380c101b9b4f3882f78fe6a5e9ae8963cac Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Tue, 23 Feb 2010 09:04:59 -0500
Subject: [PATCH 24/38] * FullPivLU: replace "remaining==0" termination
 condition (from Golub) by a fuzzy compare   (fixes lu test failures when
 testing solve()) * LU test: set appropriate threshold and limit the number of
 times that a specially tricky test   is run. (fixes lu test failures when
 testing rank()). * Tests: rename createRandomMatrixOfRank to
 createRandomProjectionOfRank

---
 Eigen/src/LU/FullPivLU.h |  6 +++++-
 test/inverse.cpp         |  2 +-
 test/lu.cpp              | 31 ++++++++++++++++++++++++++-----
 test/main.h              |  4 ++--
 test/qr_colpivoting.cpp  |  4 ++--
 test/qr_fullpivoting.cpp |  2 +-
 6 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 9afc448cc..ec551645b 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -404,6 +404,7 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
 
   m_nonzero_pivots = size; // the generic case is that in which all pivots are nonzero (invertible case)
   m_maxpivot = RealScalar(0);
+  RealScalar cutoff(0);
 
   for(int k = 0; k < size; ++k)
   {
@@ -418,8 +419,11 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
     row_of_biggest_in_corner += k; // correct the values! since they were computed in the corner,
     col_of_biggest_in_corner += k; // need to add k to them.
 
+    // when k==0, biggest_in_corner is the biggest coeff absolute value in the original matrix
+    if(k == 0) cutoff = biggest_in_corner * NumTraits<Scalar>::epsilon();
+
     // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
-    if(biggest_in_corner == RealScalar(0))
+    if(ei_abs(biggest_in_corner) < cutoff)
     {
       // before exiting, make sure to initialize the still uninitialized transpositions
       // in a sane state without destroying what we already have.
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 713caf4a6..3f6138e0c 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -42,7 +42,7 @@ template<typename MatrixType> void inverse(const MatrixType& m)
              m2(rows, cols),
              mzero = MatrixType::Zero(rows, cols),
              identity = MatrixType::Identity(rows, rows);
-  createRandomMatrixOfRank(rows,rows,rows,m1);
+  createRandomProjectionOfRank(rows,rows,rows,m1);
   m2 = m1.inverse();
   VERIFY_IS_APPROX(m1, m2.inverse() );
 
diff --git a/test/lu.cpp b/test/lu.cpp
index 568db8230..02f6ec805 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -28,7 +28,11 @@ using namespace std;
 
 template<typename MatrixType> void lu_non_invertible()
 {
+  static int times_called = 0;
+  times_called++;
+  
   typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::RealScalar RealScalar;
   /* this test covers the following files:
      LU.h
   */
@@ -64,9 +68,15 @@ template<typename MatrixType> void lu_non_invertible()
   
   MatrixType m1(rows, cols), m3(rows, cols2);
   CMatrixType m2(cols, cols2);
-  createRandomMatrixOfRank(rank, rows, cols, m1);
+  createRandomProjectionOfRank(rank, rows, cols, m1);
+
+  FullPivLU<MatrixType> lu;
+
+  // The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank of projections.
+  // So it's not clear at all the epsilon should play any role there.
+  lu.setThreshold(RealScalar(0.01));
+  lu.compute(m1);
 
-  FullPivLU<MatrixType> lu(m1);
   // FIXME need better way to construct trapezoid matrices. extend triangularView to support rectangular.
   DynamicMatrixType u(rows,cols);
   for(int i = 0; i < rows; i++)
@@ -91,9 +101,20 @@ template<typename MatrixType> void lu_non_invertible()
   VERIFY(!lu.isSurjective());
   VERIFY((m1 * m1kernel).isMuchSmallerThan(m1));
   VERIFY(m1image.fullPivLu().rank() == rank);
-  DynamicMatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols());
-  sidebyside << m1, m1image;
-  VERIFY(sidebyside.fullPivLu().rank() == rank);
+
+  // The following test is damn hard to get to succeed over a large number of repetitions.
+  // We're checking that the image is indeed the image, i.e. adding it as new columns doesn't increase the rank.
+  // Since we've already tested rank() above, the point here is not to test rank(), it is to test image().
+  // Since image() is implemented in a very simple way that doesn't leave much room for choice, the occasional
+  // errors that we get here (one in 1e+4 repetitions roughly) are probably just a sign that it's a really
+  // hard test, so we just limit how many times it's run.
+  if(times_called < 100)
+  {
+    DynamicMatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols());
+    sidebyside << m1, m1image;
+    VERIFY(sidebyside.fullPivLu().rank() == rank);
+  }
+  
   m2 = CMatrixType::Random(cols,cols2);
   m3 = m1*m2;
   m2 = CMatrixType::Random(cols,cols2);
diff --git a/test/main.h b/test/main.h
index 64f70b394..6d296b2e3 100644
--- a/test/main.h
+++ b/test/main.h
@@ -148,7 +148,7 @@ namespace Eigen
 
 #define EIGEN_INTERNAL_DEBUGGING
 #define EIGEN_NICE_RANDOM
-#include <Eigen/QR> // required for createRandomMatrixOfRank
+#include <Eigen/QR> // required for createRandomProjectionOfRank
 
 
 #define VERIFY(a) do { if (!(a)) { \
@@ -343,7 +343,7 @@ inline bool test_isUnitary(const MatrixBase<Derived>& m)
 }
 
 template<typename MatrixType>
-void createRandomMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m)
+void createRandomProjectionOfRank(int desired_rank, int rows, int cols, MatrixType& m)
 {
   typedef typename ei_traits<MatrixType>::Scalar Scalar;
   enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp
index 16eb27c52..abee32184 100644
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp
@@ -36,7 +36,7 @@ template<typename MatrixType> void qr()
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
-  createRandomMatrixOfRank(rank,rows,cols,m1);
+  createRandomProjectionOfRank(rank,rows,cols,m1);
   ColPivHouseholderQR<MatrixType> qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
@@ -64,7 +64,7 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
   typedef typename MatrixType::Scalar Scalar;
   int rank = ei_random<int>(1, std::min(int(Rows), int(Cols))-1);
   Matrix<Scalar,Rows,Cols> m1;
-  createRandomMatrixOfRank(rank,Rows,Cols,m1);
+  createRandomProjectionOfRank(rank,Rows,Cols,m1);
   ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(Cols - qr.rank() == qr.dimensionOfKernel());
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index c82ba4c7e..60255f94c 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -35,7 +35,7 @@ template<typename MatrixType> void qr()
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
-  createRandomMatrixOfRank(rank,rows,cols,m1);
+  createRandomProjectionOfRank(rank,rows,cols,m1);
   FullPivHouseholderQR<MatrixType> qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(cols - qr.rank() == qr.dimensionOfKernel());

From d92df336ad21c7f8e0289f8ac3084b6313a17fe4 Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Tue, 23 Feb 2010 15:40:24 -0500
Subject: [PATCH 25/38] Further LU test improvements. I'm not aware of any test
 failures anymore, not even with huge numbers of repetitions. Finally the
 createRandomMatrixOfRank() function is renamed to createRandomPIMatrixOfRank,
 where PI stands for 'partial isometry', that is, a matrix whose singular
 values are 0 or 1.

---
 Eigen/src/LU/FullPivLU.h |  7 +++--
 test/inverse.cpp         |  2 +-
 test/lu.cpp              | 65 +++++++++++++++-------------------------
 test/main.h              | 12 ++++++--
 test/qr_colpivoting.cpp  |  4 +--
 test/qr_fullpivoting.cpp |  2 +-
 6 files changed, 42 insertions(+), 50 deletions(-)

diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index ec551645b..0a305d52b 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -422,8 +422,11 @@ FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
     // when k==0, biggest_in_corner is the biggest coeff absolute value in the original matrix
     if(k == 0) cutoff = biggest_in_corner * NumTraits<Scalar>::epsilon();
 
-    // if the pivot (hence the corner) is exactly zero, terminate to avoid generating nan/inf values
-    if(ei_abs(biggest_in_corner) < cutoff)
+    // if the pivot (hence the corner) is "zero", terminate to avoid generating nan/inf values.
+    // Notice that using an exact comparison (biggest_in_corner==0) here, as Golub-van Loan do in
+    // their pseudo-code, results in numerical instability! The cutoff here has been validated
+    // by running the unit test 'lu' with many repetitions.
+    if(biggest_in_corner < cutoff)
     {
       // before exiting, make sure to initialize the still uninitialized transpositions
       // in a sane state without destroying what we already have.
diff --git a/test/inverse.cpp b/test/inverse.cpp
index 3f6138e0c..1e567ad14 100644
--- a/test/inverse.cpp
+++ b/test/inverse.cpp
@@ -42,7 +42,7 @@ template<typename MatrixType> void inverse(const MatrixType& m)
              m2(rows, cols),
              mzero = MatrixType::Zero(rows, cols),
              identity = MatrixType::Identity(rows, rows);
-  createRandomProjectionOfRank(rows,rows,rows,m1);
+  createRandomPIMatrixOfRank(rows,rows,rows,m1);
   m2 = m1.inverse();
   VERIFY_IS_APPROX(m1, m2.inverse() );
 
diff --git a/test/lu.cpp b/test/lu.cpp
index 02f6ec805..442202a33 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -28,9 +28,6 @@ using namespace std;
 
 template<typename MatrixType> void lu_non_invertible()
 {
-  static int times_called = 0;
-  times_called++;
-  
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   /* this test covers the following files:
@@ -55,11 +52,16 @@ template<typename MatrixType> void lu_non_invertible()
     cols2 = cols = MatrixType::ColsAtCompileTime;
   }
 
+  enum {
+    RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+    ColsAtCompileTime = MatrixType::ColsAtCompileTime
+  };
   typedef typename ei_kernel_retval_base<FullPivLU<MatrixType> >::ReturnType KernelMatrixType;
   typedef typename ei_image_retval_base<FullPivLU<MatrixType> >::ReturnType ImageMatrixType;
-  typedef Matrix<typename MatrixType::Scalar, Dynamic, Dynamic> DynamicMatrixType;
-  typedef Matrix<typename MatrixType::Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime>
+  typedef Matrix<typename MatrixType::Scalar, ColsAtCompileTime, ColsAtCompileTime>
           CMatrixType;
+  typedef Matrix<typename MatrixType::Scalar, RowsAtCompileTime, RowsAtCompileTime>
+          RMatrixType;
 
   int rank = ei_random<int>(1, std::min(rows, cols)-1);
 
@@ -68,26 +70,21 @@ template<typename MatrixType> void lu_non_invertible()
   
   MatrixType m1(rows, cols), m3(rows, cols2);
   CMatrixType m2(cols, cols2);
-  createRandomProjectionOfRank(rank, rows, cols, m1);
+  createRandomPIMatrixOfRank(rank, rows, cols, m1);
 
   FullPivLU<MatrixType> lu;
 
-  // The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank of projections.
-  // So it's not clear at all the epsilon should play any role there.
+  // The special value 0.01 below works well in tests. Keep in mind that we're only computing the rank
+  // of singular values are either 0 or 1.
+  // So it's not clear at all that the epsilon should play any role there.
   lu.setThreshold(RealScalar(0.01));
   lu.compute(m1);
 
-  // FIXME need better way to construct trapezoid matrices. extend triangularView to support rectangular.
-  DynamicMatrixType u(rows,cols);
-  for(int i = 0; i < rows; i++)
-    for(int j = 0; j < cols; j++)
-      u(i,j) = i>j ? Scalar(0) : lu.matrixLU()(i,j);
-  DynamicMatrixType l(rows,rows);
-  for(int i = 0; i < rows; i++)
-    for(int j = 0; j < rows; j++)
-      l(i,j) = (i<j || j>=cols) ? Scalar(0)
-             : i==j ? Scalar(1)
-             : lu.matrixLU()(i,j);
+  MatrixType u(rows,cols);
+  u = lu.matrixLU().template triangularView<Upper>();
+  RMatrixType l = RMatrixType::Identity(rows,rows);
+  l.block(0,0,rows,std::min(rows,cols)).template triangularView<StrictlyLower>()
+    = lu.matrixLU().block(0,0,rows,std::min(rows,cols));
   
   VERIFY_IS_APPROX(lu.permutationP() * m1 * lu.permutationQ(), l*u);
   
@@ -101,20 +98,8 @@ template<typename MatrixType> void lu_non_invertible()
   VERIFY(!lu.isSurjective());
   VERIFY((m1 * m1kernel).isMuchSmallerThan(m1));
   VERIFY(m1image.fullPivLu().rank() == rank);
+  VERIFY_IS_APPROX(m1 * m1.adjoint() * m1image, m1image);
 
-  // The following test is damn hard to get to succeed over a large number of repetitions.
-  // We're checking that the image is indeed the image, i.e. adding it as new columns doesn't increase the rank.
-  // Since we've already tested rank() above, the point here is not to test rank(), it is to test image().
-  // Since image() is implemented in a very simple way that doesn't leave much room for choice, the occasional
-  // errors that we get here (one in 1e+4 repetitions roughly) are probably just a sign that it's a really
-  // hard test, so we just limit how many times it's run.
-  if(times_called < 100)
-  {
-    DynamicMatrixType sidebyside(m1.rows(), m1.cols() + m1image.cols());
-    sidebyside << m1, m1image;
-    VERIFY(sidebyside.fullPivLu().rank() == rank);
-  }
-  
   m2 = CMatrixType::Random(cols,cols2);
   m3 = m1*m2;
   m2 = CMatrixType::Random(cols,cols2);
@@ -128,20 +113,18 @@ template<typename MatrixType> void lu_invertible()
   /* this test covers the following files:
      LU.h
   */
+  typedef typename MatrixType::Scalar Scalar;
   typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
   int size = ei_random<int>(1,200);
 
   MatrixType m1(size, size), m2(size, size), m3(size, size);
-  m1 = MatrixType::Random(size,size);
+  FullPivLU<MatrixType> lu;
+  lu.setThreshold(0.01);
+  do {
+    m1 = MatrixType::Random(size,size);
+    lu.compute(m1);
+  } while(!lu.isInvertible());
 
-  if (ei_is_same_type<RealScalar,float>::ret)
-  {
-    // let's build a matrix more stable to inverse
-    MatrixType a = MatrixType::Random(size,size*2);
-    m1 += a * a.adjoint();
-  }
-
-  FullPivLU<MatrixType> lu(m1);
   VERIFY(0 == lu.dimensionOfKernel());
   VERIFY(lu.kernel().cols() == 1); // the kernel() should consist of a single (zero) column vector
   VERIFY(size == lu.rank());
diff --git a/test/main.h b/test/main.h
index 6d296b2e3..96324de33 100644
--- a/test/main.h
+++ b/test/main.h
@@ -148,7 +148,7 @@ namespace Eigen
 
 #define EIGEN_INTERNAL_DEBUGGING
 #define EIGEN_NICE_RANDOM
-#include <Eigen/QR> // required for createRandomProjectionOfRank
+#include <Eigen/QR> // required for createRandomPIMatrixOfRank
 
 
 #define VERIFY(a) do { if (!(a)) { \
@@ -342,8 +342,13 @@ inline bool test_isUnitary(const MatrixBase<Derived>& m)
   return m.isUnitary(test_precision<typename ei_traits<Derived>::Scalar>());
 }
 
+/** Creates a random Partial Isometry matrix of given rank.
+  *
+  * A partial isometry is a matrix all of whose singular values are either 0 or 1.
+  * This is very useful to test rank-revealing algorithms.
+  */
 template<typename MatrixType>
-void createRandomProjectionOfRank(int desired_rank, int rows, int cols, MatrixType& m)
+void createRandomPIMatrixOfRank(int desired_rank, int rows, int cols, MatrixType& m)
 {
   typedef typename ei_traits<MatrixType>::Scalar Scalar;
   enum { Rows = MatrixType::RowsAtCompileTime, Cols = MatrixType::ColsAtCompileTime };
@@ -360,7 +365,8 @@ void createRandomProjectionOfRank(int desired_rank, int rows, int cols, MatrixTy
 
   if(desired_rank == 1)
   {
-    m = VectorType::Random(rows) * VectorType::Random(cols).transpose();
+    // here we normalize the vectors to get a partial isometry
+    m = VectorType::Random(rows).normalized() * VectorType::Random(cols).normalized().transpose();
     return;
   }
 
diff --git a/test/qr_colpivoting.cpp b/test/qr_colpivoting.cpp
index abee32184..96cc66316 100644
--- a/test/qr_colpivoting.cpp
+++ b/test/qr_colpivoting.cpp
@@ -36,7 +36,7 @@ template<typename MatrixType> void qr()
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
-  createRandomProjectionOfRank(rank,rows,cols,m1);
+  createRandomPIMatrixOfRank(rank,rows,cols,m1);
   ColPivHouseholderQR<MatrixType> qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(cols - qr.rank() == qr.dimensionOfKernel());
@@ -64,7 +64,7 @@ template<typename MatrixType, int Cols2> void qr_fixedsize()
   typedef typename MatrixType::Scalar Scalar;
   int rank = ei_random<int>(1, std::min(int(Rows), int(Cols))-1);
   Matrix<Scalar,Rows,Cols> m1;
-  createRandomProjectionOfRank(rank,Rows,Cols,m1);
+  createRandomPIMatrixOfRank(rank,Rows,Cols,m1);
   ColPivHouseholderQR<Matrix<Scalar,Rows,Cols> > qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(Cols - qr.rank() == qr.dimensionOfKernel());
diff --git a/test/qr_fullpivoting.cpp b/test/qr_fullpivoting.cpp
index 60255f94c..7ad3af1fe 100644
--- a/test/qr_fullpivoting.cpp
+++ b/test/qr_fullpivoting.cpp
@@ -35,7 +35,7 @@ template<typename MatrixType> void qr()
   typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> MatrixQType;
   typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, 1> VectorType;
   MatrixType m1;
-  createRandomProjectionOfRank(rank,rows,cols,m1);
+  createRandomPIMatrixOfRank(rank,rows,cols,m1);
   FullPivHouseholderQR<MatrixType> qr(m1);
   VERIFY_IS_APPROX(rank, qr.rank());
   VERIFY(cols - qr.rank() == qr.dimensionOfKernel());

From 3d066f4bc73fad712061d8b50d147d10988f07ff Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Tue, 23 Feb 2010 16:05:37 -0500
Subject: [PATCH 26/38] LDLT: * fix bug thanks to Ben Goodrich: we were
 terminating at the wrong place, leaving some matrix coefficients with wrong
 values. * don't use Higham's formula here: we're not trying to be
 rank-revealing.

---
 Eigen/src/Cholesky/LDLT.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 4d3149d42..708b02375 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -202,11 +202,8 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
     {
       // The biggest overall is the point of reference to which further diagonals
       // are compared; if any diagonal is negligible compared
-      // to the largest overall, the algorithm bails.  This cutoff is suggested
-      // in "Analysis of the Cholesky Decomposition of a Semi-definite Matrix" by
-      // Nicholas J. Higham. Also see "Accuracy and Stability of Numerical
-      // Algorithms" page 217, also by Higham.
-      cutoff = ei_abs(NumTraits<Scalar>::epsilon() * RealScalar(size) * biggest_in_corner);
+      // to the largest overall, the algorithm bails.
+      cutoff = ei_abs(NumTraits<Scalar>::epsilon() * biggest_in_corner);
 
       m_sign = ei_real(m_matrix.diagonal().coeff(index_of_biggest_in_corner)) > 0 ? 1 : -1;
     }
@@ -235,13 +232,6 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
                                                .dot(m_matrix.col(j).head(j)));
     m_matrix.coeffRef(j,j) = Djj;
 
-    // Finish early if the matrix is not full rank.
-    if(ei_abs(Djj) < cutoff)
-    {
-      for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
-      break;
-    }
-
     int endSize = size - j - 1;
     if (endSize > 0) {
       _temporary.tail(endSize).noalias() = m_matrix.block(j+1,0, endSize, j)
@@ -250,6 +240,13 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
       m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
                                    - _temporary.tail(endSize).transpose();
 
+      // Finish early if the matrix is not full rank.
+      if(ei_abs(Djj) < cutoff)
+      {
+        for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
+        break;
+      }
+
       m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
     }
   }

From 60325b83309df5061fb9230af4d7edb59d0eaf1b Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Tue, 23 Feb 2010 16:10:26 -0500
Subject: [PATCH 27/38] actually, this is not even meant to be a termination
 criterion. so the proper fix is this.

---
 Eigen/src/Cholesky/LDLT.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 708b02375..7c8e1eb04 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -240,14 +240,10 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
       m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
                                    - _temporary.tail(endSize).transpose();
 
-      // Finish early if the matrix is not full rank.
-      if(ei_abs(Djj) < cutoff)
+      if(ei_abs(Djj) > cutoff)
       {
-        for(int i = j; i < size; i++) m_transpositions.coeffRef(i) = i;
-        break;
+        m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
       }
-
-      m_matrix.col(j).tail(endSize) = m_matrix.row(j).tail(endSize) / Djj;
     }
   }
 

From f7aa9873caab20d49afd622b5ef76ecff8bfef06 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 24 Feb 2010 10:40:16 +0100
Subject: [PATCH 28/38] * fix LDLT's default ctor use * add a
 reconstructedMatrix() function to LDLT for debug purpose

---
 Eigen/src/Cholesky/LDLT.h | 55 ++++++++++++++++++++++++++++++++-------
 Eigen/src/Cholesky/LLT.h  |  2 +-
 2 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 7c8e1eb04..8cfc256bb 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -62,14 +62,21 @@ template<typename _MatrixType> class LDLT
     typedef Matrix<int, MatrixType::RowsAtCompileTime, 1> IntColVectorType;
     typedef Matrix<int, 1, MatrixType::RowsAtCompileTime> IntRowVectorType;
 
-    /**
-    * \brief Default Constructor.
-    *
-    * The default constructor is useful in cases in which the user intends to
-    * perform decompositions via LDLT::compute(const MatrixType&).
-    */
+    /** \brief Default Constructor.
+      *
+      * The default constructor is useful in cases in which the user intends to
+      * perform decompositions via LDLT::compute(const MatrixType&).
+      */
     LDLT() : m_matrix(), m_p(), m_transpositions(), m_isInitialized(false) {}
 
+    /** \brief Default Constructor with memory preallocation
+      *
+      * Like the default constructor but with preallocation of the internal data
+      * according to the specified problem \a size.
+      * \sa LDLT()
+      */
+    LDLT(int size) : m_matrix(size,size), m_p(size), m_transpositions(size), m_isInitialized(false) {}
+
     LDLT(const MatrixType& matrix)
       : m_matrix(matrix.rows(), matrix.cols()),
         m_p(matrix.rows()),
@@ -148,6 +155,8 @@ template<typename _MatrixType> class LDLT
       return m_matrix;
     }
 
+    const MatrixType reconstructedMatrix() const;
+
     inline int rows() const { return m_matrix.rows(); }
     inline int cols() const { return m_matrix.cols(); }
 
@@ -175,6 +184,10 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
 
   m_matrix = a;
 
+  m_p.resize(size);
+  m_transpositions.resize(size);
+  m_isInitialized = false;
+
   if (size <= 1) {
     m_p.setZero();
     m_transpositions.setZero();
@@ -228,8 +241,7 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
       continue;
     }
 
-    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).head(j)
-                                               .dot(m_matrix.col(j).head(j)));
+    RealScalar Djj = ei_real(m_matrix.coeff(j,j) -  m_matrix.row(j).head(j).dot(m_matrix.col(j).head(j)));
     m_matrix.coeffRef(j,j) = Djj;
 
     int endSize = size - j - 1;
@@ -238,7 +250,7 @@ LDLT<MatrixType>& LDLT<MatrixType>::compute(const MatrixType& a)
                                 * m_matrix.col(j).head(j).conjugate();
 
       m_matrix.row(j).tail(endSize) = m_matrix.row(j).tail(endSize).conjugate()
-                                   - _temporary.tail(endSize).transpose();
+                                    - _temporary.tail(endSize).transpose();
 
       if(ei_abs(Djj) > cutoff)
       {
@@ -308,6 +320,31 @@ bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
   return true;
 }
 
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^T L D L^* P.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+const MatrixType LDLT<MatrixType>::reconstructedMatrix() const
+{
+  ei_assert(m_isInitialized && "LDLT is not initialized.");
+  const int size = m_matrix.rows();
+  MatrixType res(size,size);
+  res.setIdentity();
+
+  // PI
+  for(int i = 0; i < size; ++i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
+  // L^* P
+  res = matrixL().adjoint() * res;
+  // D(L^*P)
+  res = vectorD().asDiagonal() * res;
+  // L(DL^*P)
+  res = matrixL() * res;
+  // P^T (LDL^*P)
+  for (int i = size-1; i >= 0; --i) res.row(m_transpositions.coeff(i)).swap(res.row(i));
+
+  return res;
+}
+
 /** \cholesky_module
   * \returns the Cholesky decomposition with full pivoting without square root of \c *this
   */
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 474b82406..96e1e5f73 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -117,7 +117,7 @@ template<typename _MatrixType, int _UpLo> class LLT
                 && "LLT::solve(): invalid number of rows of the right hand side matrix b");
       return ei_solve_retval<LLT, Rhs>(*this, b.derived());
     }
-    
+
     template<typename Derived>
     bool solveInPlace(MatrixBase<Derived> &bAndX) const;
 

From a7e4c0f8250ebcbab8cb26eea0730f12f5e4281d Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 24 Feb 2010 11:28:38 +0100
Subject: [PATCH 29/38] make testsuite aware of EIGEN_CTEST_ARGS

---
 test/testsuite.cmake | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/testsuite.cmake b/test/testsuite.cmake
index 90edf2853..b68a327a9 100644
--- a/test/testsuite.cmake
+++ b/test/testsuite.cmake
@@ -147,6 +147,9 @@ endif(NOT EIGEN_NO_UPDATE)
 
 # which ctest command to use for running the dashboard
 SET (CTEST_COMMAND "${EIGEN_CMAKE_DIR}ctest -D ${EIGEN_MODE}")
+if($ENV{EIGEN_CTEST_ARGS})
+SET (CTEST_COMMAND "${CTEST_COMMAND} $ENV{EIGEN_CTEST_ARGS}")
+endif($ENV{EIGEN_CTEST_ARGS})
 # what cmake command to use for configuring this dashboard
 SET (CTEST_CMAKE_COMMAND "${EIGEN_CMAKE_DIR}cmake -DEIGEN_LEAVE_TEST_IN_ALL_TARGET=ON")
 

From 7c98c04412322e56b3b6f7e235bc7ebb61ab6b43 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Wed, 24 Feb 2010 19:16:10 +0100
Subject: [PATCH 30/38] add reconstructedMatrix() to LLT, and LUs => they show
 that some improvements have still to be done    for permutations, tr*tr,
 trapezoidal matrices

---
 Eigen/src/Cholesky/LDLT.h   |  4 ++--
 Eigen/src/Cholesky/LLT.h    | 12 ++++++++++++
 Eigen/src/LU/FullPivLU.h    | 29 +++++++++++++++++++++++++++++
 Eigen/src/LU/PartialPivLU.h | 20 ++++++++++++++++++++
 test/cholesky.cpp           |  7 +++----
 test/lu.cpp                 | 21 +++++++++++++++++++++
 6 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 8cfc256bb..8699fe7e0 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -155,7 +155,7 @@ template<typename _MatrixType> class LDLT
       return m_matrix;
     }
 
-    const MatrixType reconstructedMatrix() const;
+    MatrixType reconstructedMatrix() const;
 
     inline int rows() const { return m_matrix.rows(); }
     inline int cols() const { return m_matrix.cols(); }
@@ -324,7 +324,7 @@ bool LDLT<MatrixType>::solveInPlace(MatrixBase<Derived> &bAndX) const
  * i.e., it returns the product: P^T L D L^* P.
  * This function is provided for debug purpose. */
 template<typename MatrixType>
-const MatrixType LDLT<MatrixType>::reconstructedMatrix() const
+MatrixType LDLT<MatrixType>::reconstructedMatrix() const
 {
   ei_assert(m_isInitialized && "LDLT is not initialized.");
   const int size = m_matrix.rows();
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 96e1e5f73..2e8df7661 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -133,6 +133,8 @@ template<typename _MatrixType, int _UpLo> class LLT
       return m_matrix;
     }
 
+    MatrixType reconstructedMatrix() const;
+
     inline int rows() const { return m_matrix.rows(); }
     inline int cols() const { return m_matrix.cols(); }
 
@@ -295,6 +297,16 @@ bool LLT<MatrixType,_UpLo>::solveInPlace(MatrixBase<Derived> &bAndX) const
   return true;
 }
 
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: L L^*.
+ * This function is provided for debug purpose. */
+template<typename MatrixType, int _UpLo>
+MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
+{
+  ei_assert(m_isInitialized && "LLT is not initialized.");
+  return matrixL() * matrixL().adjoint().toDenseMatrix();
+}
+
 /** \cholesky_module
   * \returns the LLT decomposition of \c *this
   */
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 0a305d52b..cd63b9ec7 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -361,6 +361,8 @@ template<typename _MatrixType> class FullPivLU
                (*this, MatrixType::Identity(m_lu.rows(), m_lu.cols()));
     }
 
+    MatrixType reconstructedMatrix() const;
+
     inline int rows() const { return m_lu.rows(); }
     inline int cols() const { return m_lu.cols(); }
 
@@ -487,6 +489,33 @@ typename ei_traits<MatrixType>::Scalar FullPivLU<MatrixType>::determinant() cons
   return Scalar(m_det_pq) * Scalar(m_lu.diagonal().prod());
 }
 
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U Q^{-1}.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
+{
+  ei_assert(m_isInitialized && "LU is not initialized.");
+  const int smalldim = std::min(m_lu.rows(), m_lu.cols());
+  // LU
+  MatrixType res(m_lu.rows(),m_lu.cols());
+  // FIXME the .toDenseMatrix() should not be needed...
+  res = m_lu.corner(TopLeft,m_lu.rows(),smalldim)
+            .template triangularView<UnitLower>().toDenseMatrix()
+      * m_lu.corner(TopLeft,smalldim,m_lu.cols())
+            .template triangularView<Upper>().toDenseMatrix();
+
+  // P^{-1}(LU)
+  // FIXME implement inplace permutation
+  res = (m_p.inverse() * res).eval();
+
+  // (P^{-1}LU)Q^{-1}
+  // FIXME implement inplace permutation
+  res = (res * m_q.inverse()).eval();
+
+  return res;
+}
+
 /********* Implementation of kernel() **************************************************/
 
 template<typename _MatrixType>
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index ed2354d78..fcffc2458 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -165,6 +165,8 @@ template<typename _MatrixType> class PartialPivLU
       */
     typename ei_traits<MatrixType>::Scalar determinant() const;
 
+    MatrixType reconstructedMatrix() const;
+
     inline int rows() const { return m_lu.rows(); }
     inline int cols() const { return m_lu.cols(); }
 
@@ -400,6 +402,24 @@ typename ei_traits<MatrixType>::Scalar PartialPivLU<MatrixType>::determinant() c
   return Scalar(m_det_p) * m_lu.diagonal().prod();
 }
 
+/** \returns the matrix represented by the decomposition,
+ * i.e., it returns the product: P^{-1} L U.
+ * This function is provided for debug purpose. */
+template<typename MatrixType>
+MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
+{
+  ei_assert(m_isInitialized && "LU is not initialized.");
+  // LU
+  MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
+                 * m_lu.template triangularView<Upper>();
+  
+  // P^{-1}(LU)
+  // FIXME implement inplace permutation
+  res = (m_p.inverse() * res).eval();
+
+  return res;
+}
+
 /***** Implementation of solve() *****************************************************/
 
 template<typename _MatrixType, typename Rhs>
diff --git a/test/cholesky.cpp b/test/cholesky.cpp
index 1bb808d20..a446f5d73 100644
--- a/test/cholesky.cpp
+++ b/test/cholesky.cpp
@@ -95,7 +95,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
 
   {
     LLT<SquareMatrixType,Lower> chollo(symmLo);
-    VERIFY_IS_APPROX(symm, chollo.matrixL().toDenseMatrix() * chollo.matrixL().adjoint().toDenseMatrix());
+    VERIFY_IS_APPROX(symm, chollo.reconstructedMatrix());
     vecX = chollo.solve(vecB);
     VERIFY_IS_APPROX(symm * vecX, vecB);
     matX = chollo.solve(matB);
@@ -103,7 +103,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
 
     // test the upper mode
     LLT<SquareMatrixType,Upper> cholup(symmUp);
-    VERIFY_IS_APPROX(symm, cholup.matrixL().toDenseMatrix() * cholup.matrixL().adjoint().toDenseMatrix());
+    VERIFY_IS_APPROX(symm, cholup.reconstructedMatrix());
     vecX = cholup.solve(vecB);
     VERIFY_IS_APPROX(symm * vecX, vecB);
     matX = cholup.solve(matB);
@@ -119,8 +119,7 @@ template<typename MatrixType> void cholesky(const MatrixType& m)
 
   {
     LDLT<SquareMatrixType> ldlt(symm);
-    // TODO(keir): This doesn't make sense now that LDLT pivots.
-    //VERIFY_IS_APPROX(symm, ldlt.matrixL() * ldlt.vectorD().asDiagonal() * ldlt.matrixL().adjoint());
+    VERIFY_IS_APPROX(symm, ldlt.reconstructedMatrix());
     vecX = ldlt.solve(vecB);
     VERIFY_IS_APPROX(symm * vecX, vecB);
     matX = ldlt.solve(matB);
diff --git a/test/lu.cpp b/test/lu.cpp
index 442202a33..1ed38cb2b 100644
--- a/test/lu.cpp
+++ b/test/lu.cpp
@@ -91,6 +91,7 @@ template<typename MatrixType> void lu_non_invertible()
   KernelMatrixType m1kernel = lu.kernel();
   ImageMatrixType m1image = lu.image(m1);
 
+  VERIFY_IS_APPROX(m1, lu.reconstructedMatrix());
   VERIFY(rank == lu.rank());
   VERIFY(cols - lu.rank() == lu.dimensionOfKernel());
   VERIFY(!lu.isInjective());
@@ -125,6 +126,7 @@ template<typename MatrixType> void lu_invertible()
     lu.compute(m1);
   } while(!lu.isInvertible());
 
+  VERIFY_IS_APPROX(m1, lu.reconstructedMatrix());
   VERIFY(0 == lu.dimensionOfKernel());
   VERIFY(lu.kernel().cols() == 1); // the kernel() should consist of a single (zero) column vector
   VERIFY(size == lu.rank());
@@ -138,6 +140,23 @@ template<typename MatrixType> void lu_invertible()
   VERIFY_IS_APPROX(m2, lu.inverse()*m3);
 }
 
+template<typename MatrixType> void lu_partial_piv()
+{
+  /* this test covers the following files:
+     PartialPivLU.h
+  */
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename NumTraits<typename MatrixType::Scalar>::Real RealScalar;
+  int rows = ei_random<int>(1,4);
+  int cols = rows;
+
+  MatrixType m1(cols, rows);
+  m1.setRandom();
+  PartialPivLU<MatrixType> plu(m1);
+
+  VERIFY_IS_APPROX(m1, plu.reconstructedMatrix());
+}
+
 template<typename MatrixType> void lu_verify_assert()
 {
   MatrixType tmp;
@@ -180,6 +199,7 @@ void test_lu()
     
     CALL_SUBTEST_4( lu_non_invertible<MatrixXd>() );
     CALL_SUBTEST_4( lu_invertible<MatrixXd>() );
+    CALL_SUBTEST_4( lu_partial_piv<MatrixXd>() );
     CALL_SUBTEST_4( lu_verify_assert<MatrixXd>() );
     
     CALL_SUBTEST_5( lu_non_invertible<MatrixXcf>() );
@@ -188,6 +208,7 @@ void test_lu()
     
     CALL_SUBTEST_6( lu_non_invertible<MatrixXcd>() );
     CALL_SUBTEST_6( lu_invertible<MatrixXcd>() );
+    CALL_SUBTEST_6( lu_partial_piv<MatrixXcd>() );
     CALL_SUBTEST_6( lu_verify_assert<MatrixXcd>() );
 
     CALL_SUBTEST_7(( lu_non_invertible<Matrix<float,Dynamic,16> >() ));

From 0f3d69b65ee17d4ca9393fe1318ff239a411bfad Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Wed, 24 Feb 2010 21:43:30 +0100
Subject: [PATCH 31/38] Provide "eigen" defines to decide which instruction set
 is used (sse3, ssse3 and sse4), independantly from the compiler. Only those
 defines should be used in other places, and the user can rely on those to
 know which sets are used.

---
 Eigen/Core                           | 35 ++++++++++++++++++++++++----
 Eigen/src/Core/arch/SSE/PacketMath.h |  8 +++----
 2 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/Eigen/Core b/Eigen/Core
index cbca16640..0306be3a8 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -61,20 +61,45 @@
 
 #ifndef EIGEN_DONT_VECTORIZE
   #if defined (EIGEN_SSE2_BUT_NOT_OLD_GCC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
+
+    // Defines symbols for compile-time detection of which instructions are
+    // used.
+    // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
     #define EIGEN_VECTORIZE
     #define EIGEN_VECTORIZE_SSE
-    #include <emmintrin.h>
-    #include <xmmintrin.h>
+    #define EIGEN_VECTORIZE_SSE2
+
+    // Detect sse3/ssse3/sse4:
+    // gcc and icc defines __SSE3__, ..,
+    // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
+    // want to force the use of those instructions with msvc.
     #ifdef __SSE3__
-      #include <pmmintrin.h>
+      #define EIGEN_VECTORIZE_SSE3
     #endif
     #ifdef __SSSE3__
-      #include <tmmintrin.h>
+      #define EIGEN_VECTORIZE_SSSE3
     #endif
     #ifdef __SSE4_1__
-      #include <smmintrin.h>
+      #define EIGEN_VECTORIZE_SSE4_1
     #endif
     #ifdef __SSE4_2__
+      #define EIGEN_VECTORIZE_SSE4_2
+    #endif
+
+    // include files
+
+    #include <emmintrin.h>
+    #include <xmmintrin.h>
+    #ifdef  EIGEN_VECTORIZE_SSE3
+      #include <pmmintrin.h>
+    #endif
+    #ifdef EIGEN_VECTORIZE_SSSE3
+      #include <tmmintrin.h>
+    #endif
+    #ifdef EIGEN_VECTORIZE_SSE4_1
+      #include <smmintrin.h>
+    #endif
+    #ifdef EIGEN_VECTORIZE_SSE4_2
       #include <nmmintrin.h>
     #endif
   #elif defined __ALTIVEC__
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index a5a56f759..f78bf0dd3 100644
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -122,7 +122,7 @@ template<> EIGEN_STRONG_INLINE Packet4f ei_pmul<Packet4f>(const Packet4f& a, con
 template<> EIGEN_STRONG_INLINE Packet2d ei_pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_mul_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i ei_pmul<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
-#ifdef __SSE4_1__
+#ifdef EIGEN_VECTORIZE_SSE4_1
   return _mm_mullo_epi32(a,b);
 #else
   // this version is slightly faster than 4 scalar products
@@ -269,7 +269,7 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pabs(const Packet2d& a)
 }
 template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
 {
-  #ifdef __SSSE3__
+  #ifdef EIGEN_VECTORIZE_SSSE3
   return _mm_abs_epi32(a);
   #else
   Packet4i aux = _mm_srai_epi32(a,31);
@@ -278,7 +278,7 @@ template<> EIGEN_STRONG_INLINE Packet4i ei_pabs(const Packet4i& a)
 }
 
 
-#ifdef __SSE3__
+#ifdef EIGEN_VECTORIZE_SSE3
 // TODO implement SSE2 versions as well as integer versions
 template<> EIGEN_STRONG_INLINE Packet4f ei_preduxp<Packet4f>(const Packet4f* vecs)
 {
@@ -439,7 +439,7 @@ template<> EIGEN_STRONG_INLINE int ei_predux_max<Packet4i>(const Packet4i& a)
 // }
 #endif
 
-#ifdef __SSSE3__
+#ifdef EIGEN_VECTORIZE_SSSE3
 // SSSE3 versions
 template<int Offset>
 struct ei_palign_impl<Offset,Packet4f>

From 00bc535b66641eb89f0608608ea64e0afda07e50 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Wed, 24 Feb 2010 21:52:08 +0100
Subject: [PATCH 32/38] provide a static method to describe which SIMD
 instructions are used

---
 Eigen/Core | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/Eigen/Core b/Eigen/Core
index 0306be3a8..a9003b294 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -146,6 +146,24 @@
 
 namespace Eigen {
 
+inline static const char *SimdInstructionsSetInUse(void) {
+#if defined(EIGEN_VECTORIZE_SSE4_2)
+  return "sse, sse2, sse3, ssse3, sse4.1, sse4.2";
+#elif defined(EIGEN_VECTORIZE_SSE4_2)
+  return "sse, sse2, sse3, ssse3, sse4.1";
+#elif defined(EIGEN_VECTORIZE_SSSE3)
+  return "sse, sse2, sse3, ssse3";
+#elif defined(EIGEN_VECTORIZE_SSE3)
+  return "sse, sse2, sse3";
+#elif defined(EIGEN_VECTORIZE_SSE2)
+  return "sse, sse2";
+#elif defined(EIGEN_VECTORIZE_ALTIVEC)
+  return "Altivec";
+#else
+  return "None";
+#endif
+}
+
 // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
 // ensure QNX/QCC support
 using std::size_t;

From 50a5ac3c4bfc658e59af3afdb01cd0b46960e7e3 Mon Sep 17 00:00:00 2001
From: Thomas Capricelli <orzel@freehackers.org>
Date: Thu, 25 Feb 2010 05:31:22 +0100
Subject: [PATCH 33/38] oops, fix typo

---
 Eigen/Core | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/Core b/Eigen/Core
index a9003b294..41372666b 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -149,7 +149,7 @@ namespace Eigen {
 inline static const char *SimdInstructionsSetInUse(void) {
 #if defined(EIGEN_VECTORIZE_SSE4_2)
   return "sse, sse2, sse3, ssse3, sse4.1, sse4.2";
-#elif defined(EIGEN_VECTORIZE_SSE4_2)
+#elif defined(EIGEN_VECTORIZE_SSE4_1)
   return "sse, sse2, sse3, ssse3, sse4.1";
 #elif defined(EIGEN_VECTORIZE_SSSE3)
   return "sse, sse2, sse3, ssse3";

From 77c922bf051862b240d841c025f6c388c776463e Mon Sep 17 00:00:00 2001
From: Benoit Jacob <jacob.benoit.1@gmail.com>
Date: Thu, 25 Feb 2010 06:43:45 -0500
Subject: [PATCH 34/38] * move the 's': InstructionsSet ---> InstructionSets *
 proper capitalization: SSE, AltiVec

---
 Eigen/Core | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Eigen/Core b/Eigen/Core
index 41372666b..5b28e6ba7 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -146,19 +146,19 @@
 
 namespace Eigen {
 
-inline static const char *SimdInstructionsSetInUse(void) {
+inline static const char *SimdInstructionSetsInUse(void) {
 #if defined(EIGEN_VECTORIZE_SSE4_2)
-  return "sse, sse2, sse3, ssse3, sse4.1, sse4.2";
+  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
 #elif defined(EIGEN_VECTORIZE_SSE4_1)
-  return "sse, sse2, sse3, ssse3, sse4.1";
+  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
 #elif defined(EIGEN_VECTORIZE_SSSE3)
-  return "sse, sse2, sse3, ssse3";
+  return "SSE, SSE2, SSE3, SSSE3";
 #elif defined(EIGEN_VECTORIZE_SSE3)
-  return "sse, sse2, sse3";
+  return "SSE, SSE2, SSE3";
 #elif defined(EIGEN_VECTORIZE_SSE2)
-  return "sse, sse2";
+  return "SSE, SSE2";
 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
-  return "Altivec";
+  return "AltiVec";
 #else
   return "None";
 #endif

From d9ca0c0d3643f4b777de686a2c0cddde075aa063 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 25 Feb 2010 15:31:15 +0100
Subject: [PATCH 35/38] optimize inverse permutations

---
 Eigen/src/Core/PermutationMatrix.h | 139 +++++++++++++++++++++++++----
 test/permutationmatrices.cpp       |   4 +-
 2 files changed, 122 insertions(+), 21 deletions(-)

diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index fcd2e46cc..c42812ec8 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -47,7 +47,7 @@
   * \sa class DiagonalMatrix
   */
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime = SizeAtCompileTime> class PermutationMatrix;
-template<typename PermutationType, typename MatrixType, int Side> struct ei_permut_matrix_product_retval;
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed=false> struct ei_permut_matrix_product_retval;
 
 template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
 struct ei_traits<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
@@ -132,6 +132,9 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
     /** \returns the number of columns */
     inline int cols() const { return m_indices.size(); }
 
+    /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
+    inline int size() const { return m_indices.size(); }
+
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename DenseDerived>
     void evalTo(MatrixBase<DenseDerived>& other) const
@@ -213,16 +216,29 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
       return *this;
     }
 
-    /**** inversion and multiplication helpers to hopefully get RVO ****/
+    /** \returns the inverse permutation matrix.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    inline Transpose<PermutationMatrix> inverse() const
+    { return *this; }
+    /** \returns the tranpose permutation matrix.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    inline Transpose<PermutationMatrix> transpose() const
+    { return *this; }
+
+    /**** multiplication helpers to hopefully get RVO ****/
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-  protected:
-    enum Inverse_t {Inverse};
-    PermutationMatrix(Inverse_t, const PermutationMatrix& other)
-      : m_indices(other.m_indices.size())
+    template<int OtherSize, int OtherMaxSize>
+    PermutationMatrix(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other)
+      : m_indices(other.nestedPermutation().size())
     {
-      for (int i=0; i<rows();++i) m_indices.coeffRef(other.m_indices.coeff(i)) = i;
+      for (int i=0; i<rows();++i) m_indices.coeffRef(other.nestedPermutation().indices().coeff(i)) = i;
     }
+  protected:
     enum Product_t {Product};
     PermutationMatrix(Product_t, const PermutationMatrix& lhs, const PermutationMatrix& rhs)
       : m_indices(lhs.m_indices.size())
@@ -233,12 +249,7 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
 #endif
 
   public:
-    /** \returns the inverse permutation matrix.
-      *
-      * \note \note_try_to_help_rvo
-      */
-    inline PermutationMatrix inverse() const
-    { return PermutationMatrix(Inverse, *this); }
+
     /** \returns the product permutation matrix.
       *
       * \note \note_try_to_help_rvo
@@ -247,6 +258,22 @@ class PermutationMatrix : public EigenBase<PermutationMatrix<SizeAtCompileTime,
     inline PermutationMatrix operator*(const PermutationMatrix<OtherSize, OtherMaxSize>& other) const
     { return PermutationMatrix(Product, *this, other); }
 
+    /** \returns the product of a permutation with another inverse permutation.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    template<int OtherSize, int OtherMaxSize>
+    inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other) const
+    { return PermutationMatrix(Product, *this, other.eval()); }
+
+    /** \returns the product of an inverse permutation with another permutation.
+      *
+      * \note \note_try_to_help_rvo
+      */
+    template<int OtherSize, int OtherMaxSize> friend
+    inline PermutationMatrix operator*(const Transpose<PermutationMatrix<OtherSize,OtherMaxSize> >& other, const PermutationMatrix& perm)
+    { return PermutationMatrix(Product, other.eval(), perm); }
+
   protected:
 
     IndicesType m_indices;
@@ -277,15 +304,15 @@ operator*(const PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> &perm
            (permutation, matrix.derived());
 }
 
-template<typename PermutationType, typename MatrixType, int Side>
-struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
+struct ei_traits<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
   typedef typename MatrixType::PlainObject ReturnType;
 };
 
-template<typename PermutationType, typename MatrixType, int Side>
+template<typename PermutationType, typename MatrixType, int Side, bool Transposed>
 struct ei_permut_matrix_product_retval
- : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side> >
+ : public ReturnByValue<ei_permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
     typedef typename ei_cleantype<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
 
@@ -305,7 +332,7 @@ struct ei_permut_matrix_product_retval
           Dest,
           Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime,
           Side==OnTheRight ? 1 : Dest::ColsAtCompileTime
-        >(dst, Side==OnTheLeft ? m_permutation.indices().coeff(i) : i)
+        >(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
 
         =
 
@@ -313,7 +340,7 @@ struct ei_permut_matrix_product_retval
           MatrixTypeNestedCleaned,
           Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,
           Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime
-        >(m_matrix, Side==OnTheRight ? m_permutation.indices().coeff(i) : i);
+        >(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
       }
     }
 
@@ -322,4 +349,78 @@ struct ei_permut_matrix_product_retval
     const typename MatrixType::Nested m_matrix;
 };
 
+/* Template partial specialization for transposed/inverse permutations */
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+struct ei_traits<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+ : ei_traits<Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime> >
+{};
+
+template<int SizeAtCompileTime, int MaxSizeAtCompileTime>
+class Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> >
+  : public EigenBase<Transpose<PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> > >
+{
+    typedef PermutationMatrix<SizeAtCompileTime, MaxSizeAtCompileTime> PermutationType;
+    typedef typename PermutationType::IndicesType IndicesType;
+  public:
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    typedef ei_traits<PermutationType> Traits;
+    typedef Matrix<int,SizeAtCompileTime,SizeAtCompileTime,0,MaxSizeAtCompileTime,MaxSizeAtCompileTime>
+            DenseMatrixType;
+    enum {
+      Flags = Traits::Flags,
+      CoeffReadCost = Traits::CoeffReadCost,
+      RowsAtCompileTime = Traits::RowsAtCompileTime,
+      ColsAtCompileTime = Traits::ColsAtCompileTime,
+      MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime,
+      MaxColsAtCompileTime = Traits::MaxColsAtCompileTime
+    };
+    typedef typename Traits::Scalar Scalar;
+    #endif
+
+    Transpose(const PermutationType& p) : m_permutation(p) {}
+
+    inline int rows() const { return m_permutation.rows(); }
+    inline int cols() const { return m_permutation.cols(); }
+
+    #ifndef EIGEN_PARSED_BY_DOXYGEN
+    template<typename DenseDerived>
+    void evalTo(MatrixBase<DenseDerived>& other) const
+    {
+      other.setZero();
+      for (int i=0; i<rows();++i)
+        other.coeffRef(i, m_permutation.indices().coeff(i)) = typename DenseDerived::Scalar(1);
+    }
+    #endif
+
+    /** \return the equivalent permutation matrix */
+    PermutationType eval() const { return *this; }
+
+    DenseMatrixType toDenseMatrix() const { return *this; }
+
+    /** \returns the matrix with the inverse permutation applied to the columns.
+      */
+    template<typename Derived> friend
+    inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>
+    operator*(const MatrixBase<Derived>& matrix, const Transpose& trPerm)
+    {
+      return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheRight, true>(trPerm.m_permutation, matrix.derived());
+    }
+
+    /** \returns the matrix with the inverse permutation applied to the rows.
+      */
+    template<typename Derived>
+    inline const ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>
+    operator*(const MatrixBase<Derived>& matrix) const
+    {
+      return ei_permut_matrix_product_retval<PermutationType, Derived, OnTheLeft, true>(m_permutation, matrix.derived());
+    }
+
+    const PermutationType& nestedPermutation() const { return m_permutation; }
+
+  protected:
+    const PermutationType& m_permutation;
+};
+
 #endif // EIGEN_PERMUTATIONMATRIX_H
diff --git a/test/permutationmatrices.cpp b/test/permutationmatrices.cpp
index 0ef0a371a..ae1bd8b85 100644
--- a/test/permutationmatrices.cpp
+++ b/test/permutationmatrices.cpp
@@ -51,7 +51,7 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
   typedef Matrix<int, Rows, 1> LeftPermutationVectorType;
   typedef PermutationMatrix<Cols> RightPermutationType;
   typedef Matrix<int, Cols, 1> RightPermutationVectorType;
-  
+
   int rows = m.rows();
   int cols = m.cols();
 
@@ -72,7 +72,7 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
   Matrix<Scalar,Cols,Cols> rm(rp);
 
   VERIFY_IS_APPROX(m_permuted, lm*m_original*rm);
-  
+
   VERIFY_IS_APPROX(lp.inverse()*m_permuted*rp.inverse(), m_original);
   VERIFY((lp*lp.inverse()).toDenseMatrix().isIdentity());
 

From 959a1b5d6335833e9ad49a088502705bb6967ff5 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 25 Feb 2010 16:30:58 +0100
Subject: [PATCH 36/38] detect and implement inplace permutations

---
 Eigen/src/Core/PermutationMatrix.h | 49 ++++++++++++++++++++++--------
 Eigen/src/Core/Transpose.h         | 19 ------------
 Eigen/src/Core/util/BlasUtil.h     | 18 +++++++++++
 Eigen/src/LU/FullPivLU.h           |  8 ++---
 Eigen/src/LU/PartialPivLU.h        |  5 ++-
 test/permutationmatrices.cpp       | 19 +++++++++++-
 6 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/Eigen/src/Core/PermutationMatrix.h b/Eigen/src/Core/PermutationMatrix.h
index c42812ec8..46884dc3f 100644
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@@ -326,21 +326,46 @@ struct ei_permut_matrix_product_retval
     template<typename Dest> inline void evalTo(Dest& dst) const
     {
       const int n = Side==OnTheLeft ? rows() : cols();
-      for(int i = 0; i < n; ++i)
+
+      if(ei_is_same_type<MatrixTypeNestedCleaned,Dest>::ret && ei_extract_data(dst) == ei_extract_data(m_matrix))
       {
-        Block<
-          Dest,
-          Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime,
-          Side==OnTheRight ? 1 : Dest::ColsAtCompileTime
-        >(dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
+        // apply the permutation inplace
+        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
+        mask.fill(false);
+        int r = 0;
+        while(r < m_permutation.size())
+        {
+          // search for the next seed
+          while(r<m_permutation.size() && mask[r]) r++;
+          if(r>=m_permutation.size())
+            break;
+          // we got one, let's follow it until we are back to the seed
+          int k0 = r++;
+          int kPrev = k0;
+          mask.coeffRef(k0) = true;
+          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          {
+                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
+            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+                       (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
 
-        =
+            mask.coeffRef(k) = true;
+            kPrev = k;
+          }
+        }
+      }
+      else
+      {
+        for(int i = 0; i < n; ++i)
+        {
+          Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
+               (dst, ((Side==OnTheLeft) ^ Transposed) ? m_permutation.indices().coeff(i) : i)
 
-        Block<
-          MatrixTypeNestedCleaned,
-          Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,
-          Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime
-        >(m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+          =
+
+          Block<MatrixTypeNestedCleaned,Side==OnTheLeft ? 1 : MatrixType::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixType::ColsAtCompileTime>
+               (m_matrix, ((Side==OnTheRight) ^ Transposed) ? m_permutation.indices().coeff(i) : i);
+        }
       }
     }
 
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index bd06d8464..6c0e50de2 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -295,25 +295,6 @@ struct ei_blas_traits<SelfCwiseBinaryOp<BinOp,NestedXpr> >
   static inline const XprType extract(const XprType& x) { return x; }
 };
 
-
-template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
-struct ei_extract_data_selector {
-  static typename T::Scalar* run(const T& m)
-  {
-    return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0);
-  }
-};
-
-template<typename T>
-struct ei_extract_data_selector<T,NoDirectAccess> {
-  static typename T::Scalar* run(const T&) { return 0; }
-};
-
-template<typename T> typename T::Scalar* ei_extract_data(const T& m)
-{
-  return ei_extract_data_selector<T>::run(m);
-}
-
 template<typename Scalar, bool DestIsTranposed, typename OtherDerived>
 struct ei_check_transpose_aliasing_selector
 {
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index 2ca463d5d..4d216d77a 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -236,4 +236,22 @@ struct ei_blas_traits<Transpose<NestedXpr> >
   static inline Scalar extractScalarFactor(const XprType& x) { return Base::extractScalarFactor(x.nestedExpression()); }
 };
 
+template<typename T, int Access=ei_blas_traits<T>::ActualAccess>
+struct ei_extract_data_selector {
+  static const typename T::Scalar* run(const T& m)
+  {
+    return &ei_blas_traits<T>::extract(m).const_cast_derived().coeffRef(0,0); // FIXME this should be .data()
+  }
+};
+
+template<typename T>
+struct ei_extract_data_selector<T,NoDirectAccess> {
+  static typename T::Scalar* run(const T&) { return 0; }
+};
+
+template<typename T> const typename T::Scalar* ei_extract_data(const T& m)
+{
+  return ei_extract_data_selector<T>::run(m);
+}
+
 #endif // EIGEN_BLASUTIL_H
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index cd63b9ec7..dea6ec41c 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -119,7 +119,7 @@ template<typename _MatrixType> class FullPivLU
       *          diagonal coefficient of U.
       */
     RealScalar maxPivot() const { return m_maxpivot; }
-    
+
     /** \returns the permutation matrix P
       *
       * \sa permutationQ()
@@ -506,12 +506,10 @@ MatrixType FullPivLU<MatrixType>::reconstructedMatrix() const
             .template triangularView<Upper>().toDenseMatrix();
 
   // P^{-1}(LU)
-  // FIXME implement inplace permutation
-  res = (m_p.inverse() * res).eval();
+  res = m_p.inverse() * res;
 
   // (P^{-1}LU)Q^{-1}
-  // FIXME implement inplace permutation
-  res = (res * m_q.inverse()).eval();
+  res = res * m_q.inverse();
 
   return res;
 }
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index fcffc2458..ad0d6b810 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -412,10 +412,9 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
   // LU
   MatrixType res = m_lu.template triangularView<UnitLower>().toDenseMatrix()
                  * m_lu.template triangularView<Upper>();
-  
+
   // P^{-1}(LU)
-  // FIXME implement inplace permutation
-  res = (m_p.inverse() * res).eval();
+  res = m_p.inverse() * res;
 
   return res;
 }
diff --git a/test/permutationmatrices.cpp b/test/permutationmatrices.cpp
index ae1bd8b85..89142d910 100644
--- a/test/permutationmatrices.cpp
+++ b/test/permutationmatrices.cpp
@@ -86,6 +86,23 @@ template<typename MatrixType> void permutationmatrices(const MatrixType& m)
   identityp.setIdentity(rows);
   VERIFY_IS_APPROX(m_original, identityp*m_original);
 
+  // check inplace permutations
+  m_permuted = m_original;
+  m_permuted = lp.inverse() * m_permuted;
+  VERIFY_IS_APPROX(m_permuted, lp.inverse()*m_original);
+
+  m_permuted = m_original;
+  m_permuted = m_permuted * rp.inverse();
+  VERIFY_IS_APPROX(m_permuted, m_original*rp.inverse());
+
+  m_permuted = m_original;
+  m_permuted = lp * m_permuted;
+  VERIFY_IS_APPROX(m_permuted, lp*m_original);
+
+  m_permuted = m_original;
+  m_permuted = m_permuted * rp;
+  VERIFY_IS_APPROX(m_permuted, m_original*rp);
+
   if(rows>1 && cols>1)
   {
     lp2 = lp;
@@ -114,7 +131,7 @@ void test_permutationmatrices()
     CALL_SUBTEST_2( permutationmatrices(Matrix3f()) );
     CALL_SUBTEST_3( permutationmatrices(Matrix<double,3,3,RowMajor>()) );
     CALL_SUBTEST_4( permutationmatrices(Matrix4d()) );
-    CALL_SUBTEST_5( permutationmatrices(Matrix<double,4,6>()) );
+    CALL_SUBTEST_5( permutationmatrices(Matrix<double,40,60>()) );
     CALL_SUBTEST_6( permutationmatrices(Matrix<double,Dynamic,Dynamic,RowMajor>(20, 30)) );
     CALL_SUBTEST_7( permutationmatrices(MatrixXcf(15, 10)) );
   }

From 53bae6b3f8129bb4e0e2790255911b46bb09c0d5 Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Thu, 25 Feb 2010 21:59:25 +0100
Subject: [PATCH 37/38] update matrix product selection rules for 1xSmallxLarge
 and the transposed case

---
 Eigen/src/Core/Product.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index fe6d29c7d..236e4f130 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -95,10 +95,10 @@ template<>                    struct ei_product_type_selector<Small, Large, 1>
 template<>                    struct ei_product_type_selector<Large, Small, 1>    { enum { ret = LazyCoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<1,    Large,Small>  { enum { ret = GemvProduct }; };
 template<>                    struct ei_product_type_selector<1,    Large,Large>  { enum { ret = GemvProduct }; };
-template<>                    struct ei_product_type_selector<1,    Small,Large>  { enum { ret = GemvProduct }; };
+template<>                    struct ei_product_type_selector<1,    Small,Large>  { enum { ret = CoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<Large,1,    Small>  { enum { ret = GemvProduct }; };
 template<>                    struct ei_product_type_selector<Large,1,    Large>  { enum { ret = GemvProduct }; };
-template<>                    struct ei_product_type_selector<Small,1,    Large>  { enum { ret = GemvProduct }; };
+template<>                    struct ei_product_type_selector<Small,1,    Large>  { enum { ret = CoeffBasedProductMode }; };
 template<>                    struct ei_product_type_selector<Small,Small,Large>  { enum { ret = GemmProduct }; };
 template<>                    struct ei_product_type_selector<Large,Small,Large>  { enum { ret = GemmProduct }; };
 template<>                    struct ei_product_type_selector<Small,Large,Large>  { enum { ret = GemmProduct }; };

From 90e4a605ef920759a23cdbd24e6e7b69ce549162 Mon Sep 17 00:00:00 2001
From: Jitse Niesen <jitse@maths.leeds.ac.uk>
Date: Thu, 25 Feb 2010 22:33:38 +0000
Subject: [PATCH 38/38] ComplexSchur: compute shift more stably, introduce
 exceptional shifts. Both the new computation of  the eigenvalues of a 2x2
 block and the exceptional shifts are taken from EISPACK routine COMQR.

---
 Eigen/src/Eigenvalues/ComplexSchur.h | 42 +++++++++++++++-------------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/Eigen/src/Eigenvalues/ComplexSchur.h b/Eigen/src/Eigenvalues/ComplexSchur.h
index 5deac3247..0fad415a2 100644
--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@@ -154,6 +154,14 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
   m_matT = hess.matrixH();
   if(!skipU)  m_matU = hess.matrixQ();
 
+
+  // Reduce the Hessenberg matrix m_matT to triangular form by QR iteration.
+
+  // The matrix m_matT is divided in three parts. 
+  // Rows 0,...,il-1 are decoupled from the rest because m_matT(il,il-1) is zero. 
+  // Rows il,...,iu is the part we are working on (the active submatrix).
+  // Rows iu+1,...,end are already brought in triangular form.
+
   int iu = m_matT.cols() - 1;
   int il;
   RealScalar d,sd,sf;
@@ -164,7 +172,7 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
   int iter = 0;
   while(true)
   {
-    //locate the range in which to iterate
+    // find iu, the bottom row of the active submatrix
     while(iu > 0)
     {
       d = ei_norm1(m_matT.coeff(iu,iu)) + ei_norm1(m_matT.coeff(iu-1,iu-1));
@@ -187,6 +195,7 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
       return;
     }
 
+    // find il, the top row of the active submatrix
     il = iu-1;
     while(il > 0)
     {
@@ -202,15 +211,16 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
 
     if( il != 0 ) m_matT.coeffRef(il,il-1) = Complex(0);
 
-    // compute the shift (the normalization by sf is to avoid under/overflow)
+    // compute the shift kappa as one of the eigenvalues of the 2x2
+    // diagonal block on the bottom of the active submatrix
+
     Matrix<Scalar,2,2> t = m_matT.template block<2,2>(iu-1,iu-1);
     sf = t.cwiseAbs().sum();
-    t /= sf;
+    t /= sf;     // the normalization by sf is to avoid under/overflow
 
-    c = t.determinant();
-    b = t.diagonal().sum();
-
-    disc = ei_sqrt(b*b - RealScalar(4)*c);
+    b = t.coeff(0,0) + t.coeff(1,1);
+    c = t.coeff(0,0) - t.coeff(1,1);
+    disc = ei_sqrt(c*c + RealScalar(4)*t.coeff(0,1)*t.coeff(1,0));
 
     r1 = (b+disc)/RealScalar(2);
     r2 = (b-disc)/RealScalar(2);
@@ -224,6 +234,12 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
       kappa = sf * r1;
     else
       kappa = sf * r2;
+    
+    if (iter == 10 || iter == 20) 
+    {
+      // exceptional shift, taken from http://www.netlib.org/eispack/comqr.f
+      kappa = ei_abs(ei_real(m_matT.coeff(iu,iu-1))) + ei_abs(ei_real(m_matT.coeff(iu-1,iu-2)));
+    }
 
     // perform the QR step using Givens rotations
     PlanarRotation<Complex> rot;
@@ -246,18 +262,6 @@ void ComplexSchur<MatrixType>::compute(const MatrixType& matrix, bool skipU)
     }
   }
 
-  // FIXME : is it necessary ?
-  /*
-  for(int i=0 ; i<n ; i++)
-    for(int j=0 ; j<n ; j++)
-    {
-      if(ei_abs(ei_real(m_matT.coeff(i,j))) < eps)
-        ei_real_ref(m_matT.coeffRef(i,j)) = 0;
-      if(ei_imag(ei_abs(m_matT.coeff(i,j))) < eps)
-        ei_imag_ref(m_matT.coeffRef(i,j)) = 0;
-    }
-  */
-
   m_isInitialized = true;
   m_matUisUptodate = !skipU;
 }