merge with default branch

2025-08-11 19:29:02 +08:00 · 2013-04-19 11:21:39 +02:00 · 2013-04-19 11:21:39 +02:00 · 9cd2d14005
commit 9cd2d14005
parent 4e2e615a7c 46755648ec
252 changed files with 5324 additions and 4172 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -105,26 +105,63 @@ if(EIGEN_DEFAULT_TO_ROW_MAJOR)
  add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR")
 endif()

-add_definitions("-DEIGEN_PERMANENTLY_DISABLE_STUPID_WARNINGS")
-
 set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320")

-if(CMAKE_COMPILER_IS_GNUCXX)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -fexceptions -fno-check-new -fno-common -fstrict-aliasing")
+macro(ei_add_cxx_compiler_flag FLAG)
+  string(REGEX REPLACE "-" "" SFLAG ${FLAG})
+  check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG})
+  if(COMPILER_SUPPORT_${SFLAG})
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}")
+  endif()
+endmacro(ei_add_cxx_compiler_flag)
+
+if(NOT MSVC)
+  # We assume that other compilers are partly compatible with GNUCC
+  
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
  set(CMAKE_CXX_FLAGS_DEBUG "-g3")
  set(CMAKE_CXX_FLAGS_RELEASE "-g0 -O2")
-
-  check_cxx_compiler_flag("-Wno-variadic-macros" COMPILER_SUPPORT_WNOVARIADICMACRO)
-  if(COMPILER_SUPPORT_WNOVARIADICMACRO)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macros")
+  
+  # clang outputs some warnings for unknwon flags that are not caught by check_cxx_compiler_flag
+  # adding -Werror turns such warnings into errors
+  check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR)
+  if(COMPILER_SUPPORT_WERROR)
+    set(CMAKE_REQUIRED_FLAGS "-Werror")
  endif()
-
-  check_cxx_compiler_flag("-Wextra" COMPILER_SUPPORT_WEXTRA)
-  if(COMPILER_SUPPORT_WEXTRA)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
+  
+  ei_add_cxx_compiler_flag("-pedantic")
+  ei_add_cxx_compiler_flag("-Wall")
+  ei_add_cxx_compiler_flag("-Wextra")
+  #ei_add_cxx_compiler_flag("-Weverything")              # clang
+  
+  ei_add_cxx_compiler_flag("-Wundef")
+  ei_add_cxx_compiler_flag("-Wcast-align")
+  ei_add_cxx_compiler_flag("-Wchar-subscripts")
+  ei_add_cxx_compiler_flag("-Wnon-virtual-dtor")
+  ei_add_cxx_compiler_flag("-Wunused-local-typedefs")
+  ei_add_cxx_compiler_flag("-Wpointer-arith")
+  ei_add_cxx_compiler_flag("-Wwrite-strings")
+  ei_add_cxx_compiler_flag("-Wformat-security")
+  
+  ei_add_cxx_compiler_flag("-Wno-psabi")
+  ei_add_cxx_compiler_flag("-Wno-variadic-macros")
+  ei_add_cxx_compiler_flag("-Wno-long-long")
+  
+  ei_add_cxx_compiler_flag("-fno-check-new")
+  ei_add_cxx_compiler_flag("-fno-common")
+  ei_add_cxx_compiler_flag("-fstrict-aliasing")
+  ei_add_cxx_compiler_flag("-wd981")                   # disbale ICC's "operands are evaluated in unspecified order" remark
+  
+  # The -ansi flag must be added last, otherwise it is also used as a linker flag by check_cxx_compiler_flag making it fails
+  # Moreover we should not set both -strict-ansi and -ansi
+  check_cxx_compiler_flag("-strict-ansi" COMPILER_SUPPORT_STRICTANSI)
+  if(COMPILER_SUPPORT_STRICTANSI)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -strict-ansi")
+  else()
+    ei_add_cxx_compiler_flag("-ansi")
  endif()
-
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic")
+  
+  set(CMAKE_REQUIRED_FLAGS "")

  option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF)
  if(EIGEN_TEST_SSE2)
@ -164,7 +201,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)

  option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF)
  if(EIGEN_TEST_NEON)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a8")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -mcpu=cortex-a"8)
    message(STATUS "Enabling NEON in tests/examples")
  endif()

@ -177,9 +214,8 @@ if(CMAKE_COMPILER_IS_GNUCXX)
    endif()
  endif()

-endif(CMAKE_COMPILER_IS_GNUCXX)
+else(NOT MSVC)

-if(MSVC)
  # C4127 - conditional expression is constant
  # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively)
  #         We can disable this warning in the unit tests since it is clear that it occurs
@ -209,7 +245,7 @@ if(MSVC)
    endif(NOT CMAKE_CL_64)
    message(STATUS "Enabling SSE2 in tests/examples")
  endif(EIGEN_TEST_SSE2)
-endif(MSVC)
+endif(NOT MSVC)

 option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF)
 option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF)
@ -308,6 +344,7 @@ add_subdirectory(Eigen)
 add_subdirectory(doc EXCLUDE_FROM_ALL)

 include(EigenConfigureTesting)
+
 # fixme, not sure this line is still needed:
 enable_testing() # must be called from the root CMakeLists, see man page

@ -342,6 +379,8 @@ if(NOT WIN32)
  add_subdirectory(bench/spbench EXCLUDE_FROM_ALL)
 endif(NOT WIN32)

+configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY)
+
 ei_testing_print_summary()

 message(STATUS "")
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@ -11,3 +11,7 @@ set(CTEST_DROP_METHOD "http")
 set(CTEST_DROP_SITE "manao.inria.fr")
 set(CTEST_DROP_LOCATION "/CDash/submit.php?project=Eigen")
 set(CTEST_DROP_SITE_CDASH TRUE)
+set(CTEST_PROJECT_SUBPROJECTS
+Official
+Unsupported
+)
--- a/Eigen/Core
+++ b/Eigen/Core
@ -65,7 +65,7 @@
  #endif
 #else
  // Remember that usage of defined() in a #define is undefined by the standard
-  #if (defined __SSE2__) && ( (!defined __GNUC__) || EIGEN_GNUC_AT_LEAST(4,2) )
+  #if (defined __SSE2__) && ( (!defined __GNUC__) || (defined __INTEL_COMPILER) || EIGEN_GNUC_AT_LEAST(4,2) )
    #define EIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
  #endif
 #endif
--- a/Eigen/OrderingMethods
+++ b/Eigen/OrderingMethods
@ -56,7 +56,10 @@
  * \endcode
  */

+#ifndef EIGEN_MPL2_ONLY
 #include "src/OrderingMethods/Amd.h"
+#endif
+
 #include "src/OrderingMethods/Ordering.h"
 #include "src/Core/util/ReenableStupidWarnings.h"

--- a/Eigen/Sparse
+++ b/Eigen/Sparse
@ -7,6 +7,8 @@
  * - SparseCore
  * - OrderingMethods
  * - SparseCholesky
+  * - SparseLU
+  * - SparseQR
  * - IterativeLinearSolvers
  *
  * \code
@ -17,6 +19,8 @@
 #include "SparseCore"
 #include "OrderingMethods"
 #include "SparseCholesky"
+#include "SparseLU"
+#include "SparseQR"
 #include "IterativeLinearSolvers"

 #endif // EIGEN_SPARSE_MODULE_H
--- a/Eigen/SparseCholesky
+++ b/Eigen/SparseCholesky
@ -20,11 +20,19 @@
  * \endcode
  */

+#ifdef EIGEN_MPL2_ONLY
+#error The SparseCholesky module has nothing to offer in MPL2 only mode
+#endif
+
 #include "src/misc/Solve.h"
 #include "src/misc/SparseSolve.h"

 #include "src/SparseCholesky/SimplicialCholesky.h"

+#ifndef EIGEN_MPL2_ONLY
+#include "src/SparseCholesky/SimplicialCholesky_impl.h"
+#endif
+
 #include "src/Core/util/ReenableStupidWarnings.h"

 #endif // EIGEN_SPARSECHOLESKY_MODULE_H
--- a/Eigen/SparseLU
+++ b/Eigen/SparseLU
@ -2,6 +2,7 @@
 // for linear algebra.
 //
 // Copyright (C) 2012 Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
+// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -14,7 +15,9 @@

 /** 
  * \defgroup SparseLU_Module SparseLU module
-  *
+  * This module defines a supernodal factorization of general sparse matrices.
+  * The code is fully optimized for supernode-panel updates with specialized kernels.
+  * Please, see the documentation of the SparseLU class for more details.
  */

 // Ordering interface
@ -23,8 +26,8 @@
 #include "src/SparseLU/SparseLU_gemm_kernel.h"

 #include "src/SparseLU/SparseLU_Structs.h"
-#include "src/SparseLU/SparseLU_Matrix.h"
-#include "src/SparseLU/SparseLUBase.h"
+#include "src/SparseLU/SparseLU_SupernodalMatrix.h"
+#include "src/SparseLU/SparseLUImpl.h"
 #include "src/SparseCore/SparseColEtree.h"
 #include "src/SparseLU/SparseLU_Memory.h"
 #include "src/SparseLU/SparseLU_heap_relax_snode.h"
--- a/Eigen/SparseQR
+++ b/Eigen/SparseQR
@ -26,4 +26,4 @@

 #include "src/Core/util/ReenableStupidWarnings.h"

-#endif
+#endif
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@ -196,7 +196,7 @@ template<typename _MatrixType, int _UpLo> class LDLT
    LDLT& compute(const MatrixType& matrix);

    template <typename Derived>
-    LDLT& rankUpdate(const MatrixBase<Derived>& w,RealScalar alpha=1);
+    LDLT& rankUpdate(const MatrixBase<Derived>& w, const RealScalar& alpha=1);

    /** \returns the internal LDLT decomposition matrix
      *
@ -347,7 +347,7 @@ template<> struct ldlt_inplace<Lower>
  // Here only rank-1 updates are implemented, to reduce the
  // requirement for intermediate storage and improve accuracy
  template<typename MatrixType, typename WDerived>
-  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, typename MatrixType::RealScalar sigma=1)
+  static bool updateInPlace(MatrixType& mat, MatrixBase<WDerived>& w, const typename MatrixType::RealScalar& sigma=1)
  {
    using internal::isfinite;
    typedef typename MatrixType::Scalar Scalar;
@ -386,7 +386,7 @@ template<> struct ldlt_inplace<Lower>
  }

  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, typename MatrixType::RealScalar sigma=1)
+  static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, const typename MatrixType::RealScalar& sigma=1)
  {
    // Apply the permutation to the input w
    tmp = transpositions * w;
@ -405,7 +405,7 @@ template<> struct ldlt_inplace<Upper>
  }

  template<typename MatrixType, typename TranspositionType, typename Workspace, typename WType>
-  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, typename MatrixType::RealScalar sigma=1)
+  static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, const typename MatrixType::RealScalar& sigma=1)
  {
    Transpose<MatrixType> matt(mat);
    return ldlt_inplace<Lower>::update(matt, transpositions, tmp, w.conjugate(), sigma);
@ -457,7 +457,7 @@ LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::compute(const MatrixType& a)
  */
 template<typename MatrixType, int _UpLo>
 template<typename Derived>
-LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w,typename NumTraits<typename MatrixType::Scalar>::Real sigma)
+LDLT<MatrixType,_UpLo>& LDLT<MatrixType,_UpLo>::rankUpdate(const MatrixBase<Derived>& w, const typename NumTraits<typename MatrixType::Scalar>::Real& sigma)
 {
  const Index size = w.rows();
  if (m_isInitialized)
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@ -200,7 +200,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
  typedef Matrix<Scalar,Dynamic,1> TempVectorType;
  typedef typename TempVectorType::SegmentReturnType TempVecSegment;

-  int n = mat.cols();
+  Index n = mat.cols();
  eigen_assert(mat.rows()==n && vec.size()==n);

  TempVectorType temp;
@ -212,12 +212,12 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
    // i.e., for sigma > 0
    temp = sqrt(sigma) * vec;

-    for(int i=0; i<n; ++i)
+    for(Index i=0; i<n; ++i)
    {
      JacobiRotation<Scalar> g;
      g.makeGivens(mat(i,i), -temp(i), &mat(i,i));

-      int rs = n-i-1;
+      Index rs = n-i-1;
      if(rs>0)
      {
        ColXprSegment x(mat.col(i).tail(rs));
@ -230,7 +230,7 @@ static typename MatrixType::Index llt_rank_update_lower(MatrixType& mat, const V
  {
    temp = vec;
    RealScalar beta = 1;
-    for(int j=0; j<n; ++j)
+    for(Index j=0; j<n; ++j)
    {
      RealScalar Ljj = real(mat.coeff(j,j));
      RealScalar dj = abs2(Ljj);
--- a/Eigen/src/CholmodSupport/CholmodSupport.h
+++ b/Eigen/src/CholmodSupport/CholmodSupport.h
@ -51,7 +51,6 @@ void cholmod_configure_matrix(CholmodType& mat)
 template<typename _Scalar, int _Options, typename _Index>
 cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_Index>& mat)
 {
-  typedef SparseMatrix<_Scalar,_Options,_Index> MatrixType;
  cholmod_sparse res;
  res.nzmax   = mat.nonZeros();
  res.nrow    = mat.rows();;
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@ -114,7 +114,7 @@ class Array
    EIGEN_STRONG_INLINE explicit Array() : Base()
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
@ -125,7 +125,7 @@ class Array
      : Base(internal::constructor_without_unaligned_array_assert())
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
 #endif

@ -143,7 +143,7 @@ class Array
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Array)
      eigen_assert(dim >= 0);
      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

    #ifndef EIGEN_PARSED_BY_DOXYGEN
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@ -158,7 +158,7 @@ template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
  {
    dst.copyCoeffByOuterInner(outer, Index, src);
    assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Index+1, Stop>::run(dst, src, outer);
@ -169,7 +169,7 @@ template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
  EIGEN_DEVICE_FUNC 
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };

 /***********************
@ -224,7 +224,7 @@ struct assign_innervec_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_innervec_InnerUnrolling
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, int outer)
+  static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
  {
    dst.template copyPacketByOuterInner<Derived2, Aligned, Aligned>(outer, Index, src);
    assign_innervec_InnerUnrolling<Derived1, Derived2,
@ -235,7 +235,7 @@ struct assign_innervec_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_innervec_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
-  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, int) {}
+  static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };

 /***************************************************************************
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@ -149,8 +149,8 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
    inner = Index % DstXprType::InnerSizeAtCompileTime
  };

-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
  {
    dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
@ -162,15 +162,15 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };

 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
  {
    dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
    copy_using_evaluator_DefaultTraversal_InnerUnrolling
@ -182,7 +182,7 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };

 /***********************
@ -192,8 +192,8 @@ struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, Sr
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
  {
    dstEvaluator.copyCoeff(Index, srcEvaluator);
    copy_using_evaluator_LinearTraversal_CompleteUnrolling
@ -205,7 +205,7 @@ struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };

 /**************************
@ -224,8 +224,8 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
    JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
  };

-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator)
  {
    dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
@ -238,15 +238,15 @@ struct copy_using_evaluator_innervec_CompleteUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { }
 };

 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
-				      SrcEvaluatorType &srcEvaluator, 
-				      int outer)
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator,
+                                      SrcEvaluatorType &srcEvaluator, 
+                                      int outer)
  {
    dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
    typedef typename DstEvaluatorType::XprType DstXprType;
@ -260,7 +260,7 @@ struct copy_using_evaluator_innervec_InnerUnrolling
 template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
 struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
 {
-  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
 };

 /***************************************************************************
@ -301,7 +301,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnr
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -319,7 +319,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
 {
  typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -330,7 +330,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, Inner
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      copy_using_evaluator_DefaultTraversal_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
        ::run(dstEvaluator, srcEvaluator, outer);
  }
 };
@ -345,7 +345,7 @@ struct unaligned_copy_using_evaluator_impl
  // if IsAligned = true, then do nothing
  template <typename SrcEvaluatorType, typename DstEvaluatorType>
  static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
-				      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+                                      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
 };

 template <>
@ -356,15 +356,15 @@ struct unaligned_copy_using_evaluator_impl<false>
 #ifdef _MSC_VER
  template <typename DstEvaluatorType, typename SrcEvaluatorType>
  static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				    const SrcEvaluatorType &srcEvaluator, 
-				    typename DstEvaluatorType::Index start, 
-				    typename DstEvaluatorType::Index end)
+                                    const SrcEvaluatorType &srcEvaluator,
+                                    typename DstEvaluatorType::Index start,
+                                    typename DstEvaluatorType::Index end)
 #else
  template <typename DstEvaluatorType, typename SrcEvaluatorType>
  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
-				      const SrcEvaluatorType &srcEvaluator, 
-				      typename DstEvaluatorType::Index start, 
-				      typename DstEvaluatorType::Index end)
+                                      const SrcEvaluatorType &srcEvaluator,
+                                      typename DstEvaluatorType::Index start,
+                                      typename DstEvaluatorType::Index end)
 #endif
  {
    for (typename DstEvaluatorType::Index index = start; index < end; ++index)
@ -375,7 +375,7 @@ struct unaligned_copy_using_evaluator_impl<false>
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -392,7 +392,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTravers
      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
    };
-    const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+    const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size);
    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;

    unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
@ -410,7 +410,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
 {
  typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -460,7 +460,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -478,7 +478,7 @@ template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
 {
  typedef typename DstXprType::Index Index;
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -489,7 +489,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversa
    const Index outerSize = dst.outerSize();
    for(Index outer = 0; outer < outerSize; ++outer)
      copy_using_evaluator_innervec_InnerUnrolling
-	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
        ::run(dstEvaluator, srcEvaluator, outer);
  }
 };
@ -519,7 +519,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnro
 template<typename DstXprType, typename SrcXprType>
 struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
 {
-  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src)
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
@ -560,7 +560,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversa
    const Index outerSize = dst.outerSize();
    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
    Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
-                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+                       : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);

    for(Index outer = 0; outer < outerSize; ++outer)
    {
@ -596,7 +596,6 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
  {
    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
-    typedef typename DstXprType::Index Index;

    DstEvaluatorType dstEvaluator(dst);
    SrcEvaluatorType srcEvaluator(src);
@ -616,7 +615,7 @@ struct copy_using_evaluator_impl<DstXprType, SrcXprType, AllAtOnceTraversal, NoU
 template<typename DstXprType, template <typename> class StorageBase, typename SrcXprType>
 EIGEN_STRONG_INLINE
 const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, 
-				       const EigenBase<SrcXprType>& src)
+                                       const EigenBase<SrcXprType>& src)
 {
  return noalias_copy_using_evaluator(dst.expression(), src.derived());
 }
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@ -140,8 +140,8 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
    {
      eigen_assert((RowsAtCompileTime==Dynamic || RowsAtCompileTime==blockRows)
          && (ColsAtCompileTime==Dynamic || ColsAtCompileTime==blockCols));
-      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow + blockRows <= xpr.rows()
-          && a_startCol >= 0 && blockCols >= 0 && a_startCol + blockCols <= xpr.cols());
+      eigen_assert(a_startRow >= 0 && blockRows >= 0 && a_startRow  <= xpr.rows() - blockRows
+          && a_startCol >= 0 && blockCols >= 0 && a_startCol <= xpr.cols() - blockCols);
    }
 };
         
--- a/Eigen/src/Core/BooleanRedux.h
+++ b/Eigen/src/Core/BooleanRedux.h
@ -85,9 +85,7 @@ inline bool DenseBase<Derived>::all() const
          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  if(unroll)
-    return internal::all_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-     >::run(derived());
+    return internal::all_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
  else
  {
    for(Index j = 0; j < cols(); ++j)
@ -111,9 +109,7 @@ inline bool DenseBase<Derived>::any() const
          && SizeAtCompileTime * (CoeffReadCost + NumTraits<Scalar>::AddCost) <= EIGEN_UNROLLING_LIMIT
  };
  if(unroll)
-    return internal::any_unroller<Derived,
-                           unroll ? int(SizeAtCompileTime) : Dynamic
-           >::run(derived());
+    return internal::any_unroller<Derived, unroll ? int(SizeAtCompileTime) : Dynamic>::run(derived());
  else
  {
    for(Index j = 0; j < cols(); ++j)
@ -133,6 +129,26 @@ inline typename DenseBase<Derived>::Index DenseBase<Derived>::count() const
  return derived().template cast<bool>().template cast<Index>().sum();
 }

+/** \returns true is \c *this contains at least one Not A Number (NaN).
+  *
+  * \sa isFinite()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::hasNaN() const
+{
+  return !((derived().array()==derived().array()).all());
+}
+
+/** \returns true if \c *this contains only finite numbers, i.e., no NaN and no +/-INF values.
+  *
+  * \sa hasNaN()
+  */
+template<typename Derived>
+inline bool DenseBase<Derived>::isFinite() const
+{
+  return !((derived()-derived()).hasNaN());
+}
+    
 } // end namespace Eigen

 #endif // EIGEN_ALLANDANY_H
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@ -349,7 +349,7 @@ struct evaluator_impl<EvalToTemp<ArgType> >
  template<int LoadMode> 
  PacketReturnType packet(Index row, Index col) const
  {
-    return m_resultImpl.packet<LoadMode>(row, col);
+    return m_resultImpl.template packet<LoadMode>(row, col);
  }

  template<int LoadMode> 
@ -361,13 +361,13 @@ struct evaluator_impl<EvalToTemp<ArgType> >
  template<int StoreMode> 
  void writePacket(Index row, Index col, const PacketScalar& x)
  {
-    m_resultImpl.writePacket<StoreMode>(row, col, x);
+    m_resultImpl.template writePacket<StoreMode>(row, col, x);
  }

  template<int StoreMode> 
  void writePacket(Index index, const PacketScalar& x)
  {
-    m_resultImpl.writePacket<StoreMode>(index, x);
+    m_resultImpl.template writePacket<StoreMode>(index, x);
  }

 protected:
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@ -94,8 +94,8 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
 // add together a float matrix and a double matrix.
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
-  EIGEN_STATIC_ASSERT((internal::functor_allows_mixing_real_and_complex<BINOP>::ret \
-                        ? int(internal::is_same<typename NumTraits<LHS>::Real, typename NumTraits<RHS>::Real>::value) \
+  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
+                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
                        : int(internal::is_same<LHS, RHS>::value)), \
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)

--- a/Eigen/src/Core/CwiseUnaryView.h
+++ b/Eigen/src/Core/CwiseUnaryView.h
@ -44,9 +44,10 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
    // "error: no integral type can represent all of the enumerator values
    InnerStrideAtCompileTime = MatrixTypeInnerStride == Dynamic
                             ? int(Dynamic)
-                             : int(MatrixTypeInnerStride)
-                               * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
-    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret
+                             : int(MatrixTypeInnerStride) * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar)),
+    OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret == Dynamic
+                             ? int(Dynamic)
+                             : outer_stride_at_compile_time<MatrixType>::ret * int(sizeof(typename traits<MatrixType>::Scalar) / sizeof(Scalar))
  };
 };
 }
@ -98,6 +99,9 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>
    typedef typename internal::dense_xpr_base< CwiseUnaryView<ViewOp, MatrixType> >::type Base;

    EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
+    
+    inline Scalar* data() { return &coeffRef(0); }
+    inline const Scalar* data() const { return &coeff(0); }

    inline Index innerStride() const
    {
@ -106,7 +110,7 @@ class CwiseUnaryViewImpl<ViewOp,MatrixType,Dense>

    inline Index outerStride() const
    {
-      return derived().nestedExpression().outerStride();
+      return derived().nestedExpression().outerStride() * sizeof(typename internal::traits<MatrixType>::Scalar) / sizeof(Scalar);
    }

    EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@ -353,6 +353,9 @@ template<typename Derived> class DenseBase
    bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
    bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    
+    inline bool hasNaN() const;
+    inline bool isFinite() const;

    inline Derived& operator*=(const Scalar& other);
    inline Derived& operator/=(const Scalar& other);
@ -436,8 +439,6 @@ template<typename Derived> class DenseBase
      return derived().coeff(0,0);
    }

-/////////// Array module ///////////
-
    bool all() const;
    bool any() const;
    Index count() const;
@ -463,11 +464,11 @@ template<typename Derived> class DenseBase

    template<typename ThenDerived>
    inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
-    select(const DenseBase<ThenDerived>& thenMatrix, typename ThenDerived::Scalar elseScalar) const;
+    select(const DenseBase<ThenDerived>& thenMatrix, const typename ThenDerived::Scalar& elseScalar) const;

    template<typename ElseDerived>
    inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-    select(typename ElseDerived::Scalar thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;
+    select(const typename ElseDerived::Scalar& thenScalar, const DenseBase<ElseDerived>& elseMatrix) const;

    template<int p> RealScalar lpNorm() const;

--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@ -154,6 +154,7 @@ template<typename Scalar> struct scalar_hypot_op {
  {
    EIGEN_USING_STD_MATH(max);
    EIGEN_USING_STD_MATH(min);
+    using std::sqrt;
    Scalar p = (max)(_x, _y);
    Scalar q = (min)(_x, _y);
    Scalar qp = q/p;
@ -543,20 +544,28 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op_impl;
 // linear access for packet ops:
 // 1) initialization
 //   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
-// 2) each step
+// 2) each step (where size is 1 for coeff access or PacketSize for packet access)
 //   base += [size*step, ..., size*step]
+//
+// TODO: Perhaps it's better to initialize lazily (so not in the constructor but in packetOp)
+//       in order to avoid the padd() in operator() ?
 template <typename Scalar>
 struct linspaced_op_impl<Scalar,false>
 {
  typedef typename packet_traits<Scalar>::type Packet;

-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
  m_low(low), m_step(step),
  m_packetStep(pset1<Packet>(packet_traits<Scalar>::size*step)),
-  m_base(padd(pset1<Packet>(low),pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
+  m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}

  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  { 
+    m_base = padd(m_base, pset1<Packet>(m_step));
+    return m_low+Scalar(i)*m_step; 
+  }
+
  template<typename Index>
  EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }

@ -574,7 +583,7 @@ struct linspaced_op_impl<Scalar,true>
 {
  typedef typename packet_traits<Scalar>::type Packet;

-  linspaced_op_impl(Scalar low, Scalar step) :
+  linspaced_op_impl(const Scalar& low, const Scalar& step) :
  m_low(low), m_step(step),
  m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}

@ -603,7 +612,7 @@ template <typename Scalar, bool RandomAccess> struct functor_traits< linspaced_o
 template <typename Scalar, bool RandomAccess> struct linspaced_op
 {
  typedef typename packet_traits<Scalar>::type Packet;
-  linspaced_op(Scalar low, Scalar high, int num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
+  linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}

  template<typename Index>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
@ -642,13 +651,14 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
 template<typename Functor> struct functor_has_linear_access { enum { ret = 1 }; };
 template<typename Scalar> struct functor_has_linear_access<scalar_identity_op<Scalar> > { enum { ret = 0 }; };

-// in CwiseBinaryOp, we require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where we only require them to have the same _real_ scalar type so one may multiply, say, float by complex<float>.
+// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
+// where the mixing of different types is handled by scalar_product_traits
+// In particular, real * complex<real> is allowed.
 // FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_allows_mixing_real_and_complex { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_allows_mixing_real_and_complex<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
+template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };


 /** \internal
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@ -222,7 +222,29 @@ class GeneralProduct<Lhs, Rhs, InnerProduct>
 ***********************************************************************/

 namespace internal {
-template<int StorageOrder> struct outer_product_selector;
+
+// Column major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const false_type&)
+{
+  typedef typename Dest::Index Index;
+  // FIXME make sure lhs is sequentially stored
+  // FIXME not very good if rhs is real and lhs complex while alpha is real too
+  const Index cols = dest.cols();
+  for (Index j=0; j<cols; ++j)
+    func(dest.col(j), prod.rhs().coeff(j) * prod.lhs());
+}
+
+// Row major
+template<typename ProductType, typename Dest, typename Func>
+EIGEN_DONT_INLINE void outer_product_selector_run(const ProductType& prod, Dest& dest, const Func& func, const true_type&) {
+  typedef typename Dest::Index Index;
+  // FIXME make sure rhs is sequentially stored
+  // FIXME not very good if lhs is real and rhs complex while alpha is real too
+  const Index rows = dest.rows();
+  for (Index i=0; i<rows; ++i)
+    func(dest.row(i), prod.lhs().coeff(i) * prod.rhs());
+}

 template<typename Lhs, typename Rhs>
 struct traits<GeneralProduct<Lhs,Rhs,OuterProduct> >
@ -235,6 +257,8 @@ template<typename Lhs, typename Rhs>
 class GeneralProduct<Lhs, Rhs, OuterProduct>
  : public ProductBase<GeneralProduct<Lhs,Rhs,OuterProduct>, Lhs, Rhs>
 {
+    template<typename T> struct IsRowMajor : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
+    
  public:
    EIGEN_PRODUCT_PUBLIC_INTERFACE(GeneralProduct)

@ -243,41 +267,39 @@ class GeneralProduct<Lhs, Rhs, OuterProduct>
      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    }
+    
+    struct set  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived()  = src; } };
+    struct add  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
+    struct sub  { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
+    struct adds {
+      Scalar m_scale;
+      adds(const Scalar& s) : m_scale(s) {}
+      template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
+        dst.const_cast_derived() += m_scale * src;
+      }
+    };
+    
+    template<typename Dest>
+    inline void evalTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, set(), IsRowMajor<Dest>());
+    }
+    
+    template<typename Dest>
+    inline void addTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, add(), IsRowMajor<Dest>());
+    }

-    template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+    template<typename Dest>
+    inline void subTo(Dest& dest) const {
+      internal::outer_product_selector_run(*this, dest, sub(), IsRowMajor<Dest>());
+    }
+
+    template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
    {
-      internal::outer_product_selector<(int(Dest::Flags)&RowMajorBit) ? RowMajor : ColMajor>::run(*this, dest, alpha);
+      internal::outer_product_selector_run(*this, dest, adds(alpha), IsRowMajor<Dest>());
    }
 };

-namespace internal {
-
-template<> struct outer_product_selector<ColMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure lhs is sequentially stored
-    // FIXME not very good if rhs is real and lhs complex while alpha is real too
-    const Index cols = dest.cols();
-    for (Index j=0; j<cols; ++j)
-      dest.col(j) += (alpha * prod.rhs().coeff(j)) * prod.lhs();
-  }
-};
-
-template<> struct outer_product_selector<RowMajor> {
-  template<typename ProductType, typename Dest>
-  static EIGEN_DONT_INLINE void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha) {
-    typedef typename Dest::Index Index;
-    // FIXME make sure rhs is sequentially stored
-    // FIXME not very good if lhs is real and rhs complex while alpha is real too
-    const Index rows = dest.rows();
-    for (Index i=0; i<rows; ++i)
-      dest.row(i) += (alpha * prod.lhs().coeff(i)) * prod.rhs();
-  }
-};
-
-} // end namespace internal
-
 /***********************************************************************
 *  Implementation of General Matrix Vector Product
 ***********************************************************************/
@ -320,7 +342,7 @@ class GeneralProduct<Lhs, Rhs, GemvProduct>
    enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
    typedef typename internal::conditional<int(Side)==OnTheRight,_LhsNested,_RhsNested>::type MatrixType;

-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
    {
      eigen_assert(m_lhs.rows() == dst.rows() && m_rhs.cols() == dst.cols());
      internal::gemv_selector<Side,(int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
@ -335,7 +357,7 @@ template<int StorageOrder, bool BlasCompatible>
 struct gemv_selector<OnTheLeft,StorageOrder,BlasCompatible>
 {
  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    Transpose<Dest> destT(dest);
    enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor };
@ -384,7 +406,7 @@ struct gemv_static_vector_if<Scalar,Size,MaxSize,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,true>
 {
  template<typename ProductType, typename Dest>
-  static inline void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static inline void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    typedef typename ProductType::Index Index;
    typedef typename ProductType::LhsScalar   LhsScalar;
@ -457,7 +479,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,true>
 template<> struct gemv_selector<OnTheRight,RowMajor,true>
 {
  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    typedef typename ProductType::LhsScalar LhsScalar;
    typedef typename ProductType::RhsScalar RhsScalar;
@ -508,7 +530,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,true>
 template<> struct gemv_selector<OnTheRight,ColMajor,false>
 {
  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    typedef typename Dest::Index Index;
    // TODO makes sure dest is sequentially stored in memory, otherwise use a temp
@ -521,7 +543,7 @@ template<> struct gemv_selector<OnTheRight,ColMajor,false>
 template<> struct gemv_selector<OnTheRight,RowMajor,false>
 {
  template<typename ProductType, typename Dest>
-  static void run(const ProductType& prod, Dest& dest, typename ProductType::Scalar alpha)
+  static void run(const ProductType& prod, Dest& dest, const typename ProductType::Scalar& alpha)
  {
    typedef typename Dest::Index Index;
    // TODO makes sure rhs is sequentially stored in memory, otherwise use a temp
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@ -156,7 +156,11 @@ pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 template<typename Packet> inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }

-/** \internal \returns a packet with elements of \a *from duplicated, e.g.: (from[0],from[0],from[1],from[1]) */
+/** \internal \returns a packet with elements of \a *from duplicated.
+  * For instance, for a packet of 8 elements, 4 scalar will be read from \a *from and
+  * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
+  * Currently, this function is only used for scalar * complex products.
+ */
 template<typename Packet> inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }

@ -307,8 +311,21 @@ struct palign_impl
  static inline void run(PacketType&, const PacketType&) {}
 };

-/** \internal update \a first using the concatenation of the \a Offset last elements
-  * of \a first and packet_size minus \a Offset first elements of \a second */
+/** \internal update \a first using the concatenation of the packet_size minus \a Offset last elements
+  * of \a first and \a Offset first elements of \a second.
+  * 
+  * This function is currently only used to optimize matrix-vector products on unligned matrices.
+  * It takes 2 packets that represent a contiguous memory array, and returns a packet starting
+  * at the position \a Offset. For instance, for packets of 4 elements, we have:
+  *  Input:
+  *  - first = {f0,f1,f2,f3}
+  *  - second = {s0,s1,s2,s3}
+  * Output: 
+  *   - if Offset==0 then {f0,f1,f2,f3}
+  *   - if Offset==1 then {f1,f2,f3,s0}
+  *   - if Offset==2 then {f2,f3,s0,s1}
+  *   - if Offset==3 then {f3,s0,s1,s3}
+  */
 template<int Offset,typename PacketType>
 inline void palign(PacketType& first, const PacketType& second)
 {
--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@ -70,7 +70,7 @@ namespace Eigen
  **/
  template <typename Derived>
  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(typename Derived::Scalar s, const Eigen::ArrayBase<Derived>& a)
+    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
  {
    return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
      a.derived(),
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -363,6 +363,7 @@ struct hypot_impl
    EIGEN_USING_STD_MATH(max);
    EIGEN_USING_STD_MATH(min);
    using std::abs;
+    using std::sqrt;
    RealScalar _x = abs(x);
    RealScalar _y = abs(y);
    RealScalar p = (max)(_x, _y);
@ -420,7 +421,7 @@ struct atanh2_default_impl
    using std::log;
    using std::sqrt;
    Scalar z = x / y;
-    if (abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
+    if (y == Scalar(0) || abs(z) > sqrt(NumTraits<RealScalar>::epsilon()))
      return RealScalar(0.5) * log((y + x) / (y - x));
    else
      return z + z*z*z / RealScalar(3);
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@ -208,14 +208,14 @@ class Matrix
    EIGEN_STRONG_INLINE explicit Matrix() : Base()
    {
      Base::_check_template_params();
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

    // FIXME is it still needed
    EIGEN_DEVICE_FUNC
    Matrix(internal::constructor_without_unaligned_array_assert)
      : Base(internal::constructor_without_unaligned_array_assert())
-    { Base::_check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED }
+    { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }

    /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors
      *
@ -231,7 +231,7 @@ class Matrix
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(Matrix)
      eigen_assert(dim >= 0);
      eigen_assert(SizeAtCompileTime == Dynamic || SizeAtCompileTime == dim);
-      EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

    #ifndef EIGEN_PARSED_BY_DOXYGEN
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -475,7 +475,7 @@ template<typename Derived> class MatrixBase
    const MatrixFunctionReturnValue<Derived> sin() const;
    const MatrixSquareRootReturnValue<Derived> sqrt() const;
    const MatrixLogarithmReturnValue<Derived> log() const;
-    const MatrixPowerReturnValue<Derived> pow(RealScalar p) const;
+    const MatrixPowerReturnValue<Derived> pow(const RealScalar& p) const;

 #ifdef EIGEN2_SUPPORT
    template<typename ProductDerived, typename Lhs, typename Rhs>
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@ -140,6 +140,9 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
    AddCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::AddCost,
    MulCost  = ArrayType::SizeAtCompileTime==Dynamic ? Dynamic : ArrayType::SizeAtCompileTime * NumTraits<Scalar>::MulCost
  };
+  
+  static inline RealScalar epsilon() { return NumTraits<RealScalar>::epsilon(); }
+  static inline RealScalar dummy_precision() { return NumTraits<RealScalar>::dummy_precision(); }
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/PermutationMatrix.h
+++ b/Eigen/src/Core/PermutationMatrix.h
@ -105,13 +105,13 @@ class PermutationBase : public EigenBase<Derived>
    #endif

    /** \returns the number of rows */
-    inline Index rows() const { return indices().size(); }
+    inline Index rows() const { return Index(indices().size()); }

    /** \returns the number of columns */
-    inline Index cols() const { return indices().size(); }
+    inline Index cols() const { return Index(indices().size()); }

    /** \returns the size of a side of the respective square matrix, i.e., the number of indices */
-    inline Index size() const { return indices().size(); }
+    inline Index size() const { return Index(indices().size()); }

    #ifndef EIGEN_PARSED_BY_DOXYGEN
    template<typename DenseDerived>
@ -541,24 +541,25 @@ struct permut_matrix_product_retval
 : public ReturnByValue<permut_matrix_product_retval<PermutationType, MatrixType, Side, Transposed> >
 {
    typedef typename remove_all<typename MatrixType::Nested>::type MatrixTypeNestedCleaned;
+    typedef typename MatrixType::Index Index;

    permut_matrix_product_retval(const PermutationType& perm, const MatrixType& matrix)
      : m_permutation(perm), m_matrix(matrix)
    {}

-    inline int rows() const { return m_matrix.rows(); }
-    inline int cols() const { return m_matrix.cols(); }
+    inline Index rows() const { return m_matrix.rows(); }
+    inline Index cols() const { return m_matrix.cols(); }

    template<typename Dest> inline void evalTo(Dest& dst) const
    {
-      const int n = Side==OnTheLeft ? rows() : cols();
+      const Index n = Side==OnTheLeft ? rows() : cols();

      if(is_same<MatrixTypeNestedCleaned,Dest>::value && extract_data(dst) == extract_data(m_matrix))
      {
        // apply the permutation inplace
        Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(m_permutation.size());
        mask.fill(false);
-        int r = 0;
+        Index r = 0;
        while(r < m_permutation.size())
        {
          // search for the next seed
@ -566,10 +567,10 @@ struct permut_matrix_product_retval
          if(r>=m_permutation.size())
            break;
          // we got one, let's follow it until we are back to the seed
-          int k0 = r++;
-          int kPrev = k0;
+          Index k0 = r++;
+          Index kPrev = k0;
          mask.coeffRef(k0) = true;
-          for(int k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
+          for(Index k=m_permutation.indices().coeff(k0); k!=k0; k=m_permutation.indices().coeff(k))
          {
                  Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
            .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@ -11,10 +11,15 @@
 #ifndef EIGEN_DENSESTORAGEBASE_H
 #define EIGEN_DENSESTORAGEBASE_H

-#ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=Scalar(0);
+#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN)
+# define EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED for(int i=0;i<base().size();++i) coeffRef(i)=std::numeric_limits<Scalar>::quiet_NaN();
 #else
-# define EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+# undef EIGEN_INITIALIZE_COEFFS
+# define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
 #endif

 namespace Eigen {
@ -243,11 +248,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
                   && EIGEN_IMPLIES(ColsAtCompileTime==Dynamic && MaxColsAtCompileTime!=Dynamic,nbCols<=MaxColsAtCompileTime)
                   && nbRows>=0 && nbCols>=0 && "Invalid sizes when resizing a matrix or array.");
      internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
        Index size = nbRows*nbCols;
        bool size_changed = size != this->size();
        m_storage.resize(size, nbRows, nbCols);
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
      #else
        internal::check_rows_cols_for_overflow<MaxSizeAtCompileTime>::run(nbRows, nbCols);
        m_storage.resize(nbRows*nbCols, nbRows, nbCols);
@ -270,15 +275,15 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    {
      EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
      eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime==Dynamic || size<=MaxSizeAtCompileTime)) || SizeAtCompileTime == size) && size>=0);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
+      #ifdef EIGEN_INITIALIZE_COEFFS
        bool size_changed = size != this->size();
      #endif
      if(RowsAtCompileTime == 1)
        m_storage.resize(size, 1, size);
      else
        m_storage.resize(size, size, 1);
-      #ifdef EIGEN_INITIALIZE_MATRICES_BY_ZERO
-        if(size_changed) EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+      #ifdef EIGEN_INITIALIZE_COEFFS
+        if(size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
      #endif
    }

@ -435,7 +440,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    EIGEN_STRONG_INLINE explicit PlainObjectBase() : m_storage()
    {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

 #ifndef EIGEN_PARSED_BY_DOXYGEN
@ -445,7 +450,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
    PlainObjectBase(internal::constructor_without_unaligned_array_assert)
      : m_storage(internal::constructor_without_unaligned_array_assert())
    {
-//       _check_template_params(); EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       _check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }
 #endif

@ -454,7 +459,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      : m_storage(a_size, nbRows, nbCols)
    {
 //       _check_template_params();
-//       EIGEN_INITIALIZE_BY_ZERO_IF_THAT_OPTION_IS_ENABLED
+//       EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED
    }

    /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -74,8 +74,8 @@ class Product : public ProductImpl<Lhs,Rhs,typename internal::promote_storage_ty

  protected:

-    const LhsNested m_lhs;
-    const RhsNested m_rhs;
+    LhsNested m_lhs;
+    RhsNested m_rhs;
 };

 template<typename Lhs, typename Rhs>
--- a/Eigen/src/Core/ProductBase.h
+++ b/Eigen/src/Core/ProductBase.h
@ -108,7 +108,7 @@ class ProductBase : public MatrixBase<Derived>
    inline void subTo(Dest& dst) const { scaleAndAddTo(dst,Scalar(-1)); }

    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar alpha) const { derived().scaleAndAddTo(dst,alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& alpha) const { derived().scaleAndAddTo(dst,alpha); }

    const _LhsNested& lhs() const { return m_lhs; }
    const _RhsNested& rhs() const { return m_rhs; }
@ -195,7 +195,7 @@ class ScaledProduct;
 // Also note that here we accept any compatible scalar types
 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(const ProductBase<Derived,Lhs,Rhs>& prod, typename Derived::Scalar x)
+operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Scalar& x)
 { return ScaledProduct<Derived>(prod.derived(), x); }

 template<typename Derived,typename Lhs,typename Rhs>
@ -207,7 +207,7 @@ operator*(const ProductBase<Derived,Lhs,Rhs>& prod, const typename Derived::Real

 template<typename Derived,typename Lhs,typename Rhs>
 const ScaledProduct<Derived>
-operator*(typename Derived::Scalar x,const ProductBase<Derived,Lhs,Rhs>& prod)
+operator*(const typename Derived::Scalar& x,const ProductBase<Derived,Lhs,Rhs>& prod)
 { return ScaledProduct<Derived>(prod.derived(), x); }

 template<typename Derived,typename Lhs,typename Rhs>
@ -241,7 +241,7 @@ class ScaledProduct
    typedef typename Base::PlainObject PlainObject;
 //     EIGEN_PRODUCT_PUBLIC_INTERFACE(ScaledProduct)

-    ScaledProduct(const NestedProduct& prod, Scalar x)
+    ScaledProduct(const NestedProduct& prod, const Scalar& x)
    : Base(prod.lhs(),prod.rhs()), m_prod(prod), m_alpha(x) {}

    template<typename Dest>
@ -254,7 +254,7 @@ class ScaledProduct
    inline void subTo(Dest& dst) const { scaleAndAddTo(dst, Scalar(-1)); }

    template<typename Dest>
-    inline void scaleAndAddTo(Dest& dst,Scalar a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }
+    inline void scaleAndAddTo(Dest& dst, const Scalar& a_alpha) const { m_prod.derived().scaleAndAddTo(dst,a_alpha * m_alpha); }

    const Scalar& alpha() const { return m_alpha; }
    
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@ -209,7 +209,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
  {
    etor_product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, innerDim, res);
    res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
@ -220,7 +220,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, RetScalar &res)
  {
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
  }
@ -230,7 +230,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar& res)
  {
    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
    res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@ -248,7 +248,7 @@ struct etor_product_coeff_vectorized_unroller
 {
  typedef typename Lhs::Index Index;
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::PacketScalar &pres)
  {
    etor_product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
    pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
@ -259,7 +259,7 @@ template<typename Lhs, typename Rhs, typename Packet>
 struct etor_product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::PacketScalar &pres)
  {
    pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
  }
@ -271,7 +271,7 @@ struct etor_product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rh
  typedef typename Lhs::PacketScalar Packet;
  typedef typename Lhs::Index Index;
  enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, RetScalar &res)
  {
    Packet pres;
    etor_product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, innerDim, pres);
@ -284,7 +284,7 @@ template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int R
 struct etor_product_coeff_vectorized_dyn_selector
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
  {
    res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
  }
@ -296,7 +296,7 @@ template<typename Lhs, typename Rhs, int RhsCols>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index /*row*/, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
  {
    res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
  }
@ -306,7 +306,7 @@ template<typename Lhs, typename Rhs, int LhsRows>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
  {
    res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
  }
@ -316,7 +316,7 @@ template<typename Lhs, typename Rhs>
 struct etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
+  EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, typename Lhs::Scalar &res)
  {
    res = lhs.transpose().cwiseProduct(rhs).sum();
  }
@ -326,7 +326,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct etor_product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, typename Lhs::Scalar &res)
  {
    etor_product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, innerDim, res);
  }
@ -340,7 +340,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
  {
    etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
    res =  pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
@ -351,7 +351,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int Lo
 struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
  {
    etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
    res =  pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
@ -362,7 +362,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
  {
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
  }
@ -372,7 +372,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
  {
    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
  }
@ -382,7 +382,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
  {
    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
    res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
@ -395,7 +395,7 @@ template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
 struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
 {
  typedef typename Lhs::Index Index;
-  EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
+  static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
  {
    eigen_assert(innerDim>0 && "you are using a non initialized matrix");
    res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@ -334,7 +334,8 @@ DenseBase<Derived>::redux(const Func& func) const
            ::run(derived(), func);
 }

-/** \returns the minimum of all coefficients of *this
+/** \returns the minimum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
@ -343,7 +344,8 @@ DenseBase<Derived>::minCoeff() const
  return this->redux(Eigen::internal::scalar_min_op<Scalar>());
 }

-/** \returns the maximum of all coefficients of *this
+/** \returns the maximum of all coefficients of \c *this.
+  * \warning the result is undefined if \c *this contains NaN.
  */
 template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@ -149,6 +149,8 @@ public:
      m_stride(StrideType::OuterStrideAtCompileTime==Dynamic?0:StrideType::OuterStrideAtCompileTime,
               StrideType::InnerStrideAtCompileTime==Dynamic?0:StrideType::InnerStrideAtCompileTime)
  {}
+  
+  EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase)

 protected:

@ -170,7 +172,7 @@ protected:
    else
      ::new (static_cast<Base*>(this)) Base(expr.data(), expr.rows(), expr.cols());
    ::new (&m_stride) StrideBase(StrideType::OuterStrideAtCompileTime==0?0:expr.outerStride(),
-                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());
+                                 StrideType::InnerStrideAtCompileTime==0?0:expr.innerStride());    
  }

  StrideBase m_stride;
@ -211,8 +213,8 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref
 };

 // this is the const ref version
-template<typename PlainObjectType, int Options, typename StrideType> class Ref<const PlainObjectType, Options, StrideType>
-  : public RefBase<Ref<const PlainObjectType, Options, StrideType> >
+template<typename TPlainObjectType, int Options, typename StrideType> class Ref<const TPlainObjectType, Options, StrideType>
+  : public RefBase<Ref<const TPlainObjectType, Options, StrideType> >
 {
    typedef internal::traits<Ref> Traits;
  public:
@ -240,13 +242,12 @@ template<typename PlainObjectType, int Options, typename StrideType> class Ref<c
    template<typename Expression>
    void construct(const Expression& expr, internal::false_type)
    {
-//      std::cout << "Ref: copy\n";
-      m_object = expr;
+      m_object.lazyAssign(expr);
      Base::construct(m_object);
    }

  protected:
-    PlainObjectType m_object;
+    TPlainObjectType m_object;
 };

 } // end namespace Eigen
--- a/Eigen/src/Core/Select.h
+++ b/Eigen/src/Core/Select.h
@ -136,7 +136,7 @@ template<typename Derived>
 template<typename ThenDerived>
 inline const Select<Derived,ThenDerived, typename ThenDerived::ConstantReturnType>
 DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
-                            typename ThenDerived::Scalar elseScalar) const
+                           const typename ThenDerived::Scalar& elseScalar) const
 {
  return Select<Derived,ThenDerived,typename ThenDerived::ConstantReturnType>(
    derived(), thenMatrix.derived(), ThenDerived::Constant(rows(),cols(),elseScalar));
@ -150,8 +150,8 @@ DenseBase<Derived>::select(const DenseBase<ThenDerived>& thenMatrix,
 template<typename Derived>
 template<typename ElseDerived>
 inline const Select<Derived, typename ElseDerived::ConstantReturnType, ElseDerived >
-DenseBase<Derived>::select(typename ElseDerived::Scalar thenScalar,
-                            const DenseBase<ElseDerived>& elseMatrix) const
+DenseBase<Derived>::select(const typename ElseDerived::Scalar& thenScalar,
+                           const DenseBase<ElseDerived>& elseMatrix) const
 {
  return Select<Derived,typename ElseDerived::ConstantReturnType,ElseDerived>(
    derived(), ElseDerived::Constant(rows(),cols(),thenScalar), elseMatrix.derived());
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@ -132,7 +132,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
      */
    template<typename DerivedU, typename DerivedV>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));

    /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
      * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix.
@ -145,7 +145,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
      * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
      */
    template<typename DerivedU>
-    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha = Scalar(1));
+    SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));

 /////////// Cholesky module ///////////

--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@ -196,7 +196,10 @@ inline Derived& DenseBase<Derived>::operator/=(const Scalar& other)
                                        internal::scalar_product_op<Scalar> >::type BinOp;
  typedef typename Derived::PlainObject PlainObject;
  SelfCwiseBinaryOp<BinOp, Derived, typename PlainObject::ConstantReturnType> tmp(derived());
-  tmp = PlainObject::Constant(rows(),cols(), NumTraits<Scalar>::IsInteger ? other : Scalar(1)/other);
+  Scalar actual_other;
+  if(NumTraits<Scalar>::IsInteger)  actual_other = other;
+  else                              actual_other = Scalar(1)/other;
+  tmp = PlainObject::Constant(rows(),cols(), actual_other);
  return derived();
 }

--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@ -13,6 +13,7 @@
 namespace Eigen { 

 namespace internal {
+
 template<typename ExpressionType, typename Scalar>
 inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale)
 {
@ -32,7 +33,6 @@ template<typename Derived>
 inline typename NumTraits<typename traits<Derived>::Scalar>::Real
 blueNorm_impl(const EigenBase<Derived>& _vec)
 {
-  typedef typename Derived::Scalar Scalar;
  typedef typename Derived::RealScalar RealScalar;  
  typedef typename Derived::Index Index;
  using std::pow;
@ -41,43 +41,40 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
  using std::sqrt;
  using std::abs;
  const Derived& vec(_vec.derived());
-  static Index nmax = -1;
+  static bool initialized = false;
  static RealScalar b1, b2, s1m, s2m, overfl, rbig, relerr;
-  if(nmax <= 0)
+  if(!initialized)
  {
-    int nbig, ibeta, it, iemin, iemax, iexp;
-    RealScalar abig, eps;
+    int ibeta, it, iemin, iemax, iexp;
+    RealScalar eps;
    // This program calculates the machine-dependent constants
-    // bl, b2, slm, s2m, relerr overfl, nmax
+    // bl, b2, slm, s2m, relerr overfl
    // from the "basic" machine-dependent numbers
    // nbig, ibeta, it, iemin, iemax, rbig.
    // The following define the basic machine-dependent constants.
    // For portability, the PORT subprograms "ilmaeh" and "rlmach"
    // are used. For any specific computer, each of the assignment
    // statements can be replaced
-    nbig  = (std::numeric_limits<Index>::max)();            // largest integer
-    ibeta = std::numeric_limits<RealScalar>::radix;         // base for floating-point numbers
-    it    = std::numeric_limits<RealScalar>::digits;        // number of base-beta digits in mantissa
-    iemin = std::numeric_limits<RealScalar>::min_exponent;  // minimum exponent
-    iemax = std::numeric_limits<RealScalar>::max_exponent;  // maximum exponent
-    rbig  = (std::numeric_limits<RealScalar>::max)();         // largest floating-point number
+    ibeta = std::numeric_limits<RealScalar>::radix;                 // base for floating-point numbers
+    it    = std::numeric_limits<RealScalar>::digits;                // number of base-beta digits in mantissa
+    iemin = std::numeric_limits<RealScalar>::min_exponent;          // minimum exponent
+    iemax = std::numeric_limits<RealScalar>::max_exponent;          // maximum exponent
+    rbig  = (std::numeric_limits<RealScalar>::max)();               // largest floating-point number

    iexp  = -((1-iemin)/2);
-    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));  // lower boundary of midrange
+    b1    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // lower boundary of midrange
    iexp  = (iemax + 1 - it)/2;
-    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // upper boundary of midrange
+    b2    = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // upper boundary of midrange

    iexp  = (2-iemin)/2;
-    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for lower range
+    s1m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for lower range
    iexp  = - ((iemax+it)/2);
-    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));   // scaling factor for upper range
+    s2m   = RealScalar(pow(RealScalar(ibeta),RealScalar(iexp)));    // scaling factor for upper range

-    overfl  = rbig*s2m;             // overflow boundary for abig
+    overfl  = rbig*s2m;                                             // overflow boundary for abig
    eps     = RealScalar(pow(double(ibeta), 1-it));
-    relerr  = sqrt(eps);         // tolerance for neglecting asml
-    abig    = RealScalar(1.0/eps - 1.0);
-    if (RealScalar(nbig)>abig)  nmax = int(abig);  // largest safe n
-    else                        nmax = nbig;
+    relerr  = sqrt(eps);                                            // tolerance for neglecting asml
+    initialized = true;
  }
  Index n = vec.size();
  RealScalar ab2 = b2 / RealScalar(n);
@ -125,6 +122,7 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
  else
    return abig * sqrt(RealScalar(1) + internal::abs2(asml/abig));
 }
+
 } // end namespace internal

 /** \returns the \em l2 norm of \c *this avoiding underflow and overflow.
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@ -287,7 +287,7 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
  * m = m.transpose().eval();
  * \endcode
  * and is faster and also safer because in the latter line of code, forgetting the eval() results
-  * in a bug caused by aliasing.
+  * in a bug caused by \ref TopicAliasing "aliasing".
  *
  * Notice however that this method is only useful if you want to replace a matrix by its own transpose.
  * If you just need the transpose of a matrix, use transpose().
@ -298,6 +298,8 @@ struct inplace_transpose_selector<MatrixType,false> { // non square matrix
 template<typename Derived>
 inline void DenseBase<Derived>::transposeInPlace()
 {
+  eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic))
+               && "transposeInPlace() called on a non-square non-resizable matrix");
  internal::inplace_transpose_selector<Derived>::run(derived());
 }

--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@ -103,8 +103,8 @@ class PartialReduxExpr : internal::no_assignment_operator,

 #define EIGEN_MEMBER_FUNCTOR(MEMBER,COST)                               \
  template <typename ResultType>                                        \
-  struct member_##MEMBER {                                           \
-    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                         \
+  struct member_##MEMBER {                                              \
+    EIGEN_EMPTY_STRUCT_CTOR(member_##MEMBER)                            \
    typedef ResultType result_type;                                     \
    template<typename Scalar, int Size> struct Cost                     \
    { enum { value = COST }; };                                         \
@ -233,6 +233,28 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                       Direction==Vertical   ? 1 : m_matrix.rows(),
                       Direction==Horizontal ? 1 : m_matrix.cols());
    }
+    
+    template<typename OtherDerived> struct OppositeExtendedType {
+      typedef Replicate<OtherDerived,
+                        Direction==Horizontal ? 1 : ExpressionType::RowsAtCompileTime,
+                        Direction==Vertical   ? 1 : ExpressionType::ColsAtCompileTime> Type;
+    };
+
+    /** \internal
+      * Replicates a vector in the opposite direction to match the size of \c *this */
+    template<typename OtherDerived>
+    typename OppositeExtendedType<OtherDerived>::Type
+    extendedToOpposite(const DenseBase<OtherDerived>& other) const
+    {
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Horizontal, OtherDerived::MaxColsAtCompileTime==1),
+                          YOU_PASSED_A_ROW_VECTOR_BUT_A_COLUMN_VECTOR_WAS_EXPECTED)
+      EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(Direction==Vertical, OtherDerived::MaxRowsAtCompileTime==1),
+                          YOU_PASSED_A_COLUMN_VECTOR_BUT_A_ROW_VECTOR_WAS_EXPECTED)
+      return typename OppositeExtendedType<OtherDerived>::Type
+                      (other.derived(),
+                       Direction==Horizontal  ? 1 : m_matrix.rows(),
+                       Direction==Vertical    ? 1 : m_matrix.cols());
+    }

  public:

@ -255,6 +277,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp

    /** \returns a row (or column) vector expression of the smallest coefficient
      * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
      *
      * Example: \include PartialRedux_minCoeff.cpp
      * Output: \verbinclude PartialRedux_minCoeff.out
@ -265,6 +289,8 @@ template<typename ExpressionType, int Direction> class VectorwiseOp

    /** \returns a row (or column) vector expression of the largest coefficient
      * of each column (or row) of the referenced expression.
+      * 
+      * \warning the result is undefined if \c *this contains NaN.
      *
      * Example: \include PartialRedux_maxCoeff.cpp
      * Output: \verbinclude PartialRedux_maxCoeff.out
@ -504,6 +530,23 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
      EIGEN_STATIC_ASSERT_SAME_XPR_KIND(ExpressionType, OtherDerived)
      return m_matrix / extendedTo(other.derived());
    }
+    
+    /** \returns an expression where each column of row of the referenced matrix are normalized.
+      * The referenced matrix is \b not modified.
+      * \sa MatrixBase::normalized(), normalize()
+      */
+    CwiseBinaryOp<internal::scalar_quotient_op<Scalar>,
+                  const ExpressionTypeNestedCleaned,
+                  const typename OppositeExtendedType<typename ReturnType<internal::member_norm,RealScalar>::Type>::Type>
+    normalized() const { return m_matrix.cwiseQuotient(extendedToOpposite(this->norm())); }
+    
+    
+    /** Normalize in-place each row or columns of the referenced matrix.
+      * \sa MatrixBase::normalize(), normalized()
+      */
+    void normalize() {
+      m_matrix = this->normalized();
+    }

 /////////// Geometry module ///////////

--- a/Eigen/src/Core/Visitor.h
+++ b/Eigen/src/Core/Visitor.h
@ -164,8 +164,8 @@ struct functor_traits<max_coeff_visitor<Scalar> > {

 } // end namespace internal

-/** \returns the minimum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the minimum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN.
  *
  * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visitor(), DenseBase::minCoeff()
  */
@ -181,8 +181,8 @@ DenseBase<Derived>::minCoeff(IndexType* rowId, IndexType* colId) const
  return minVisitor.res;
 }

-/** \returns the minimum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the minimum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN. 
  *
  * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::minCoeff()
  */
@ -198,8 +198,8 @@ DenseBase<Derived>::minCoeff(IndexType* index) const
  return minVisitor.res;
 }

-/** \returns the maximum of all coefficients of *this
-  * and puts in *row and *col its location.
+/** \returns the maximum of all coefficients of *this and puts in *row and *col its location.
+  * \warning the result is undefined if \c *this contains NaN. 
  *
  * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
  */
@ -215,8 +215,8 @@ DenseBase<Derived>::maxCoeff(IndexType* rowPtr, IndexType* colPtr) const
  return maxVisitor.res;
 }

-/** \returns the maximum of all coefficients of *this
-  * and puts in *index its location.
+/** \returns the maximum of all coefficients of *this and puts in *index its location.
+  * \warning the result is undefined if \c *this contains NaN.
  *
  * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), DenseBase::maxCoeff()
  */
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@ -31,7 +31,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)

  /* the smallest non denormalized float number */
  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos,  0x00800000);
-
+  _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(minus_inf,     0xff800000);//-1.f/0.f);
+  
  /* natural logarithm computed for 4 simultaneous float
    return NaN for x <= 0
  */
@ -51,7 +52,8 @@ Packet4f plog<Packet4f>(const Packet4f& _x)

  Packet4i emm0;

-  Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+  Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+  Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());

  x = pmax(x, p4f_min_norm_pos);  /* cut off denormalized stuff */
  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
@ -96,7 +98,9 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
  y2 = pmul(e, p4f_cephes_log_q2);
  x = padd(x, y);
  x = padd(x, y2);
-  return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+  // negative arg will be NAN, 0 will be -INF
+  return _mm_or_ps(_mm_andnot_ps(iszero_mask, _mm_or_ps(x, invalid_mask)),
+                   _mm_and_ps(iszero_mask, p4f_minus_inf));
 }

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -173,18 +173,26 @@ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const
 template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_min_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_min_epi32(a,b);
+#else
  // after some bench, this version *is* faster than a scalar implementation
  Packet4i mask = _mm_cmplt_epi32(a,b);
  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }

 template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_max_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_max_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b)
 {
+#ifdef EIGEN_VECTORIZE_SSE4_1
+  return _mm_max_epi32(a,b);
+#else
  // after some bench, this version *is* faster than a scalar implementation
  Packet4i mask = _mm_cmpgt_epi32(a,b);
  return _mm_or_si128(_mm_and_si128(mask,a),_mm_andnot_si128(mask,b));
+#endif
 }

 template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_and_ps(a,b); }
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@ -152,7 +152,7 @@ class CoeffBasedProduct
    {
      // we don't allow taking products of matrices of different real types, as that wouldn't be vectorizable.
      // We still allow to mix T and complex<T>.
-      EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
+      EIGEN_STATIC_ASSERT((internal::scalar_product_traits<typename Lhs::RealScalar, typename Rhs::RealScalar>::Defined),
        YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
      eigen_assert(lhs.cols() == rhs.rows()
        && "invalid matrix product"
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -69,8 +69,8 @@ inline void manage_caching_sizes(Action action, std::ptrdiff_t* l1=0, std::ptrdi
  * - the number of scalars that fit into a packet (when vectorization is enabled).
  *
  * \sa setCpuCacheSizes */
-template<typename LhsScalar, typename RhsScalar, int KcFactor>
-void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, int KcFactor, typename SizeType>
+void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
  EIGEN_UNUSED_VARIABLE(n);
  // Explanations:
@ -91,13 +91,13 @@ void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrd
  };

  manage_caching_sizes(GetAction, &l1, &l2);
-  k = std::min<std::ptrdiff_t>(k, l1/kdiv);
-  std::ptrdiff_t _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
+  k = std::min<SizeType>(k, l1/kdiv);
+  SizeType _m = k>0 ? l2/(4 * sizeof(LhsScalar) * k) : 0;
  if(_m<m) m = _m & mr_mask;
 }

-template<typename LhsScalar, typename RhsScalar>
-inline void computeProductBlockingSizes(std::ptrdiff_t& k, std::ptrdiff_t& m, std::ptrdiff_t& n)
+template<typename LhsScalar, typename RhsScalar, typename SizeType>
+inline void computeProductBlockingSizes(SizeType& k, SizeType& m, SizeType& n)
 {
  computeProductBlockingSizes<LhsScalar,RhsScalar,1>(k, m, n);
 }
@ -529,7 +529,14 @@ struct gebp_kernel

  EIGEN_DONT_INLINE
  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
-                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB = 0)
+                  Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0, RhsScalar* unpackedB=0);
+};
+
+template<typename LhsScalar, typename RhsScalar, typename Index, int mr, int nr, bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE
+void gebp_kernel<LhsScalar,RhsScalar,Index,mr,nr,ConjugateLhs,ConjugateRhs>
+  ::operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha,
+               Index strideA, Index strideB, Index offsetA, Index offsetB, RhsScalar* unpackedB)
  {
    Traits traits;
    
@ -1089,7 +1096,7 @@ EIGEN_ASM_COMMENT("mybegin4");
      }
    }
  }
-};
+

 #undef CJMADD

@ -1110,81 +1117,84 @@ EIGEN_ASM_COMMENT("mybegin4");
 template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
 struct gemm_pack_lhs
 {
-  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows,
-                  Index stride=0, Index offset=0)
-  {
-    typedef typename packet_traits<Scalar>::type Packet;
-    enum { PacketSize = packet_traits<Scalar>::size };
-
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
-    Index count = 0;
-    Index peeled_mc = (rows/Pack1)*Pack1;
-    for(Index i=0; i<peeled_mc; i+=Pack1)
-    {
-      if(PanelMode) count += Pack1 * offset;
-
-      if(StorageOrder==ColMajor)
-      {
-        for(Index k=0; k<depth; k++)
-        {
-          Packet A, B, C, D;
-          if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
-          if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
-          if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
-          if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
-          if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
-          if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
-          if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
-          if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
-        }
-      }
-      else
-      {
-        for(Index k=0; k<depth; k++)
-        {
-          // TODO add a vectorized transpose here
-          Index w=0;
-          for(; w<Pack1-3; w+=4)
-          {
-            Scalar a(cj(lhs(i+w+0, k))),
-                   b(cj(lhs(i+w+1, k))),
-                   c(cj(lhs(i+w+2, k))),
-                   d(cj(lhs(i+w+3, k)));
-            blockA[count++] = a;
-            blockA[count++] = b;
-            blockA[count++] = c;
-            blockA[count++] = d;
-          }
-          if(Pack1%4)
-            for(;w<Pack1;++w)
-              blockA[count++] = cj(lhs(i+w, k));
-        }
-      }
-      if(PanelMode) count += Pack1 * (stride-offset-depth);
-    }
-    if(rows-peeled_mc>=Pack2)
-    {
-      if(PanelMode) count += Pack2*offset;
-      for(Index k=0; k<depth; k++)
-        for(Index w=0; w<Pack2; w++)
-          blockA[count++] = cj(lhs(peeled_mc+w, k));
-      if(PanelMode) count += Pack2 * (stride-offset-depth);
-      peeled_mc += Pack2;
-    }
-    for(Index i=peeled_mc; i<rows; i++)
-    {
-      if(PanelMode) count += offset;
-      for(Index k=0; k<depth; k++)
-        blockA[count++] = cj(lhs(i, k));
-      if(PanelMode) count += (stride-offset-depth);
-    }
-  }
+  EIGEN_DONT_INLINE void operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride=0, Index offset=0);
 };

+template<typename Scalar, typename Index, int Pack1, int Pack2, int StorageOrder, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<Scalar, Index, Pack1, Pack2, StorageOrder, Conjugate, PanelMode>
+  ::operator()(Scalar* blockA, const Scalar* EIGEN_RESTRICT _lhs, Index lhsStride, Index depth, Index rows, Index stride, Index offset)
+{
+  typedef typename packet_traits<Scalar>::type Packet;
+  enum { PacketSize = packet_traits<Scalar>::size };
+
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK LHS");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  eigen_assert( (StorageOrder==RowMajor) || ((Pack1%PacketSize)==0 && Pack1<=4*PacketSize) );
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  const_blas_data_mapper<Scalar, Index, StorageOrder> lhs(_lhs,lhsStride);
+  Index count = 0;
+  Index peeled_mc = (rows/Pack1)*Pack1;
+  for(Index i=0; i<peeled_mc; i+=Pack1)
+  {
+    if(PanelMode) count += Pack1 * offset;
+
+    if(StorageOrder==ColMajor)
+    {
+      for(Index k=0; k<depth; k++)
+      {
+        Packet A, B, C, D;
+        if(Pack1>=1*PacketSize) A = ploadu<Packet>(&lhs(i+0*PacketSize, k));
+        if(Pack1>=2*PacketSize) B = ploadu<Packet>(&lhs(i+1*PacketSize, k));
+        if(Pack1>=3*PacketSize) C = ploadu<Packet>(&lhs(i+2*PacketSize, k));
+        if(Pack1>=4*PacketSize) D = ploadu<Packet>(&lhs(i+3*PacketSize, k));
+        if(Pack1>=1*PacketSize) { pstore(blockA+count, cj.pconj(A)); count+=PacketSize; }
+        if(Pack1>=2*PacketSize) { pstore(blockA+count, cj.pconj(B)); count+=PacketSize; }
+        if(Pack1>=3*PacketSize) { pstore(blockA+count, cj.pconj(C)); count+=PacketSize; }
+        if(Pack1>=4*PacketSize) { pstore(blockA+count, cj.pconj(D)); count+=PacketSize; }
+      }
+    }
+    else
+    {
+      for(Index k=0; k<depth; k++)
+      {
+        // TODO add a vectorized transpose here
+        Index w=0;
+        for(; w<Pack1-3; w+=4)
+        {
+          Scalar a(cj(lhs(i+w+0, k))),
+                  b(cj(lhs(i+w+1, k))),
+                  c(cj(lhs(i+w+2, k))),
+                  d(cj(lhs(i+w+3, k)));
+          blockA[count++] = a;
+          blockA[count++] = b;
+          blockA[count++] = c;
+          blockA[count++] = d;
+        }
+        if(Pack1%4)
+          for(;w<Pack1;++w)
+            blockA[count++] = cj(lhs(i+w, k));
+      }
+    }
+    if(PanelMode) count += Pack1 * (stride-offset-depth);
+  }
+  if(rows-peeled_mc>=Pack2)
+  {
+    if(PanelMode) count += Pack2*offset;
+    for(Index k=0; k<depth; k++)
+      for(Index w=0; w<Pack2; w++)
+        blockA[count++] = cj(lhs(peeled_mc+w, k));
+    if(PanelMode) count += Pack2 * (stride-offset-depth);
+    peeled_mc += Pack2;
+  }
+  for(Index i=peeled_mc; i<rows; i++)
+  {
+    if(PanelMode) count += offset;
+    for(Index k=0; k<depth; k++)
+      blockA[count++] = cj(lhs(i, k));
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
+
 // copy a complete panel of the rhs
 // this version is optimized for column major matrices
 // The traversal order is as follow: (nr==4):
@ -1197,93 +1207,99 @@ struct gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
 {
  typedef typename packet_traits<Scalar>::type Packet;
  enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
-  {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
-    {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      const Scalar* b1 = &rhs[(j2+1)*rhsStride];
-      const Scalar* b2 = &rhs[(j2+2)*rhsStride];
-      const Scalar* b3 = &rhs[(j2+3)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-                  blockB[count+0] = cj(b0[k]);
-                  blockB[count+1] = cj(b1[k]);
-        if(nr==4) blockB[count+2] = cj(b2[k]);
-        if(nr==4) blockB[count+3] = cj(b3[k]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
-    }
-
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
-    {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[(j2+0)*rhsStride];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k]);
-        count += 1;
-      }
-      if(PanelMode) count += (stride-offset-depth);
-    }
-  }
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
 };

+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, ColMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS COLMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
+  {
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    const Scalar* b1 = &rhs[(j2+1)*rhsStride];
+    const Scalar* b2 = &rhs[(j2+2)*rhsStride];
+    const Scalar* b3 = &rhs[(j2+3)*rhsStride];
+    for(Index k=0; k<depth; k++)
+    {
+                blockB[count+0] = cj(b0[k]);
+                blockB[count+1] = cj(b1[k]);
+      if(nr==4) blockB[count+2] = cj(b2[k]);
+      if(nr==4) blockB[count+3] = cj(b3[k]);
+      count += nr;
+    }
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
+
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[(j2+0)*rhsStride];
+    for(Index k=0; k<depth; k++)
+    {
+      blockB[count] = cj(b0[k]);
+      count += 1;
+    }
+    if(PanelMode) count += (stride-offset-depth);
+  }
+}
+
 // this version is optimized for row major matrices
 template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
 struct gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
 {
  enum { PacketSize = packet_traits<Scalar>::size };
-  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols,
-                  Index stride=0, Index offset=0)
-  {
-    EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
-    eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
-    conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
-    Index packet_cols = (cols/nr) * nr;
-    Index count = 0;
-    for(Index j2=0; j2<packet_cols; j2+=nr)
-    {
-      // skip what we have before
-      if(PanelMode) count += nr * offset;
-      for(Index k=0; k<depth; k++)
-      {
-        const Scalar* b0 = &rhs[k*rhsStride + j2];
-                  blockB[count+0] = cj(b0[0]);
-                  blockB[count+1] = cj(b0[1]);
-        if(nr==4) blockB[count+2] = cj(b0[2]);
-        if(nr==4) blockB[count+3] = cj(b0[3]);
-        count += nr;
-      }
-      // skip what we have after
-      if(PanelMode) count += nr * (stride-offset-depth);
-    }
-    // copy the remaining columns one at a time (nr==1)
-    for(Index j2=packet_cols; j2<cols; ++j2)
-    {
-      if(PanelMode) count += offset;
-      const Scalar* b0 = &rhs[j2];
-      for(Index k=0; k<depth; k++)
-      {
-        blockB[count] = cj(b0[k*rhsStride]);
-        count += 1;
-      }
-      if(PanelMode) count += stride-offset-depth;
-    }
-  }
+  EIGEN_DONT_INLINE void operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride=0, Index offset=0);
 };

+template<typename Scalar, typename Index, int nr, bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, nr, RowMajor, Conjugate, PanelMode>
+  ::operator()(Scalar* blockB, const Scalar* rhs, Index rhsStride, Index depth, Index cols, Index stride, Index offset)
+{
+  EIGEN_ASM_COMMENT("EIGEN PRODUCT PACK RHS ROWMAJOR");
+  eigen_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride));
+  conj_if<NumTraits<Scalar>::IsComplex && Conjugate> cj;
+  Index packet_cols = (cols/nr) * nr;
+  Index count = 0;
+  for(Index j2=0; j2<packet_cols; j2+=nr)
+  {
+    // skip what we have before
+    if(PanelMode) count += nr * offset;
+    for(Index k=0; k<depth; k++)
+    {
+      const Scalar* b0 = &rhs[k*rhsStride + j2];
+                blockB[count+0] = cj(b0[0]);
+                blockB[count+1] = cj(b0[1]);
+      if(nr==4) blockB[count+2] = cj(b0[2]);
+      if(nr==4) blockB[count+3] = cj(b0[3]);
+      count += nr;
+    }
+    // skip what we have after
+    if(PanelMode) count += nr * (stride-offset-depth);
+  }
+  // copy the remaining columns one at a time (nr==1)
+  for(Index j2=packet_cols; j2<cols; ++j2)
+  {
+    if(PanelMode) count += offset;
+    const Scalar* b0 = &rhs[j2];
+    for(Index k=0; k<depth; k++)
+    {
+      blockB[count] = cj(b0[k*rhsStride]);
+      count += 1;
+    }
+    if(PanelMode) count += stride-offset-depth;
+  }
+}
+
 } // end namespace internal

 /** \returns the currently set level 1 cpu cache size (in bytes) used to estimate the ideal blocking size parameters.
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@ -50,6 +50,7 @@ template<
  typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs>
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor>
 {
+
 typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
  const LhsScalar* _lhs, Index lhsStride,
@ -169,7 +170,6 @@ static void run(Index rows, Index cols, Index depth,
      // vertical panel which is, in practice, a very low number.
      pack_rhs(blockB, &rhs(k2,0), rhsStride, actual_kc, cols);

-
      // For each mc x kc block of the lhs's vertical panel...
      // (==GEPP_VAR1)
      for(Index i2=0; i2<rows; i2+=mc)
@ -183,7 +183,6 @@ static void run(Index rows, Index cols, Index depth,

        // Everything is packed, we can now call the block * panel kernel:
        gebp(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols, alpha, -1, -1, 0, 0, blockW);
-
      }
    }
  }
@ -204,7 +203,7 @@ struct traits<GeneralProduct<Lhs,Rhs,GemmProduct> >
 template<typename Scalar, typename Index, typename Gemm, typename Lhs, typename Rhs, typename Dest, typename BlockingType>
 struct gemm_functor
 {
-  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, Scalar actualAlpha,
+  gemm_functor(const Lhs& lhs, const Rhs& rhs, Dest& dest, const Scalar& actualAlpha,
                  BlockingType& blocking)
    : m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha), m_blocking(blocking)
  {}
@ -395,7 +394,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
      EIGEN_CHECK_BINARY_COMPATIBILIY(BinOp,LhsScalar,RhsScalar);
    }

-    template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+    template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
    {
      eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());

--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@ -12,6 +12,9 @@

 namespace Eigen { 

+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
+struct selfadjoint_rank1_update;
+
 namespace internal {

 /**********************************************************************
@ -39,7 +42,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
-                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
  {
    general_matrix_matrix_triangular_product<Index,
        RhsScalar, RhsStorageOrder==RowMajor ? ColMajor : RowMajor, ConjugateRhs,
@ -55,7 +58,7 @@ struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,
 {
  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
-                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, ResScalar alpha)
+                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* res, Index resStride, const ResScalar& alpha)
  {
    const_blas_data_mapper<LhsScalar, Index, LhsStorageOrder> lhs(_lhs,lhsStride);
    const_blas_data_mapper<RhsScalar, Index, RhsStorageOrder> rhs(_rhs,rhsStride);
@ -133,7 +136,7 @@ struct tribb_kernel
  enum {
    BlockSize  = EIGEN_PLAIN_ENUM_MAX(mr,nr)
  };
-  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, ResScalar alpha, RhsScalar* workspace)
+  void operator()(ResScalar* res, Index resStride, const LhsScalar* blockA, const RhsScalar* blockB, Index size, Index depth, const ResScalar& alpha, RhsScalar* workspace)
  {
    gebp_kernel<LhsScalar, RhsScalar, Index, mr, nr, ConjLhs, ConjRhs> gebp_kernel;
    Matrix<ResScalar,BlockSize,BlockSize,ColMajor> buffer;
@ -180,31 +183,92 @@ struct tribb_kernel

 // high level API

+template<typename MatrixType, typename ProductType, int UpLo, bool IsOuterProduct>
+struct general_product_to_triangular_selector;
+
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,true>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Scalar Scalar;
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    enum {
+      StorageOrder = (internal::traits<MatrixType>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+      UseLhsDirectly = _ActualLhs::InnerStrideAtCompileTime==1,
+      UseRhsDirectly = _ActualRhs::InnerStrideAtCompileTime==1
+    };
+    
+    internal::gemv_static_vector_if<Scalar,Lhs::SizeAtCompileTime,Lhs::MaxSizeAtCompileTime,!UseLhsDirectly> static_lhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualLhsPtr, actualLhs.size(),
+      (UseLhsDirectly ? const_cast<Scalar*>(actualLhs.data()) : static_lhs.data()));
+    if(!UseLhsDirectly) Map<typename _ActualLhs::PlainObject>(actualLhsPtr, actualLhs.size()) = actualLhs;
+    
+    internal::gemv_static_vector_if<Scalar,Rhs::SizeAtCompileTime,Rhs::MaxSizeAtCompileTime,!UseRhsDirectly> static_rhs;
+    ei_declare_aligned_stack_constructed_variable(Scalar, actualRhsPtr, actualRhs.size(),
+      (UseRhsDirectly ? const_cast<Scalar*>(actualRhs.data()) : static_rhs.data()));
+    if(!UseRhsDirectly) Map<typename _ActualRhs::PlainObject>(actualRhsPtr, actualRhs.size()) = actualRhs;
+    
+    
+    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
+                              LhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex,
+                              RhsBlasTraits::NeedToConjugate && NumTraits<Scalar>::IsComplex>
+          ::run(actualLhs.size(), mat.data(), mat.outerStride(), actualLhsPtr, actualRhsPtr, actualAlpha);
+  }
+};
+
+template<typename MatrixType, typename ProductType, int UpLo>
+struct general_product_to_triangular_selector<MatrixType,ProductType,UpLo,false>
+{
+  static void run(MatrixType& mat, const ProductType& prod, const typename MatrixType::Scalar& alpha)
+  {
+    typedef typename MatrixType::Index Index;
+    
+    typedef typename internal::remove_all<typename ProductType::LhsNested>::type Lhs;
+    typedef internal::blas_traits<Lhs> LhsBlasTraits;
+    typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
+    typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
+    typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
+    
+    typedef typename internal::remove_all<typename ProductType::RhsNested>::type Rhs;
+    typedef internal::blas_traits<Rhs> RhsBlasTraits;
+    typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
+    typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
+    typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
+
+    typename ProductType::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
+
+    internal::general_matrix_matrix_triangular_product<Index,
+      typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
+      typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
+      MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
+      ::run(mat.cols(), actualLhs.cols(),
+            &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
+            mat.data(), mat.outerStride(), actualAlpha);
+  }
+};
+
 template<typename MatrixType, unsigned int UpLo>
 template<typename ProductDerived, typename _Lhs, typename _Rhs>
 TriangularView<MatrixType,UpLo>& TriangularView<MatrixType,UpLo>::assignProduct(const ProductBase<ProductDerived, _Lhs,_Rhs>& prod, const Scalar& alpha)
 {
-  typedef typename internal::remove_all<typename ProductDerived::LhsNested>::type Lhs;
-  typedef internal::blas_traits<Lhs> LhsBlasTraits;
-  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhs;
-  typedef typename internal::remove_all<ActualLhs>::type _ActualLhs;
-  typename internal::add_const_on_value_type<ActualLhs>::type actualLhs = LhsBlasTraits::extract(prod.lhs());
-  
-  typedef typename internal::remove_all<typename ProductDerived::RhsNested>::type Rhs;
-  typedef internal::blas_traits<Rhs> RhsBlasTraits;
-  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhs;
-  typedef typename internal::remove_all<ActualRhs>::type _ActualRhs;
-  typename internal::add_const_on_value_type<ActualRhs>::type actualRhs = RhsBlasTraits::extract(prod.rhs());
-
-  typename ProductDerived::Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(prod.lhs().derived()) * RhsBlasTraits::extractScalarFactor(prod.rhs().derived());
-
-  internal::general_matrix_matrix_triangular_product<Index,
-    typename Lhs::Scalar, _ActualLhs::Flags&RowMajorBit ? RowMajor : ColMajor, LhsBlasTraits::NeedToConjugate,
-    typename Rhs::Scalar, _ActualRhs::Flags&RowMajorBit ? RowMajor : ColMajor, RhsBlasTraits::NeedToConjugate,
-    MatrixType::Flags&RowMajorBit ? RowMajor : ColMajor, UpLo>
-    ::run(m_matrix.cols(), actualLhs.cols(),
-          &actualLhs.coeffRef(0,0), actualLhs.outerStride(), &actualRhs.coeffRef(0,0), actualRhs.outerStride(),
-          const_cast<Scalar*>(m_matrix.data()), m_matrix.outerStride(), actualAlpha);
+  general_product_to_triangular_selector<MatrixType, ProductDerived, UpLo, (_Lhs::ColsAtCompileTime==1) || (_Rhs::RowsAtCompileTime==1)>::run(m_matrix.const_cast_derived(), prod.derived(), alpha);
  
  return *this;
 }
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@ -49,6 +49,18 @@ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;

 EIGEN_DONT_INLINE static void run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index
+  #ifdef EIGEN_INTERNAL_DEBUGGING
+    resIncr
+  #endif
+  , RhsScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,ColMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
  Index rows, Index cols,
  const LhsScalar* lhs, Index lhsStride,
  const RhsScalar* rhs, Index rhsIncr,
@ -274,7 +286,6 @@ EIGEN_DONT_INLINE static void run(
  } while(Vectorizable);
  #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};

 /* Optimized row-major matrix * vector product:
 * This algorithm processes 4 rows at onces that allows to both reduce
@ -308,6 +319,15 @@ typedef typename conditional<Vectorizable,_RhsPacket,RhsScalar>::type RhsPacket;
 typedef typename conditional<Vectorizable,_ResPacket,ResScalar>::type ResPacket;
  
 EIGEN_DONT_INLINE static void run(
+  Index rows, Index cols,
+  const LhsScalar* lhs, Index lhsStride,
+  const RhsScalar* rhs, Index rhsIncr,
+  ResScalar* res, Index resIncr,
+  ResScalar alpha);
+};
+
+template<typename Index, typename LhsScalar, bool ConjugateLhs, typename RhsScalar, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,RowMajor,ConjugateLhs,RhsScalar,ConjugateRhs,Version>::run(
  Index rows, Index cols,
  const LhsScalar* lhs, Index lhsStride,
  const RhsScalar* rhs, Index rhsIncr,
@ -545,7 +565,6 @@ EIGEN_DONT_INLINE static void run(

  #undef _EIGEN_ACCUMULATE_PACKETS
 }
-};

 } // end namespace internal

--- a/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector_MKL.h
@ -53,7 +53,7 @@ struct general_matrix_vector_product_gemv :
 #define EIGEN_MKL_GEMV_SPECIALIZE(Scalar) \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,ColMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
  Index rows, Index cols, \
  const Scalar* lhs, Index lhsStride, \
  const Scalar* rhs, Index rhsIncr, \
@ -70,7 +70,7 @@ static EIGEN_DONT_INLINE void run( \
 }; \
 template<typename Index, bool ConjugateLhs, bool ConjugateRhs> \
 struct general_matrix_vector_product<Index,Scalar,RowMajor,ConjugateLhs,Scalar,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
  Index rows, Index cols, \
  const Scalar* lhs, Index lhsStride, \
  const Scalar* rhs, Index rhsIncr, \
@ -92,7 +92,7 @@ struct general_matrix_vector_product_gemv<Index,EIGTYPE,LhsStorageOrder,Conjugat
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> GEMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
  Index rows, Index cols, \
  const EIGTYPE* lhs, Index lhsStride, \
  const EIGTYPE* rhs, Index rhsIncr, \
--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h
@ -211,7 +211,7 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,Co
    const Scalar* lhs, Index lhsStride,
    const Scalar* rhs, Index rhsStride,
    Scalar* res,       Index resStride,
-    Scalar alpha)
+    const Scalar& alpha)
  {
    product_selfadjoint_matrix<Scalar, Index,
      EIGEN_LOGICAL_XOR(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
@ -234,7 +234,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
  {
    Index size = rows;

@ -301,7 +312,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs
      }
    }
  }
-};

 // matrix * selfadjoint product
 template <typename Scalar, typename Index,
@ -315,7 +325,18 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
-    Scalar alpha)
+    const Scalar& alpha);
+};
+
+template <typename Scalar, typename Index,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs>
+EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor>::run(
+    Index rows, Index cols,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha)
  {
    Index size = cols;

@ -353,7 +374,6 @@ struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLh
      }
    }
  }
-};

 } // end namespace internal

@ -383,7 +403,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,RhsMode,false>
    RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
  };

-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());

--- a/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix_MKL.h
@ -23,7 +23,7 @@
 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
+//
 ********************************************************************************
 *   Content : Eigen bindings to Intel(R) MKL
 *   Self adjoint matrix * matrix product functionality based on ?SYMM/?HEMM.
@ -47,7 +47,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index rows, Index cols, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
@ -98,7 +98,7 @@ template <typename Index, \
          int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,true,ConjugateLhs,RhsStorageOrder,false,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index rows, Index cols, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
@ -174,7 +174,7 @@ template <typename Index, \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index rows, Index cols, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
@ -224,7 +224,7 @@ template <typename Index, \
          int RhsStorageOrder, bool ConjugateRhs> \
 struct product_selfadjoint_matrix<EIGTYPE,Index,LhsStorageOrder,false,ConjugateLhs,RhsStorageOrder,true,ConjugateRhs,ColMajor> \
 {\
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index rows, Index cols, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
--- a/Eigen/src/Core/products/SelfadjointMatrixVector.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector.h
@ -28,6 +28,15 @@ struct selfadjoint_matrix_vector_product

 {
 static EIGEN_DONT_INLINE void run(
+  Index size,
+  const Scalar*  lhs, Index lhsStride,
+  const Scalar* _rhs, Index rhsIncr,
+  Scalar* res,
+  Scalar alpha);
+};
+
+template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
  Index size,
  const Scalar*  lhs, Index lhsStride,
  const Scalar* _rhs, Index rhsIncr,
@ -35,7 +44,6 @@ static EIGEN_DONT_INLINE void run(
  Scalar alpha)
 {
  typedef typename packet_traits<Scalar>::type Packet;
-  typedef typename NumTraits<Scalar>::Real RealScalar;
  const Index PacketSize = sizeof(Packet)/sizeof(Scalar);

  enum {
@ -153,7 +161,6 @@ static EIGEN_DONT_INLINE void run(
    res[j] += alpha * t2;
  }
 }
-};

 } // end namespace internal 

@ -180,7 +187,7 @@ struct SelfadjointProductMatrix<Lhs,LhsMode,false,Rhs,0,true>

  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}

-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
  {
    typedef typename Dest::Scalar ResScalar;
    typedef typename Base::RhsScalar RhsScalar;
@ -260,7 +267,7 @@ struct SelfadjointProductMatrix<Lhs,0,true,Rhs,RhsMode,false>

  SelfadjointProductMatrix(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}

-  template<typename Dest> void scaleAndAddTo(Dest& dest, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dest, const Scalar& alpha) const
  {
    // let's simply transpose the product
    Transpose<Dest> destT(dest);
--- a/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/SelfadjointMatrixVector_MKL.h
@ -50,7 +50,7 @@ struct selfadjoint_matrix_vector_product_symv :
 #define EIGEN_MKL_SYMV_SPECIALIZE(Scalar) \
 template<typename Index, int StorageOrder, int UpLo, bool ConjugateLhs, bool ConjugateRhs> \
 struct selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Specialized> { \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
  Index size, const Scalar*  lhs, Index lhsStride, \
  const Scalar* _rhs, Index rhsIncr, Scalar* res, Scalar alpha) { \
    enum {\
@ -77,7 +77,7 @@ struct selfadjoint_matrix_vector_product_symv<EIGTYPE,Index,StorageOrder,UpLo,Co
 { \
 typedef Matrix<EIGTYPE,Dynamic,1,ColMajor> SYMVVector;\
 \
-static EIGEN_DONT_INLINE void run( \
+static void run( \
 Index size, const EIGTYPE*  lhs, Index lhsStride, \
 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* res, EIGTYPE alpha) \
 { \
--- a/Eigen/src/Core/products/SelfadjointProduct.h
+++ b/Eigen/src/Core/products/SelfadjointProduct.h
@ -18,21 +18,19 @@

 namespace Eigen { 

-template<typename Scalar, typename Index, int StorageOrder, int UpLo, bool ConjLhs, bool ConjRhs>
-struct selfadjoint_rank1_update;

 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
  {
    internal::conj_if<ConjRhs> cj;
    typedef Map<const Matrix<Scalar,Dynamic,1> > OtherMap;
-    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjRhsType;
+    typedef typename internal::conditional<ConjLhs,typename OtherMap::ConjugateReturnType,const OtherMap&>::type ConjLhsType;
    for (Index i=0; i<size; ++i)
    {
      Map<Matrix<Scalar,Dynamic,1> >(mat+stride*i+(UpLo==Lower ? i : 0), (UpLo==Lower ? size-i : (i+1)))
-          += (alpha * cj(vec[i])) * ConjRhsType(OtherMap(vec+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
+          += (alpha * cj(vecY[i])) * ConjLhsType(OtherMap(vecX+(UpLo==Lower ? i : 0),UpLo==Lower ? size-i : (i+1)));
    }
  }
 };
@ -40,9 +38,9 @@ struct selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo,ConjLhs,ConjRhs>
 template<typename Scalar, typename Index, int UpLo, bool ConjLhs, bool ConjRhs>
 struct selfadjoint_rank1_update<Scalar,Index,RowMajor,UpLo,ConjLhs,ConjRhs>
 {
-  static void run(Index size, Scalar* mat, Index stride, const Scalar* vec, Scalar alpha)
+  static void run(Index size, Scalar* mat, Index stride, const Scalar* vecX, const Scalar* vecY, const Scalar& alpha)
  {
-    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vec,alpha);
+    selfadjoint_rank1_update<Scalar,Index,ColMajor,UpLo==Lower?Upper:Lower,ConjRhs,ConjLhs>::run(size,mat,stride,vecY,vecX,alpha);
  }
 };

@ -52,7 +50,7 @@ struct selfadjoint_product_selector;
 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
  {
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::Index Index;
@ -78,14 +76,14 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,true>
    selfadjoint_rank1_update<Scalar,Index,StorageOrder,UpLo,
                              OtherBlasTraits::NeedToConjugate  && NumTraits<Scalar>::IsComplex,
                            (!OtherBlasTraits::NeedToConjugate) && NumTraits<Scalar>::IsComplex>
-          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualAlpha);
+          ::run(other.size(), mat.data(), mat.outerStride(), actualOtherPtr, actualOtherPtr, actualAlpha);
  }
 };

 template<typename MatrixType, typename OtherType, int UpLo>
 struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 {
-  static void run(MatrixType& mat, const OtherType& other, typename MatrixType::Scalar alpha)
+  static void run(MatrixType& mat, const OtherType& other, const typename MatrixType::Scalar& alpha)
  {
    typedef typename MatrixType::Scalar Scalar;
    typedef typename MatrixType::Index Index;
@ -113,7 +111,7 @@ struct selfadjoint_product_selector<MatrixType,OtherType,UpLo,false>
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha)
 {
  selfadjoint_product_selector<MatrixType,DerivedU,UpLo>::run(_expression().const_cast_derived(), u.derived(), alpha);

--- a/Eigen/src/Core/products/SelfadjointRank2Update.h
+++ b/Eigen/src/Core/products/SelfadjointRank2Update.h
@ -24,7 +24,7 @@ struct selfadjoint_rank2_update_selector;
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
  {
    const Index size = u.size();
    for (Index i=0; i<size; ++i)
@ -39,7 +39,7 @@ struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Lower>
 template<typename Scalar, typename Index, typename UType, typename VType>
 struct selfadjoint_rank2_update_selector<Scalar,Index,UType,VType,Upper>
 {
-  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, Scalar alpha)
+  static void run(Scalar* mat, Index stride, const UType& u, const VType& v, const Scalar& alpha)
  {
    const Index size = u.size();
    for (Index i=0; i<size; ++i)
@ -58,7 +58,7 @@ template<bool Cond, typename T> struct conj_expr_if
 template<typename MatrixType, unsigned int UpLo>
 template<typename DerivedU, typename DerivedV>
 SelfAdjointView<MatrixType,UpLo>& SelfAdjointView<MatrixType,UpLo>
-::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, Scalar alpha)
+::rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha)
 {
  typedef internal::blas_traits<DerivedU> UBlasTraits;
  typedef typename UBlasTraits::DirectLinearAccessType ActualUType;
--- a/Eigen/src/Core/products/TriangularMatrixMatrix.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix.h
@ -61,7 +61,7 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,LhsIsTriangular,
    const Scalar* lhs, Index lhsStride,
    const Scalar* rhs, Index rhsStride,
    Scalar* res,       Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
  {
    product_triangular_matrix_matrix<Scalar, Index,
      (Mode&(UnitDiag|ZeroDiag)) | ((Mode&Upper) ? Lower : Upper),
@ -96,7 +96,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,true,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
  {
    // strip zeros
    Index diagSize  = (std::min)(_rows,_depth);
@ -203,15 +216,14 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,true,
      }
    }
  }
-};

 // implements col-major += alpha * op(general) * op(triangular)
 template <typename Scalar, typename Index, int Mode,
          int LhsStorageOrder, bool ConjugateLhs,
          int RhsStorageOrder, bool ConjugateRhs, int Version>
 struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
-                                           LhsStorageOrder,ConjugateLhs,
-                                           RhsStorageOrder,ConjugateRhs,ColMajor,Version>
+                                        LhsStorageOrder,ConjugateLhs,
+                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>
 {
  typedef gebp_traits<Scalar,Scalar> Traits;
  enum {
@ -225,7 +237,20 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
    const Scalar* _lhs, Index lhsStride,
    const Scalar* _rhs, Index rhsStride,
    Scalar* res,        Index resStride,
-    Scalar alpha, level3_blocking<Scalar,Scalar>& blocking)
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
+};
+
+template <typename Scalar, typename Index, int Mode,
+          int LhsStorageOrder, bool ConjugateLhs,
+          int RhsStorageOrder, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void product_triangular_matrix_matrix<Scalar,Index,Mode,false,
+                                                        LhsStorageOrder,ConjugateLhs,
+                                                        RhsStorageOrder,ConjugateRhs,ColMajor,Version>::run(
+    Index _rows, Index _cols, Index _depth,
+    const Scalar* _lhs, Index lhsStride,
+    const Scalar* _rhs, Index rhsStride,
+    Scalar* res,        Index resStride,
+    const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
  {
    // strip zeros
    Index diagSize  = (std::min)(_cols,_depth);
@ -343,7 +368,6 @@ struct product_triangular_matrix_matrix<Scalar,Index,Mode,false,
      }
    }
  }
-};

 /***************************************************************************
 * Wrapper to product_triangular_matrix_matrix
@ -364,7 +388,7 @@ struct TriangularProduct<Mode,LhsIsTriangular,Lhs,false,Rhs,false>

  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}

-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
  {
    typename internal::add_const_on_value_type<ActualLhsType>::type lhs = LhsBlasTraits::extract(m_lhs);
    typename internal::add_const_on_value_type<ActualRhsType>::type rhs = RhsBlasTraits::extract(m_rhs);
--- a/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixMatrix_MKL.h
@ -91,7 +91,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,true, \
    conjA = ((LhsStorageOrder==ColMajor) && ConjugateLhs) ? 1 : 0 \
  }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index _rows, Index _cols, Index _depth, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
@ -205,7 +205,7 @@ struct product_triangular_matrix_matrix_trmm<EIGTYPE,Index,Mode,false, \
    conjA = ((RhsStorageOrder==ColMajor) && ConjugateRhs) ? 1 : 0 \
  }; \
 \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
    Index _rows, Index _cols, Index _depth, \
    const EIGTYPE* _lhs, Index lhsStride, \
    const EIGTYPE* _rhs, Index rhsStride, \
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@ -27,7 +27,13 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
  };
  static EIGEN_DONT_INLINE  void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+                                     const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
  {
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
    Index size = (std::min)(_rows,_cols);
@ -78,7 +84,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
          _res, resIncr, alpha);
    }
  }
-};

 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
@ -89,8 +94,14 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
    HasZeroDiag = (Mode & ZeroDiag)==ZeroDiag
  };
-  static void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
-                  const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, ResScalar alpha)
+  static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+                                    const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha);
+};
+
+template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
+EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
+  ::run(Index _rows, Index _cols, const LhsScalar* _lhs, Index lhsStride,
+        const RhsScalar* _rhs, Index rhsIncr, ResScalar* _res, Index resIncr, const ResScalar& alpha)
  {
    static const Index PanelWidth = EIGEN_TUNE_TRIANGULAR_PANEL_WIDTH;
    Index diagSize = (std::min)(_rows,_cols);
@ -141,7 +152,6 @@ struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,C
            &res.coeffRef(diagSize), resIncr, alpha);
    }
  }
-};

 /***************************************************************************
 * Wrapper to product_triangular_vector
@ -171,7 +181,7 @@ struct TriangularProduct<Mode,true,Lhs,false,Rhs,true>

  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}

-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());
  
@ -187,7 +197,7 @@ struct TriangularProduct<Mode,false,Lhs,true,Rhs,false>

  TriangularProduct(const Lhs& lhs, const Rhs& rhs) : Base(lhs,rhs) {}

-  template<typename Dest> void scaleAndAddTo(Dest& dst, Scalar alpha) const
+  template<typename Dest> void scaleAndAddTo(Dest& dst, const Scalar& alpha) const
  {
    eigen_assert(dst.rows()==m_lhs.rows() && dst.cols()==m_rhs.cols());

@ -205,7 +215,7 @@ namespace internal {
 template<> struct trmv_selector<ColMajor>
 {
  template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
  {
    typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
    typedef typename ProductType::Index Index;
@ -246,7 +256,7 @@ template<> struct trmv_selector<ColMajor>
    if(!evalToDest)
    {
      #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
-      int size = dest.size();
+      Index size = dest.size();
      EIGEN_DENSE_STORAGE_CTOR_PLUGIN
      #endif
      if(!alphaIsCompatible)
@ -281,7 +291,7 @@ template<> struct trmv_selector<ColMajor>
 template<> struct trmv_selector<RowMajor>
 {
  template<int Mode, typename Lhs, typename Rhs, typename Dest>
-  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar alpha)
+  static void run(const TriangularProduct<Mode,true,Lhs,false,Rhs,true>& prod, Dest& dest, const typename TriangularProduct<Mode,true,Lhs,false,Rhs,true>::Scalar& alpha)
  {
    typedef TriangularProduct<Mode,true,Lhs,false,Rhs,true> ProductType;
    typedef typename ProductType::LhsScalar LhsScalar;
--- a/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector_MKL.h
@ -50,7 +50,7 @@ struct triangular_matrix_vector_product_trmv :
 #define EIGEN_MKL_TRMV_SPECIALIZE(Scalar) \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,ColMajor>::run( \
        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@ -58,7 +58,7 @@ struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs
 }; \
 template<typename Index, int Mode, bool ConjLhs, bool ConjRhs> \
 struct triangular_matrix_vector_product<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor,Specialized> { \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
+ static void run(Index _rows, Index _cols, const Scalar* _lhs, Index lhsStride, \
                                     const Scalar* _rhs, Index rhsIncr, Scalar* _res, Index resIncr, Scalar alpha) { \
      triangular_matrix_vector_product_trmv<Index,Mode,Scalar,ConjLhs,Scalar,ConjRhs,RowMajor>::run( \
        _rows, _cols, _lhs, lhsStride, _rhs, rhsIncr, _res, resIncr, alpha); \
@ -81,8 +81,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
    LowUp = IsLower ? Lower : Upper \
  }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
 { \
   if (ConjLhs || IsZeroDiag) { \
     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,ColMajor,BuiltIn>::run( \
@ -166,8 +166,8 @@ struct triangular_matrix_vector_product_trmv<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,
    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
    LowUp = IsLower ? Lower : Upper \
  }; \
- static EIGEN_DONT_INLINE void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
-                             const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
+ static void run(Index _rows, Index _cols, const EIGTYPE* _lhs, Index lhsStride, \
+                 const EIGTYPE* _rhs, Index rhsIncr, EIGTYPE* _res, Index resIncr, EIGTYPE alpha) \
 { \
   if (IsZeroDiag) { \
     triangular_matrix_vector_product<Index,Mode,EIGTYPE,ConjLhs,EIGTYPE,ConjRhs,RowMajor,BuiltIn>::run( \
--- a/Eigen/src/Core/products/TriangularSolverMatrix.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix.h
@ -18,7 +18,7 @@ namespace internal {
 template <typename Scalar, typename Index, int Side, int Mode, bool Conjugate, int TriStorageOrder>
 struct triangular_solve_matrix<Scalar,Index,Side,Mode,Conjugate,TriStorageOrder,RowMajor>
 {
-  static EIGEN_DONT_INLINE void run(
+  static void run(
    Index size, Index cols,
    const Scalar*  tri, Index triStride,
    Scalar* _other, Index otherStride,
@ -39,6 +39,13 @@ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStor
 struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>
 {
  static EIGEN_DONT_INLINE void run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
    Index size, Index otherSize,
    const Scalar* _tri, Index triStride,
    Scalar* _other, Index otherStride,
@ -173,7 +180,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheLeft,Mode,Conjugate,TriStorageO
      }
    }
  }
-};

 /* Optimized triangular solver with multiple left hand sides and the trinagular matrix on the right
 */
@ -181,6 +187,13 @@ template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStor
 struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>
 {
  static EIGEN_DONT_INLINE void run(
+    Index size, Index otherSize,
+    const Scalar* _tri, Index triStride,
+    Scalar* _other, Index otherStride,
+    level3_blocking<Scalar,Scalar>& blocking);
+};
+template <typename Scalar, typename Index, int Mode, bool Conjugate, int TriStorageOrder>
+EIGEN_DONT_INLINE void triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorageOrder,ColMajor>::run(
    Index size, Index otherSize,
    const Scalar* _tri, Index triStride,
    Scalar* _other, Index otherStride,
@ -308,7 +321,6 @@ struct triangular_solve_matrix<Scalar,Index,OnTheRight,Mode,Conjugate,TriStorage
      }
    }
  }
-};

 } // end namespace internal

--- a/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
+++ b/Eigen/src/Core/products/TriangularSolverMatrix_MKL.h
@ -48,7 +48,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheLeft,Mode,Conjugate,TriStorage
    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
  }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
      Index size, Index otherSize, \
      const EIGTYPE* _tri, Index triStride, \
      EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
@ -103,7 +103,7 @@ struct triangular_solve_matrix<EIGTYPE,Index,OnTheRight,Mode,Conjugate,TriStorag
    IsZeroDiag  = (Mode&ZeroDiag) ? 1 : 0, \
    conjA = ((TriStorageOrder==ColMajor) && Conjugate) ? 1 : 0 \
  }; \
-  static EIGEN_DONT_INLINE void run( \
+  static void run( \
      Index size, Index otherSize, \
      const EIGTYPE* _tri, Index triStride, \
      EIGTYPE* _other, Index otherStride, level3_blocking<EIGTYPE,EIGTYPE>& /*blocking*/) \
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -13,7 +13,7 @@

 #define EIGEN_WORLD_VERSION 3
 #define EIGEN_MAJOR_VERSION 1
-#define EIGEN_MINOR_VERSION 90
+#define EIGEN_MINOR_VERSION 91

 #define EIGEN_VERSION_AT_LEAST(x,y,z) (EIGEN_WORLD_VERSION>x || (EIGEN_WORLD_VERSION>=x && \
                                      (EIGEN_MAJOR_VERSION>y || (EIGEN_MAJOR_VERSION>=y && \
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@ -19,6 +19,10 @@
 #ifndef EIGEN_MEMORY_H
 #define EIGEN_MEMORY_H

+#ifndef EIGEN_MALLOC_ALREADY_ALIGNED
+
+// Try to determine automatically if malloc is already aligned.
+
 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
 //   http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
 // This is true at least since glibc 2.8.
@ -27,7 +31,7 @@
 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
- && defined(__LP64__)
+ && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ )
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
 #else
  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
@ -52,6 +56,8 @@
  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
 #endif

+#endif
+
 #if ((defined __QNXNTO__) || (defined _GNU_SOURCE) || ((defined _XOPEN_SOURCE) && (_XOPEN_SOURCE >= 600))) \
 && (defined _POSIX_ADVISORY_INFO) && (_POSIX_ADVISORY_INFO > 0)
  #define EIGEN_HAS_POSIX_MEMALIGN 1
@ -88,11 +94,11 @@ inline void throw_std_bad_alloc()
 /** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
  * Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
  */
-inline void* handmade_aligned_malloc(size_t size)
+inline void* handmade_aligned_malloc(std::size_t size)
 {
  void *original = std::malloc(size+16);
  if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
  *(reinterpret_cast<void**>(aligned) - 1) = original;
  return aligned;
 }
@ -108,13 +114,18 @@ inline void handmade_aligned_free(void *ptr)
  * Since we know that our handmade version is based on std::realloc
  * we can use std::realloc to implement efficient reallocation.
  */
-inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
+inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t = 0)
 {
  if (ptr == 0) return handmade_aligned_malloc(size);
  void *original = *(reinterpret_cast<void**>(ptr) - 1);
+  std::ptrdiff_t previous_offset = static_cast<char *>(ptr)-static_cast<char *>(original);
  original = std::realloc(original,size+16);
  if (original == 0) return 0;
-  void *aligned = reinterpret_cast<void*>((reinterpret_cast<size_t>(original) & ~(size_t(15))) + 16);
+  void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(15))) + 16);
+  void *previous_aligned = static_cast<char *>(original)+previous_offset;
+  if(aligned!=previous_aligned)
+    std::memmove(aligned, previous_aligned, size);
+  
  *(reinterpret_cast<void**>(aligned) - 1) = original;
  return aligned;
 }
@ -123,7 +134,7 @@ inline void* handmade_aligned_realloc(void* ptr, size_t size, size_t = 0)
 *** Implementation of generic aligned realloc (when no realloc can be used)***
 *****************************************************************************/

-void* aligned_malloc(size_t size);
+void* aligned_malloc(std::size_t size);
 void  aligned_free(void *ptr);

 /** \internal
@ -227,7 +238,7 @@ inline void aligned_free(void *ptr)
    std::free(ptr);
  #elif EIGEN_HAS_MM_MALLOC
    _mm_free(ptr);
-  #elif defined(_MSC_VER)
+  #elif defined(_MSC_VER) && (!defined(_WIN32_WCE))
    _aligned_free(ptr);
  #else
    handmade_aligned_free(ptr);
@ -446,7 +457,6 @@ template<typename T, bool Align> inline void conditional_aligned_delete_auto(T *
 template<typename Scalar, typename Index>
 static inline Index first_aligned(const Scalar* array, Index size)
 {
-  typedef typename packet_traits<Scalar>::type Packet;
  enum { PacketSize = packet_traits<Scalar>::size,
         PacketAlignedMask = PacketSize-1
  };
@ -745,11 +755,16 @@ public:
 #    if defined(__PIC__) && defined(__i386__)
       // Case for x86 with PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("xchgl %%ebx, %%esi;cpuid; xchgl %%ebx,%%esi": "=a" (abcd[0]), "=S" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+         __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
+#    elif defined(__PIC__) && defined(__x86_64__)
+       // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
+       // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
+#      define EIGEN_CPUID(abcd,func,id) \
+        __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
 #    else
       // Case for x86_64 or x86 w/o PIC
 #      define EIGEN_CPUID(abcd,func,id) \
-         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id) );
+         __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
 #    endif
 #  elif defined(_MSC_VER)
 #    if (_MSC_VER > 1500) && ( defined(_M_IX86) || defined(_M_X64) )
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -186,23 +186,35 @@ template<int Y, int InfX, int SupX>
 class meta_sqrt<Y, InfX, SupX, true> { public:  enum { ret = (SupX*SupX <= Y) ? SupX : InfX }; };

 /** \internal determines whether the product of two numeric types is allowed and what the return type is */
-template<typename T, typename U> struct scalar_product_traits;
+template<typename T, typename U> struct scalar_product_traits
+{
+  enum { Defined = 0 };
+};

 template<typename T> struct scalar_product_traits<T,T>
 {
-  //enum { Cost = NumTraits<T>::MulCost };
+  enum {
+    // Cost = NumTraits<T>::MulCost,
+    Defined = 1
+  };
  typedef T ReturnType;
 };

 template<typename T> struct scalar_product_traits<T,std::complex<T> >
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
  typedef std::complex<T> ReturnType;
 };

 template<typename T> struct scalar_product_traits<std::complex<T>, T>
 {
-  //enum { Cost = 2*NumTraits<T>::MulCost  };
+  enum {
+    // Cost = 2*NumTraits<T>::MulCost,
+    Defined = 1
+  };
  typedef std::complex<T> ReturnType;
 };

--- a/Eigen/src/Eigenvalues/ComplexEigenSolver.h
+++ b/Eigen/src/Eigenvalues/ComplexEigenSolver.h
@ -242,7 +242,7 @@ template<typename _MatrixType> class ComplexEigenSolver
    EigenvectorType m_matX;

  private:
-    void doComputeEigenvectors(RealScalar matrixnorm);
+    void doComputeEigenvectors(const RealScalar& matrixnorm);
    void sortEigenvalues(bool computeEigenvectors);
 };

@ -252,7 +252,7 @@ ComplexEigenSolver<MatrixType>&
 ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvectors)
 {
  // this code is inspired from Jampack
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());

  // Do a complex Schur decomposition, A = U T U^*
  // The eigenvalues are on the diagonal of T.
@ -273,7 +273,7 @@ ComplexEigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEi


 template<typename MatrixType>
-void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(RealScalar matrixnorm)
+void ComplexEigenSolver<MatrixType>::doComputeEigenvectors(const RealScalar& matrixnorm)
 {
  const Index n = m_eivalues.size();

--- a/Eigen/src/Eigenvalues/ComplexSchur.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur.h
@ -364,7 +364,6 @@ struct complex_schur_reduce_to_hessenberg<MatrixType, false>
  static void run(ComplexSchur<MatrixType>& _this, const MatrixType& matrix, bool computeU)
  {
    typedef typename ComplexSchur<MatrixType>::ComplexScalar ComplexScalar;
-    typedef typename ComplexSchur<MatrixType>::ComplexMatrixType ComplexMatrixType;

    // Note: m_hess is over RealScalar; m_matT and m_matU is over ComplexScalar
    _this.m_hess.compute(matrix);
--- a/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/ComplexSchur_MKL.h
@ -49,7 +49,7 @@ ComplexSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matri
  typedef MatrixType::RealScalar RealScalar; \
  typedef std::complex<RealScalar> ComplexScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
  m_matUisUptodate = false; \
  if(matrix.cols() == 1) \
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@ -366,7 +366,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect
 {
  using std::sqrt;
  using std::abs;
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());

  // Reduce to real Schur form.
  m_realSchur.compute(matrix, computeEigenvectors);
@ -410,7 +410,7 @@ EigenSolver<MatrixType>::compute(const MatrixType& matrix, bool computeEigenvect

 // Complex scalar division.
 template<typename Scalar>
-std::complex<Scalar> cdiv(Scalar xr, Scalar xi, Scalar yr, Scalar yi)
+std::complex<Scalar> cdiv(const Scalar& xr, const Scalar& xi, const Scalar& yr, const Scalar& yi)
 {
  using std::abs;
  Scalar r,d;
--- a/Eigen/src/Eigenvalues/HessenbergDecomposition.h
+++ b/Eigen/src/Eigenvalues/HessenbergDecomposition.h
@ -291,7 +291,7 @@ template<typename _MatrixType> class HessenbergDecomposition
 template<typename MatrixType>
 void HessenbergDecomposition<MatrixType>::_compute(MatrixType& matA, CoeffVectorType& hCoeffs, VectorType& temp)
 {
-  assert(matA.rows()==matA.cols());
+  eigen_assert(matA.rows()==matA.cols());
  Index n = matA.rows();
  temp.resize(n);
  for (Index i = 0; i<n-1; ++i)
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@ -559,7 +559,7 @@ namespace Eigen {

      const Index dim = A_in.cols();

-      assert (A_in.rows()==dim && A_in.cols()==dim 
+      eigen_assert (A_in.rows()==dim && A_in.cols()==dim 
          && B_in.rows()==dim && B_in.cols()==dim 
          && "Need square matrices of the same dimension");

--- a/Eigen/src/Eigenvalues/RealSchur.h
+++ b/Eigen/src/Eigenvalues/RealSchur.h
@ -234,8 +234,8 @@ template<typename _MatrixType> class RealSchur
    typedef Matrix<Scalar,3,1> Vector3s;

    Scalar computeNormOfT();
-    Index findSmallSubdiagEntry(Index iu, Scalar norm);
-    void splitOffTwoRows(Index iu, bool computeU, Scalar exshift);
+    Index findSmallSubdiagEntry(Index iu, const Scalar& norm);
+    void splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift);
    void computeShift(Index iu, Index iter, Scalar& exshift, Vector3s& shiftInfo);
    void initFrancisQRStep(Index il, Index iu, const Vector3s& shiftInfo, Index& im, Vector3s& firstHouseholderVector);
    void performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace);
@ -245,7 +245,7 @@ template<typename _MatrixType> class RealSchur
 template<typename MatrixType>
 RealSchur<MatrixType>& RealSchur<MatrixType>::compute(const MatrixType& matrix, bool computeU)
 {
-  assert(matrix.cols() == matrix.rows());
+  eigen_assert(matrix.cols() == matrix.rows());
  Index maxIters = m_maxIters;
  if (maxIters == -1)
    maxIters = m_maxIterationsPerRow * matrix.rows();
@ -343,7 +343,7 @@ inline typename MatrixType::Scalar RealSchur<MatrixType>::computeNormOfT()

 /** \internal Look for single small sub-diagonal element and returns its index */
 template<typename MatrixType>
-inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, Scalar norm)
+inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(Index iu, const Scalar& norm)
 {
  using std::abs;
  Index res = iu;
@ -361,7 +361,7 @@ inline typename MatrixType::Index RealSchur<MatrixType>::findSmallSubdiagEntry(I

 /** \internal Update T given that rows iu-1 and iu decouple from the rest. */
 template<typename MatrixType>
-inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, Scalar exshift)
+inline void RealSchur<MatrixType>::splitOffTwoRows(Index iu, bool computeU, const Scalar& exshift)
 {
  using std::sqrt;
  using std::abs;
@ -467,8 +467,8 @@ inline void RealSchur<MatrixType>::initFrancisQRStep(Index il, Index iu, const V
 template<typename MatrixType>
 inline void RealSchur<MatrixType>::performFrancisQRStep(Index il, Index im, Index iu, bool computeU, const Vector3s& firstHouseholderVector, Scalar* workspace)
 {
-  assert(im >= il);
-  assert(im <= iu-2);
+  eigen_assert(im >= il);
+  eigen_assert(im <= iu-2);

  const Index size = m_matT.cols();

--- a/Eigen/src/Eigenvalues/RealSchur_MKL.h
+++ b/Eigen/src/Eigenvalues/RealSchur_MKL.h
@ -48,7 +48,7 @@ RealSchur<Matrix<EIGTYPE, Dynamic, Dynamic, EIGCOLROW> >::compute(const Matrix<E
  typedef MatrixType::Scalar Scalar; \
  typedef MatrixType::RealScalar RealScalar; \
 \
-  assert(matrix.cols() == matrix.rows()); \
+  eigen_assert(matrix.cols() == matrix.rows()); \
 \
  lapack_int n = matrix.cols(), sdim, info; \
  lapack_int lda = matrix.outerStride(); \
--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@ -426,8 +426,6 @@ struct tridiagonalization_inplace_selector;
 template<typename MatrixType, typename DiagonalType, typename SubDiagonalType>
 void tridiagonalization_inplace(MatrixType& mat, DiagonalType& diag, SubDiagonalType& subdiag, bool extractQ)
 {
-  typedef typename MatrixType::Index Index;
-  //Index n = mat.rows();
  eigen_assert(mat.cols()==mat.rows() && diag.size()==mat.rows() && subdiag.size()==mat.rows()-1);
  tridiagonalization_inplace_selector<MatrixType>::run(mat, diag, subdiag, extractQ);
 }
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@ -71,7 +71,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  template<typename Derived>
  inline explicit AlignedBox(const MatrixBase<Derived>& a_p)
  {
-    const typename internal::nested<Derived,2>::type p(a_p.derived());
+    typename internal::nested<Derived,2>::type p(a_p.derived());
    m_min = p;
    m_max = p;
  }
@ -282,7 +282,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
    * determined by \a prec.
    *
    * \sa MatrixBase::isApprox() */
-  bool isApprox(const AlignedBox& other, RealScalar prec = ScalarTraits::dummy_precision()) const
+  bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
  { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }

 protected:
@ -296,7 +296,7 @@ template<typename Scalar,int AmbientDim>
 template<typename Derived>
 inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
 {
-  const typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
+  typename internal::nested<Derived,2*AmbientDim>::type p(a_p.derived());
  Scalar dist2(0);
  Scalar aux;
  for (Index k=0; k<dim(); ++k)
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@ -78,8 +78,8 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const

  typedef typename internal::nested<Derived,2>::type DerivedNested;
  typedef typename internal::nested<OtherDerived,2>::type OtherDerivedNested;
-  const DerivedNested lhs(derived());
-  const OtherDerivedNested rhs(other.derived());
+  DerivedNested lhs(derived());
+  OtherDerivedNested rhs(other.derived());

  return internal::cross3_impl<Architecture::Target,
                        typename internal::remove_all<DerivedNested>::type,
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@ -154,7 +154,7 @@ public:
    * \a t in [0;1]
    * see http://en.wikipedia.org/wiki/Slerp
    */
-  template<class OtherDerived> Quaternion<Scalar> slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;

  /** \returns \c true if \c *this is approximately equal to \a other, within the precision
    * determined by \a prec.
@ -683,7 +683,7 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
 template <class Derived>
 template <class OtherDerived>
 Quaternion<typename internal::traits<Derived>::Scalar>
-QuaternionBase<Derived>::slerp(Scalar t, const QuaternionBase<OtherDerived>& other) const
+QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
 {
  using std::acos;
  using std::sin;
--- a/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
+++ b/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h
@ -44,6 +44,11 @@ bool bicgstab(const MatrixType& mat, const Rhs& rhs, Dest& x,
  VectorType r0 = r;
  
  RealScalar r0_sqnorm = rhs.squaredNorm();
+  if(r0_sqnorm == 0)
+  {
+    x.setZero();
+    return true;
+  }
  Scalar rho    = 1;
  Scalar alpha  = 1;
  Scalar w      = 1;
--- a/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
+++ b/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h
@ -41,15 +41,29 @@ void conjugate_gradient(const MatrixType& mat, const Rhs& rhs, Dest& x,
  int n = mat.cols();

  VectorType residual = rhs - mat * x; //initial residual
-  VectorType p(n);

+  RealScalar rhsNorm2 = rhs.squaredNorm();
+  if(rhsNorm2 == 0) 
+  {
+    x.setZero();
+    iters = 0;
+    tol_error = 0;
+    return;
+  }
+  RealScalar threshold = tol*tol*rhsNorm2;
+  RealScalar residualNorm2 = residual.squaredNorm();
+  if (residualNorm2 < threshold)
+  {
+    iters = 0;
+    tol_error = sqrt(residualNorm2 / rhsNorm2);
+    return;
+  }
+  
+  VectorType p(n);
  p = precond.solve(residual);      //initial search direction

  VectorType z(n), tmp(n);
  RealScalar absNew = internal::real(residual.dot(p));  // the square of the absolute value of r scaled by invM
-  RealScalar rhsNorm2 = rhs.squaredNorm();
-  RealScalar residualNorm2 = 0;
-  RealScalar threshold = tol*tol*rhsNorm2;
  int i = 0;
  while(i < maxIters)
  {
--- a/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
+++ b/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h
@ -24,14 +24,15 @@ namespace internal {
  * \param ind The array of index for the elements in @p row
  * \param ncut  The number of largest elements to keep
  **/ 
-template <typename VectorV, typename VectorI>
-int QuickSplit(VectorV &row, VectorI &ind, int ncut)
+template <typename VectorV, typename VectorI, typename Index>
+Index QuickSplit(VectorV &row, VectorI &ind, Index ncut)
 {
  typedef typename VectorV::RealScalar RealScalar;
  using std::swap;
-  int mid;
-  int n = row.size(); /* length of the vector */
-  int first, last ; 
+  using std::abs;
+  Index mid;
+  Index n = row.size(); /* length of the vector */
+  Index first, last ;
  
  ncut--; /* to fit the zero-based indices */
  first = 0; 
@ -40,9 +41,9 @@ int QuickSplit(VectorV &row, VectorI &ind, int ncut)
  
  do {
    mid = first; 
-    RealScalar abskey = std::abs(row(mid)); 
-    for (int j = first + 1; j <= last; j++) {
-      if ( std::abs(row(j)) > abskey) {
+    RealScalar abskey = abs(row(mid)); 
+    for (Index j = first + 1; j <= last; j++) {
+      if ( abs(row(j)) > abskey) {
        ++mid;
        swap(row(mid), row(j));
        swap(ind(mid), ind(j));
@ -110,7 +111,7 @@ class IncompleteLUT : internal::noncopyable
    {}
    
    template<typename MatrixType>
-    IncompleteLUT(const MatrixType& mat, RealScalar droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
+    IncompleteLUT(const MatrixType& mat, const RealScalar& droptol=NumTraits<Scalar>::dummy_precision(), int fillfactor = 10)
      : m_droptol(droptol),m_fillfactor(fillfactor),
        m_analysisIsOk(false),m_factorizationIsOk(false),m_isInitialized(false)
    {
@ -154,7 +155,7 @@ class IncompleteLUT : internal::noncopyable
      return *this;
    }

-    void setDroptol(RealScalar droptol); 
+    void setDroptol(const RealScalar& droptol); 
    void setFillfactor(int fillfactor); 
    
    template<typename Rhs, typename Dest>
@ -203,7 +204,7 @@ protected:
 *  \param droptol   Drop any element whose magnitude is less than this tolerance 
 **/ 
 template<typename Scalar>
-void IncompleteLUT<Scalar>::setDroptol(RealScalar droptol)
+void IncompleteLUT<Scalar>::setDroptol(const RealScalar& droptol)
 {
  this->m_droptol = droptol;   
 }
@ -246,7 +247,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
  using std::abs;

  eigen_assert((amat.rows() == amat.cols()) && "The factorization should be done on a square matrix");
-  int n = amat.cols();  // Size of the matrix
+  Index n = amat.cols();  // Size of the matrix
  m_lu.resize(n,n);
  // Declare Working vectors and variables
  Vector u(n) ;     // real values of the row -- maximum size is n --
@ -264,21 +265,21 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
  u.fill(0);

  // number of largest elements to keep in each row:
-  int fill_in =   static_cast<int> (amat.nonZeros()*m_fillfactor)/n+1;
+  Index fill_in =   static_cast<Index> (amat.nonZeros()*m_fillfactor)/n+1;
  if (fill_in > n) fill_in = n;

  // number of largest nonzero elements to keep in the L and the U part of the current row:
-  int nnzL = fill_in/2;
-  int nnzU = nnzL;
+  Index nnzL = fill_in/2;
+  Index nnzU = nnzL;
  m_lu.reserve(n * (nnzL + nnzU + 1));

  // global loop over the rows of the sparse matrix
-  for (int ii = 0; ii < n; ii++)
+  for (Index ii = 0; ii < n; ii++)
  {
    // 1 - copy the lower and the upper part of the row i of mat in the working vector u

-    int sizeu = 1; // number of nonzero elements in the upper part of the current row
-    int sizel = 0; // number of nonzero elements in the lower part of the current row
+    Index sizeu = 1; // number of nonzero elements in the upper part of the current row
+    Index sizel = 0; // number of nonzero elements in the lower part of the current row
    ju(ii)    = ii;
    u(ii)     = 0;
    jr(ii)    = ii;
@ -287,7 +288,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
    typename FactorType::InnerIterator j_it(mat, ii); // Iterate through the current row ii
    for (; j_it; ++j_it)
    {
-      int k = j_it.index();
+      Index k = j_it.index();
      if (k < ii)
      {
        // copy the lower part
@ -303,7 +304,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
      else
      {
        // copy the upper part
-        int jpos = ii + sizeu;
+        Index jpos = ii + sizeu;
        ju(jpos) = k;
        u(jpos) = j_it.value();
        jr(k) = jpos;
@ -322,19 +323,19 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
    rownorm = sqrt(rownorm);

    // 3 - eliminate the previous nonzero rows
-    int jj = 0;
-    int len = 0;
+    Index jj = 0;
+    Index len = 0;
    while (jj < sizel)
    {
      // In order to eliminate in the correct order,
      // we must select first the smallest column index among  ju(jj:sizel)
-      int k;
-      int minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
+      Index k;
+      Index minrow = ju.segment(jj,sizel-jj).minCoeff(&k); // k is relative to the segment
      k += jj;
      if (minrow != ju(jj))
      {
        // swap the two locations
-        int j = ju(jj);
+        Index j = ju(jj);
        swap(ju(jj), ju(k));
        jr(minrow) = jj;   jr(j) = k;
        swap(u(jj), u(k));
@ -360,11 +361,11 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
      for (; ki_it; ++ki_it)
      {
        Scalar prod = fact * ki_it.value();
-        int j       = ki_it.index();
-        int jpos    = jr(j);
+        Index j       = ki_it.index();
+        Index jpos    = jr(j);
        if (jpos == -1) // fill-in element
        {
-          int newpos;
+          Index newpos;
          if (j >= ii) // dealing with the upper part
          {
            newpos = ii + sizeu;
@ -393,7 +394,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
    } // end of the elimination on the row ii

    // reset the upper part of the pointer jr to zero
-    for(int k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;
+    for(Index k = 0; k <sizeu; k++) jr(ju(ii+k)) = -1;

    // 4 - partially sort and insert the elements in the m_lu matrix

@ -406,7 +407,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)

    // store the largest m_fill elements of the L part
    m_lu.startVec(ii);
-    for(int k = 0; k < len; k++)
+    for(Index k = 0; k < len; k++)
      m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);

    // store the diagonal element
@ -418,7 +419,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
    // sort the U-part of the row
    // apply the dropping rule first
    len = 0;
-    for(int k = 1; k < sizeu; k++)
+    for(Index k = 1; k < sizeu; k++)
    {
      if(abs(u(ii+k)) > m_droptol * rownorm )
      {
@ -434,7 +435,7 @@ void IncompleteLUT<Scalar>::factorize(const _MatrixType& amat)
    internal::QuickSplit(uu, juu, len);

    // store the largest elements of the U part
-    for(int k = ii + 1; k < ii + len; k++)
+    for(Index k = ii + 1; k < ii + len; k++)
      m_lu.insertBackByOuterInnerUnordered(ii,ju(k)) = u(k);
  }

--- a/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
+++ b/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h
@ -120,7 +120,7 @@ public:
  RealScalar tolerance() const { return m_tolerance; }
  
  /** Sets the tolerance threshold used by the stopping criteria */
-  Derived& setTolerance(RealScalar tolerance)
+  Derived& setTolerance(const RealScalar& tolerance)
  {
    m_tolerance = tolerance;
    return derived();
--- a/Eigen/src/Jacobi/Jacobi.h
+++ b/Eigen/src/Jacobi/Jacobi.h
@ -63,7 +63,7 @@ template<typename Scalar> class JacobiRotation

    template<typename Derived>
    bool makeJacobi(const MatrixBase<Derived>&, typename Derived::Index p, typename Derived::Index q);
-    bool makeJacobi(RealScalar x, Scalar y, RealScalar z);
+    bool makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z);

    void makeGivens(const Scalar& p, const Scalar& q, Scalar* z=0);

@ -80,7 +80,7 @@ template<typename Scalar> class JacobiRotation
  * \sa MatrixBase::makeJacobi(const MatrixBase<Derived>&, Index, Index), MatrixBase::applyOnTheLeft(), MatrixBase::applyOnTheRight()
  */
 template<typename Scalar>
-bool JacobiRotation<Scalar>::makeJacobi(RealScalar x, Scalar y, RealScalar z)
+bool JacobiRotation<Scalar>::makeJacobi(const RealScalar& x, const Scalar& y, const RealScalar& z)
 {
  using std::sqrt;
  using std::abs;
--- a/Eigen/src/LU/Determinant.h
+++ b/Eigen/src/LU/Determinant.h
@ -91,7 +91,7 @@ template<typename Derived> struct determinant_impl<Derived, 4>
 template<typename Derived>
 inline typename internal::traits<Derived>::Scalar MatrixBase<Derived>::determinant() const
 {
-  assert(rows() == cols());
+  eigen_assert(rows() == cols());
  typedef typename internal::nested<Derived,Base::RowsAtCompileTime>::type Nested;
  return internal::determinant_impl<typename internal::remove_all<Nested>::type>::run(derived());
 }
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@ -417,6 +417,9 @@ FullPivLU<MatrixType>::FullPivLU(const MatrixType& matrix)
 template<typename MatrixType>
 FullPivLU<MatrixType>& FullPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the permutations are stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<=NumTraits<int>::highest() && matrix.cols()<=NumTraits<int>::highest());
+  
  m_isInitialized = true;
  m_lu = matrix;

--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@ -242,7 +242,7 @@ struct partial_lu_impl
    const Index cols = lu.cols();
    const Index size = (std::min)(rows,cols);
    nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
    for(Index k = 0; k < size; ++k)
    {
      Index rrows = rows-k-1;
@ -253,7 +253,7 @@ struct partial_lu_impl
        = lu.col(k).tail(rows-k).cwiseAbs().maxCoeff(&row_of_biggest_in_col);
      row_of_biggest_in_col += k;

-      row_transpositions[k] = row_of_biggest_in_col;
+      row_transpositions[k] = PivIndex(row_of_biggest_in_col);

      if(biggest_in_corner != RealScalar(0))
      {
@ -318,7 +318,7 @@ struct partial_lu_impl
    }

    nb_transpositions = 0;
-    int first_zero_pivot = -1;
+    Index first_zero_pivot = -1;
    for(Index k = 0; k < size; k+=blockSize)
    {
      Index bs = (std::min)(size-k,blockSize); // actual size of the block
@ -386,6 +386,9 @@ void partial_lu_inplace(MatrixType& lu, TranspositionType& row_transpositions, t
 template<typename MatrixType>
 PartialPivLU<MatrixType>& PartialPivLU<MatrixType>::compute(const MatrixType& matrix)
 {
+  // the row permutation is stored as int indices, so just to be sure:
+  eigen_assert(matrix.rows()<NumTraits<int>::highest());
+  
  m_lu = matrix;

  eigen_assert(matrix.rows() == matrix.cols() && "PartialPivLU is only for square (and moreover invertible) matrices");
--- a/Eigen/src/MetisSupport/MetisSupport.h
+++ b/Eigen/src/MetisSupport/MetisSupport.h
@ -29,7 +29,7 @@ public:
  void get_symmetrized_graph(const MatrixType& A)
  {
    Index m = A.cols(); 
-    
+    eigen_assert((A.rows() == A.cols()) && "ONLY FOR SQUARED MATRICES");
    // Get the transpose of the input matrix 
    MatrixType At = A.transpose(); 
    // Get the number of nonzeros elements in each row/col of At+A
--- a/Eigen/src/OrderingMethods/Amd.h
+++ b/Eigen/src/OrderingMethods/Amd.h
@ -2,10 +2,6 @@
 // for linear algebra.
 //
 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 /*

@ -95,7 +91,6 @@ template<typename Scalar, typename Index>
 void minimum_degree_ordering(SparseMatrix<Scalar,ColMajor,Index>& C, PermutationMatrix<Dynamic,Dynamic,Index>& perm)
 {
  using std::sqrt;
-  typedef SparseMatrix<Scalar,ColMajor,Index> CCS;
  
  int d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1,
      k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi,
--- a/Eigen/src/OrderingMethods/Eigen_Colamd.h
+++ b/Eigen/src/OrderingMethods/Eigen_Colamd.h
--- a/Eigen/src/OrderingMethods/Ordering.h
+++ b/Eigen/src/OrderingMethods/Ordering.h
@ -4,29 +4,13 @@
 //
 // Copyright (C) 2012  Désiré Nuentsa-Wakam <desire.nuentsa_wakam@inria.fr>
 //
-// Eigen is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public
-// License as published by the Free Software Foundation; either
-// version 3 of the License, or (at your option) any later version.
-//
-// Alternatively, you can redistribute it and/or
-// modify it under the terms of the GNU General Public License as
-// published by the Free Software Foundation; either version 2 of
-// the License, or (at your option) any later version.
-//
-// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
-// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
-// GNU General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public
-// License and a copy of the GNU General Public License along with
-// Eigen. If not, see <http://www.gnu.org/licenses/>.
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 #ifndef EIGEN_ORDERING_H
 #define EIGEN_ORDERING_H

-#include "Amd.h"
 namespace Eigen {
  
 #include "Eigen_Colamd.h"
@ -53,6 +37,8 @@ void ordering_helper_at_plus_a(const MatrixType& mat, MatrixType& symmat)
    
 }

+#ifndef EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
  * \class AMDOrdering
  *
@ -94,6 +80,8 @@ class AMDOrdering
    }
 };

+#endif // EIGEN_MPL2_ONLY
+
 /** \ingroup OrderingMethods_Module
  * \class NaturalOrdering
  *
@ -134,26 +122,26 @@ class COLAMDOrdering
    template <typename MatrixType>
    void operator() (const MatrixType& mat, PermutationType& perm)
    {
-      int m = mat.rows();
-      int n = mat.cols();
-      int nnz = mat.nonZeros();
+      Index m = mat.rows();
+      Index n = mat.cols();
+      Index nnz = mat.nonZeros();
      // Get the recommended value of Alen to be used by colamd
-      int Alen = internal::colamd_recommended(nnz, m, n); 
+      Index Alen = internal::colamd_recommended(nnz, m, n); 
      // Set the default parameters
      double knobs [COLAMD_KNOBS]; 
-      int stats [COLAMD_STATS];
+      Index stats [COLAMD_STATS];
      internal::colamd_set_defaults(knobs);
      
-      int info;
+      Index info;
      IndexVector p(n+1), A(Alen); 
-      for(int i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
-      for(int i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
+      for(Index i=0; i <= n; i++)   p(i) = mat.outerIndexPtr()[i];
+      for(Index i=0; i < nnz; i++)  A(i) = mat.innerIndexPtr()[i];
      // Call Colamd routine to compute the ordering 
      info = internal::colamd(m, n, Alen, A.data(), p.data(), knobs, stats); 
      eigen_assert( info && "COLAMD failed " );
      
      perm.resize(n);
-      for (int i = 0; i < n; i++) perm.indices()(p(i)) = i;
+      for (Index i = 0; i < n; i++) perm.indices()(p(i)) = i;
    }
 };

--- a/Eigen/src/PaStiXSupport/PaStiXSupport.h
+++ b/Eigen/src/PaStiXSupport/PaStiXSupport.h
@ -157,27 +157,6 @@ class PastixBase : internal::noncopyable
    template<typename Rhs,typename Dest>
    bool _solve (const MatrixBase<Rhs> &b, MatrixBase<Dest> &x) const;
    
-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 1;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-    
    Derived& derived()
    {
      return *static_cast<Derived*>(this);
@ -731,7 +710,7 @@ struct sparse_solve_retval<PastixBase<_MatrixType>, Rhs>

  template<typename Dest> void evalTo(Dest& dst) const
  {
-    dec()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
  }
 };

--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@ -206,29 +206,6 @@ class PardisoImpl
    template<typename BDerived, typename XDerived>
    bool _solve(const MatrixBase<BDerived> &b, MatrixBase<XDerived>& x) const;

-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_size==b.rows());
-
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      // Pardiso cannot solve in-place,
-      // so we need two temporaries
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_rhs(size,rhsCols);
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic,ColMajor> tmp_res(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp_rhs.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp_res.leftCols(actualCols) = derived().solve(tmp_rhs.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp_res.leftCols(actualCols).sparseView();
-      }
-    }
-
  protected:
    void pardisoRelease()
    {
@ -604,7 +581,7 @@ struct sparse_solve_retval<PardisoImpl<Derived>, Rhs>

  template<typename Dest> void evalTo(Dest& dst) const
  {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
  }
 };

--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@ -56,6 +56,12 @@ template<typename _MatrixType> class ColPivHouseholderQR
    typedef typename internal::plain_row_type<MatrixType>::type RowVectorType;
    typedef typename internal::plain_row_type<MatrixType, RealScalar>::type RealRowVectorType;
    typedef typename HouseholderSequence<MatrixType,HCoeffsType>::ConjugateReturnType HouseholderSequenceType;
+    
+  private:
+    
+    typedef typename PermutationType::Index PermIndexType;
+    
+  public:

    /**
    * \brief Default Constructor.
@ -81,7 +87,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
    ColPivHouseholderQR(Index rows, Index cols)
      : m_qr(rows, cols),
        m_hCoeffs((std::min)(rows,cols)),
-        m_colsPermutation(cols),
+        m_colsPermutation(PermIndexType(cols)),
        m_colsTranspositions(cols),
        m_temp(cols),
        m_colSqNorms(cols),
@ -91,7 +97,7 @@ template<typename _MatrixType> class ColPivHouseholderQR
    ColPivHouseholderQR(const MatrixType& matrix)
      : m_qr(matrix.rows(), matrix.cols()),
        m_hCoeffs((std::min)(matrix.rows(),matrix.cols())),
-        m_colsPermutation(matrix.cols()),
+        m_colsPermutation(PermIndexType(matrix.cols())),
        m_colsTranspositions(matrix.cols()),
        m_temp(matrix.cols()),
        m_colSqNorms(matrix.cols()),
@ -139,7 +145,22 @@ template<typename _MatrixType> class ColPivHouseholderQR
      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
      return m_qr;
    }
-
+    
+    /** \returns a reference to the matrix where the result Householder QR is stored 
+     * \warning The strict lower part of this matrix contains internal values. 
+     * Only the upper triangular part should be referenced. To get it, use
+     * \code matrixR().template triangularView<Upper>() \endcode
+     * For rank-deficient matrices, use 
+     * \code 
+     * matrixR().topLeftCorner(rank(), rank()).template triangularView<Upper>() 
+     * \endcode
+     */
+    const MatrixType& matrixR() const
+    {
+      eigen_assert(m_isInitialized && "ColPivHouseholderQR is not initialized.");
+      return m_qr;
+    }
+    
    ColPivHouseholderQR& compute(const MatrixType& matrix);

    const PermutationType& colsPermutation() const
@ -330,6 +351,18 @@ template<typename _MatrixType> class ColPivHouseholderQR
      *          diagonal coefficient of R.
      */
    RealScalar maxPivot() const { return m_maxpivot; }
+    
+    /** \brief Reports whether the QR factorization was succesful.
+      *
+      * \note This function always returns \c Success. It is provided for compatibility 
+      * with other factorization routines.
+      * \returns \c Success 
+      */
+    ComputationInfo info() const
+    {
+      eigen_assert(m_isInitialized && "Decomposition is not initialized.");
+      return Success;
+    }

  protected:
    MatrixType m_qr;
@ -368,6 +401,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
  Index rows = matrix.rows();
  Index cols = matrix.cols();
  Index size = matrix.diagonalSize();
+  
+  // the column permutation is stored as int indices, so just to be sure:
+  eigen_assert(cols<=NumTraits<int>::highest());

  m_qr = matrix;
  m_hCoeffs.resize(size);
@ -443,9 +479,9 @@ ColPivHouseholderQR<MatrixType>& ColPivHouseholderQR<MatrixType>::compute(const
    m_colSqNorms.tail(cols-k-1) -= m_qr.row(k).tail(cols-k-1).cwiseAbs2();
  }

-  m_colsPermutation.setIdentity(cols);
-  for(Index k = 0; k < m_nonzero_pivots; ++k)
-    m_colsPermutation.applyTranspositionOnTheRight(k, m_colsTranspositions.coeff(k));
+  m_colsPermutation.setIdentity(PermIndexType(cols));
+  for(PermIndexType k = 0; k < m_nonzero_pivots; ++k)
+    m_colsPermutation.applyTranspositionOnTheRight(k, PermIndexType(m_colsTranspositions.coeff(k)));

  m_det_pq = (number_of_transpositions%2) ? -1 : 1;
  m_isInitialized = true;
@ -482,7 +518,7 @@ struct solve_retval<ColPivHouseholderQR<_MatrixType>, Rhs>
 		     .transpose()
      );

-    dec().matrixQR()
+    dec().matrixR()
       .topLeftCorner(nonzero_pivots, nonzero_pivots)
       .template triangularView<Upper>()
       .solveInPlace(c.topRows(nonzero_pivots));
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@ -241,7 +241,6 @@ void householder_qr_inplace_blocked(MatrixQR& mat, HCoeffs& hCoeffs,
 {
  typedef typename MatrixQR::Index Index;
  typedef typename MatrixQR::Scalar Scalar;
-  typedef typename MatrixQR::RealScalar RealScalar;
  typedef Block<MatrixQR,Dynamic,Dynamic> BlockType;

  Index rows = mat.rows();
--- a/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
+++ b/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h
@ -60,7 +60,7 @@ class SPQR
    typedef typename _MatrixType::Scalar Scalar;
    typedef typename _MatrixType::RealScalar RealScalar;
    typedef UF_long Index ; 
-    typedef SparseMatrix<Scalar, _MatrixType::Flags, Index> MatrixType;
+    typedef SparseMatrix<Scalar, ColMajor, Index> MatrixType;
    typedef PermutationMatrix<Dynamic, Dynamic> PermutationType;
  public:
    SPQR() 
@ -88,7 +88,7 @@ class SPQR
      delete[] m_E;
      delete[] m_HPinv; 
    }
-    void compute(const MatrixType& matrix)
+    void compute(const _MatrixType& matrix)
    {
      MatrixType mat(matrix);
      cholmod_sparse A; 
@ -105,20 +105,18 @@ class SPQR
      }
      m_info = Success;
      m_isInitialized = true;
+      m_isRUpToDate = false;
    }
    /** 
-     * Get the number of rows of the triangular matrix. 
+     * Get the number of rows of the input matrix and the Q matrix
     */
-    inline Index rows() const { return m_cR->nrow; }
+    inline Index rows() const {return m_H->nrow; }
    
    /** 
-     * Get the number of columns of the triangular matrix. 
+     * Get the number of columns of the input matrix. 
     */
    inline Index cols() const { return m_cR->ncol; }
-    /**
-     * This is the number of rows in the input matrix and the Q matrix
-     */
-    inline Index rowsQ() const {return m_HTau->nrow; }
+   
      /** \returns the solution X of \f$ A X = B \f$ using the current decomposition of A.
      *
      * \sa compute()
@ -126,8 +124,8 @@ class SPQR
    template<typename Rhs>
    inline const internal::solve_retval<SPQR, Rhs> solve(const MatrixBase<Rhs>& B) const 
    {
-      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()"); 
-      eigen_assert(rows()==B.rows()
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      eigen_assert(this->rows()==B.rows()
                    && "SPQR::solve(): invalid number of rows of the right hand side matrix B");
          return internal::solve_retval<SPQR, Rhs>(*this, B.derived());
    }
@ -139,22 +137,28 @@ class SPQR
      eigen_assert(b.cols()==1 && "This method is for vectors only");
      
      //Compute Q^T * b
-      dest = matrixQ().transpose() * b;
-      
-        // Solves with the triangular matrix R
      Dest y; 
-      y = this->matrixQR().template triangularView<Upper>().solve(dest.derived());
+      y = matrixQ().transpose() * b;
+        // Solves with the triangular matrix R
+      Index rk = this->rank();
+      y.topRows(rk) = this->matrixR().topLeftCorner(rk, rk).template triangularView<Upper>().solve(y.topRows(rk));
+      y.bottomRows(cols()-rk).setZero();
      // Apply the column permutation 
-      dest = colsPermutation() * y;
+      dest.topRows(cols()) = colsPermutation() * y.topRows(cols());
      
      m_info = Success;
    }
-    /// Get the sparse triangular matrix R. It is a sparse matrix
-    MatrixType matrixQR() const
+    
+    /** \returns the sparse triangular factor R. It is a sparse matrix
+     */
+    const MatrixType matrixR() const
    {
-      MatrixType R; 
-      R = viewAsEigen<Scalar, MatrixType::Flags, typename MatrixType::Index>(*m_cR);
-      return R; 
+      eigen_assert(m_isInitialized && " The QR factorization should be computed first, call compute()");
+      if(!m_isRUpToDate) {
+        m_R = viewAsEigen<Scalar,ColMajor, typename MatrixType::Index>(*m_cR);
+        m_isRUpToDate = true;
+      }
+      return m_R;
    }
    /// Get an expression of the matrix Q
    SPQRMatrixQReturnType<SPQR> matrixQ() const
@ -181,15 +185,12 @@ class SPQR
      return m_cc.SPQR_istat[4];
    }
    /// Set the fill-reducing ordering method to be used
-    void setOrdering(int ord) { m_ordering = ord;}
+    void setSPQROrdering(int ord) { m_ordering = ord;}
    /// Set the tolerance tol to treat columns with 2-norm < =tol as zero
-    void setThreshold(RealScalar tol) { m_tolerance = tol; }
+    void setPivotThreshold(const RealScalar& tol) { m_tolerance = tol; }
    
-    /// Return a pointer to SPQR workspace 
-    cholmod_common *cc() const { return &m_cc; }
-    cholmod_sparse * H() const { return m_H; }
-    Index  *HPinv() const { return m_HPinv; }
-    cholmod_dense* HTau() const { return m_HTau; }
+    /** \returns a pointer to the SPQR workspace */
+    cholmod_common *cholmodCommon() const { return &m_cc; }
    
    
    /** \brief Reports whether previous computation was successful.
@ -206,17 +207,20 @@ class SPQR
    bool m_isInitialized;
    bool m_analysisIsOk;
    bool m_factorizationIsOk;
+    mutable bool m_isRUpToDate;
    mutable ComputationInfo m_info;
    int m_ordering; // Ordering method to use, see SPQR's manual
    int m_allow_tol; // Allow to use some tolerance during numerical factorization.
    RealScalar m_tolerance; // treat columns with 2-norm below this tolerance as zero
    mutable cholmod_sparse *m_cR; // The sparse R factor in cholmod format
+    mutable MatrixType m_R; // The sparse matrix R in Eigen format
    mutable Index *m_E; // The permutation applied to columns
    mutable cholmod_sparse *m_H;  //The householder vectors
    mutable Index *m_HPinv; // The row permutation of H
    mutable cholmod_dense *m_HTau; // The Householder coefficients
    mutable Index m_rank; // The rank of the matrix
    mutable cholmod_common m_cc; // Workspace and parameters
+    template<typename ,typename > friend struct SPQR_QProduct;
 };

 template <typename SPQRType, typename Derived>
@ -227,7 +231,7 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
  //Define the constructor to get reference to argument types
  SPQR_QProduct(const SPQRType& spqr, const Derived& other, bool transpose) : m_spqr(spqr),m_other(other),m_transpose(transpose) {}
  
-  inline Index rows() const { return m_transpose ? m_spqr.rowsQ() : m_spqr.cols(); }
+  inline Index rows() const { return m_transpose ? m_spqr.rows() : m_spqr.cols(); }
  inline Index cols() const { return m_other.cols(); }
  // Assign to a vector
  template<typename ResType>
@ -236,9 +240,9 @@ struct SPQR_QProduct : ReturnByValue<SPQR_QProduct<SPQRType,Derived> >
    cholmod_dense y_cd;
    cholmod_dense *x_cd; 
    int method = m_transpose ? SPQR_QTX : SPQR_QX; 
-    cholmod_common *cc = m_spqr.cc();
+    cholmod_common *cc = m_spqr.cholmodCommon();
    y_cd = viewAsCholmod(m_other.const_cast_derived());
-    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.H(), m_spqr.HTau(), m_spqr.HPinv(), &y_cd, cc);
+    x_cd = SuiteSparseQR_qmult<Scalar>(method, m_spqr.m_H, m_spqr.m_HTau, m_spqr.m_HPinv, &y_cd, cc);
    res = Matrix<Scalar,ResType::RowsAtCompileTime,ResType::ColsAtCompileTime>::Map(reinterpret_cast<Scalar*>(x_cd->x), x_cd->nrow, x_cd->ncol);
    cholmod_free_dense(&x_cd, cc); 
  }
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@ -78,7 +78,8 @@ public:
  {
    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
    {
-      m_qr = FullPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
    }
    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
  }
@ -96,7 +97,8 @@ public:
    return false;
  }
 private:
-  FullPivHouseholderQR<MatrixType> m_qr;
+  typedef FullPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
  WorkspaceType m_workspace;
 };

@ -121,7 +123,8 @@ public:
  {
    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
    {
-      m_qr = FullPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
    }
    m_adjoint.resize(svd.cols(), svd.rows());
    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
@ -141,7 +144,8 @@ public:
    else return false;
  }
 private:
-  FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef FullPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
  TransposeTypeWithSameStorageOrder m_adjoint;
  typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@ -158,7 +162,8 @@ public:
  {
    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
    {
-      m_qr = ColPivHouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
    }
    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@ -183,7 +188,8 @@ public:
  }

 private:
-  ColPivHouseholderQR<MatrixType> m_qr;
+  typedef ColPivHouseholderQR<MatrixType> QRType;
+  QRType m_qr;
  typename internal::plain_col_type<MatrixType>::type m_workspace;
 };

@ -209,7 +215,8 @@ public:
  {
    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
    {
-      m_qr = ColPivHouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
    }
    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@ -237,7 +244,8 @@ public:
  }

 private:
-  ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef ColPivHouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
  TransposeTypeWithSameStorageOrder m_adjoint;
  typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
@ -254,7 +262,8 @@ public:
  {
    if (svd.rows() != m_qr.rows() || svd.cols() != m_qr.cols())
    {
-      m_qr = HouseholderQR<MatrixType>(svd.rows(), svd.cols());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.rows(), svd.cols());
    }
    if (svd.m_computeFullU) m_workspace.resize(svd.rows());
    else if (svd.m_computeThinU) m_workspace.resize(svd.cols());
@ -278,7 +287,8 @@ public:
    return false;
  }
 private:
-  HouseholderQR<MatrixType> m_qr;
+  typedef HouseholderQR<MatrixType> QRType;
+  QRType m_qr;
  typename internal::plain_col_type<MatrixType>::type m_workspace;
 };

@ -304,7 +314,8 @@ public:
  {
    if (svd.cols() != m_qr.rows() || svd.rows() != m_qr.cols())
    {
-      m_qr = HouseholderQR<TransposeTypeWithSameStorageOrder>(svd.cols(), svd.rows());
+      m_qr.~QRType();
+      ::new (&m_qr) QRType(svd.cols(), svd.rows());
    }
    if (svd.m_computeFullV) m_workspace.resize(svd.cols());
    else if (svd.m_computeThinV) m_workspace.resize(svd.rows());
@ -332,7 +343,8 @@ public:
  }

 private:
-  HouseholderQR<TransposeTypeWithSameStorageOrder> m_qr;
+  typedef HouseholderQR<TransposeTypeWithSameStorageOrder> QRType;
+  QRType m_qr;
  TransposeTypeWithSameStorageOrder m_adjoint;
  typename internal::plain_row_type<MatrixType>::type m_workspace;
 };
--- a/Eigen/src/SparseCholesky/SimplicialCholesky.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky.h
@ -1,52 +1,12 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

-/*
-
-NOTE: the _symbolic, and _numeric functions has been adapted from
-      the LDL library:
-
-LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
-
-LDL License:
-
-    Your use or distribution of LDL or any modified version of
-    LDL implies that you agree to this License.
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
-    USA
-
-    Permission is hereby granted to use or copy this program under the
-    terms of the GNU LGPL, provided that the Copyright, this License,
-    and the Availability of the original version is retained on all copies.
-    User documentation of any code that uses this code or any modified
-    version of this code must cite the Copyright, this License, the
-    Availability note, and "Used by permission." Permission to modify
-    the code and to distribute modified code is granted, provided the
-    Copyright, this License, and the Availability note are retained,
-    and a notice that the code was modified is included.
- */
-
-#include "../Core/util/NonMPL2.h"
-
 #ifndef EIGEN_SIMPLICIAL_CHOLESKY_H
 #define EIGEN_SIMPLICIAL_CHOLESKY_H

@ -215,27 +175,6 @@ class SimplicialCholeskyBase : internal::noncopyable
        dest = m_Pinv * dest;
    }

-    /** \internal */
-    template<typename Rhs, typename DestScalar, int DestOptions, typename DestIndex>
-    void _solve_sparse(const Rhs& b, SparseMatrix<DestScalar,DestOptions,DestIndex> &dest) const
-    {
-      eigen_assert(m_factorizationIsOk && "The decomposition is not in a valid state for solving, you must first call either compute() or symbolic()/numeric()");
-      eigen_assert(m_matrix.rows()==b.rows());
-      
-      // we process the sparse rhs per block of NbColsAtOnce columns temporarily stored into a dense matrix.
-      static const int NbColsAtOnce = 4;
-      int rhsCols = b.cols();
-      int size = b.rows();
-      Eigen::Matrix<DestScalar,Dynamic,Dynamic> tmp(size,rhsCols);
-      for(int k=0; k<rhsCols; k+=NbColsAtOnce)
-      {
-        int actualCols = std::min<int>(rhsCols-k, NbColsAtOnce);
-        tmp.leftCols(actualCols) = b.middleCols(k,actualCols);
-        tmp.leftCols(actualCols) = derived().solve(tmp.leftCols(actualCols));
-        dest.middleCols(k,actualCols) = tmp.leftCols(actualCols).sparseView();
-      }
-    }
-
 #endif // EIGEN_PARSED_BY_DOXYGEN

  protected:
@ -693,153 +632,6 @@ void SimplicialCholeskyBase<Derived>::ordering(const MatrixType& a, CholMatrixTy
  ap.template selfadjointView<Upper>() = a.template selfadjointView<UpLo>().twistedBy(m_P);
 }

-template<typename Derived>
-void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
-{
-  const Index size = ap.rows();
-  m_matrix.resize(size, size);
-  m_parent.resize(size);
-  m_nonZerosPerCol.resize(size);
-  
-  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
-
-  for(Index k = 0; k < size; ++k)
-  {
-    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
-    m_parent[k] = -1;             /* parent of k is not yet known */
-    tags[k] = k;                  /* mark node k as visited */
-    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
-    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i < k)
-      {
-        /* follow path from i to root of etree, stop at flagged node */
-        for(; tags[i] != k; i = m_parent[i])
-        {
-          /* find parent of i if not yet determined */
-          if (m_parent[i] == -1)
-            m_parent[i] = k;
-          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
-          tags[i] = k;                  /* mark i as visited */
-        }
-      }
-    }
-  }
-  
-  /* construct Lp index array from m_nonZerosPerCol column counts */
-  Index* Lp = m_matrix.outerIndexPtr();
-  Lp[0] = 0;
-  for(Index k = 0; k < size; ++k)
-    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
-
-  m_matrix.resizeNonZeros(Lp[size]);
-  
-  m_isInitialized     = true;
-  m_info              = Success;
-  m_analysisIsOk      = true;
-  m_factorizationIsOk = false;
-}
-
-
-template<typename Derived>
-template<bool DoLDLT>
-void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
-{
-  using std::sqrt;
-  
-  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
-  eigen_assert(ap.rows()==ap.cols());
-  const Index size = ap.rows();
-  eigen_assert(m_parent.size()==size);
-  eigen_assert(m_nonZerosPerCol.size()==size);
-
-  const Index* Lp = m_matrix.outerIndexPtr();
-  Index* Li = m_matrix.innerIndexPtr();
-  Scalar* Lx = m_matrix.valuePtr();
-
-  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
-  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
-  
-  bool ok = true;
-  m_diag.resize(DoLDLT ? size : 0);
-  
-  for(Index k = 0; k < size; ++k)
-  {
-    // compute nonzero pattern of kth row of L, in topological order
-    y[k] = 0.0;                     // Y(0:k) is now all zero
-    Index top = size;               // stack for pattern is empty
-    tags[k] = k;                    // mark node k as visited
-    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
-    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
-    {
-      Index i = it.index();
-      if(i <= k)
-      {
-        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
-        Index len;
-        for(len = 0; tags[i] != k; i = m_parent[i])
-        {
-          pattern[len++] = i;     /* L(k,i) is nonzero */
-          tags[i] = k;            /* mark i as visited */
-        }
-        while(len > 0)
-          pattern[--top] = pattern[--len];
-      }
-    }
-
-    /* compute numerical values kth row of L (a sparse triangular solve) */
-
-    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
-    y[k] = 0.0;
-    for(; top < size; ++top)
-    {
-      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
-      Scalar yi = y[i];             /* get and clear Y(i) */
-      y[i] = 0.0;
-      
-      /* the nonzero entry L(k,i) */
-      Scalar l_ki;
-      if(DoLDLT)
-        l_ki = yi / m_diag[i];       
-      else
-        yi = l_ki = yi / Lx[Lp[i]];
-      
-      Index p2 = Lp[i] + m_nonZerosPerCol[i];
-      Index p;
-      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
-        y[Li[p]] -= internal::conj(Lx[p]) * yi;
-      d -= internal::real(l_ki * internal::conj(yi));
-      Li[p] = k;                          /* store L(k,i) in column form of L */
-      Lx[p] = l_ki;
-      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
-    }
-    if(DoLDLT)
-    {
-      m_diag[k] = d;
-      if(d == RealScalar(0))
-      {
-        ok = false;                         /* failure, D(k,k) is zero */
-        break;
-      }
-    }
-    else
-    {
-      Index p = Lp[k] + m_nonZerosPerCol[k]++;
-      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
-      if(d <= RealScalar(0)) {
-        ok = false;              /* failure, matrix is not positive definite */
-        break;
-      }
-      Lx[p] = sqrt(d) ;
-    }
-  }
-
-  m_info = ok ? Success : NumericalIssue;
-  m_factorizationIsOk = true;
-}
-
 namespace internal {
  
 template<typename Derived, typename Rhs>
@ -864,7 +656,7 @@ struct sparse_solve_retval<SimplicialCholeskyBase<Derived>, Rhs>

  template<typename Dest> void evalTo(Dest& dst) const
  {
-    dec().derived()._solve_sparse(rhs(),dst);
+    this->defaultEvalTo(dst);
  }
 };

--- a/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
+++ b/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h
@ -0,0 +1,199 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2008-2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+
+/*
+
+NOTE: thes functions vave been adapted from the LDL library:
+
+LDL Copyright (c) 2005 by Timothy A. Davis.  All Rights Reserved.
+
+LDL License:
+
+    Your use or distribution of LDL or any modified version of
+    LDL implies that you agree to this License.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+    USA
+
+    Permission is hereby granted to use or copy this program under the
+    terms of the GNU LGPL, provided that the Copyright, this License,
+    and the Availability of the original version is retained on all copies.
+    User documentation of any code that uses this code or any modified
+    version of this code must cite the Copyright, this License, the
+    Availability note, and "Used by permission." Permission to modify
+    the code and to distribute modified code is granted, provided the
+    Copyright, this License, and the Availability note are retained,
+    and a notice that the code was modified is included.
+ */
+
+#include "../Core/util/NonMPL2.h"
+
+#ifndef EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+#define EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
+
+namespace Eigen {
+
+template<typename Derived>
+void SimplicialCholeskyBase<Derived>::analyzePattern_preordered(const CholMatrixType& ap, bool doLDLT)
+{
+  const Index size = ap.rows();
+  m_matrix.resize(size, size);
+  m_parent.resize(size);
+  m_nonZerosPerCol.resize(size);
+
+  ei_declare_aligned_stack_constructed_variable(Index, tags, size, 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    /* L(k,:) pattern: all nodes reachable in etree from nz in A(0:k-1,k) */
+    m_parent[k] = -1;             /* parent of k is not yet known */
+    tags[k] = k;                  /* mark node k as visited */
+    m_nonZerosPerCol[k] = 0;      /* count of nonzeros in column k of L */
+    for(typename CholMatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i < k)
+      {
+        /* follow path from i to root of etree, stop at flagged node */
+        for(; tags[i] != k; i = m_parent[i])
+        {
+          /* find parent of i if not yet determined */
+          if (m_parent[i] == -1)
+            m_parent[i] = k;
+          m_nonZerosPerCol[i]++;        /* L (k,i) is nonzero */
+          tags[i] = k;                  /* mark i as visited */
+        }
+      }
+    }
+  }
+
+  /* construct Lp index array from m_nonZerosPerCol column counts */
+  Index* Lp = m_matrix.outerIndexPtr();
+  Lp[0] = 0;
+  for(Index k = 0; k < size; ++k)
+    Lp[k+1] = Lp[k] + m_nonZerosPerCol[k] + (doLDLT ? 0 : 1);
+
+  m_matrix.resizeNonZeros(Lp[size]);
+
+  m_isInitialized     = true;
+  m_info              = Success;
+  m_analysisIsOk      = true;
+  m_factorizationIsOk = false;
+}
+
+
+template<typename Derived>
+template<bool DoLDLT>
+void SimplicialCholeskyBase<Derived>::factorize_preordered(const CholMatrixType& ap)
+{
+  using std::sqrt;
+
+  eigen_assert(m_analysisIsOk && "You must first call analyzePattern()");
+  eigen_assert(ap.rows()==ap.cols());
+  const Index size = ap.rows();
+  eigen_assert(m_parent.size()==size);
+  eigen_assert(m_nonZerosPerCol.size()==size);
+
+  const Index* Lp = m_matrix.outerIndexPtr();
+  Index* Li = m_matrix.innerIndexPtr();
+  Scalar* Lx = m_matrix.valuePtr();
+
+  ei_declare_aligned_stack_constructed_variable(Scalar, y, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  pattern, size, 0);
+  ei_declare_aligned_stack_constructed_variable(Index,  tags, size, 0);
+
+  bool ok = true;
+  m_diag.resize(DoLDLT ? size : 0);
+
+  for(Index k = 0; k < size; ++k)
+  {
+    // compute nonzero pattern of kth row of L, in topological order
+    y[k] = 0.0;                     // Y(0:k) is now all zero
+    Index top = size;               // stack for pattern is empty
+    tags[k] = k;                    // mark node k as visited
+    m_nonZerosPerCol[k] = 0;        // count of nonzeros in column k of L
+    for(typename MatrixType::InnerIterator it(ap,k); it; ++it)
+    {
+      Index i = it.index();
+      if(i <= k)
+      {
+        y[i] += internal::conj(it.value());            /* scatter A(i,k) into Y (sum duplicates) */
+        Index len;
+        for(len = 0; tags[i] != k; i = m_parent[i])
+        {
+          pattern[len++] = i;     /* L(k,i) is nonzero */
+          tags[i] = k;            /* mark i as visited */
+        }
+        while(len > 0)
+          pattern[--top] = pattern[--len];
+      }
+    }
+
+    /* compute numerical values kth row of L (a sparse triangular solve) */
+
+    RealScalar d = internal::real(y[k]) * m_shiftScale + m_shiftOffset;    // get D(k,k), apply the shift function, and clear Y(k)
+    y[k] = 0.0;
+    for(; top < size; ++top)
+    {
+      Index i = pattern[top];       /* pattern[top:n-1] is pattern of L(:,k) */
+      Scalar yi = y[i];             /* get and clear Y(i) */
+      y[i] = 0.0;
+
+      /* the nonzero entry L(k,i) */
+      Scalar l_ki;
+      if(DoLDLT)
+        l_ki = yi / m_diag[i];
+      else
+        yi = l_ki = yi / Lx[Lp[i]];
+
+      Index p2 = Lp[i] + m_nonZerosPerCol[i];
+      Index p;
+      for(p = Lp[i] + (DoLDLT ? 0 : 1); p < p2; ++p)
+        y[Li[p]] -= internal::conj(Lx[p]) * yi;
+      d -= internal::real(l_ki * internal::conj(yi));
+      Li[p] = k;                          /* store L(k,i) in column form of L */
+      Lx[p] = l_ki;
+      ++m_nonZerosPerCol[i];              /* increment count of nonzeros in col i */
+    }
+    if(DoLDLT)
+    {
+      m_diag[k] = d;
+      if(d == RealScalar(0))
+      {
+        ok = false;                         /* failure, D(k,k) is zero */
+        break;
+      }
+    }
+    else
+    {
+      Index p = Lp[k] + m_nonZerosPerCol[k]++;
+      Li[p] = k ;                /* store L(k,k) = sqrt (d) in column k */
+      if(d <= RealScalar(0)) {
+        ok = false;              /* failure, matrix is not positive definite */
+        break;
+      }
+      Lx[p] = sqrt(d) ;
+    }
+  }
+
+  m_info = ok ? Success : NumericalIssue;
+  m_factorizationIsOk = true;
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SIMPLICIAL_CHOLESKY_IMPL_H
--- a/Eigen/src/SparseCore/AmbiVector.h
+++ b/Eigen/src/SparseCore/AmbiVector.h
@ -288,7 +288,7 @@ class AmbiVector<_Scalar,_Index>::Iterator
      * In practice, all coefficients having a magnitude smaller than \a epsilon
      * are skipped.
      */
-    Iterator(const AmbiVector& vec, RealScalar epsilon = 0)
+    Iterator(const AmbiVector& vec, const RealScalar& epsilon = 0)
      : m_vector(vec)
    {
      using std::abs;
--- a/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
+++ b/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h
@ -121,9 +121,9 @@ static void conservative_sparse_sparse_product_impl(const Lhs& lhs, const Rhs& r
 namespace internal {

 template<typename Lhs, typename Rhs, typename ResultType,
-  int LhsStorageOrder = traits<Lhs>::Flags&RowMajorBit,
-  int RhsStorageOrder = traits<Rhs>::Flags&RowMajorBit,
-  int ResStorageOrder = traits<ResultType>::Flags&RowMajorBit>
+  int LhsStorageOrder = (traits<Lhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int RhsStorageOrder = (traits<Rhs>::Flags&RowMajorBit) ? RowMajor : ColMajor,
+  int ResStorageOrder = (traits<ResultType>::Flags&RowMajorBit) ? RowMajor : ColMajor>
 struct conservative_sparse_sparse_product_selector;

 template<typename Lhs, typename Rhs, typename ResultType>
--- a/Eigen/src/SparseCore/SparseBlock.h
+++ b/Eigen/src/SparseCore/SparseBlock.h
@ -61,7 +61,7 @@ public:

  protected:

-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
    Index m_outerStart;
    const internal::variable_if_dynamic<Index, OuterSize> m_outerSize;
 };
@ -129,59 +129,58 @@ public:

      // 2 - let's check whether there is enough allocated memory
      Index nnz           = tmp.nonZeros();
-      Index nnz_previous  = nonZeros();
-      Index free_size     = Index(matrix.data().allocatedSize()) + nnz_previous;
-      Index nnz_head      = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart];
-      Index tail          = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()];
-      Index nnz_tail      = matrix.nonZeros() - tail;
+      Index start         = m_outerStart==0 ? 0 : matrix.outerIndexPtr()[m_outerStart]; // starting position of the current block
+      Index end           = m_matrix.outerIndexPtr()[m_outerStart+m_outerSize.value()]; // ending posiiton of the current block
+      Index block_size    = end - start;                                                // available room in the current block
+      Index tail_size     = m_matrix.outerIndexPtr()[m_matrix.outerSize()] - end;
+      
+      Index free_size     = m_matrix.isCompressed()
+                          ? Index(matrix.data().allocatedSize()) + block_size
+                          : block_size;

-      if(nnz>free_size)
+      if(nnz>free_size) 
      {
        // realloc manually to reduce copies
-        typename SparseMatrixType::Storage newdata(m_matrix.nonZeros() - nnz_previous + nnz);
+        typename SparseMatrixType::Storage newdata(m_matrix.data().allocatedSize() - block_size + nnz);

-        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), nnz_head*sizeof(Scalar));
-        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), nnz_head*sizeof(Index));
+        std::memcpy(&newdata.value(0), &m_matrix.data().value(0), start*sizeof(Scalar));
+        std::memcpy(&newdata.index(0), &m_matrix.data().index(0), start*sizeof(Index));

-        std::memcpy(&newdata.value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        std::memcpy(&newdata.value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&newdata.index(start), &tmp.data().index(0), nnz*sizeof(Index));

-        std::memcpy(&newdata.value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-        std::memcpy(&newdata.index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
+        std::memcpy(&newdata.value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memcpy(&newdata.index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));
+        
+        newdata.resize(m_matrix.outerIndexPtr()[m_matrix.outerSize()] - block_size + nnz);

        matrix.data().swap(newdata);
      }
      else
      {
        // no need to realloc, simply copy the tail at its respective position and insert tmp
-        matrix.data().resize(nnz_head + nnz + nnz_tail);
+        matrix.data().resize(start + nnz + tail_size);

-        if(nnz<nnz_previous)
-        {
-          std::memcpy(&matrix.data().value(nnz_head+nnz), &matrix.data().value(tail), nnz_tail*sizeof(Scalar));
-          std::memcpy(&matrix.data().index(nnz_head+nnz), &matrix.data().index(tail), nnz_tail*sizeof(Index));
-        }
-        else
-        {
-          for(Index i=nnz_tail-1; i>=0; --i)
-          {
-            matrix.data().value(nnz_head+nnz+i) = matrix.data().value(tail+i);
-            matrix.data().index(nnz_head+nnz+i) = matrix.data().index(tail+i);
-          }
-        }
+        std::memmove(&matrix.data().value(start+nnz), &matrix.data().value(end), tail_size*sizeof(Scalar));
+        std::memmove(&matrix.data().index(start+nnz), &matrix.data().index(end), tail_size*sizeof(Index));

-        std::memcpy(&matrix.data().value(nnz_head), &tmp.data().value(0), nnz*sizeof(Scalar));
-        std::memcpy(&matrix.data().index(nnz_head), &tmp.data().index(0), nnz*sizeof(Index));
+        std::memcpy(&matrix.data().value(start), &tmp.data().value(0), nnz*sizeof(Scalar));
+        std::memcpy(&matrix.data().index(start), &tmp.data().index(0), nnz*sizeof(Index));
      }
+      
+      // update innerNonZeros
+      if(!m_matrix.isCompressed())
+        for(Index j=0; j<m_outerSize.value(); ++j)
+          matrix.innerNonZeroPtr()[m_outerStart+j] = tmp.innerVector(j).nonZeros();

      // update outer index pointers
-      Index p = nnz_head;
+      Index p = start;
      for(Index k=0; k<m_outerSize.value(); ++k)
      {
        matrix.outerIndexPtr()[m_outerStart+k] = p;
        p += tmp.innerVector(k).nonZeros();
      }
-      std::ptrdiff_t offset = nnz - nnz_previous;
+      std::ptrdiff_t offset = nnz - block_size;
      for(Index k = m_outerStart + m_outerSize.value(); k<=matrix.outerSize(); ++k)
      {
        matrix.outerIndexPtr()[k] += offset;
@ -353,7 +352,7 @@ public:
          m_block(block),
          m_end(IsRowMajor ? block.m_startCol.value()+block.m_blockCols.value() : block.m_startRow.value()+block.m_blockRows.value())
      {
-        while(Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value()))
+        while( (Base::operator bool()) && (Base::index() < (IsRowMajor ? m_block.m_startCol.value() : m_block.m_startRow.value())) )
          Base::operator++();
      }

@ -376,7 +375,7 @@ public:
          m_block(block),
          m_begin(IsRowMajor ? block.m_startCol.value() : block.m_startRow.value())
      {
-        while(Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value()) )
+        while( (Base::operator bool()) && (Base::index() >= (IsRowMajor ? m_block.m_startCol.value()+block.m_blockCols.value() : m_block.m_startRow.value()+block.m_blockRows.value())) )
          Base::operator--();
      }

@ -391,7 +390,7 @@ public:
    friend class InnerIterator;
    friend class ReverseInnerIterator;

-    const typename XprType::Nested m_matrix;
+    typename XprType::Nested m_matrix;
    const internal::variable_if_dynamic<Index, XprType::RowsAtCompileTime == 1 ? 0 : Dynamic> m_startRow;
    const internal::variable_if_dynamic<Index, XprType::ColsAtCompileTime == 1 ? 0 : Dynamic> m_startCol;
    const internal::variable_if_dynamic<Index, RowsAtCompileTime> m_blockRows;
--- a/Show More
+++ b/Show More