merge pull request 198

2025-09-26 00:03:14 +08:00 · 2016-06-24 11:48:17 +02:00 · 2016-06-24 11:48:17 +02:00 · 3852351793
commit 3852351793
parent 72c95383e0 6dd9077070
109 changed files with 1692 additions and 1164 deletions
--- a/Eigen/Eigenvalues
+++ b/Eigen/Eigenvalues
@ -32,6 +32,7 @@
  * \endcode
  */

+#include "src/misc/RealSvd2x2.h"
 #include "src/Eigenvalues/Tridiagonalization.h"
 #include "src/Eigenvalues/RealSchur.h"
 #include "src/Eigenvalues/EigenSolver.h"
--- a/Eigen/SVD
+++ b/Eigen/SVD
@ -31,6 +31,7 @@
  * \endcode
  */

+#include "src/misc/RealSvd2x2.h"
 #include "src/SVD/UpperBidiagonalization.h"
 #include "src/SVD/SVDBase.h"
 #include "src/SVD/JacobiSVD.h"
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@ -149,7 +149,7 @@ class Array

 #if EIGEN_HAS_RVALUE_REFERENCES
    EIGEN_DEVICE_FUNC
-    Array(Array&& other)
+    Array(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_constructible<Scalar>::value)
      : Base(std::move(other))
    {
      Base::_check_template_params();
@ -157,7 +157,7 @@ class Array
        Base::_set_noalias(other);
    }
    EIGEN_DEVICE_FUNC
-    Array& operator=(Array&& other)
+    Array& operator=(Array&& other) EIGEN_NOEXCEPT_IF(std::is_nothrow_move_assignable<Scalar>::value)
    {
      other.swap(*this);
      return *this;
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@ -176,7 +176,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator-=(const ArrayBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -189,7 +189,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 ArrayBase<Derived>::operator+=(const ArrayBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@ -116,9 +116,9 @@ private:
                        : 1,
    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * ActualPacketSize,
    MayUnrollCompletely = int(Dst::SizeAtCompileTime) != Dynamic
-                       && int(Dst::SizeAtCompileTime) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit),
+                       && int(Dst::SizeAtCompileTime) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit),
    MayUnrollInner      = int(InnerSize) != Dynamic
-                       && int(InnerSize) * int(SrcEvaluator::CoeffReadCost) <= int(UnrollingLimit)
+                       && int(InnerSize) * (int(DstEvaluator::CoeffReadCost)+int(SrcEvaluator::CoeffReadCost)) <= int(UnrollingLimit)
  };

 public:
@ -687,7 +687,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstX
 template<typename DstXprType, typename SrcXprType>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src)
 {
-  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+  call_dense_assignment_loop(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
 }

 /***************************************************************************
@ -722,13 +722,13 @@ template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(Dst& dst, const Src& src)
 {
-  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
 template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment(const Dst& dst, const Src& src)
 {
-  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar>());
+  call_assignment(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }
                     
 // Deal with "assume-aliasing"
@ -787,7 +787,7 @@ template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias(Dst& dst, const Src& src)
 {
-  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar>());
+  call_assignment_no_alias(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }

 template<typename Dst, typename Src, typename Func>
@ -809,7 +809,7 @@ template<typename Dst, typename Src>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src)
 {
-  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar>());
+  call_assignment_no_alias_no_transpose(dst, src, internal::assign_op<typename Dst::Scalar,typename Src::Scalar>());
 }

 // forward declaration
@ -838,7 +838,7 @@ template< typename DstXprType, typename SrcXprType, typename Functor, typename S
 struct Assignment<DstXprType, SrcXprType, Functor, EigenBase2EigenBase, Scalar>
 {
  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());
    src.evalTo(dst);
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@ -80,8 +80,11 @@ struct CommaInitializer
  EIGEN_DEVICE_FUNC
  CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
  {
-    if(other.cols()==0 || other.rows()==0)
+    if(other.rows()==0)
+    {
+      m_col += other.cols();
      return *this;
+    }
    if (m_col==m_xpr.cols())
    {
      m_row+=m_currentBlockRows;
@ -90,7 +93,7 @@ struct CommaInitializer
      eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows()
        && "Too many rows passed to comma initializer (operator<<)");
    }
-    eigen_assert(m_col<m_xpr.cols()
+    eigen_assert((m_col<m_xpr.cols() || (m_xpr.cols()==0 && m_col==0))
      && "Too many coefficients passed to comma initializer (operator<<)");
    eigen_assert(m_currentBlockRows==other.rows());
    if (OtherDerived::SizeAtCompileTime != Dynamic)
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@ -160,7 +160,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator-=(const MatrixBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -173,7 +173,7 @@ template<typename OtherDerived>
 EIGEN_STRONG_INLINE Derived &
 MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

--- a/Eigen/src/Core/CwiseTernaryOp.h
+++ b/Eigen/src/Core/CwiseTernaryOp.h
@ -34,22 +34,6 @@ struct traits<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3> > {
  typedef typename result_of<TernaryOp(
      const typename Arg1::Scalar&, const typename Arg2::Scalar&,
      const typename Arg3::Scalar&)>::type Scalar;
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename internal::traits<Arg1>::StorageKind,
-                         typename internal::traits<Arg2>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename internal::traits<Arg1>::StorageKind,
-                         typename internal::traits<Arg3>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename internal::traits<Arg1>::StorageIndex,
-                         typename internal::traits<Arg3>::StorageIndex>::value),
-      STORAGE_INDEX_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename internal::traits<Arg1>::StorageIndex,
-                         typename internal::traits<Arg3>::StorageIndex>::value),
-      STORAGE_INDEX_MUST_MATCH)

  typedef typename internal::traits<Arg1>::StorageKind StorageKind;
  typedef typename internal::traits<Arg1>::StorageIndex StorageIndex;
@ -100,18 +84,8 @@ template <typename TernaryOp, typename Arg1Type, typename Arg2Type,
 class CwiseTernaryOp : public CwiseTernaryOpImpl<
                           TernaryOp, Arg1Type, Arg2Type, Arg3Type,
                           typename internal::traits<Arg1Type>::StorageKind>,
-                       internal::no_assignment_operator {
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<
-          typename internal::traits<Arg1Type>::StorageKind,
-          typename internal::traits<Arg2Type>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<
-          typename internal::traits<Arg1Type>::StorageKind,
-          typename internal::traits<Arg3Type>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-
+                       internal::no_assignment_operator
+{
 public:
  typedef typename internal::remove_all<Arg1Type>::type Arg1;
  typedef typename internal::remove_all<Arg2Type>::type Arg2;
@ -137,6 +111,17 @@ class CwiseTernaryOp : public CwiseTernaryOpImpl<
    // require the sizes to match
    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2)
    EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3)
+
+    // The index types should match
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg2Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<
+                         typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg3Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+
    eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() &&
                 a1.rows() == a3.rows() && a1.cols() == a3.cols());
  }
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@ -71,18 +71,17 @@ class DiagonalBase : public EigenBase<Derived>
      return InverseReturnType(diagonal().cwiseInverse());
    }
    
-    typedef DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> > ScalarMultipleReturnType;
    EIGEN_DEVICE_FUNC
-    inline const ScalarMultipleReturnType
+    inline const DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >
    operator*(const Scalar& scalar) const
    {
-      return ScalarMultipleReturnType(diagonal() * scalar);
+      return DiagonalWrapper<const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DiagonalVectorType,Scalar,product) >(diagonal() * scalar);
    }
    EIGEN_DEVICE_FUNC
-    friend inline const ScalarMultipleReturnType
+    friend inline const DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >
    operator*(const Scalar& scalar, const DiagonalBase& other)
    {
-      return ScalarMultipleReturnType(other.diagonal() * scalar);
+      return DiagonalWrapper<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,DiagonalVectorType,product) >(scalar * other.diagonal());
    }
 };

@ -320,16 +319,16 @@ template<> struct AssignmentKind<DenseShape,DiagonalShape> { typedef Diagonal2De
 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
 struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Dense, Scalar>
 {
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    dst.setZero();
    dst.diagonal() = src.diagonal();
  }
  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() += src.diagonal(); }
  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() -= src.diagonal(); }
 };

--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@ -28,22 +28,24 @@ template<typename T, typename U,
 >
 struct dot_nocheck
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.template binaryExpr<conj_prod>(b).sum();
  }
 };

 template<typename T, typename U>
 struct dot_nocheck<T, U, true>
 {
-  typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  typedef scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> conj_prod;
+  typedef typename conj_prod::result_type ResScalar;
  EIGEN_DEVICE_FUNC
  static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
  {
-    return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
+    return a.transpose().template binaryExpr<conj_prod>(b).sum();
  }
 };

@ -62,7 +64,7 @@ struct dot_nocheck<T, U, true>
 template<typename Derived>
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@ -138,7 +138,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator+=(const EigenBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -146,7 +146,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& DenseBase<Derived>::operator-=(const EigenBase<OtherDerived> &other)
 {
-  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  call_assignment(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

--- a/Eigen/src/Core/GlobalFunctions.h
+++ b/Eigen/src/Core/GlobalFunctions.h
@ -89,14 +89,32 @@ namespace Eigen
  EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign,scalar_sign_op,sign (or 0),\sa ArrayBase::sign)
  
  /** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent.
+    *
+    * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression (\c Derived::Scalar).
    *
    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Derived,typename ScalarExponent>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Derived::Scalar,ScalarExponent>,Derived,Constant<ScalarExponent> >
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent);
+#else
+  template<typename Derived,typename ScalarExponent>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,ScalarExponent>::value)
+                                      && ScalarBinaryOpTraits<typename Derived::Scalar,ScalarExponent,internal::scalar_pow_op<typename Derived::Scalar,ScalarExponent> >::Defined,
+          const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,ScalarExponent,pow) >::type
+  pow(const Eigen::ArrayBase<Derived>& x, const ScalarExponent& exponent) {
+    return x.derived().pow(exponent);
+  }
+
  template<typename Derived>
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar>, const Derived>
+  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename Derived::Scalar,pow)
  pow(const Eigen::ArrayBase<Derived>& x, const typename Derived::Scalar& exponent) {
    return x.derived().pow(exponent);
  }
+#endif

  /** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents.
    *
@ -106,12 +124,14 @@ namespace Eigen
    * Output: \verbinclude Cwise_array_power_array.out
    * 
    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
    */
  template<typename Derived,typename ExponentDerived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
+  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
  pow(const Eigen::ArrayBase<Derived>& x, const Eigen::ArrayBase<ExponentDerived>& exponents) 
  {
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
+    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_pow_op<typename Derived::Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
      x.derived(),
      exponents.derived()
    );
@ -120,36 +140,39 @@ namespace Eigen
  /** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents.
    *
    * This function computes the coefficient-wise power between a scalar and an array of exponents.
-    * Beaware that the scalar type of the input scalar \a x and the exponents \a exponents must be the same.
+    *
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
    *
    * Example: \include Cwise_scalar_power_array.cpp
    * Output: \verbinclude Cwise_scalar_power_array.out
    * 
    * \sa ArrayBase::pow()
+    *
+    * \relates ArrayBase
    */
+#ifdef EIGEN_PARSED_BY_DOXYGEN
+  template<typename Scalar,typename Derived>
+  inline const CwiseBinaryOp<internal::scalar_pow_op<Scalar,Derived::Scalar>,Constant<Scalar>,Derived>
+  pow(const Scalar& x,const Eigen::ArrayBase<Derived>& x);
+#else
+  template<typename Scalar, typename Derived>
+  inline typename internal::enable_if<   !(internal::is_same<typename Derived::Scalar,Scalar>::value)
+                                      && ScalarBinaryOpTraits<Scalar,typename Derived::Scalar,internal::scalar_pow_op<Scalar,typename Derived::Scalar> >::Defined,
+          const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow) >::type
+  pow(const Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
+  {
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,Derived,pow)(
+            typename internal::plain_constant_type<Derived,Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
+  }
+
  template<typename Derived>
-  inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>
-  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents) 
+  inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)
+  pow(const typename Derived::Scalar& x, const Eigen::ArrayBase<Derived>& exponents)
  {
-    typename Derived::ConstantReturnType constant_x(exponents.rows(), exponents.cols(), x);
-    return Eigen::CwiseBinaryOp<Eigen::internal::scalar_binary_pow_op<typename Derived::Scalar, typename Derived::Scalar>, const typename Derived::ConstantReturnType, const Derived>(
-      constant_x,
-      exponents.derived()
-    );
-  }
-  
-  /**
-    * \brief Component-wise division of a scalar by array elements.
-    **/
-  template <typename Derived>
-  inline const Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>
-    operator/(const typename Derived::Scalar& s, const Eigen::ArrayBase<Derived>& a)
-  {
-    return Eigen::CwiseUnaryOp<Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>, const Derived>(
-      a.derived(),
-      Eigen::internal::scalar_inverse_mult_op<typename Derived::Scalar>(s)  
-    );
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename Derived::Scalar,Derived,pow)(
+      typename internal::plain_constant_type<Derived,typename Derived::Scalar>::type(exponents.rows(), exponents.cols(), x), exponents.derived() );
  }
+#endif

  /** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
    *
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -462,7 +462,7 @@ struct arg_retval
 template<typename Scalar, bool isComplex = NumTraits<Scalar>::IsComplex >
 struct log1p_impl
 {
-  static inline Scalar run(const Scalar& x)
+  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x)
  {
    EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar)
    typedef typename NumTraits<Scalar>::Real RealScalar;
@ -472,7 +472,7 @@ struct log1p_impl
  }
 };

-#if EIGEN_HAS_CXX11_MATH
+#if EIGEN_HAS_CXX11_MATH && !defined(__CUDACC__)
 template<typename Scalar>
 struct log1p_impl<Scalar, false> {
  static inline Scalar run(const Scalar& x)
@ -494,24 +494,26 @@ struct log1p_retval
 * Implementation of pow                                                  *
 ****************************************************************************/

-template<typename Scalar, bool IsInteger>
-struct pow_default_impl
+template<typename ScalarX,typename ScalarY, bool IsInteger = NumTraits<ScalarX>::IsInteger&&NumTraits<ScalarY>::IsInteger>
+struct pow_impl
 {
-  typedef Scalar retval;
-  static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y)
+  //typedef Scalar retval;
+  typedef typename ScalarBinaryOpTraits<ScalarX,ScalarY,internal::scalar_pow_op<ScalarX,ScalarY> >::ReturnType result_type;
+  static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y)
  {
    EIGEN_USING_STD_MATH(pow);
    return pow(x, y);
  }
 };

-template<typename Scalar>
-struct pow_default_impl<Scalar, true>
+template<typename ScalarX,typename ScalarY>
+struct pow_impl<ScalarX,ScalarY, true>
 {
-  static EIGEN_DEVICE_FUNC inline Scalar run(Scalar x, Scalar y)
+  typedef ScalarX result_type;
+  static EIGEN_DEVICE_FUNC inline ScalarX run(const ScalarX &x, const ScalarY &y)
  {
-    Scalar res(1);
-    eigen_assert(!NumTraits<Scalar>::IsSigned || y >= 0);
+    ScalarX res(1);
+    eigen_assert(!NumTraits<ScalarY>::IsSigned || y >= 0);
    if(y & 1) res *= x;
    y >>= 1;
    while(y)
@ -524,15 +526,6 @@ struct pow_default_impl<Scalar, true>
  }
 };

-template<typename Scalar>
-struct pow_impl : pow_default_impl<Scalar, NumTraits<Scalar>::IsInteger> {};
-
-template<typename Scalar>
-struct pow_retval
-{
-  typedef Scalar type;
-};
-
 /****************************************************************************
 * Implementation of random                                               *
 ****************************************************************************/
@ -928,11 +921,11 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x)
  return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x);
 }

-template<typename Scalar>
+template<typename ScalarX,typename ScalarY>
 EIGEN_DEVICE_FUNC
-inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
+inline typename internal::pow_impl<ScalarX,ScalarY>::result_type pow(const ScalarX& x, const ScalarY& y)
 {
-  return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
+  return internal::pow_impl<ScalarX,ScalarY>::run(x, y);
 }

 template<typename T> EIGEN_DEVICE_FUNC bool (isnan)   (const T &x) { return internal::isnan_impl(x); }
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@ -193,7 +193,7 @@ template<typename Derived> class MatrixBase

    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
+    typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
    dot(const MatrixBase<OtherDerived>& other) const;

    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
@ -381,7 +381,7 @@ template<typename Derived> class MatrixBase
    #ifndef EIGEN_PARSED_BY_DOXYGEN
    /// \internal helper struct to form the return type of the cross product
    template<typename OtherDerived> struct cross_product_return_type {
-      typedef typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
+      typedef typename ScalarBinaryOpTraits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType Scalar;
      typedef Matrix<Scalar,MatrixBase::RowsAtCompileTime,MatrixBase::ColsAtCompileTime> type;
    };
    #endif // EIGEN_PARSED_BY_DOXYGEN
@ -403,7 +403,6 @@ template<typename Derived> class MatrixBase

    inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;

-    inline ScalarMultipleReturnType operator*(const UniformScaling<Scalar>& s) const;
    // put this as separate enum value to work around possible GCC 4.3 bug (?)
    enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
                                          : ColsAtCompileTime==1 ? Vertical : Horizontal };
@ -416,8 +415,7 @@ template<typename Derived> class MatrixBase
    typedef Block<const Derived,
                  internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
                  internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
-    typedef CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>,
-                const ConstStartMinusOne > HNormalizedReturnType;
+    typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;

    inline const HNormalizedReturnType hnormalized() const;

--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@ -39,7 +39,7 @@ class NoAlias
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar>());
+      call_assignment_no_alias(m_expression, other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }
    
@ -47,7 +47,7 @@ class NoAlias
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar>());
+      call_assignment_no_alias(m_expression, other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }
    
@ -55,7 +55,7 @@ class NoAlias
    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
    {
-      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar>());
+      call_assignment_no_alias(m_expression, other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
      return m_expression;
    }

--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@ -22,14 +22,16 @@ namespace Eigen {
  * This class stores enums, typedefs and static methods giving information about a numeric type.
  *
  * The provided data consists of:
-  * \li A typedef \a Real, giving the "real part" type of \a T. If \a T is already real,
-  *     then \a Real is just a typedef to \a T. If \a T is \c std::complex<U> then \a Real
+  * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real,
+  *     then \c Real is just a typedef to \a T. If \a T is \c std::complex<U> then \c Real
  *     is a typedef to \a U.
-  * \li A typedef \a NonInteger, giving the type that should be used for operations producing non-integral values,
+  * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values,
  *     such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives
  *     \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to
  *     take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is
  *     only intended as a helper for code that needs to explicitly promote types.
+  * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for \c std::complex<U>, Literal is defined as \c U.
+  *     Of course, this type must be fully compatible with \a T. In doubt, just use \a T here.
  * \li A typedef \a Nested giving the type to use to nest a value inside of the expression tree. If you don't know what
  *     this means, just use \a T here.
  * \li An enum value \a IsComplex. It is equal to 1 if \a T is a \c std::complex
@ -84,6 +86,7 @@ template<typename T> struct GenericNumTraits
                     T
                   >::type NonInteger;
  typedef T Nested;
+  typedef T Literal;

  EIGEN_DEVICE_FUNC
  static inline Real epsilon()
@ -145,6 +148,7 @@ template<typename _Real> struct NumTraits<std::complex<_Real> >
  : GenericNumTraits<std::complex<_Real> >
 {
  typedef _Real Real;
+  typedef typename NumTraits<_Real>::Literal Literal;
  enum {
    IsComplex = 1,
    RequireInitialization = NumTraits<_Real>::RequireInitialization,
@ -168,6 +172,7 @@ struct NumTraits<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
  typedef typename NumTraits<Scalar>::NonInteger NonIntegerScalar;
  typedef Array<NonIntegerScalar, Rows, Cols, Options, MaxRows, MaxCols> NonInteger;
  typedef ArrayType & Nested;
+  typedef typename NumTraits<Scalar>::Literal Literal;

  enum {
    IsComplex = NumTraits<Scalar>::IsComplex,
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@ -718,7 +718,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
      //_resize_to_match(other);
      // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because
      // it wouldn't allow to copy a row-vector into a column-vector.
-      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar>());
+      internal::call_assignment_no_alias(this->derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
      return this->derived();
    }

--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -16,39 +16,6 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro

 namespace internal {

-// Determine the scalar of Product<Lhs, Rhs>. This is normally the same as Lhs::Scalar times
-// Rhs::Scalar, but product with permutation matrices inherit the scalar of the other factor.
-template<typename Lhs, typename Rhs, typename LhsShape = typename evaluator_traits<Lhs>::Shape, 
-         typename RhsShape = typename evaluator_traits<Rhs>::Shape >
-struct product_result_scalar
-{
-  typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename RhsShape>
-struct product_result_scalar<Lhs, Rhs, PermutationShape, RhsShape>
-{
-  typedef typename Rhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename LhsShape>
-  struct product_result_scalar<Lhs, Rhs, LhsShape, PermutationShape>
-{
-  typedef typename Lhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename RhsShape>
-struct product_result_scalar<Lhs, Rhs, TranspositionsShape, RhsShape>
-{
-  typedef typename Rhs::Scalar Scalar;
-};
-
-template<typename Lhs, typename Rhs, typename LhsShape>
-  struct product_result_scalar<Lhs, Rhs, LhsShape, TranspositionsShape>
-{
-  typedef typename Lhs::Scalar Scalar;
-};
-
 template<typename Lhs, typename Rhs, int Option>
 struct traits<Product<Lhs, Rhs, Option> >
 {
@ -59,7 +26,7 @@ struct traits<Product<Lhs, Rhs, Option> >
  
  typedef MatrixXpr XprKind;
  
-  typedef typename product_result_scalar<LhsCleaned,RhsCleaned>::Scalar Scalar;
+  typedef typename ScalarBinaryOpTraits<typename traits<LhsCleaned>::Scalar, typename traits<RhsCleaned>::Scalar>::ReturnType Scalar;
  typedef typename product_promote_storage_type<typename LhsTraits::StorageKind,
                                                typename RhsTraits::StorageKind,
                                                internal::product_type<Lhs,Rhs>::ret>::ret StorageKind;
--- a/Eigen/src/Core/ProductEvaluators.h
+++ b/Eigen/src/Core/ProductEvaluators.h
@ -35,22 +35,28 @@ struct evaluator<Product<Lhs, Rhs, Options> >
  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
 };
 
-// Catch scalar * ( A * B ) and transform it to (A*scalar) * B
+// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
 // TODO we should apply that rule only if that's really helpful
-template<typename Lhs, typename Rhs, typename Scalar>
-struct evaluator_assume_aliasing<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > >
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                                               const Product<Lhs, Rhs, DefaultProduct> > >
 {
  static const bool value = true;
 };
-template<typename Lhs, typename Rhs, typename Scalar>
-struct evaluator<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,  const Product<Lhs, Rhs, DefaultProduct>  > > 
- : public evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> >
+template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
+struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                               const Product<Lhs, Rhs, DefaultProduct> > >
+ : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
 {
-  typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Product<Lhs, Rhs, DefaultProduct> > XprType;
-  typedef evaluator<Product<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,const Lhs>, Rhs, DefaultProduct> > Base;
-  
+  typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
+                               const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
+                               const Product<Lhs, Rhs, DefaultProduct> > XprType;
+  typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
+
  EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
-    : Base(xpr.functor().m_other * xpr.nestedExpression().lhs() * xpr.nestedExpression().rhs())
+    : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
  {}
 };

@ -122,14 +128,17 @@ protected:
  PlainObject m_result;
 };

+// The following three shortcuts are enabled only if the scalar types match excatly.
+// TODO: we could enable them for different scalar types when the product is not vectorized.
+
 // Dense = Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar>, Dense2Dense,
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
 {
  typedef Product<Lhs,Rhs,Options> SrcXprType;
  static EIGEN_STRONG_INLINE
-  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
@ -138,12 +147,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scal

 // Dense += Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar>, Dense2Dense,
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
 {
  typedef Product<Lhs,Rhs,Options> SrcXprType;
  static EIGEN_STRONG_INLINE
-  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+  void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
  {
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
@ -152,12 +161,12 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<

 // Dense -= Product
 template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar>, Dense2Dense,
+struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
  typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct),Scalar>::type>
 {
  typedef Product<Lhs,Rhs,Options> SrcXprType;
  static EIGEN_STRONG_INLINE
-  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+  void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
  {
    // FIXME shall we handle nested_eval here?
    generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
@ -168,16 +177,17 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<
 // Dense ?= scalar * Product
 // TODO we should apply that rule if that's really helpful
 // for instance, this is not good for inner products
-template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis>
-struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
+template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
                                           const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense, Scalar>
 {
-  typedef CwiseUnaryOp<internal::scalar_multiple_op<ScalarBis>,
-                       const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
+  typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
+                        const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
+                        const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
  static EIGEN_STRONG_INLINE
  void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
  {
-    call_assignment_no_alias(dst, (src.functor().m_other * src.nestedExpression().lhs())*src.nestedExpression().rhs(), func);
+    call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
  }
 };

@ -187,37 +197,38 @@ struct Assignment<DstXprType, CwiseUnaryOp<internal::scalar_multiple_op<ScalarBi
 // TODO enable it for "Dense ?= xpr - Product<>" as well.

 template<typename OtherXpr, typename Lhs, typename Rhs>
-struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar>, const OtherXpr,
+struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
                                               const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
  static const bool value = true;
 };

-template<typename DstXprType, typename OtherXpr, typename ProductType, typename Scalar, typename Func1, typename Func2>
+template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
 struct assignment_from_xpr_plus_product
 {
-  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr, const ProductType> SrcXprType;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename ProductType::Scalar>, const OtherXpr, const ProductType> SrcXprType;
+  template<typename InitialFunc>
  static EIGEN_STRONG_INLINE
-  void run(DstXprType &dst, const SrcXprType &src, const Func1& func)
+  void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
  {
-    call_assignment_no_alias(dst, src.lhs(), func);
+    call_assignment_no_alias(dst, src.lhs(), Func1());
    call_assignment_no_alias(dst, src.rhs(), Func2());
  }
 };

-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<Scalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::assign_op<Scalar>, internal::add_assign_op<Scalar> >
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::assign_op<DstScalar,SrcScalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<DstScalar,OtherScalar>, internal::add_assign_op<DstScalar,ProdScalar> >
 {};
-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<Scalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::add_assign_op<Scalar>, internal::add_assign_op<Scalar> >
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::add_assign_op<DstScalar,SrcScalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<DstScalar,OtherScalar>, internal::add_assign_op<DstScalar,ProdScalar> >
 {};
-template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const OtherXpr,
-                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<Scalar>, Dense2Dense>
-  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, Scalar, internal::sub_assign_op<Scalar>, internal::sub_assign_op<Scalar> >
+template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar>
+struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_sum_op<OtherScalar,ProdScalar>, const OtherXpr,
+                                           const Product<Lhs,Rhs,DefaultProduct> >, internal::sub_assign_op<DstScalar,SrcScalar>, Dense2Dense>
+  : assignment_from_xpr_plus_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<DstScalar,OtherScalar>, internal::sub_assign_op<DstScalar,ProdScalar> >
 {};
 //----------------------------------------

@ -369,21 +380,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
  {
    // Same as: dst.noalias() = lhs.lazyProduct(rhs);
    // but easier on the compiler side
-    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<Scalar>());
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
  }
  
  template<typename Dst>
  static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    // dst.noalias() += lhs.lazyProduct(rhs);
-    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<Scalar>());
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
  }
  
  template<typename Dst>
  static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
  {
    // dst.noalias() -= lhs.lazyProduct(rhs);
-    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<Scalar>());
+    call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
  }
  
 //   template<typename Dst>
@ -735,7 +746,7 @@ template<typename MatrixType, typename DiagonalType, typename Derived, int Produ
 struct diagonal_product_evaluator_base
  : evaluator_base<Derived>
 {
-   typedef typename scalar_product_traits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
+   typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
 public:
  enum {
    CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@ -425,7 +425,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::minCoeff() const
 {
-  return derived().redux(Eigen::internal::scalar_min_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_min_op<Scalar,Scalar>());
 }

 /** \returns the maximum of all coefficients of \c *this.
@ -435,7 +435,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE typename internal::traits<Derived>::Scalar
 DenseBase<Derived>::maxCoeff() const
 {
-  return derived().redux(Eigen::internal::scalar_max_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_max_op<Scalar,Scalar>());
 }

 /** \returns the sum of all coefficients of \c *this
@ -450,7 +450,7 @@ DenseBase<Derived>::sum() const
 {
  if(SizeAtCompileTime==0 || (SizeAtCompileTime==Dynamic && size()==0))
    return Scalar(0);
-  return derived().redux(Eigen::internal::scalar_sum_op<Scalar>());
+  return derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>());
 }

 /** \returns the mean of all coefficients of *this
@ -465,7 +465,7 @@ DenseBase<Derived>::mean() const
  #pragma warning push
  #pragma warning ( disable : 2259 )
 #endif
-  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar>())) / Scalar(this->size());
+  return Scalar(derived().redux(Eigen::internal::scalar_sum_op<Scalar,Scalar>())) / Scalar(this->size());
 #ifdef __INTEL_COMPILER
  #pragma warning pop
 #endif
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@ -262,7 +262,7 @@ template<typename TPlainObjectType, int Options, typename StrideType> class Ref<
    template<typename Expression>
    EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type)
    {
-      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar>());
+      internal::call_assignment_no_alias(m_object,expr,internal::assign_op<Scalar,Scalar>());
      Base::construct(m_object);
    }

--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@ -129,7 +129,7 @@ template<typename _MatrixType, unsigned int UpLo> class SelfAdjointView
    }
    
    friend EIGEN_DEVICE_FUNC
-    const SelfAdjointView<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,MatrixType>,UpLo>
+    const SelfAdjointView<const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar,MatrixType,product),UpLo>
    operator*(const Scalar& s, const SelfAdjointView& mat)
    {
      return (s*mat.nestedExpression()).template selfadjointView<UpLo>();
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@ -12,11 +12,13 @@

 namespace Eigen { 

+// TODO generalize the scalar type of 'other'
+
 template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator*=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar>());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::mul_assign_op<Scalar,Scalar>());
  return derived();
 }

@ -24,7 +26,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator+=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar>());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::add_assign_op<Scalar,Scalar>());
  return derived();
 }

@ -32,7 +34,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE Derived& ArrayBase<Derived>::operator-=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar>());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::sub_assign_op<Scalar,Scalar>());
  return derived();
 }

@ -40,7 +42,7 @@ template<typename Derived>
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator/=(const Scalar& other)
 {
  typedef typename Derived::PlainObject PlainObject;
-  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar>());
+  internal::call_assignment(this->derived(), PlainObject::Constant(rows(),cols(),other), internal::div_assign_op<Scalar,Scalar>());
  return derived();
 }

--- a/Eigen/src/Core/Solve.h
+++ b/Eigen/src/Core/Solve.h
@ -134,10 +134,10 @@ protected:
 // Specialization for "dst = dec.solve(rhs)"
 // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
-struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef Solve<DecType,RhsType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    // FIXME shall we resize dst here?
    src.dec()._solve_impl(src.rhs(), dst);
@ -146,10 +146,10 @@ struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar

 // Specialization for "dst = dec.transpose().solve(rhs)"
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
-struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef Solve<Transpose<const DecType>,RhsType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    src.dec().nestedExpression().template _solve_impl_transposed<false>(src.rhs(), dst);
  }
@ -157,10 +157,11 @@ struct Assignment<DstXprType, Solve<Transpose<const DecType>,RhsType>, internal:

 // Specialization for "dst = dec.adjoint().solve(rhs)"
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
-struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType>,
+                  internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef Solve<CwiseUnaryOp<internal::scalar_conjugate_op<typename DecType::Scalar>, const Transpose<const DecType> >,RhsType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed<true>(src.rhs(), dst);
  }
--- a/Eigen/src/Core/SpecialFunctions.h
+++ b/Eigen/src/Core/SpecialFunctions.h
@ -1050,8 +1050,8 @@ struct betainc_impl {
 template <typename Scalar>
 struct betainc_impl {
  EIGEN_DEVICE_FUNC
-  static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) {
-    /*							betaincf.c
+  static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) {
+    /*	betaincf.c
     *
     *	Incomplete beta integral
     *
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@ -367,14 +367,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularViewImpl<_Mat
    template<typename Other>
    EIGEN_DEVICE_FUNC
    TriangularViewType&  operator+=(const DenseBase<Other>& other) {
-      internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename Other::Scalar>());
      return derived();
    }
    /** \sa MatrixBase::operator-=() */
    template<typename Other>
    EIGEN_DEVICE_FUNC
    TriangularViewType&  operator-=(const DenseBase<Other>& other) {
-      internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+      internal::call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename Other::Scalar>());
      return derived();
    }
    
@ -552,7 +552,7 @@ template<typename OtherDerived>
 inline TriangularView<MatrixType, Mode>&
 TriangularViewImpl<MatrixType, Mode, Dense>::operator=(const MatrixBase<OtherDerived>& other)
 {
-  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar>());
+  internal::call_assignment_no_alias(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -794,7 +794,7 @@ void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& sr
  enum {
      unroll = DstXprType::SizeAtCompileTime != Dynamic
            && SrcEvaluatorType::CoeffReadCost < HugeCost
-            && DstXprType::SizeAtCompileTime * SrcEvaluatorType::CoeffReadCost / 2 <= EIGEN_UNROLLING_LIMIT
+            && DstXprType::SizeAtCompileTime * (DstEvaluatorType::CoeffReadCost+SrcEvaluatorType::CoeffReadCost) / 2 <= EIGEN_UNROLLING_LIMIT
    };
  
  triangular_assignment_loop<Kernel, Mode, unroll ? int(DstXprType::SizeAtCompileTime) : Dynamic, SetOpposite>::run(kernel);
@ -804,7 +804,7 @@ template<int Mode, bool SetOpposite, typename DstXprType, typename SrcXprType>
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 void call_triangular_assignment_loop(const DstXprType& dst, const SrcXprType& src)
 {
-  call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar>());
+  call_triangular_assignment_loop<Mode,SetOpposite>(dst, src, internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
 }

 template<> struct AssignmentKind<TriangularShape,TriangularShape> { typedef Triangular2Triangular Kind; };
@ -933,10 +933,10 @@ namespace internal {
  
 // Triangular = Product
 template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar>, Dense2Triangular, Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular, Scalar>
 {
  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename SrcXprType::Scalar> &)
  {
    dst.setZero();
    dst._assignProduct(src, 1);
@ -945,10 +945,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_

 // Triangular += Product
 template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar>, Dense2Triangular, Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular, Scalar>
 {
  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,typename SrcXprType::Scalar> &)
  {
    dst._assignProduct(src, 1);
  }
@ -956,10 +956,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass

 // Triangular -= Product
 template< typename DstXprType, typename Lhs, typename Rhs, typename Scalar>
-struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar>, Dense2Triangular, Scalar>
+struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_assign_op<Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, Dense2Triangular, Scalar>
 {
  typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,typename SrcXprType::Scalar> &)
  {
    dst._assignProduct(src, -1);
  }
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@ -540,7 +540,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp

    /** Returns the expression of the sum of the vector \a other to each subvector of \c *this */
    template<typename OtherDerived> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
-    CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
+    CwiseBinaryOp<internal::scalar_sum_op<Scalar,typename OtherDerived::Scalar>,
                  const ExpressionTypeNestedCleaned,
                  const typename ExtendedType<OtherDerived>::Type>
    operator+(const DenseBase<OtherDerived>& other) const
@ -553,7 +553,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
    /** Returns the expression of the difference between each subvector of \c *this and the vector \a other */
    template<typename OtherDerived>
    EIGEN_DEVICE_FUNC
-    CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
+    CwiseBinaryOp<internal::scalar_difference_op<Scalar,typename OtherDerived::Scalar>,
                  const ExpressionTypeNestedCleaned,
                  const typename ExtendedType<OtherDerived>::Type>
    operator-(const DenseBase<OtherDerived>& other) const
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@ -28,6 +28,8 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
    AlignedOnScalar = 1,
    size=2,
    HasHalfPacket = 0,
+    HasAdd    = 1,
+    HasMul    = 1,
    HasDiv    = 1,
    HasSqrt   = 1,
    HasRsqrt  = 1,
--- a/Eigen/src/Core/arch/NEON/Complex.h
+++ b/Eigen/src/Core/arch/NEON/Complex.h
@ -14,8 +14,9 @@ namespace Eigen {

 namespace internal {

-static uint32x4_t p4ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET4(0x00000000, 0x80000000, 0x00000000, 0x80000000);
-static uint32x2_t p2ui_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x00000000, 0x80000000);
+const uint32_t  conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
+static uint32x4_t p4ui_CONJ_XOR = vld1q_u32( conj_XOR_DATA );
+static uint32x2_t p2ui_CONJ_XOR = vld1_u32( conj_XOR_DATA );

 //---------- float ----------
 struct Packet2cf
@ -274,7 +275,8 @@ ptranspose(PacketBlock<Packet2cf,2>& kernel) {
 //---------- double ----------
 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG

-static uint64x2_t p2ul_CONJ_XOR = EIGEN_INIT_NEON_PACKET2(0x0, 0x8000000000000000);
+const uint64_t  p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
+static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );

 struct Packet1cd
 {
--- a/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/Eigen/src/Core/arch/NEON/PacketMath.h
@ -49,17 +49,6 @@ typedef uint32x4_t  Packet4ui;
 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
  const Packet4i p4i_##NAME = pset1<Packet4i>(X)

-#if EIGEN_COMP_LLVM && !EIGEN_COMP_CLANG
-  //Special treatment for Apple's llvm-gcc, its NEON packet types are unions
-  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {{X, Y}}
-  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {{X, Y, Z, W}}
-#else
-  //Default initializer for packets
-  #define EIGEN_INIT_NEON_PACKET2(X, Y)       {X, Y}
-  #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
-#endif
-
-
 // arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
 // which available on LLVM and GCC (at least)
 #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
@ -122,12 +111,14 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int&    from)   {

 template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a)
 {
-  Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+  const float32_t f[] = {0, 1, 2, 3};
+  Packet4f countdown = vld1q_f32(f);
  return vaddq_f32(pset1<Packet4f>(a), countdown);
 }
 template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a)
 {
-  Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3);
+  const int32_t i[] = {0, 1, 2, 3};
+  Packet4i countdown = vld1q_s32(i);
  return vaddq_s32(pset1<Packet4i>(a), countdown);
 }

@ -585,7 +576,8 @@ template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double&  from) { r

 template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a)
 {
-  Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1);
+  const double countdown_raw[] = {0.0,1.0};
+  const Packet2d countdown = vld1q_f64(countdown_raw);
  return vaddq_f64(pset1<Packet2d>(a), countdown);
 }
 template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return vaddq_f64(a,b); }
--- a/Eigen/src/Core/functors/AssignmentFunctors.h
+++ b/Eigen/src/Core/functors/AssignmentFunctors.h
@ -18,20 +18,24 @@ namespace internal {
  * \brief Template functor for scalar/packet assignment
  *
  */
-template<typename Scalar> struct assign_op {
+template<typename DstScalar,typename SrcScalar> struct assign_op {

  EIGEN_EMPTY_STRUCT_CTOR(assign_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a = b; }
  
  template<int Alignment, typename Packet>
-  EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
-  { internal::pstoret<Scalar,Packet,Alignment>(a,b); }
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,b); }
 };
-template<typename Scalar>
-struct functor_traits<assign_op<Scalar> > {
+
+// Empty overload for void type (used by PermutationMatrix
+template<typename DstScalar> struct assign_op<DstScalar,void> {};
+
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<assign_op<DstScalar,SrcScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::ReadCost,
-    PacketAccess = packet_traits<Scalar>::Vectorizable
+    Cost = NumTraits<DstScalar>::ReadCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::Vectorizable && packet_traits<SrcScalar>::Vectorizable
  };
 };

@ -39,20 +43,20 @@ struct functor_traits<assign_op<Scalar> > {
  * \brief Template functor for scalar/packet assignment with addition
  *
  */
-template<typename Scalar> struct add_assign_op {
+template<typename DstScalar,typename SrcScalar> struct add_assign_op {

  EIGEN_EMPTY_STRUCT_CTOR(add_assign_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a += b; }
  
  template<int Alignment, typename Packet>
-  EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
-  { internal::pstoret<Scalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::padd(internal::ploadt<Packet,Alignment>(a),b)); }
 };
-template<typename Scalar>
-struct functor_traits<add_assign_op<Scalar> > {
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<add_assign_op<DstScalar,SrcScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasAdd
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasAdd
  };
 };

@ -60,20 +64,20 @@ struct functor_traits<add_assign_op<Scalar> > {
  * \brief Template functor for scalar/packet assignment with subtraction
  *
  */
-template<typename Scalar> struct sub_assign_op {
+template<typename DstScalar,typename SrcScalar> struct sub_assign_op {

  EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(DstScalar& a, const SrcScalar& b) const { a -= b; }
  
  template<int Alignment, typename Packet>
-  EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const
-  { internal::pstoret<Scalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
+  EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const
+  { internal::pstoret<DstScalar,Packet,Alignment>(a,internal::psub(internal::ploadt<Packet,Alignment>(a),b)); }
 };
-template<typename Scalar>
-struct functor_traits<sub_assign_op<Scalar> > {
+template<typename DstScalar,typename SrcScalar>
+struct functor_traits<sub_assign_op<DstScalar,SrcScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::ReadCost + NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasSub
+    Cost = NumTraits<DstScalar>::ReadCost + NumTraits<DstScalar>::AddCost,
+    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasSub
  };
 };

@ -98,7 +102,6 @@ struct functor_traits<mul_assign_op<DstScalar,SrcScalar> > {
    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasMul
  };
 };
-template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<mul_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; };

 /** \internal
  * \brief Template functor for scalar/packet assignment with diviving
@ -120,7 +123,6 @@ struct functor_traits<div_assign_op<DstScalar,SrcScalar> > {
    PacketAccess = is_same<DstScalar,SrcScalar>::value && packet_traits<DstScalar>::HasDiv
  };
 };
-template<typename DstScalar,typename SrcScalar> struct functor_is_product_like<div_assign_op<DstScalar,SrcScalar> > { enum { ret = 1 }; };

 /** \internal
  * \brief Template functor for scalar/packet assignment with swapping
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@ -16,27 +16,43 @@ namespace internal {

 //---------- associative binary functors ----------

+template<typename Arg1, typename Arg2>
+struct binary_op_base
+{
+  typedef Arg1 first_argument_type;
+  typedef Arg2 second_argument_type;
+};
+
 /** \internal
  * \brief Template functor to compute the sum of two scalars
  *
  * \sa class CwiseBinaryOp, MatrixBase::operator+, class VectorwiseOp, DenseBase::sum()
  */
-template<typename Scalar> struct scalar_sum_op {
-//   typedef Scalar result_type;
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_sum_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_sum_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
  EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+#else
+  scalar_sum_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a + b; }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::padd(a,b); }
  template<typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
  { return internal::predux(a); }
 };
-template<typename Scalar>
-struct functor_traits<scalar_sum_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_sum_op<LhsScalar,RhsScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasAdd
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2, // rough estimate!
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasAdd && packet_traits<RhsScalar>::HasAdd
+    // TODO vectorize mixed sum
  };
 };

@ -45,7 +61,7 @@ struct functor_traits<scalar_sum_op<Scalar> > {
  * This is required to solve Bug 426.
  * \sa DenseBase::count(), DenseBase::any(), ArrayBase::cast(), MatrixBase::cast()
  */
-template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
+template<> struct scalar_sum_op<bool,bool> : scalar_sum_op<int,int> {
  EIGEN_DEPRECATED
  scalar_sum_op() {}
 };
@ -56,13 +72,17 @@ template<> struct scalar_sum_op<bool> : scalar_sum_op<int> {
  *
  * \sa class CwiseBinaryOp, Cwise::operator*(), class VectorwiseOp, MatrixBase::redux()
  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
-  enum {
-    // TODO vectorize mixed product
-    Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
-  };
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_product_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_product_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
  EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
+#else
+  scalar_product_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@ -75,7 +95,8 @@ template<typename LhsScalar,typename RhsScalar>
 struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
  enum {
    Cost = (NumTraits<LhsScalar>::MulCost + NumTraits<RhsScalar>::MulCost)/2, // rough estimate!
-    PacketAccess = scalar_product_op<LhsScalar,RhsScalar>::Vectorizable
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasMul && packet_traits<RhsScalar>::HasMul
+    // TODO vectorize mixed product
  };
 };

@ -84,13 +105,15 @@ struct functor_traits<scalar_product_op<LhsScalar,RhsScalar> > {
  *
  * This is a short cut for conj(x) * y which is needed for optimization purpose; in Eigen2 support mode, this becomes x * conj(y)
  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_conj_product_op  : binary_op_base<LhsScalar,RhsScalar>
+{

  enum {
    Conj = NumTraits<LhsScalar>::IsComplex
  };
  
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_conj_product_op>::ReturnType result_type;
  
  EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
@ -113,21 +136,24 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
  *
  * \sa class CwiseBinaryOp, MatrixBase::cwiseMin, class VectorwiseOp, MatrixBase::minCoeff()
  */
-template<typename Scalar> struct scalar_min_op {
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_min_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_min_op>::ReturnType result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::mini(a, b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::mini(a, b); }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmin(a,b); }
  template<typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
  { return internal::predux_min(a); }
 };
-template<typename Scalar>
-struct functor_traits<scalar_min_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_min_op<LhsScalar,RhsScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasMin
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMin
  };
 };

@ -136,21 +162,24 @@ struct functor_traits<scalar_min_op<Scalar> > {
  *
  * \sa class CwiseBinaryOp, MatrixBase::cwiseMax, class VectorwiseOp, MatrixBase::maxCoeff()
  */
-template<typename Scalar> struct scalar_max_op {
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_max_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_max_op>::ReturnType result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return numext::maxi(a, b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return numext::maxi(a, b); }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::pmax(a,b); }
  template<typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar predux(const Packet& a) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type predux(const Packet& a) const
  { return internal::predux_max(a); }
 };
-template<typename Scalar>
-struct functor_traits<scalar_max_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_max_op<LhsScalar,RhsScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasMax
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = internal::is_same<LhsScalar, RhsScalar>::value && packet_traits<LhsScalar>::HasMax
  };
 };

@ -158,56 +187,70 @@ struct functor_traits<scalar_max_op<Scalar> > {
  * \brief Template functors for comparison of two scalars
  * \todo Implement packet-comparisons
  */
-template<typename Scalar, ComparisonName cmp> struct scalar_cmp_op;
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp> struct scalar_cmp_op;

-template<typename Scalar, ComparisonName cmp>
-struct functor_traits<scalar_cmp_op<Scalar, cmp> > {
+template<typename LhsScalar, typename RhsScalar, ComparisonName cmp>
+struct functor_traits<scalar_cmp_op<LhsScalar,RhsScalar, cmp> > {
  enum {
-    Cost = NumTraits<Scalar>::AddCost,
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
    PacketAccess = false
  };
 };

-template<ComparisonName Cmp, typename Scalar>
-struct result_of<scalar_cmp_op<Scalar, Cmp>(Scalar,Scalar)> {
+template<ComparisonName Cmp, typename LhsScalar, typename RhsScalar>
+struct result_of<scalar_cmp_op<LhsScalar, RhsScalar, Cmp>(LhsScalar,RhsScalar)> {
  typedef bool type;
 };


-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_EQ> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_EQ> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a==b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a==b;}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LT> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LT> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<b;}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_LE> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_LE> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a<=b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a<=b;}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GT> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GT> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>b;}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_GE> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_GE> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a>=b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a>=b;}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_UNORD> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_UNORD> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return !(a<=b || b<=a);}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return !(a<=b || b<=a);}
 };
-template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
+template<typename LhsScalar, typename RhsScalar>
+struct scalar_cmp_op<LhsScalar,RhsScalar, cmp_NEQ> : binary_op_base<LhsScalar,RhsScalar>
+{
  typedef bool result_type;
  EIGEN_EMPTY_STRUCT_CTOR(scalar_cmp_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const Scalar& a, const Scalar& b) const {return a!=b;}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator()(const LhsScalar& a, const RhsScalar& b) const {return a!=b;}
 };


@ -216,7 +259,9 @@ template<typename Scalar> struct scalar_cmp_op<Scalar, cmp_NEQ> {
  *
  * \sa MatrixBase::stableNorm(), class Redux
  */
-template<typename Scalar> struct scalar_hypot_op {
+template<typename Scalar>
+struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
+{
  EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
 //   typedef typename NumTraits<Scalar>::Real result_type;
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
@ -237,7 +282,7 @@ template<typename Scalar> struct scalar_hypot_op {
  }
 };
 template<typename Scalar>
-struct functor_traits<scalar_hypot_op<Scalar> > {
+struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
  enum
  {
    Cost = 3 * NumTraits<Scalar>::AddCost +
@ -250,13 +295,24 @@ struct functor_traits<scalar_hypot_op<Scalar> > {
 /** \internal
  * \brief Template functor to compute the pow of two scalars
  */
-template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
-  EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
+template<typename Scalar, typename Exponent>
+struct scalar_pow_op  : binary_op_base<Scalar,Exponent>
+{
+  typedef typename ScalarBinaryOpTraits<Scalar,Exponent,scalar_pow_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
+  EIGEN_EMPTY_STRUCT_CTOR(scalar_pow_op)
+#else
+  scalar_pow_op() {
+    typedef Scalar LhsScalar;
+    typedef Exponent RhsScalar;
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
  EIGEN_DEVICE_FUNC
-  inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
+  inline result_type operator() (const Scalar& a, const Exponent& b) const { return numext::pow(a, b); }
 };
-template<typename Scalar, typename OtherScalar>
-struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
+template<typename Scalar, typename Exponent>
+struct functor_traits<scalar_pow_op<Scalar,Exponent> > {
  enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
 };

@ -269,18 +325,27 @@ struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
  *
  * \sa class CwiseBinaryOp, MatrixBase::operator-
  */
-template<typename Scalar> struct scalar_difference_op {
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_difference_op : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_difference_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
  EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+#else
+  scalar_difference_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a - b; }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
  { return internal::psub(a,b); }
 };
-template<typename Scalar>
-struct functor_traits<scalar_difference_op<Scalar> > {
+template<typename LhsScalar,typename RhsScalar>
+struct functor_traits<scalar_difference_op<LhsScalar,RhsScalar> > {
  enum {
-    Cost = NumTraits<Scalar>::AddCost,
-    PacketAccess = packet_traits<Scalar>::HasSub
+    Cost = (NumTraits<LhsScalar>::AddCost+NumTraits<RhsScalar>::AddCost)/2,
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasSub && packet_traits<RhsScalar>::HasSub
  };
 };

@ -289,13 +354,17 @@ struct functor_traits<scalar_difference_op<Scalar> > {
  *
  * \sa class CwiseBinaryOp, Cwise::operator/()
  */
-template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
-  enum {
-    // TODO vectorize mixed product
-    Vectorizable = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv
-  };
-  typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
+template<typename LhsScalar,typename RhsScalar>
+struct scalar_quotient_op  : binary_op_base<LhsScalar,RhsScalar>
+{
+  typedef typename ScalarBinaryOpTraits<LhsScalar,RhsScalar,scalar_quotient_op>::ReturnType result_type;
+#ifndef EIGEN_SCALAR_BINARY_OP_PLUGIN
  EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
+#else
+  scalar_quotient_op() {
+    EIGEN_SCALAR_BINARY_OP_PLUGIN
+  }
+#endif
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
  template<typename Packet>
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
@ -305,7 +374,7 @@ template<typename LhsScalar,typename RhsScalar>
 struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
  typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
  enum {
-    PacketAccess = scalar_quotient_op<LhsScalar,RhsScalar>::Vectorizable,
+    PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
    Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
  };
 };
@ -365,7 +434,8 @@ template<> struct functor_traits<scalar_boolean_xor_op> {
  *
  * \sa class CwiseBinaryOp, Cwise::igamma
  */
-template<typename Scalar> struct scalar_igamma_op {
+template<typename Scalar> struct scalar_igamma_op : binary_op_base<Scalar,Scalar>
+{
  EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
    using numext::igamma; return igamma(a, x);
@ -390,7 +460,8 @@ struct functor_traits<scalar_igamma_op<Scalar> > {
  *
  * \sa class CwiseBinaryOp, Cwise::igammac
  */
-template<typename Scalar> struct scalar_igammac_op {
+template<typename Scalar> struct scalar_igammac_op : binary_op_base<Scalar,Scalar>
+{
  EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
    using numext::igammac; return igammac(a, x);
@ -413,183 +484,46 @@ struct functor_traits<scalar_igammac_op<Scalar> > {

 //---------- binary functors bound to a constant, thus appearing as a unary functor ----------

-/** \internal
-  * \brief Template functor to multiply a scalar by a fixed other one
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::operator*, MatrixBase::operator/
-  */
-/* NOTE why doing the pset1() in packetOp *is* an optimization ?
- * indeed it seems better to declare m_other as a Packet and do the pset1() once
- * in the constructor. However, in practice:
- *  - GCC does not like m_other as a Packet and generate a load every time it needs it
- *  - on the other hand GCC is able to moves the pset1() outside the loop :)
- *  - simpler code ;)
- * (ICC and gcc 4.4 seems to perform well in both cases, the issue is visible with y = a*x + b*y)
- */
-template<typename Scalar>
-struct scalar_multiple_op {
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC
-  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pmul(a, pset1<Packet>(m_other)); }
-  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_multiple_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasMul }; };
+// The following two classes permits to turn any binary functor into a unary one with one argument bound to a constant value.
+// They are analogues to std::binder1st/binder2nd but with the following differences:
+//  - they are compatible with packetOp
+//  - they are portable across C++ versions (the std::binder* are deprecated in C++11)
+template<typename BinaryOp> struct bind1st_op : BinaryOp {

-template<typename Scalar1, typename Scalar2>
-struct scalar_multiple2_op {
-  typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
-  typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
-};
-template<typename Scalar1,typename Scalar2>
-struct functor_traits<scalar_multiple2_op<Scalar1,Scalar2> >
-{ enum { Cost = NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+  typedef typename BinaryOp::first_argument_type  first_argument_type;
+  typedef typename BinaryOp::second_argument_type second_argument_type;
+  typedef typename BinaryOp::result_type          result_type;

-/** \internal
-  * \brief Template functor to divide a scalar by a fixed other one
-  *
-  * This functor is used to implement the quotient of a matrix by
-  * a scalar where the scalar type is not necessarily a floating point type.
-  *
-  * \sa class CwiseUnaryOp, MatrixBase::operator/
-  */
-template<typename Scalar>
-struct scalar_quotient1_op {
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(a, pset1<Packet>(m_other)); }
-  typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_quotient1_op<Scalar> >
-{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = packet_traits<Scalar>::HasDiv }; };
+  bind1st_op(const first_argument_type &val) : m_value(val) {}

-template<typename Scalar1, typename Scalar2>
-struct scalar_quotient2_op {
-  typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const scalar_quotient2_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient2_op(const Scalar2& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a / m_other; }
-  typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
-};
-template<typename Scalar1,typename Scalar2>
-struct functor_traits<scalar_quotient2_op<Scalar1,Scalar2> >
-{ enum { Cost = 2 * NumTraits<Scalar1>::MulCost, PacketAccess = false }; };
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const second_argument_type& b) const { return BinaryOp::operator()(m_value,b); }

-// In Eigen, any binary op (Product, CwiseBinaryOp) require the Lhs and Rhs to have the same scalar type, except for multiplication
-// where the mixing of different types is handled by scalar_product_traits
-// In particular, real * complex<real> is allowed.
-// FIXME move this to functor_traits adding a functor_default
-template<typename Functor> struct functor_is_product_like { enum { ret = 0 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_conj_product_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-template<typename LhsScalar,typename RhsScalar> struct functor_is_product_like<scalar_quotient_op<LhsScalar,RhsScalar> > { enum { ret = 1 }; };
-
-
-/** \internal
-  * \brief Template functor to add a scalar to a fixed other one
-  * \sa class CwiseUnaryOp, Array::operator+
-  */
-/* If you wonder why doing the pset1() in packetOp() is an optimization check scalar_multiple_op */
-template<typename Scalar>
-struct scalar_add_op {
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
-  { return internal::padd(a, pset1<Packet>(m_other)); }
-  const Scalar m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_add_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
-
-/** \internal
-  * \brief Template functor to subtract a fixed scalar to another one
-  * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_rsub_op
-  */
-template<typename Scalar>
-struct scalar_sub_op {
-  EIGEN_DEVICE_FUNC inline scalar_sub_op(const scalar_sub_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC inline scalar_sub_op(const Scalar& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a - m_other; }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
-  { return internal::psub(a, pset1<Packet>(m_other)); }
-  const Scalar m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_sub_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
-
-/** \internal
-  * \brief Template functor to subtract a scalar to fixed another one
-  * \sa class CwiseUnaryOp, Array::operator-, struct scalar_add_op, struct scalar_sub_op
-  */
-template<typename Scalar>
-struct scalar_rsub_op {
-  EIGEN_DEVICE_FUNC inline scalar_rsub_op(const scalar_rsub_op& other) : m_other(other.m_other) { }
-  EIGEN_DEVICE_FUNC inline scalar_rsub_op(const Scalar& other) : m_other(other) { }
-  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other - a; }
-  template <typename Packet>
-  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
-  { return internal::psub(pset1<Packet>(m_other), a); }
-  const Scalar m_other;
-};
-template<typename Scalar>
-struct functor_traits<scalar_rsub_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = packet_traits<Scalar>::HasAdd }; };
-
-/** \internal
-  * \brief Template functor to raise a scalar to a power
-  * \sa class CwiseUnaryOp, Cwise::pow
-  */
-template<typename Scalar>
-struct scalar_pow_op {
-  // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_DEVICE_FUNC inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
-  EIGEN_DEVICE_FUNC inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
-  EIGEN_DEVICE_FUNC
-  inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
-  const Scalar m_exponent;
-};
-template<typename Scalar>
-struct functor_traits<scalar_pow_op<Scalar> >
-{ enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
-
-/** \internal
-  * \brief Template functor to compute the quotient between a scalar and array entries.
-  * \sa class CwiseUnaryOp, Cwise::inverse()
-  */
-template<typename Scalar>
-struct scalar_inverse_mult_op {
-  EIGEN_DEVICE_FUNC scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
-  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
  template<typename Packet>
-  EIGEN_DEVICE_FUNC inline const Packet packetOp(const Packet& a) const
-  { return internal::pdiv(pset1<Packet>(m_other),a); }
-  Scalar m_other;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& b) const
+  { return BinaryOp::packetOp(internal::pset1<Packet>(m_value), b); }
+
+  first_argument_type m_value;
 };
-template<typename Scalar>
-struct functor_traits<scalar_inverse_mult_op<Scalar> >
-{ enum { PacketAccess = packet_traits<Scalar>::HasDiv, Cost = NumTraits<Scalar>::template Div<PacketAccess>::Cost }; };
+template<typename BinaryOp> struct functor_traits<bind1st_op<BinaryOp> > : functor_traits<BinaryOp> {};
+
+
+template<typename BinaryOp> struct bind2nd_op : BinaryOp {
+
+  typedef typename BinaryOp::first_argument_type  first_argument_type;
+  typedef typename BinaryOp::second_argument_type second_argument_type;
+  typedef typename BinaryOp::result_type          result_type;
+
+  bind2nd_op(const second_argument_type &val) : m_value(val) {}
+
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const first_argument_type& a) const { return BinaryOp::operator()(a,m_value); }
+
+  template<typename Packet>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
+  { return BinaryOp::packetOp(a,internal::pset1<Packet>(m_value)); }
+
+  second_argument_type m_value;
+};
+template<typename BinaryOp> struct functor_traits<bind2nd_op<BinaryOp> > : functor_traits<BinaryOp> {};


 } // end namespace internal
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@ -26,7 +26,8 @@ struct scalar_constant_op {
 };
 template<typename Scalar>
 struct functor_traits<scalar_constant_op<Scalar> >
-{ enum { Cost = 1, PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };
+{ enum { Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,
+         PacketAccess = packet_traits<Scalar>::Vectorizable, IsRepeatable = true }; };

 template<typename Scalar> struct scalar_identity_op {
  EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
--- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h
+++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h
@ -363,7 +363,7 @@ class gebp_traits
 public:
  typedef _LhsScalar LhsScalar;
  typedef _RhsScalar RhsScalar;
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;

  enum {
    ConjLhs = _ConjLhs,
@ -478,7 +478,7 @@ class gebp_traits<std::complex<RealScalar>, RealScalar, _ConjLhs, false>
 public:
  typedef std::complex<RealScalar> LhsScalar;
  typedef RealScalar RhsScalar;
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;

  enum {
    ConjLhs = _ConjLhs,
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@ -25,7 +25,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
 {
  typedef gebp_traits<RhsScalar,LhsScalar> Traits;
  
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(
    Index rows, Index cols, Index depth,
    const LhsScalar* lhs, Index lhsStride,
@ -55,7 +55,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh

 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
  
-typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
  const LhsScalar* _lhs, Index lhsStride,
  const RhsScalar* _rhs, Index rhsStride,
--- a/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h
@ -40,7 +40,7 @@ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool Conjugat
                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor,UpLo,Version>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* lhs, Index lhsStride,
                                      const RhsScalar* rhs, Index rhsStride, ResScalar* res, Index resStride,
                                      const ResScalar& alpha, level3_blocking<RhsScalar,LhsScalar>& blocking)
@ -57,7 +57,7 @@ template <typename Index, typename LhsScalar, int LhsStorageOrder, bool Conjugat
                          typename RhsScalar, int RhsStorageOrder, bool ConjugateRhs, int  UpLo, int Version>
 struct general_matrix_matrix_triangular_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,ColMajor,UpLo,Version>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  static EIGEN_STRONG_INLINE void run(Index size, Index depth,const LhsScalar* _lhs, Index lhsStride,
                                      const RhsScalar* _rhs, Index rhsStride, ResScalar* _res, Index resStride,
                                      const ResScalar& alpha, level3_blocking<LhsScalar,RhsScalar>& blocking)
--- a/Eigen/src/Core/products/GeneralMatrixVector.h
+++ b/Eigen/src/Core/products/GeneralMatrixVector.h
@ -58,7 +58,7 @@ namespace internal {
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
 struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,ColMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;

 enum {
  Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
@ -334,7 +334,7 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,LhsScalar,LhsMapper,C
 template<typename Index, typename LhsScalar, typename LhsMapper, bool ConjugateLhs, typename RhsScalar, typename RhsMapper, bool ConjugateRhs, int Version>
 struct general_matrix_vector_product<Index,LhsScalar,LhsMapper,RowMajor,ConjugateLhs,RhsScalar,RhsMapper,ConjugateRhs,Version>
 {
-typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;

 enum {
  Vectorizable = packet_traits<LhsScalar>::Vectorizable && packet_traits<RhsScalar>::Vectorizable
--- a/Eigen/src/Core/products/TriangularMatrixVector.h
+++ b/Eigen/src/Core/products/TriangularMatrixVector.h
@ -20,7 +20,7 @@ struct triangular_matrix_vector_product;
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs, int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor,Version>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
@ -91,7 +91,7 @@ EIGEN_DONT_INLINE void triangular_matrix_vector_product<Index,Mode,LhsScalar,Con
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs,int Version>
 struct triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor,Version>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower = ((Mode&Lower)==Lower),
    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@ -293,16 +293,27 @@ struct blas_traits<CwiseUnaryOp<scalar_conjugate_op<Scalar>, NestedXpr> >
 };

 // pop scalar multiple
-template<typename Scalar, typename NestedXpr>
-struct blas_traits<CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> >
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> >
 : blas_traits<NestedXpr>
 {
  typedef blas_traits<NestedXpr> Base;
-  typedef CwiseUnaryOp<scalar_multiple_op<Scalar>, NestedXpr> XprType;
+  typedef CwiseBinaryOp<scalar_product_op<Scalar>, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain>, NestedXpr> XprType;
  typedef typename Base::ExtractType ExtractType;
-  static inline ExtractType extract(const XprType& x) { return Base::extract(x.nestedExpression()); }
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.rhs()); }
  static inline Scalar extractScalarFactor(const XprType& x)
-  { return x.functor().m_other * Base::extractScalarFactor(x.nestedExpression()); }
+  { return x.lhs().functor().m_other * Base::extractScalarFactor(x.rhs()); }
+};
+template<typename Scalar, typename NestedXpr, typename Plain>
+struct blas_traits<CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > >
+ : blas_traits<NestedXpr>
+{
+  typedef blas_traits<NestedXpr> Base;
+  typedef CwiseBinaryOp<scalar_product_op<Scalar>, NestedXpr, const CwiseNullaryOp<scalar_constant_op<Scalar>,Plain> > XprType;
+  typedef typename Base::ExtractType ExtractType;
+  static inline ExtractType extract(const XprType& x) { return Base::extract(x.lhs()); }
+  static inline Scalar extractScalarFactor(const XprType& x)
+  { return Base::extractScalarFactor(x.lhs()) * x.rhs().functor().m_other; }
 };

 // pop opposite
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@ -175,9 +175,11 @@ namespace internal {
 // with optional conjugation of the arguments.
 template<typename LhsScalar, typename RhsScalar, bool ConjLhs=false, bool ConjRhs=false> struct conj_helper;

-template<typename Scalar> struct scalar_sum_op;
-template<typename Scalar> struct scalar_difference_op;
-template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_sum_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_difference_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_conj_product_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_min_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_max_op;
 template<typename Scalar> struct scalar_opposite_op;
 template<typename Scalar> struct scalar_conjugate_op;
 template<typename Scalar> struct scalar_real_op;
@ -193,17 +195,11 @@ template<typename Scalar> struct scalar_sin_op;
 template<typename Scalar> struct scalar_acos_op;
 template<typename Scalar> struct scalar_asin_op;
 template<typename Scalar> struct scalar_tan_op;
-template<typename Scalar> struct scalar_pow_op;
 template<typename Scalar> struct scalar_inverse_op;
 template<typename Scalar> struct scalar_square_op;
 template<typename Scalar> struct scalar_cube_op;
 template<typename Scalar, typename NewType> struct scalar_cast_op;
-template<typename Scalar> struct scalar_multiple_op;
-template<typename Scalar> struct scalar_quotient1_op;
-template<typename Scalar> struct scalar_min_op;
-template<typename Scalar> struct scalar_max_op;
 template<typename Scalar> struct scalar_random_op;
-template<typename Scalar> struct scalar_add_op;
 template<typename Scalar> struct scalar_constant_op;
 template<typename Scalar> struct scalar_identity_op;
 template<typename Scalar,bool iscpx> struct scalar_sign_op;
@ -211,10 +207,10 @@ template<typename Scalar> struct scalar_igamma_op;
 template<typename Scalar> struct scalar_igammac_op;
 template<typename Scalar> struct scalar_betainc_op;

+template<typename Scalar,typename ScalarExponent> struct scalar_pow_op;
+template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_hypot_op;
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_product_op;
-template<typename LhsScalar,typename RhsScalar> struct scalar_multiple2_op;
 template<typename LhsScalar,typename RhsScalar=LhsScalar> struct scalar_quotient_op;
-template<typename LhsScalar,typename RhsScalar> struct scalar_quotient2_op;

 } // end namespace internal

--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@ -462,6 +462,8 @@
 #define EIGEN_CAT2(a,b) a ## b
 #define EIGEN_CAT(a,b) EIGEN_CAT2(a,b)

+#define EIGEN_COMMA ,
+
 // convert a token to a string
 #define EIGEN_MAKESTRING2(a) #a
 #define EIGEN_MAKESTRING(a) EIGEN_MAKESTRING2(a)
@ -876,18 +878,10 @@ namespace Eigen {

 #define EIGEN_IMPLIES(a,b) (!(a) || (b))

-#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
-  template<typename OtherDerived> \
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
-  (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
-  { \
-    return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
-  }
-
-// the expression type of a cwise product
-#define EIGEN_CWISE_PRODUCT_RETURN_TYPE(LHS,RHS) \
+// the expression type of a standard coefficient wise binary operation
+#define EIGEN_CWISE_BINARY_RETURN_TYPE(LHS,RHS,OPNAME) \
    CwiseBinaryOp< \
-      internal::scalar_product_op< \
+      EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)< \
          typename internal::traits<LHS>::Scalar, \
          typename internal::traits<RHS>::Scalar \
      >, \
@ -895,6 +889,45 @@ namespace Eigen {
      const RHS \
    >

+#define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,OPNAME) \
+  template<typename OtherDerived> \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME) \
+  (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
+  { \
+    return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,OPNAME)(derived(), other.derived()); \
+  }
+
+#define EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(EXPR,SCALAR,OPNAME) \
+  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<typename internal::traits<EXPR>::Scalar,SCALAR>, const EXPR, \
+                const typename internal::plain_constant_type<EXPR,SCALAR>::type>
+
+#define EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(SCALAR,EXPR,OPNAME) \
+  CwiseBinaryOp<EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<SCALAR,typename internal::traits<EXPR>::Scalar>, \
+                const typename internal::plain_constant_type<EXPR,SCALAR>::type, const EXPR>
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME) \
+  template <typename T> EIGEN_DEVICE_FUNC inline \
+  const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA ScalarBinaryOpTraits<Scalar EIGEN_COMMA T EIGEN_COMMA EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<Scalar EIGEN_COMMA T> >::Defined>::type,OPNAME) \
+  (METHOD)(const T& scalar) const { \
+    typedef typename internal::promote_scalar_arg<Scalar,T,ScalarBinaryOpTraits<Scalar,T,EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<Scalar,T> >::Defined>::type PromotedT; \
+    return EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,PromotedT,OPNAME)(derived(), \
+           typename internal::plain_constant_type<Derived,PromotedT>::type(derived().rows(), derived().cols(), internal::scalar_constant_op<PromotedT>(scalar))); \
+  }
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+  template <typename T> EIGEN_DEVICE_FUNC inline friend \
+  const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(typename internal::promote_scalar_arg<Scalar EIGEN_COMMA T EIGEN_COMMA ScalarBinaryOpTraits<T EIGEN_COMMA Scalar EIGEN_COMMA EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<T EIGEN_COMMA Scalar> >::Defined>::type,Derived,OPNAME) \
+  (METHOD)(const T& scalar, const StorageBaseType& matrix) { \
+    typedef typename internal::promote_scalar_arg<Scalar,T,ScalarBinaryOpTraits<T,Scalar,EIGEN_CAT(EIGEN_CAT(internal::scalar_,OPNAME),_op)<T,Scalar> >::Defined>::type PromotedT; \
+    return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedT,Derived,OPNAME)( \
+           typename internal::plain_constant_type<Derived,PromotedT>::type(matrix.derived().rows(), matrix.derived().cols(), internal::scalar_constant_op<PromotedT>(scalar)), matrix.derived()); \
+  }
+
+#define EIGEN_MAKE_SCALAR_BINARY_OP(METHOD,OPNAME) \
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(METHOD,OPNAME) \
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(METHOD,OPNAME)
+
+
 #ifdef EIGEN_EXCEPTIONS
 #  define EIGEN_THROW_X(X) throw X
 #  define EIGEN_THROW throw
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -328,6 +328,30 @@ struct result_of<Func(ArgType0,ArgType1)> {
    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
    typedef typename binary_result_of_select<Func, ArgType0, ArgType1, FunctorType>::type type;
 };
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2, int SizeOf=sizeof(has_none)>
+struct ternary_result_of_select {typedef typename internal::remove_all<ArgType0>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_std_result_type)>
+{typedef typename Func::result_type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, sizeof(has_tr1_result)>
+{typedef typename Func::template result<Func(ArgType0,ArgType1,ArgType2)>::type type;};
+
+template<typename Func, typename ArgType0, typename ArgType1, typename ArgType2>
+struct result_of<Func(ArgType0,ArgType1,ArgType2)> {
+    template<typename T>
+    static has_std_result_type    testFunctor(T const *, typename T::result_type const * = 0);
+    template<typename T>
+    static has_tr1_result         testFunctor(T const *, typename T::template result<T(ArgType0,ArgType1,ArgType2)>::type const * = 0);
+    static has_none               testFunctor(...);
+
+    // note that the following indirection is needed for gcc-3.3
+    enum {FunctorType = sizeof(testFunctor(static_cast<Func*>(0)))};
+    typedef typename ternary_result_of_select<Func, ArgType0, ArgType1, ArgType2, FunctorType>::type type;
+};
 #endif

 /** \internal In short, it computes int(sqrt(\a Y)) with \a Y an integer.
@ -375,33 +399,6 @@ template<typename T, typename U> struct scalar_product_traits
  enum { Defined = 0 };
 };

-template<typename T> struct scalar_product_traits<T,T>
-{
-  enum {
-    // Cost = NumTraits<T>::MulCost,
-    Defined = 1
-  };
-  typedef T ReturnType;
-};
-
-template<typename T> struct scalar_product_traits<T,std::complex<T> >
-{
-  enum {
-    // Cost = 2*NumTraits<T>::MulCost,
-    Defined = 1
-  };
-  typedef std::complex<T> ReturnType;
-};
-
-template<typename T> struct scalar_product_traits<std::complex<T>, T>
-{
-  enum {
-    // Cost = 2*NumTraits<T>::MulCost,
-    Defined = 1
-  };
-  typedef std::complex<T> ReturnType;
-};
-
 // FIXME quick workaround around current limitation of result_of
 // template<typename Scalar, typename ArgType0, typename ArgType1>
 // struct result_of<scalar_product_op<Scalar>(ArgType0,ArgType1)> {
@ -434,6 +431,67 @@ T div_ceil(const T &a, const T &b)

 } // end namespace numext

+
+/** \class ScalarBinaryOpTraits
+  * \ingroup Core_Module
+  *
+  * \brief Determines whether the given binary operation of two numeric types is allowed and what the scalar return type is.
+  *
+  * \sa CwiseBinaryOp
+  */
+template<typename ScalarA, typename ScalarB, typename BinaryOp=void>
+struct ScalarBinaryOpTraits
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  // for backward compatibility, use the hints given by the (deprecated) internal::scalar_product_traits class.
+  : internal::scalar_product_traits<ScalarA,ScalarB>
+#endif // EIGEN_PARSED_BY_DOXYGEN
+{};
+
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,T,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef T ReturnType;
+};
+
+// For Matrix * Permutation
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,void,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef T ReturnType;
+};
+
+// For Permutation * Matrix
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<void,T,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef T ReturnType;
+};
+
+// for Permutation*Permutation
+template<typename BinaryOp>
+struct ScalarBinaryOpTraits<void,void,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef void ReturnType;
+};
+
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T,std::complex<T>,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef std::complex<T> ReturnType;
+};
+
+template<typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<std::complex<T>, T,BinaryOp>
+{
+  enum { Defined = 1 };
+  typedef std::complex<T> ReturnType;
+};
+
 } // end namespace Eigen

 #endif // EIGEN_META_H
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@ -45,6 +45,56 @@ inline IndexDest convert_index(const IndexSrc& idx) {
 }


+// promote_scalar_arg is an helper used in operation between an expression and a scalar, like:
+//    expression * scalar
+// Its role is to determine how the type T of the scalar operand should be promoted given the scalar type ExprScalar of the given expression.
+// The IsSupported template parameter must be provided by the caller as: ScalarBinaryOpTraits<ExprScalar,T,op>::Defined using the proper order for ExprScalar and T.
+// Then the logic is as follows:
+//  - if the operation is natively supported as defined by IsSupported, then the scalar type is not promoted, and T is returned.
+//  - otherwise, NumTraits<ExprScalar>::Literal is returned if T is implicitly convertible to NumTraits<ExprScalar>::Literal AND that this does not imply a float to integer conversion.
+//  - otherwise, ExprScalar is returned if T is implicitly convertible to ExprScalar AND that this does not imply a float to integer conversion.
+//  - In all other cases, the promoted type is not defined, and the respective operation is thus invalid and not available (SFINAE).
+template<typename ExprScalar,typename T, bool IsSupported>
+struct promote_scalar_arg;
+
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,true>
+{
+  typedef T type;
+};
+
+// Recursively check safe conversion to PromotedType, and then ExprScalar if they are different.
+template<typename ExprScalar,typename T,typename PromotedType,
+  bool ConvertibleToLiteral = internal::is_convertible<T,PromotedType>::value,
+  bool IsSafe = NumTraits<T>::IsInteger || !NumTraits<PromotedType>::IsInteger>
+struct promote_scalar_arg_unsupported;
+
+// Start recursion with NumTraits<ExprScalar>::Literal
+template<typename S,typename T>
+struct promote_scalar_arg<S,T,false> : promote_scalar_arg_unsupported<S,T,typename NumTraits<S>::Literal> {};
+
+// We found a match!
+template<typename S,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,true,true>
+{
+  typedef PromotedType type;
+};
+
+// No match, but no real-to-integer issues, and ExprScalar and current PromotedType are different,
+// so let's try to promote to ExprScalar
+template<typename ExprScalar,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<ExprScalar,T,PromotedType,false,true>
+   : promote_scalar_arg_unsupported<ExprScalar,T,ExprScalar>
+{};
+
+// Unsafe real-to-integer, let's stop.
+template<typename S,typename T, typename PromotedType>
+struct promote_scalar_arg_unsupported<S,T,PromotedType,false,false> {};
+
+// T is not even convertible to ExprScalar, let's stop.
+template<typename S,typename T>
+struct promote_scalar_arg_unsupported<S,T,S,false,true> {};
+
 //classes inheriting no_assignment_operator don't generate a default operator=.
 class no_assignment_operator
 {
@ -576,6 +626,20 @@ struct plain_diag_type
  >::type type;
 };

+template<typename Expr,typename Scalar = typename Expr::Scalar>
+struct plain_constant_type
+{
+  enum { Options = (traits<Expr>::Flags&RowMajorBit)?RowMajor:0 };
+
+  typedef Array<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,
+                Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> array_type;
+
+  typedef Matrix<Scalar,  traits<Expr>::RowsAtCompileTime,   traits<Expr>::ColsAtCompileTime,
+                 Options, traits<Expr>::MaxRowsAtCompileTime,traits<Expr>::MaxColsAtCompileTime> matrix_type;
+
+  typedef CwiseNullaryOp<scalar_constant_op<Scalar>, const typename conditional<is_same< typename traits<Expr>::XprKind, MatrixXpr >::value, matrix_type, array_type>::type > type;
+};
+
 template<typename ExpressionType>
 struct is_lvalue
 {
@ -610,11 +674,6 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
  return false;
 }

-template<typename T, typename U> struct is_same_or_void { enum { value = is_same<T,U>::value }; };
-template<typename T> struct is_same_or_void<void,T>     { enum { value = 1 }; };
-template<typename T> struct is_same_or_void<T,void>     { enum { value = 1 }; };
-template<>           struct is_same_or_void<void,void>  { enum { value = 1 }; };
-
 #ifdef EIGEN_DEBUG_ASSIGN
 std::string demangle_traversal(int t)
 {
@ -649,17 +708,12 @@ std::string demangle_flags(int f)

 } // end namespace internal

-// we require Lhs and Rhs to have the same scalar type. Currently there is no example of a binary functor
-// that would take two operands of different types. If there were such an example, then this check should be
-// moved to the BinaryOp functors, on a per-case basis. This would however require a change in the BinaryOp functors, as
-// currently they take only one typename Scalar template parameter.
+// We require Lhs and Rhs to have "compatible" scalar types.
 // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
 // add together a float matrix and a double matrix.
 #define EIGEN_CHECK_BINARY_COMPATIBILIY(BINOP,LHS,RHS) \
-  EIGEN_STATIC_ASSERT((internal::functor_is_product_like<BINOP>::ret \
-                        ? int(internal::scalar_product_traits<LHS, RHS>::Defined) \
-                        : int(internal::is_same_or_void<LHS, RHS>::value)), \
+  EIGEN_STATIC_ASSERT(int(ScalarBinaryOpTraits<LHS, RHS,BINOP>::Defined), \
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
    
 } // end namespace Eigen
--- a/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
+++ b/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h
@ -327,13 +327,22 @@ GeneralizedEigenSolver<MatrixType>::compute(const MatrixType& A, const MatrixTyp
      }
      else
      {
-        Scalar p = Scalar(0.5) * (m_matS.coeff(i, i) - m_matS.coeff(i+1, i+1));
-        Scalar z = sqrt(abs(p * p + m_matS.coeff(i+1, i) * m_matS.coeff(i, i+1)));
-        m_alphas.coeffRef(i)   = ComplexScalar(m_matS.coeff(i+1, i+1) + p, z);
-        m_alphas.coeffRef(i+1) = ComplexScalar(m_matS.coeff(i+1, i+1) + p, -z);
+        // We need to extract the generalized eigenvalues of the pair of a general 2x2 block S and a positive diagonal 2x2 block T
+        // Then taking beta=T_00*T_11, we can avoid any division, and alpha is the eigenvalues of A = (U^-1 * S * U) * diag(T_11,T_00):

-        m_betas.coeffRef(i)   = m_realQZ.matrixT().coeff(i,i);
-        m_betas.coeffRef(i+1) = m_realQZ.matrixT().coeff(i,i);
+        // T =  [a 0]
+        //      [0 b]
+        RealScalar a = m_realQZ.matrixT().coeff(i, i), b = m_realQZ.matrixT().coeff(i+1, i+1);
+        Matrix<RealScalar,2,2> S2 = m_matS.template block<2,2>(i,i) * Matrix<Scalar,2,1>(b,a).asDiagonal();
+
+        Scalar p = Scalar(0.5) * (S2.coeff(0,0) - S2.coeff(1,1));
+        Scalar z = sqrt(abs(p * p + S2.coeff(1,0) * S2.coeff(0,1)));
+        m_alphas.coeffRef(i)   = ComplexScalar(S2.coeff(1,1) + p,  z);
+        m_alphas.coeffRef(i+1) = ComplexScalar(S2.coeff(1,1) + p, -z);
+
+        m_betas.coeffRef(i)   =
+        m_betas.coeffRef(i+1) = a*b;
+        
        i += 2;
      }
    }
--- a/Eigen/src/Eigenvalues/RealQZ.h
+++ b/Eigen/src/Eigenvalues/RealQZ.h
@ -552,7 +552,6 @@ namespace Eigen {
      m_T.coeffRef(l,l-1) = Scalar(0.0);
    }

-
  template<typename MatrixType>
    RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ)
    {
@ -616,6 +615,37 @@ namespace Eigen {
      }
      // check if we converged before reaching iterations limit
      m_info = (local_iter<m_maxIters) ? Success : NoConvergence;
+
+      // For each non triangular 2x2 diagonal block of S,
+      //    reduce the respective 2x2 diagonal block of T to positive diagonal form using 2x2 SVD.
+      // This step is not mandatory for QZ, but it does help further extraction of eigenvalues/eigenvectors,
+      // and is in par with Lapack/Matlab QZ.
+      if(m_info==Success)
+      {
+        for(Index i=0; i<dim-1; ++i)
+        {
+          if(m_S.coeff(i+1, i) != Scalar(0))
+          {
+            JacobiRotation<Scalar> j_left, j_right;
+            internal::real_2x2_jacobi_svd(m_T, i, i+1, &j_left, &j_right);
+
+            // Apply resulting Jacobi rotations
+            m_S.applyOnTheLeft(i,i+1,j_left);
+            m_S.applyOnTheRight(i,i+1,j_right);
+            m_T.applyOnTheLeft(i,i+1,j_left);
+            m_T.applyOnTheRight(i,i+1,j_right);
+            m_T(i+1,i) = m_T(i,i+1) = Scalar(0);
+
+            if(m_computeQZ) {
+              m_Q.applyOnTheRight(i,i+1,j_left.transpose());
+              m_Z.applyOnTheLeft(i,i+1,j_right.transpose());
+            }
+
+            i++;
+          }
+        }
+      }
+
      return *this;
    } // end compute

--- a/Eigen/src/Eigenvalues/Tridiagonalization.h
+++ b/Eigen/src/Eigenvalues/Tridiagonalization.h
@ -367,10 +367,10 @@ void tridiagonalization_inplace(MatrixType& matA, CoeffVectorType& hCoeffs)
    hCoeffs.tail(n-i-1).noalias() = (matA.bottomRightCorner(remainingSize,remainingSize).template selfadjointView<Lower>()
                                  * (conj(h) * matA.col(i).tail(remainingSize)));

-    hCoeffs.tail(n-i-1) += (conj(h)*Scalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);
+    hCoeffs.tail(n-i-1) += (conj(h)*RealScalar(-0.5)*(hCoeffs.tail(remainingSize).dot(matA.col(i).tail(remainingSize)))) * matA.col(i).tail(n-i-1);

    matA.bottomRightCorner(remainingSize, remainingSize).template selfadjointView<Lower>()
-      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), -1);
+      .rankUpdate(matA.col(i).tail(remainingSize), hCoeffs.tail(remainingSize), Scalar(-1));

    matA.col(i).coeffRef(i+1) = beta;
    hCoeffs.coeffRef(i) = h;
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@ -36,8 +36,9 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  typedef NumTraits<Scalar>                         ScalarTraits;
  typedef Eigen::Index                              Index; ///< \deprecated since Eigen 3.3
  typedef typename ScalarTraits::Real               RealScalar;
-  typedef typename ScalarTraits::NonInteger      NonInteger;
+  typedef typename ScalarTraits::NonInteger         NonInteger;
  typedef Matrix<Scalar,AmbientDimAtCompileTime,1>  VectorType;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const VectorType, const VectorType> VectorTypeSum;

  /** Define constants to name the corners of a 1D, 2D or 3D axis aligned bounding box */
  enum CornerType
@ -111,16 +112,15 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
  inline VectorType& (max)() { return m_max; }

  /** \returns the center of the box */
-  inline const CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>,
-                            const CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const VectorType, const VectorType> >
+  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
  center() const
-  { return (m_min+m_max)/2; }
+  { return (m_min+m_max)/RealScalar(2); }

  /** \returns the lengths of the sides of the bounding box.
    * Note that this function does not get the same
    * result for integral or floating scalar types: see
    */
-  inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar>, const VectorType, const VectorType> sizes() const
+  inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
  { return m_max - m_min; }

  /** \returns the volume of the bounding box */
@ -131,7 +131,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
    * if the length of the diagonal is needed: diagonal().norm()
    * will provide it.
    */
-  inline CwiseBinaryOp< internal::scalar_difference_op<Scalar>, const VectorType, const VectorType> diagonal() const
+  inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
  { return sizes(); }

  /** \returns the vertex of the bounding box at the corner defined by
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@ -329,10 +329,10 @@ protected:

 // dense = homogeneous
 template< typename DstXprType, typename ArgType, typename Scalar>
-struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense, Scalar>
 {
  typedef Homogeneous<ArgType,Vertical> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
  {
    dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
    dst.row(dst.rows()-1).setOnes();
@ -341,10 +341,10 @@ struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op

 // dense = homogeneous
 template< typename DstXprType, typename ArgType, typename Scalar>
-struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense, Scalar>
 {
  typedef Homogeneous<ArgType,Horizontal> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
  {
    dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
    dst.col(dst.cols()-1).setOnes();
@ -373,7 +373,7 @@ struct homogeneous_right_product_refactoring_helper
  typedef typename Rhs::ConstRowXpr                                     ConstantColumn;
  typedef Replicate<const ConstantColumn,Rows,1>                        ConstantBlock;
  typedef Product<Lhs,LinearBlock,LazyProduct>                          LinearProduct;
-  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
 };

 template<typename Lhs, typename Rhs, int ProductTag>
@ -414,7 +414,7 @@ struct homogeneous_left_product_refactoring_helper
  typedef typename Lhs::ConstColXpr                                     ConstantColumn;
  typedef Replicate<const ConstantColumn,1,Cols>                        ConstantBlock;
  typedef Product<LinearBlock,Rhs,LazyProduct>                          LinearProduct;
-  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
+  typedef CwiseBinaryOp<internal::scalar_sum_op<typename Lhs::Scalar,typename Rhs::Scalar>, const LinearProduct, const ConstantBlock> Xpr;
 };

 template<typename Lhs, typename Rhs, int ProductTag>
--- a/Eigen/src/Geometry/Scaling.h
+++ b/Eigen/src/Geometry/Scaling.h
@ -107,12 +107,15 @@ public:
 /** \addtogroup Geometry_Module */
 //@{

-/** Concatenates a linear transformation matrix and a uniform scaling */
+/** Concatenates a linear transformation matrix and a uniform scaling
+  * \relates UniformScaling
+  */
 // NOTE this operator is defiend in MatrixBase and not as a friend function
 // of UniformScaling to fix an internal crash of Intel's ICC
-template<typename Derived> typename MatrixBase<Derived>::ScalarMultipleReturnType
-MatrixBase<Derived>::operator*(const UniformScaling<Scalar>& s) const
-{ return derived() * s.factor(); }
+template<typename Derived,typename Scalar>
+EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(Derived,Scalar,product)
+operator*(const MatrixBase<Derived>& matrix, const UniformScaling<Scalar>& s)
+{ return matrix.derived() * s.factor(); }

 /** Constructs a uniform scaling from scale factor \a s */
 static inline UniformScaling<float> Scaling(float s) { return UniformScaling<float>(s); }
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@ -1367,7 +1367,7 @@ struct transform_right_product_impl< TransformType, MatrixType, 2, 1> // rhs is
    EIGEN_STATIC_ASSERT(OtherRows==Dim, YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES);

    Matrix<typename ResultType::Scalar, Dim+1, 1> rhs;
-    rhs << other,1;
+    rhs.template head<Dim>() = other; rhs[Dim] = typename ResultType::Scalar(1);
    Matrix<typename ResultType::Scalar, WorkingRows, 1> res(T.matrix() * rhs);
    return res.template head<Dim>();
  }
--- a/Eigen/src/Householder/HouseholderSequence.h
+++ b/Eigen/src/Householder/HouseholderSequence.h
@ -108,7 +108,7 @@ struct hseq_side_dependent_impl<VectorsType, CoeffsType, OnTheRight>

 template<typename OtherScalarType, typename MatrixType> struct matrix_type_times_scalar_type
 {
-  typedef typename scalar_product_traits<OtherScalarType, typename MatrixType::Scalar>::ReturnType
+  typedef typename ScalarBinaryOpTraits<OtherScalarType, typename MatrixType::Scalar>::ReturnType
    ResultScalar;
  typedef Matrix<ResultScalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime,
                 0, MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime> Type;
--- a/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
+++ b/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h
@ -91,10 +91,10 @@ protected:
 // Specialization for "dst = dec.solveWithGuess(rhs)"
 // NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse specialization must exist somewhere
 template<typename DstXprType, typename DecType, typename RhsType, typename GuessType, typename Scalar>
-struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, SolveWithGuess<DecType,RhsType,GuessType>, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef SolveWithGuess<DecType,RhsType,GuessType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    // FIXME shall we resize dst here?
    dst = src.guess();
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@ -839,12 +839,12 @@ namespace internal {


 /***** Implementation of inverse() *****************************************************/
-template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename FullPivLU<MatrixType>::Scalar>, Dense2Dense>
 {
  typedef FullPivLU<MatrixType> LuType;
  typedef Inverse<LuType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename MatrixType::Scalar> &)
  {
    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
  }
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@ -286,11 +286,11 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
 namespace internal {

 // Specialization for "dense = dense_xpr.inverse()"
-template<typename DstXprType, typename XprType, typename Scalar>
-struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename XprType>
+struct Assignment<DstXprType, Inverse<XprType>, internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar>, Dense2Dense>
 {
  typedef Inverse<XprType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename XprType::Scalar> &)
  {
    // FIXME shall we resize dst here?
    const int Size = EIGEN_PLAIN_ENUM_MIN(XprType::ColsAtCompileTime,DstXprType::ColsAtCompileTime);
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@ -525,12 +525,12 @@ MatrixType PartialPivLU<MatrixType>::reconstructedMatrix() const
 namespace internal {

 /***** Implementation of inverse() *****************************************************/
-template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+template<typename DstXprType, typename MatrixType>
+struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assign_op<typename DstXprType::Scalar,typename PartialPivLU<MatrixType>::Scalar>, Dense2Dense>
 {
  typedef PartialPivLU<MatrixType> LuType;
  typedef Inverse<LuType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename LuType::Scalar> &)
  {
    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
  }
--- a/Eigen/src/PardisoSupport/PardisoSupport.h
+++ b/Eigen/src/PardisoSupport/PardisoSupport.h
@ -183,7 +183,7 @@ class PardisoImpl : public SparseSolverBase<Derived>
    {
      if(m_isInitialized) // Factorization ran at least once
      {
-        internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, m_size,0, 0, 0, m_perm.data(), 0,
+        internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, -1, internal::convert_index<StorageIndex>(m_size),0, 0, 0, m_perm.data(), 0,
                                                          m_iparm.data(), m_msglvl, NULL, NULL);
        m_isInitialized = false;
      }
@ -194,11 +194,11 @@ class PardisoImpl : public SparseSolverBase<Derived>
      m_type = type;
      bool symmetric = std::abs(m_type) < 10;
      m_iparm[0] = 1;   // No solver default
-      m_iparm[1] = 3;   // use Metis for the ordering
-      m_iparm[2] = 1;   // Numbers of processors, value of OMP_NUM_THREADS
+      m_iparm[1] = 2;   // use Metis for the ordering
+      m_iparm[2] = 0;   // Reserved. Set to zero. (??Numbers of processors, value of OMP_NUM_THREADS??)
      m_iparm[3] = 0;   // No iterative-direct algorithm
      m_iparm[4] = 0;   // No user fill-in reducing permutation
-      m_iparm[5] = 0;   // Write solution into x
+      m_iparm[5] = 0;   // Write solution into x, b is left unchanged
      m_iparm[6] = 0;   // Not in use
      m_iparm[7] = 2;   // Max numbers of iterative refinement steps
      m_iparm[8] = 0;   // Not in use
@ -219,7 +219,8 @@ class PardisoImpl : public SparseSolverBase<Derived>
      m_iparm[26] = 0;  // No matrix checker
      m_iparm[27] = (sizeof(RealScalar) == 4) ? 1 : 0;
      m_iparm[34] = 1;  // C indexing
-      m_iparm[59] = 1;  // Automatic switch between In-Core and Out-of-Core modes
+      m_iparm[36] = 0;  // CSR
+      m_iparm[59] = 0;  // 0 - In-Core ; 1 - Automatic switch between In-Core and Out-of-Core modes ; 2 - Out-of-Core
      
      memset(m_pt, 0, sizeof(m_pt));
    }
@ -246,7 +247,7 @@ class PardisoImpl : public SparseSolverBase<Derived>
    mutable SparseMatrixType m_matrix;
    mutable ComputationInfo m_info;
    bool m_analysisIsOk, m_factorizationIsOk;
-    Index m_type, m_msglvl;
+    StorageIndex m_type, m_msglvl;
    mutable void *m_pt[64];
    mutable ParameterType m_iparm;
    mutable IntColVectorType m_perm;
@ -265,10 +266,9 @@ Derived& PardisoImpl<Derived>::compute(const MatrixType& a)
  derived().getMatrix(a);
  
  Index error;
-  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 12, m_size,
+  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 12, internal::convert_index<StorageIndex>(m_size),
                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
-
  manageErrorCode(error);
  m_analysisIsOk = true;
  m_factorizationIsOk = true;
@ -287,7 +287,7 @@ Derived& PardisoImpl<Derived>::analyzePattern(const MatrixType& a)
  derived().getMatrix(a);
  
  Index error;
-  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 11, m_size,
+  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 11, internal::convert_index<StorageIndex>(m_size),
                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
  
@ -306,8 +306,8 @@ Derived& PardisoImpl<Derived>::factorize(const MatrixType& a)
  
  derived().getMatrix(a);

-  Index error;  
-  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 22, m_size,
+  Index error;
+  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 22, internal::convert_index<StorageIndex>(m_size),
                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
                                                            m_perm.data(), 0, m_iparm.data(), m_msglvl, NULL, NULL);
  
@ -354,9 +354,9 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
  }
  
  Index error;
-  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 33, m_size,
+  error = internal::pardiso_run_selector<StorageIndex>::run(m_pt, 1, 1, m_type, 33, internal::convert_index<StorageIndex>(m_size),
                                                            m_matrix.valuePtr(), m_matrix.outerIndexPtr(), m_matrix.innerIndexPtr(),
-                                                            m_perm.data(), nrhs, m_iparm.data(), m_msglvl,
+                                                            m_perm.data(), internal::convert_index<StorageIndex>(nrhs), m_iparm.data(), m_msglvl,
                                                            rhs_ptr, x.derived().data());

  manageErrorCode(error);
@ -371,6 +371,9 @@ void PardisoImpl<Derived>::_solve_impl(const MatrixBase<BDerived> &b, MatrixBase
  * using the Intel MKL PARDISO library. The sparse matrix A must be squared and invertible.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam _MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  *
  * \implsparsesolverconcept
@ -421,6 +424,9 @@ class PardisoLU : public PardisoImpl< PardisoLU<MatrixType> >
  * using the Intel MKL PARDISO library. The sparse matrix A must be selfajoint and positive definite.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam UpLo can be any bitwise combination of Upper, Lower. The default is Upper, meaning only the upper triangular part has to be used.
  *         Upper|Lower can be used to tell both triangular parts can be used as input.
@ -480,6 +486,9 @@ class PardisoLLT : public PardisoImpl< PardisoLLT<MatrixType,_UpLo> >
  * For complex matrices, A can also be symmetric only, see the \a Options template parameter.
  * The vectors or matrices X and B can be either dense or sparse.
  *
+  * By default, it runs in in-core mode. To enable PARDISO's out-of-core feature, set:
+  * \code solver.pardisoParameterArray()[59] = 1; \endcode
+  *
  * \tparam MatrixType the type of the sparse matrix A, it must be a SparseMatrix<>
  * \tparam Options can be any bitwise combination of Upper, Lower, and Symmetric. The default is Upper, meaning only the upper triangular part has to be used.
  *         Symmetric can be used for symmetric, non-selfadjoint complex matrices, the default being to assume a selfadjoint matrix.
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@ -598,11 +598,11 @@ void ColPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType &
 namespace internal {

 template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Inverse<ColPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef ColPivHouseholderQR<MatrixType> QrType;
  typedef Inverse<QrType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
  }
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@ -510,11 +510,11 @@ void CompleteOrthogonalDecomposition<_MatrixType>::_solve_impl(
 namespace internal {

 template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Inverse<CompleteOrthogonalDecomposition<MatrixType> >, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef CompleteOrthogonalDecomposition<MatrixType> CodType;
  typedef Inverse<CodType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.rows()));
  }
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@ -560,11 +560,11 @@ void FullPivHouseholderQR<_MatrixType>::_solve_impl(const RhsType &rhs, DstType
 namespace internal {
  
 template<typename DstXprType, typename MatrixType, typename Scalar>
-struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar>, Dense2Dense, Scalar>
+struct Assignment<DstXprType, Inverse<FullPivHouseholderQR<MatrixType> >, internal::assign_op<Scalar,Scalar>, Dense2Dense, Scalar>
 {
  typedef FullPivHouseholderQR<MatrixType> QrType;
  typedef Inverse<QrType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {    
    dst = src.nestedExpression().solve(MatrixType::Identity(src.rows(), src.cols()));
  }
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@ -419,38 +419,6 @@ struct svd_precondition_2x2_block_to_be_real<MatrixType, QRPreconditioner, true>
  }
 };

-template<typename MatrixType, typename RealScalar, typename Index>
-void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
-                         JacobiRotation<RealScalar> *j_left,
-                         JacobiRotation<RealScalar> *j_right)
-{
-  using std::sqrt;
-  using std::abs;
-  Matrix<RealScalar,2,2> m;
-  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
-       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
-  JacobiRotation<RealScalar> rot1;
-  RealScalar t = m.coeff(0,0) + m.coeff(1,1);
-  RealScalar d = m.coeff(1,0) - m.coeff(0,1);
-  if(d == RealScalar(0))
-  {
-    rot1.s() = RealScalar(0);
-    rot1.c() = RealScalar(1);
-  }
-  else
-  {
-    // If d!=0, then t/d cannot overflow because the magnitude of the
-    // entries forming d are not too small compared to the ones forming t.
-    RealScalar u = t / d;
-    RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
-    rot1.s() = RealScalar(1) / tmp;
-    rot1.c() = u / tmp;
-  }
-  m.applyOnTheLeft(0,1,rot1);
-  j_right->makeJacobi(m,0,1);
-  *j_left = rot1 * j_right->transpose();
-}
-
 template<typename _MatrixType, int QRPreconditioner> 
 struct traits<JacobiSVD<_MatrixType,QRPreconditioner> >
 {
--- a/Eigen/src/SparseCore/SparseAssign.h
+++ b/Eigen/src/SparseCore/SparseAssign.h
@ -34,8 +34,8 @@ template<typename OtherDerived>
 inline Derived& SparseMatrixBase<Derived>::operator=(const SparseMatrixBase<OtherDerived>& other)
 {
  // by default sparse evaluation do not alias, so we can safely bypass the generic call_assignment routine
-  internal::Assignment<Derived,OtherDerived,internal::assign_op<Scalar> >
-          ::run(derived(), other.derived(), internal::assign_op<Scalar>());
+  internal::Assignment<Derived,OtherDerived,internal::assign_op<Scalar,typename OtherDerived::Scalar> >
+          ::run(derived(), other.derived(), internal::assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -127,7 +127,7 @@ void assign_sparse_to_sparse(DstXprType &dst, const SrcXprType &src)
 template< typename DstXprType, typename SrcXprType, typename Functor, typename Scalar>
 struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Sparse, Scalar>
 {
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    assign_sparse_to_sparse(dst.derived(), src.derived());
  }
@ -141,7 +141,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Scalar>
  {
    eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols());

-    if(internal::is_same<Functor,internal::assign_op<Scalar> >::value)
+    if(internal::is_same<Functor,internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> >::value)
      dst.setZero();
    
    internal::evaluator<SrcXprType> srcEval(src);
@ -156,10 +156,10 @@ struct Assignment<DstXprType, SrcXprType, Functor, Sparse2Dense, Scalar>
 // Specialization for "dst = dec.solve(rhs)"
 // NOTE we need to specialize it for Sparse2Sparse to avoid ambiguous specialization error
 template<typename DstXprType, typename DecType, typename RhsType, typename Scalar>
-struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar>, Sparse2Sparse, Scalar>
+struct Assignment<DstXprType, Solve<DecType,RhsType>, internal::assign_op<Scalar,Scalar>, Sparse2Sparse, Scalar>
 {
  typedef Solve<DecType,RhsType> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
  {
    src.dec()._solve_impl(src.rhs(), dst);
  }
@ -176,7 +176,7 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse, Scalar>
  typedef Array<StorageIndex,Dynamic,1> ArrayXI;
  typedef Array<Scalar,Dynamic,1> ArrayXS;
  template<int Options>
-  static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(SparseMatrix<Scalar,Options,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    Index size = src.diagonal().size();
    dst.makeCompressed();
@ -187,15 +187,15 @@ struct Assignment<DstXprType, SrcXprType, Functor, Diagonal2Sparse, Scalar>
  }
  
  template<typename DstDerived>
-  static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(SparseMatrixBase<DstDerived> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    dst.diagonal() = src.diagonal();
  }
  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() += src.diagonal(); }
  
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  { dst.diagonal() -= src.diagonal(); }
 };
 } // end namespace internal
--- a/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
+++ b/Eigen/src/SparseCore/SparseCwiseBinaryOp.h
@ -28,6 +28,9 @@ namespace Eigen {
 //                         generic      sparse
 //  4 - dense op dense     product      dense
 //                         generic      dense
+//
+// TODO to ease compiler job, we could specialize product/quotient with a scalar
+//      and fallback to cwise-unary evaluator using bind1st_op and bind2nd_op.

 template<typename BinaryOp, typename Lhs, typename Rhs>
 class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Sparse>
@ -323,12 +326,12 @@ protected:
 };

 // "sparse .* sparse"
-template<typename T, typename Lhs, typename Rhs>
-struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IteratorBased>
-  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
+template<typename T1, typename T2, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IteratorBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
 {
 protected:
-  typedef scalar_product_op<T> BinaryOp;
+  typedef scalar_product_op<T1,T2> BinaryOp;
  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;
  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;
  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
@ -407,12 +410,12 @@ protected:
 };

 // "dense .* sparse"
-template<typename T, typename Lhs, typename Rhs>
-struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IndexBased, IteratorBased>
-  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
+template<typename T1, typename T2, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IndexBased, IteratorBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
 {
 protected:
-  typedef scalar_product_op<T> BinaryOp;
+  typedef scalar_product_op<T1,T2> BinaryOp;
  typedef evaluator<Lhs>  LhsEvaluator;
  typedef typename evaluator<Rhs>::InnerIterator  RhsIterator;
  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
@ -480,12 +483,12 @@ protected:
 };

 // "sparse .* dense"
-template<typename T, typename Lhs, typename Rhs>
-struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs>, IteratorBased, IndexBased>
-  : evaluator_base<CwiseBinaryOp<scalar_product_op<T>, Lhs, Rhs> >
+template<typename T1, typename T2, typename Lhs, typename Rhs>
+struct binary_evaluator<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs>, IteratorBased, IndexBased>
+  : evaluator_base<CwiseBinaryOp<scalar_product_op<T1,T2>, Lhs, Rhs> >
 {
 protected:
-  typedef scalar_product_op<T> BinaryOp;
+  typedef scalar_product_op<T1,T2> BinaryOp;
  typedef typename evaluator<Lhs>::InnerIterator  LhsIterator;
  typedef evaluator<Rhs>  RhsEvaluator;
  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
@ -579,7 +582,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& SparseMatrixBase<Derived>::operator+=(const DiagonalBase<OtherDerived>& other)
 {
-  call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar>());
+  call_assignment_no_alias(derived(), other.derived(), internal::add_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }

@ -587,7 +590,7 @@ template<typename Derived>
 template<typename OtherDerived>
 Derived& SparseMatrixBase<Derived>::operator-=(const DiagonalBase<OtherDerived>& other)
 {
-  call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar>());
+  call_assignment_no_alias(derived(), other.derived(), internal::sub_assign_op<Scalar,typename OtherDerived::Scalar>());
  return derived();
 }
    
@ -600,31 +603,31 @@ SparseMatrixBase<Derived>::cwiseProduct(const MatrixBase<OtherDerived> &other) c
 }

 template<typename DenseDerived, typename SparseDerived>
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>
 operator+(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
 {
-  return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
+  return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
 }

 template<typename SparseDerived, typename DenseDerived>
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_sum_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
 operator+(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
 {
-  return CwiseBinaryOp<internal::scalar_sum_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
+  return CwiseBinaryOp<internal::scalar_sum_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
 }

 template<typename DenseDerived, typename SparseDerived>
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>
 operator-(const MatrixBase<DenseDerived> &a, const SparseMatrixBase<SparseDerived> &b)
 {
-  return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
+  return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar,typename SparseDerived::Scalar>, const DenseDerived, const SparseDerived>(a.derived(), b.derived());
 }

 template<typename SparseDerived, typename DenseDerived>
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_difference_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>
 operator-(const SparseMatrixBase<SparseDerived> &a, const MatrixBase<DenseDerived> &b)
 {
-  return CwiseBinaryOp<internal::scalar_difference_op<typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
+  return CwiseBinaryOp<internal::scalar_difference_op<typename SparseDerived::Scalar,typename DenseDerived::Scalar>, const SparseDerived, const DenseDerived>(a.derived(), b.derived());
 }

 } // end namespace Eigen
--- a/Eigen/src/SparseCore/SparseDenseProduct.h
+++ b/Eigen/src/SparseCore/SparseDenseProduct.h
@ -74,7 +74,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, t
 // FIXME: what is the purpose of the following specialization? Is it for the BlockedSparse format?
 // -> let's disable it for now as it is conflicting with generic scalar*matrix and matrix*scalar operators
 // template<typename T1, typename T2/*, int _Options, typename _StrideType*/>
-// struct scalar_product_traits<T1, Ref<T2/*, _Options, _StrideType*/> >
+// struct ScalarBinaryOpTraits<T1, Ref<T2/*, _Options, _StrideType*/> >
 // {
 //   enum {
 //     Defined = 1
@ -97,7 +97,7 @@ struct sparse_time_dense_product_impl<SparseLhsType,DenseRhsType,DenseResType, A
      for(Index j=0; j<lhs.outerSize(); ++j)
      {
 //        typename Res::Scalar rhs_j = alpha * rhs.coeff(j,c);
-        typename internal::scalar_product_traits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
+        typename ScalarBinaryOpTraits<AlphaType, typename Rhs::Scalar>::ReturnType rhs_j(alpha * rhs.coeff(j,c));
        for(LhsInnerIterator it(lhsEval,j); it ;++it)
          res.coeffRef(it.index(),c) += it.value() * rhs_j;
      }
--- a/Eigen/src/SparseCore/SparseMatrix.h
+++ b/Eigen/src/SparseCore/SparseMatrix.h
@ -440,7 +440,7 @@ class SparseMatrix
    template<typename InputIterators,typename DupFunctor>
    void setFromTriplets(const InputIterators& begin, const InputIterators& end, DupFunctor dup_func);

-    void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op<Scalar>()); }
+    void sumupDuplicates() { collapseDuplicates(internal::scalar_sum_op<Scalar,Scalar>()); }

    template<typename DupFunctor>
    void collapseDuplicates(DupFunctor dup_func = DupFunctor());
@ -979,7 +979,7 @@ template<typename Scalar, int _Options, typename _Index>
 template<typename InputIterators>
 void SparseMatrix<Scalar,_Options,_Index>::setFromTriplets(const InputIterators& begin, const InputIterators& end)
 {
-  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_Index> >(begin, end, *this, internal::scalar_sum_op<Scalar>());
+  internal::set_from_triplets<InputIterators, SparseMatrix<Scalar,_Options,_Index> >(begin, end, *this, internal::scalar_sum_op<Scalar,Scalar>());
 }

 /** The same as setFromTriplets but when duplicates are met the functor \a dup_func is applied:
--- a/Eigen/src/SparseCore/SparseMatrixBase.h
+++ b/Eigen/src/SparseCore/SparseMatrixBase.h
@ -256,7 +256,7 @@ template<typename Derived> class SparseMatrixBase
    Derived& operator/=(const Scalar& other);

    template<typename OtherDerived> struct CwiseProductDenseReturnType {
-      typedef CwiseBinaryOp<internal::scalar_product_op<typename internal::scalar_product_traits<
+      typedef CwiseBinaryOp<internal::scalar_product_op<typename ScalarBinaryOpTraits<
                                                          typename internal::traits<Derived>::Scalar,
                                                          typename internal::traits<OtherDerived>::Scalar
                                                        >::ReturnType>,
--- a/Eigen/src/SparseCore/SparseProduct.h
+++ b/Eigen/src/SparseCore/SparseProduct.h
@ -99,10 +99,10 @@ struct generic_product_impl<Lhs, Rhs, SparseTriangularShape, SparseShape, Produc

 // dense = sparse-product (can be sparse*sparse, sparse*perm, etc.)
 template< typename DstXprType, typename Lhs, typename Rhs>
-struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense>
+struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
 {
  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
  {
    generic_product_impl<Lhs, Rhs>::evalTo(dst,src.lhs(),src.rhs());
  }
@ -110,10 +110,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::assig

 // dense += sparse-product (can be sparse*sparse, sparse*perm, etc.)
 template< typename DstXprType, typename Lhs, typename Rhs>
-struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::add_assign_op<typename DstXprType::Scalar>, Sparse2Dense>
+struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::add_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
 {
  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
  {
    generic_product_impl<Lhs, Rhs>::addTo(dst,src.lhs(),src.rhs());
  }
@ -121,10 +121,10 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::add_a

 // dense -= sparse-product (can be sparse*sparse, sparse*perm, etc.)
 template< typename DstXprType, typename Lhs, typename Rhs>
-struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::sub_assign_op<typename DstXprType::Scalar>, Sparse2Dense>
+struct Assignment<DstXprType, Product<Lhs,Rhs,AliasFreeProduct>, internal::sub_assign_op<typename DstXprType::Scalar,typename Product<Lhs,Rhs,AliasFreeProduct>::Scalar>, Sparse2Dense>
 {
  typedef Product<Lhs,Rhs,AliasFreeProduct> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar> &)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &)
  {
    generic_product_impl<Lhs, Rhs>::subTo(dst,src.lhs(),src.rhs());
  }
--- a/Eigen/src/SparseCore/SparseSelfAdjointView.h
+++ b/Eigen/src/SparseCore/SparseSelfAdjointView.h
@ -223,13 +223,13 @@ struct Assignment<DstXprType, SrcXprType, Functor, SparseSelfAdjoint2Sparse, Sca
 {
  typedef typename DstXprType::StorageIndex StorageIndex;
  template<typename DestScalar,int StorageOrder>
-  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(SparseMatrix<DestScalar,StorageOrder,StorageIndex> &dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    internal::permute_symm_to_fullsymm<SrcXprType::Mode>(src.matrix(), dst);
  }
  
  template<typename DestScalar>
-  static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar> &/*func*/)
+  static void run(DynamicSparseMatrix<DestScalar,ColMajor,StorageIndex>& dst, const SrcXprType &src, const internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar> &/*func*/)
  {
    // TODO directly evaluate into dst;
    SparseMatrix<DestScalar,ColMajor,StorageIndex> tmp(dst.rows(),dst.cols());
@ -586,12 +586,12 @@ class SparseSymmetricPermutationProduct
 namespace internal {
  
 template<typename DstXprType, typename MatrixType, int Mode, typename Scalar>
-struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar>, Sparse2Sparse>
+struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>, internal::assign_op<Scalar,typename MatrixType::Scalar>, Sparse2Sparse>
 {
  typedef SparseSymmetricPermutationProduct<MatrixType,Mode> SrcXprType;
  typedef typename DstXprType::StorageIndex DstIndex;
  template<int Options>
-  static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(SparseMatrix<Scalar,Options,DstIndex> &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)
  {
    // internal::permute_symm_to_fullsymm<Mode>(m_matrix,_dest,m_perm.indices().data());
    SparseMatrix<Scalar,(Options&RowMajor)==RowMajor ? ColMajor : RowMajor, DstIndex> tmp;
@ -600,7 +600,7 @@ struct Assignment<DstXprType, SparseSymmetricPermutationProduct<MatrixType,Mode>
  }
  
  template<typename DestType,unsigned int DestMode>
-  static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
+  static void run(SparseSelfAdjointView<DestType,DestMode>& dst, const SrcXprType &src, const internal::assign_op<Scalar,typename MatrixType::Scalar> &)
  {
    internal::permute_symm_to_symm<Mode,DestMode>(src.matrix(),dst.matrix(),src.perm().indices().data());
  }
--- a/Eigen/src/SparseQR/SparseQR.h
+++ b/Eigen/src/SparseQR/SparseQR.h
@ -705,12 +705,12 @@ struct evaluator_traits<SparseQRMatrixQReturnType<SparseQRType> >
 };

 template< typename DstXprType, typename SparseQRType>
-struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Sparse>
+struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>, Sparse2Sparse>
 {
  typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
  typedef typename DstXprType::Scalar Scalar;
  typedef typename DstXprType::StorageIndex StorageIndex;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
  {
    typename DstXprType::PlainObject idMat(src.m_qr.rows(), src.m_qr.rows());
    idMat.setIdentity();
@ -721,12 +721,12 @@ struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal:
 };

 template< typename DstXprType, typename SparseQRType>
-struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar>, Sparse2Dense>
+struct Assignment<DstXprType, SparseQRMatrixQReturnType<SparseQRType>, internal::assign_op<typename DstXprType::Scalar,typename DstXprType::Scalar>, Sparse2Dense>
 {
  typedef SparseQRMatrixQReturnType<SparseQRType> SrcXprType;
  typedef typename DstXprType::Scalar Scalar;
  typedef typename DstXprType::StorageIndex StorageIndex;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &/*func*/)
+  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &/*func*/)
  {
    dst = src.m_qr.matrixQ() * DstXprType::Identity(src.m_qr.rows(), src.m_qr.rows());
  }
--- a/Eigen/src/misc/RealSvd2x2.h
+++ b/Eigen/src/misc/RealSvd2x2.h
@ -0,0 +1,54 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009-2010 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2013-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_REALSVD2X2_H
+#define EIGEN_REALSVD2X2_H
+
+namespace Eigen {
+
+namespace internal {
+
+template<typename MatrixType, typename RealScalar, typename Index>
+void real_2x2_jacobi_svd(const MatrixType& matrix, Index p, Index q,
+                         JacobiRotation<RealScalar> *j_left,
+                         JacobiRotation<RealScalar> *j_right)
+{
+  using std::sqrt;
+  using std::abs;
+  Matrix<RealScalar,2,2> m;
+  m << numext::real(matrix.coeff(p,p)), numext::real(matrix.coeff(p,q)),
+       numext::real(matrix.coeff(q,p)), numext::real(matrix.coeff(q,q));
+  JacobiRotation<RealScalar> rot1;
+  RealScalar t = m.coeff(0,0) + m.coeff(1,1);
+  RealScalar d = m.coeff(1,0) - m.coeff(0,1);
+  if(d == RealScalar(0))
+  {
+    rot1.s() = RealScalar(0);
+    rot1.c() = RealScalar(1);
+  }
+  else
+  {
+    // If d!=0, then t/d cannot overflow because the magnitude of the
+    // entries forming d are not too small compared to the ones forming t.
+    RealScalar u = t / d;
+    RealScalar tmp = sqrt(RealScalar(1) + numext::abs2(u));
+    rot1.s() = RealScalar(1) / tmp;
+    rot1.c() = u / tmp;
+  }
+  m.applyOnTheLeft(0,1,rot1);
+  j_right->makeJacobi(m,0,1);
+  *j_left = rot1 * j_right->transpose();
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_REALSVD2X2_H
--- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@ -1,13 +1,14 @@
+
 /** \returns an expression of the coefficient wise product of \c *this and \a other
  *
  * \sa MatrixBase::cwiseProduct
  */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
 operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived());
+  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
 }

 /** \returns an expression of the coefficient wise quotient of \c *this and \a other
@ -16,10 +17,10 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>
 operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar,typename OtherDerived::Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }

 /** \returns an expression of the coefficient-wise min of \c *this and \a other
@ -29,14 +30,14 @@ operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  *
  * \sa max()
  */
-EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op)
+EIGEN_MAKE_CWISE_BINARY_OP(min,min)

 /** \returns an expression of the coefficient-wise min of \c *this and scalar \a other
  *
  * \sa max()
  */
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived,
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived,
                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 min
@ -55,14 +56,14 @@ min
  *
  * \sa min()
  */
-EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op)
+EIGEN_MAKE_CWISE_BINARY_OP(max,max)

 /** \returns an expression of the coefficient-wise max of \c *this and scalar \a other
  *
  * \sa min()
  */
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived,
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived,
                                        const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
 max
@ -81,27 +82,38 @@ max
  * Example: \include Cwise_array_power_array.cpp
  * Output: \verbinclude Cwise_array_power_array.out
  */
-template<typename ExponentDerived>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-const CwiseBinaryOp<internal::scalar_binary_pow_op<Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>
-pow(const ArrayBase<ExponentDerived>& exponents) const
-{
-  return CwiseBinaryOp<internal::scalar_binary_pow_op<Scalar, typename ExponentDerived::Scalar>, const Derived, const ExponentDerived>(
-    this->derived(),
-    exponents.derived()
-  );
-}
+EIGEN_MAKE_CWISE_BINARY_OP(pow,pow)
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(pow,pow)
+#else
+/** \returns an expression of the coefficients of \c *this rasied to the constant power \a exponent
+  *
+  * \tparam T is the scalar type of \a exponent. It must be compatible with the scalar type of the given expression.
+  *
+  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
+  * unsupported module MatrixFunctions computes the matrix power.
+  *
+  * Example: \include Cwise_pow.cpp
+  * Output: \verbinclude Cwise_pow.out
+  *
+  * \sa ArrayBase::pow(ArrayBase), square(), cube(), exp(), log()
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_pow_op<Scalar,T>,Derived,Constant<T> > pow(const T& exponent) const;
+#endif
+

 // TODO code generating macros could be moved to Macros.h and could include generation of documentation
 #define EIGEN_MAKE_CWISE_COMP_OP(OP, COMPARATOR) \
 template<typename OtherDerived> \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived> \
 OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
 { \
-  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
+  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, typename OtherDerived::Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const OtherDerived>(derived(), other.derived()); \
 }\
-typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
-typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const Derived, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> > Cmp ## COMPARATOR ## ReturnType; \
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar, internal::cmp_ ## COMPARATOR>, const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject>, const Derived > RCmp ## COMPARATOR ## ReturnType; \
 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Cmp ## COMPARATOR ## ReturnType \
 OP(const Scalar& s) const { \
  return this->OP(Derived::PlainObject::Constant(rows(), cols(), s)); \
@ -113,10 +125,10 @@ OP(const Scalar& s, const Derived& d) { \

 #define EIGEN_MAKE_CWISE_COMP_R_OP(OP, R_OP, RCOMPARATOR) \
 template<typename OtherDerived> \
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived> \
 OP(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
 { \
-  return CwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
+  return CwiseBinaryOp<internal::scalar_cmp_op<typename OtherDerived::Scalar, Scalar, internal::cmp_##RCOMPARATOR>, const OtherDerived, const Derived>(other.derived(), derived()); \
 } \
 EIGEN_DEVICE_FUNC \
 inline const RCmp ## RCOMPARATOR ## ReturnType \
@ -199,48 +211,63 @@ EIGEN_MAKE_CWISE_COMP_OP(operator!=, NEQ)
 #undef EIGEN_MAKE_CWISE_COMP_R_OP

 // scalar addition
-
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator+,sum)
+#else
 /** \returns an expression of \c *this with each coeff incremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
  *
  * Example: \include Cwise_plus.cpp
  * Output: \verbinclude Cwise_plus.out
  *
  * \sa operator+=(), operator-()
  */
-EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
-operator+(const Scalar& scalar) const
-{
-  return CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>(derived(), internal::scalar_add_op<Scalar>(scalar));
-}
-
-EIGEN_DEVICE_FUNC
-friend inline const CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
-operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& other)
-{
-  return other + scalar;
-}
+template<typename T>
+const CwiseBinaryOp<internal::scalar_sum_op<Scalar,T>,Derived,Constant<T> > operator+(const T& scalar) const;
+/** \returns an expression of \a expr with each coeff incremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_sum_op<T,Scalar>,Constant<T>,Derived> operator+(const T& scalar, const StorageBaseType& expr);
+#endif

+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator-,difference)
+#else
 /** \returns an expression of \c *this with each coeff decremented by the constant \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
  *
  * Example: \include Cwise_minus.cpp
  * Output: \verbinclude Cwise_minus.out
  *
-  * \sa operator+(), operator-=()
+  * \sa operator+=(), operator-()
  */
-EIGEN_DEVICE_FUNC
-inline const CwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>
-operator-(const Scalar& scalar) const
-{
-  return CwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>(derived(), internal::scalar_sub_op<Scalar>(scalar));;
-}
+template<typename T>
+const CwiseBinaryOp<internal::scalar_difference_op<Scalar,T>,Derived,Constant<T> > operator-(const T& scalar) const;
+/** \returns an expression of the constant matrix of value \a scalar decremented by the coefficients of \a expr
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_difference_op<T,Scalar>,Constant<T>,Derived> operator-(const T& scalar, const StorageBaseType& expr);
+#endif

-EIGEN_DEVICE_FUNC
-friend inline const CwiseUnaryOp<internal::scalar_rsub_op<Scalar>, const Derived>
-operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& other)
-{
-  return CwiseUnaryOp<internal::scalar_rsub_op<Scalar>, const Derived>(other.derived(), internal::scalar_rsub_op<Scalar>(scalar));;
-}
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+  EIGEN_MAKE_SCALAR_BINARY_OP_ONTHELEFT(operator/,quotient)
+#else
+  /**
+    * \brief Component-wise division of the scalar \a s by array elements of \a a.
+    *
+    * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression (\c Derived::Scalar).
+    */
+  template<typename T> friend
+  inline const CwiseBinaryOp<internal::scalar_quotient_op<T,Scalar>,Constant<T>,Derived>
+  operator/(const T& s,const StorageBaseType& a);
+#endif

 /** \returns an expression of the coefficient-wise && operator of *this and \a other
  *
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@ -26,7 +26,6 @@ typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaRe
 typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
 typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
 typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
-typedef CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived> PowReturnType;
 typedef CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived> SquareReturnType;
 typedef CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived> CubeReturnType;
 typedef CwiseUnaryOp<internal::scalar_round_op<Scalar>, const Derived> RoundReturnType;
@ -248,6 +247,7 @@ tan() const
  *
  * \sa tan(), asin(), acos()
  */
+EIGEN_DEVICE_FUNC
 inline const AtanReturnType
 atan() const
 {
@ -289,6 +289,7 @@ asin() const
  *
  * \sa tan(), sinh(), cosh()
  */
+EIGEN_DEVICE_FUNC
 inline const TanhReturnType
 tanh() const
 {
@ -302,6 +303,7 @@ tanh() const
  *
  * \sa sin(), tanh(), cosh()
  */
+EIGEN_DEVICE_FUNC
 inline const SinhReturnType
 sinh() const
 {
@ -315,6 +317,7 @@ sinh() const
  *
  * \sa tan(), sinh(), cosh()
  */
+EIGEN_DEVICE_FUNC
 inline const CoshReturnType
 cosh() const
 {
@ -332,6 +335,7 @@ cosh() const
  *
  * \sa digamma()
  */
+EIGEN_DEVICE_FUNC
 inline const LgammaReturnType
 lgamma() const
 {
@ -346,6 +350,7 @@ lgamma() const
  *
  * \sa Eigen::digamma(), Eigen::polygamma(), lgamma()
  */
+EIGEN_DEVICE_FUNC
 inline const DigammaReturnType
 digamma() const
 {
@ -364,6 +369,7 @@ digamma() const
  *
  * \sa erfc()
  */
+EIGEN_DEVICE_FUNC
 inline const ErfReturnType
 erf() const
 {
@ -382,30 +388,13 @@ erf() const
  *
  * \sa erf()
  */
+EIGEN_DEVICE_FUNC
 inline const ErfcReturnType
 erfc() const
 {
  return ErfcReturnType(derived());
 }

-/** \returns an expression of the coefficient-wise power of *this to the given exponent.
-  *
-  * This function computes the coefficient-wise power. The function MatrixBase::pow() in the
-  * unsupported module MatrixFunctions computes the matrix power.
-  *
-  * Example: \include Cwise_pow.cpp
-  * Output: \verbinclude Cwise_pow.out
-  *
-  * \sa exp(), log()
-  */
-EIGEN_DEVICE_FUNC
-inline const PowReturnType
-pow(const Scalar& exponent) const
-{
-  return PowReturnType(derived(), internal::scalar_pow_op<Scalar>(exponent));
-}
-
-
 /** \returns an expression of the coefficient-wise inverse of *this.
  *
  * Example: \include Cwise_inverse.cpp
@ -455,6 +444,7 @@ cube() const
  *
  * \sa ceil(), floor()
  */
+EIGEN_DEVICE_FUNC
 inline const RoundReturnType
 round() const
 {
@ -468,6 +458,7 @@ round() const
  *
  * \sa ceil(), round()
  */
+EIGEN_DEVICE_FUNC
 inline const FloorReturnType
 floor() const
 {
@ -481,6 +472,7 @@ floor() const
  *
  * \sa floor(), round()
  */
+EIGEN_DEVICE_FUNC
 inline const CeilReturnType
 ceil() const
 {
@ -494,6 +486,7 @@ ceil() const
  *
  * \sa isfinite(), isinf()
  */
+EIGEN_DEVICE_FUNC
 inline const IsNaNReturnType
 isNaN() const
 {
@ -507,6 +500,7 @@ isNaN() const
  *
  * \sa isnan(), isfinite()
  */
+EIGEN_DEVICE_FUNC
 inline const IsInfReturnType
 isInf() const
 {
@ -520,6 +514,7 @@ isInf() const
  *
  * \sa isnan(), isinf()
  */
+EIGEN_DEVICE_FUNC
 inline const IsFiniteReturnType
 isFinite() const
 {
--- a/Eigen/src/plugins/CommonCwiseBinaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2008-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
 //
 // This Source Code Form is subject to the terms of the Mozilla
@ -16,7 +16,7 @@
  *
  * \sa class CwiseBinaryOp, operator-=()
  */
-EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op)
+EIGEN_MAKE_CWISE_BINARY_OP(operator-,difference)

 /** \returns an expression of the sum of \c *this and \a other
  *
@ -24,7 +24,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator-,internal::scalar_difference_op)
  *
  * \sa class CwiseBinaryOp, operator+=()
  */
-EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op)
+EIGEN_MAKE_CWISE_BINARY_OP(operator+,sum)

 /** \returns an expression of a custom coefficient-wise operator \a func of *this and \a other
  *
@ -45,3 +45,33 @@ binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const Cu
  return CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>(derived(), other.derived(), func);
 }

+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP(operator*,product)
+#else
+/** \returns an expression of \c *this scaled by the scalar factor \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_product_op<Scalar,T>,Derived,Constant<T> > operator*(const T& scalar) const;
+/** \returns an expression of \a expr scaled by the scalar factor \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T> friend
+const CwiseBinaryOp<internal::scalar_product_op<T,Scalar>,Constant<T>,Derived> operator*(const T& scalar, const StorageBaseType& expr);
+#endif
+
+
+
+#ifndef EIGEN_PARSED_BY_DOXYGEN
+EIGEN_MAKE_SCALAR_BINARY_OP_ONTHERIGHT(operator/,quotient)
+#else
+/** \returns an expression of \c *this divided by the scalar value \a scalar
+  *
+  * \tparam T is the scalar type of \a scalar. It must be compatible with the scalar type of the given expression.
+  */
+template<typename T>
+const CwiseBinaryOp<internal::scalar_quotient_op<Scalar,T>,Derived,Constant<T> > operator/(const T& scalar) const;
+#endif
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@ -12,12 +12,6 @@

 #ifndef EIGEN_PARSED_BY_DOXYGEN

-/** \internal Represents a scalar multiple of an expression */
-typedef CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived> ScalarMultipleReturnType;
-typedef CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived> ScalarComplexMultipleReturnType;
-
-/** \internal Represents a quotient of an expression by a scalar*/
-typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived> ScalarQuotient1ReturnType;
 /** \internal the return type of conjugate() */
 typedef typename internal::conditional<NumTraits<Scalar>::IsComplex,
                    const CwiseUnaryOp<internal::scalar_conjugate_op<Scalar>, const Derived>,
@ -39,7 +33,6 @@ typedef CwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived> ImagReturn
 typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstImagReturnType;

 typedef CwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> NegativeReturnType;
-//typedef CwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>

 #endif // not EIGEN_PARSED_BY_DOXYGEN

@ -50,71 +43,6 @@ inline const NegativeReturnType
 operator-() const { return NegativeReturnType(derived()); }


-/** \returns an expression of \c *this scaled by the scalar factor \a scalar */
-EIGEN_DEVICE_FUNC
-inline const ScalarMultipleReturnType
-operator*(const Scalar& scalar) const
-{
-  return ScalarMultipleReturnType(derived(), internal::scalar_multiple_op<Scalar>(scalar));
-}
-
-#ifdef EIGEN_PARSED_BY_DOXYGEN
-const ScalarMultipleReturnType operator*(const RealScalar& scalar) const;
-#endif
-
-/** \returns an expression of \c *this divided by the scalar value \a scalar */
-EIGEN_DEVICE_FUNC
-inline const ScalarQuotient1ReturnType
-operator/(const Scalar& scalar) const
-{
-  return ScalarQuotient1ReturnType(derived(), internal::scalar_quotient1_op<Scalar>(scalar));
-}
-
-/** Overloaded for efficiently multipling with compatible scalar types */
-template <typename T>
-EIGEN_DEVICE_FUNC inline
-typename internal::enable_if<internal::scalar_product_traits<T,Scalar>::Defined,
-                             const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,T>, const Derived> >::type
-operator*(const T& scalar) const
-{
-#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-  EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-#endif
-  return CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,T>, const Derived>(
-            derived(), internal::scalar_multiple2_op<Scalar,T>(scalar) );
-}
-
-EIGEN_DEVICE_FUNC
-inline friend const ScalarMultipleReturnType
-operator*(const Scalar& scalar, const StorageBaseType& matrix)
-{ return matrix*scalar; }
-
-template <typename T>
-EIGEN_DEVICE_FUNC inline friend
-typename internal::enable_if<internal::scalar_product_traits<Scalar,T>::Defined,
-                             const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,T>, const Derived> >::type
-operator*(const T& scalar, const StorageBaseType& matrix)
-{
-#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-  EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-#endif
-  return CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,T>, const Derived>(
-            matrix.derived(), internal::scalar_multiple2_op<Scalar,T>(scalar) );
-}
-
-template <typename T>
-EIGEN_DEVICE_FUNC inline
-typename internal::enable_if<internal::scalar_product_traits<Scalar,T>::Defined,
-                             const CwiseUnaryOp<internal::scalar_quotient2_op<Scalar,T>, const Derived> >::type
-operator/(const T& scalar) const
-{
-#ifdef EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-  EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN
-#endif
-  return CwiseUnaryOp<internal::scalar_quotient2_op<Scalar,T>, const Derived>(
-            derived(), internal::scalar_quotient2_op<Scalar,T>(scalar) );
-}
-
 template<class NewType> struct CastXpr { typedef typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<Scalar, NewType>, const Derived> >::type Type; };

 /** \returns an expression of *this with the \a Scalar type casted to
--- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@ -19,10 +19,10 @@
  */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)
+EIGEN_STRONG_INLINE const EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)
 cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)(derived(), other.derived());
+  return EIGEN_CWISE_BINARY_RETURN_TYPE(Derived,OtherDerived,product)(derived(), other.derived());
 }

 /** \returns an expression of the coefficient-wise == operator of *this and \a other
@ -74,10 +74,10 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>
 cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }

 /** \returns an expression of the coefficient-wise min of *this and scalar \a other
@ -85,7 +85,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  * \sa class CwiseBinaryOp, min()
  */
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const ConstantReturnType>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
 cwiseMin(const Scalar &other) const
 {
  return cwiseMin(Derived::Constant(rows(), cols(), other));
@ -100,10 +100,10 @@ cwiseMin(const Scalar &other) const
  */
 template<typename OtherDerived>
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>
 cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
-  return CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
+  return CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }

 /** \returns an expression of the coefficient-wise max of *this and scalar \a other
@ -111,7 +111,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  * \sa class CwiseBinaryOp, min()
  */
 EIGEN_DEVICE_FUNC
-EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const ConstantReturnType>
+EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar,Scalar>, const Derived, const ConstantReturnType>
 cwiseMax(const Scalar &other) const
 {
  return cwiseMax(Derived::Constant(rows(), cols(), other));
@ -133,7 +133,7 @@ cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
  return CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>(derived(), other.derived());
 }

-typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;
+typedef CwiseBinaryOp<internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>, const Derived, const ConstantReturnType> CwiseScalarEqualReturnType;

 /** \returns an expression of the coefficient-wise == operator of \c *this and a scalar \a s
  *
@ -148,5 +148,5 @@ EIGEN_DEVICE_FUNC
 inline const CwiseScalarEqualReturnType
 cwiseEqual(const Scalar& s) const
 {
-  return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,internal::cmp_EQ>());
+  return CwiseScalarEqualReturnType(derived(), Derived::Constant(rows(), cols(), s), internal::scalar_cmp_op<Scalar,Scalar,internal::cmp_EQ>());
 }
--- a/bench/perf_monitoring/gemm/changesets.txt
+++ b/bench/perf_monitoring/gemm/changesets.txt
@ -44,4 +44,8 @@ before-evaluators
 7013:f875e75f07e5   # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)
 7591:09a8e2186610   # 3.3-alpha1
 7650:b0f3c8f43025   # help clang inlining
+8744:74b789ada92a   # Improved the matrix multiplication blocking in the case where mr is not a power of 2 (e.g on Haswell CPUs)
+8789:efcb912e4356   # Made the index type a template parameter to evaluateProductBlockingSizes. Use numext::mini and numext::maxi instead of std::min/std::max to compute blocking sizes
+8972:81d53c711775   # Don't optimize the processing of the last rows of a matrix matrix product in cases that violate the assumptions made by the optimized code path
+8985:d935df21a082   # Remove the rotating kernel.

--- a/blas/PackedTriangularMatrixVector.h
+++ b/blas/PackedTriangularMatrixVector.h
@ -18,7 +18,7 @@ struct packed_triangular_matrix_vector_product;
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
 struct packed_triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,ColMajor>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower     = (Mode & Lower)   ==Lower,
    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
@ -47,7 +47,7 @@ struct packed_triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsS
 template<typename Index, int Mode, typename LhsScalar, bool ConjLhs, typename RhsScalar, bool ConjRhs>
 struct packed_triangular_matrix_vector_product<Index,Mode,LhsScalar,ConjLhs,RhsScalar,ConjRhs,RowMajor>
 {
-  typedef typename scalar_product_traits<LhsScalar, RhsScalar>::ReturnType ResScalar;
+  typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
  enum {
    IsLower     = (Mode & Lower)   ==Lower,
    HasUnitDiag = (Mode & UnitDiag)==UnitDiag,
--- a/doc/CustomizingEigen.dox
+++ b/doc/CustomizingEigen.dox
@ -56,13 +56,13 @@ void makeFloor(const MatrixBase<OtherDerived>& other) { derived() = derived().cw
 template<typename OtherDerived>
 void makeCeil(const MatrixBase<OtherDerived>& other) { derived() = derived().cwiseMax(other.derived()); }

-const CwiseUnaryOp<internal::scalar_add_op<Scalar>, Derived>
+const CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const ConstantReturnType>
 operator+(const Scalar& scalar) const
-{ return CwiseUnaryOp<internal::scalar_add_op<Scalar>, Derived>(derived(), internal::scalar_add_op<Scalar>(scalar)); }
+{ return CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const Derived, const ConstantReturnType>(derived(), Constant(rows(),cols(),scalar)); }

-friend const CwiseUnaryOp<internal::scalar_add_op<Scalar>, Derived>
+friend const CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ConstantReturnType, Derived>
 operator+(const Scalar& scalar, const MatrixBase<Derived>& mat)
-{ return CwiseUnaryOp<internal::scalar_add_op<Scalar>, Derived>(mat.derived(), internal::scalar_add_op<Scalar>(scalar)); }
+{ return CwiseBinaryOp<internal::scalar_sum_op<Scalar>, const ConstantReturnType, Derived>(Constant(rows(),cols(),scalar), mat.derived()); }
 \endcode

 Then one can the following declaration in the config.h or whatever prerequisites header file of his project:
--- a/test/array.cpp
+++ b/test/array.cpp
@ -72,7 +72,7 @@ template<typename ArrayType> void array(const ArrayType& m)
  VERIFY_IS_MUCH_SMALLER_THAN(abs(m1.rowwise().sum().sum() - m1.sum()), m1.abs().sum());
  if (!internal::isMuchSmallerThan(abs(m1.sum() - (m1+m2).sum()), m1.abs().sum(), test_precision<Scalar>()))
      VERIFY_IS_NOT_APPROX(((m1+m2).rowwise().sum()).sum(), m1.sum());
-  VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar>()));
+  VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar,Scalar>()));

  // vector-wise ops
  m3 = m1;
@ -807,7 +807,7 @@ void test_array()
  VERIFY((internal::is_same< internal::global_math_functions_filtering_base<int>::type, int >::value));
  VERIFY((internal::is_same< internal::global_math_functions_filtering_base<float>::type, float >::value));
  VERIFY((internal::is_same< internal::global_math_functions_filtering_base<Array2i>::type, ArrayBase<Array2i> >::value));
-  typedef CwiseUnaryOp<internal::scalar_multiple_op<double>, ArrayXd > Xpr;
+  typedef CwiseUnaryOp<internal::scalar_abs_op<double>, ArrayXd > Xpr;
  VERIFY((internal::is_same< internal::global_math_functions_filtering_base<Xpr>::type,
                           ArrayBase<Xpr>
                         >::value));
--- a/test/array_for_matrix.cpp
+++ b/test/array_for_matrix.cpp
@ -45,7 +45,7 @@ template<typename MatrixType> void array_for_matrix(const MatrixType& m)
  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum().sum() - m1.sum(), m1.squaredNorm());
  VERIFY_IS_MUCH_SMALLER_THAN(m1.colwise().sum() + m2.colwise().sum() - (m1+m2).colwise().sum(), (m1+m2).squaredNorm());
  VERIFY_IS_MUCH_SMALLER_THAN(m1.rowwise().sum() - m2.rowwise().sum() - (m1-m2).rowwise().sum(), (m1-m2).squaredNorm());
-  VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar>()));
+  VERIFY_IS_APPROX(m1.colwise().sum(), m1.colwise().redux(internal::scalar_sum_op<Scalar,Scalar>()));

  // vector-wise ops
  m3 = m1;
--- a/test/commainitializer.cpp
+++ b/test/commainitializer.cpp
@ -43,4 +43,27 @@ void test_commainitializer()
        4, 5, 6,
        vec[2].transpose();
  VERIFY_IS_APPROX(m3, ref);
+
+
+  // Check with empty matrices (bug #1242)
+  {
+    int const M = 0;
+    int const N1 = 2;
+    int const N2 = 1;
+
+    {
+      Matrix<double, M, N1> A1;
+      Matrix<double, M, N2> A2;
+      Matrix<double, M, N1 + N2> B;
+      B << A1, A2;
+    }
+    {
+      Matrix<double, N1, M> A1;
+      Matrix<double, N2, M> A2;
+      Matrix<double, N1 + N2, M> B;
+      B << A1,
+           A2;
+    }
+  }
+
 }
--- a/test/eigensolver_generalized_real.cpp
+++ b/test/eigensolver_generalized_real.cpp
@ -1,7 +1,7 @@
 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
-// Copyright (C) 2012 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2012-2016 Gael Guennebaud <gael.guennebaud@inria.fr>
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
@ -10,6 +10,7 @@
 #include "main.h"
 #include <limits>
 #include <Eigen/Eigenvalues>
+#include <Eigen/LU>

 template<typename MatrixType> void generalized_eigensolver_real(const MatrixType& m)
 {
@ -21,6 +22,7 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
  Index cols = m.cols();

  typedef typename MatrixType::Scalar Scalar;
+  typedef std::complex<Scalar> ComplexScalar;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;

  MatrixType a = MatrixType::Random(rows,cols);
@ -31,14 +33,28 @@ template<typename MatrixType> void generalized_eigensolver_real(const MatrixType
  MatrixType spdB =  b.adjoint() * b + b1.adjoint() * b1;

  // lets compare to GeneralizedSelfAdjointEigenSolver
-  GeneralizedSelfAdjointEigenSolver<MatrixType> symmEig(spdA, spdB);
-  GeneralizedEigenSolver<MatrixType> eig(spdA, spdB);
+  {
+    GeneralizedSelfAdjointEigenSolver<MatrixType> symmEig(spdA, spdB);
+    GeneralizedEigenSolver<MatrixType> eig(spdA, spdB);

-  VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0);
+    VERIFY_IS_EQUAL(eig.eigenvalues().imag().cwiseAbs().maxCoeff(), 0);

-  VectorType realEigenvalues = eig.eigenvalues().real();
-  std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size());
-  VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues());
+    VectorType realEigenvalues = eig.eigenvalues().real();
+    std::sort(realEigenvalues.data(), realEigenvalues.data()+realEigenvalues.size());
+    VERIFY_IS_APPROX(realEigenvalues, symmEig.eigenvalues());
+  }
+
+  // non symmetric case:
+  {
+    GeneralizedEigenSolver<MatrixType> eig(a,b);
+    for(Index k=0; k<cols; ++k)
+    {
+      Matrix<ComplexScalar,Dynamic,Dynamic> tmp = (eig.betas()(k)*a).template cast<ComplexScalar>() - eig.alphas()(k)*b;
+      if(tmp.norm()>(std::numeric_limits<Scalar>::min)())
+        tmp /= tmp.norm();
+      VERIFY_IS_MUCH_SMALLER_THAN( std::abs(tmp.determinant()), Scalar(1) );
+    }
+  }

  // regression test for bug 1098
  {
@ -57,7 +73,7 @@ void test_eigensolver_generalized_real()
    s = internal::random<int>(1,EIGEN_TEST_MAX_SIZE/4);
    CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(s,s)) );

-    // some trivial but implementation-wise tricky cases
+    // some trivial but implementation-wise special cases
    CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(1,1)) );
    CALL_SUBTEST_2( generalized_eigensolver_real(MatrixXd(2,2)) );
    CALL_SUBTEST_3( generalized_eigensolver_real(Matrix<double,1,1>()) );
--- a/test/evaluators.cpp
+++ b/test/evaluators.cpp
@ -21,7 +21,7 @@ namespace Eigen {
  EIGEN_STRONG_INLINE
  DstXprType& copy_using_evaluator(const EigenBase<DstXprType> &dst, const SrcXprType &src)
  {
-    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
    return dst.const_cast_derived();
  }
  
@ -29,7 +29,7 @@ namespace Eigen {
  EIGEN_STRONG_INLINE
  const DstXprType& copy_using_evaluator(const NoAlias<DstXprType, StorageBase>& dst, const SrcXprType &src)
  {
-    call_assignment(dst, src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    call_assignment(dst, src.derived(), internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
    return dst.expression();
  }
  
@ -45,7 +45,7 @@ namespace Eigen {
    dst.const_cast_derived().resizeLike(src.derived());
  #endif
    
-    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar>());
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::assign_op<typename DstXprType::Scalar,typename SrcXprType::Scalar>());
    return dst.const_cast_derived();
  }

@ -53,28 +53,28 @@ namespace Eigen {
  void add_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
  {
    typedef typename DstXprType::Scalar Scalar;
-    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::add_assign_op<Scalar>());
+    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::add_assign_op<Scalar,typename SrcXprType::Scalar>());
  }

  template<typename DstXprType, typename SrcXprType>
  void subtract_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
  {
    typedef typename DstXprType::Scalar Scalar;
-    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::sub_assign_op<Scalar>());
+    call_assignment(const_cast<DstXprType&>(dst), src.derived(), internal::sub_assign_op<Scalar,typename SrcXprType::Scalar>());
  }

  template<typename DstXprType, typename SrcXprType>
  void multiply_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
  {
    typedef typename DstXprType::Scalar Scalar;
-    call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op<Scalar>());
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::mul_assign_op<Scalar,typename SrcXprType::Scalar>());
  }

  template<typename DstXprType, typename SrcXprType>
  void divide_assign_using_evaluator(const DstXprType& dst, const SrcXprType& src)
  {
    typedef typename DstXprType::Scalar Scalar;
-    call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op<Scalar>());
+    call_assignment(dst.const_cast_derived(), src.derived(), internal::div_assign_op<Scalar,typename SrcXprType::Scalar>());
  }
  
  template<typename DstXprType, typename SrcXprType>
--- a/test/geo_alignedbox.cpp
+++ b/test/geo_alignedbox.cpp
@ -48,6 +48,8 @@ template<typename BoxType> void alignedbox(const BoxType& _box)
  b0.extend(p0);
  b0.extend(p1);
  VERIFY(b0.contains(p0*s1+(Scalar(1)-s1)*p1));
+  VERIFY(b0.contains(b0.center()));
+  VERIFY(b0.center()==(p0+p1)/Scalar(2));

  (b2 = b0).extend(b1);
  VERIFY(b2.contains(b0));
--- a/test/geo_homogeneous.cpp
+++ b/test/geo_homogeneous.cpp
@ -58,6 +58,8 @@ template<typename Scalar,int Size> void homogeneous(void)
  T2MatrixType t2 = T2MatrixType::Random();
  VERIFY_IS_APPROX(t2 * (v0.homogeneous().eval()), t2 * v0.homogeneous());
  VERIFY_IS_APPROX(t2 * (m0.colwise().homogeneous().eval()), t2 * m0.colwise().homogeneous());
+  VERIFY_IS_APPROX(t2 * (v0.homogeneous().asDiagonal()), t2 * hv0.asDiagonal());
+  VERIFY_IS_APPROX((v0.homogeneous().asDiagonal()) * t2, hv0.asDiagonal() * t2);

  VERIFY_IS_APPROX((v0.transpose().rowwise().homogeneous().eval()) * t2,
                    v0.transpose().rowwise().homogeneous() * t2);
--- a/test/linearstructure.cpp
+++ b/test/linearstructure.cpp
@ -9,7 +9,7 @@
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

 static bool g_called;
-#define EIGEN_SPECIAL_SCALAR_MULTIPLE_PLUGIN { g_called = true; }
+#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same<LhsScalar,RhsScalar>::value); }

 #include "main.h"

@ -93,6 +93,22 @@ template<typename MatrixType> void real_complex(DenseIndex rows = MatrixType::Ro
  g_called = false;
  VERIFY_IS_APPROX(m1/s, m1/Scalar(s));
  VERIFY(g_called && "matrix<complex> / real not properly optimized");
+
+  g_called = false;
+  VERIFY_IS_APPROX(s+m1.array(), Scalar(s)+m1.array());
+  VERIFY(g_called && "real + matrix<complex> not properly optimized");
+
+  g_called = false;
+  VERIFY_IS_APPROX(m1.array()+s, m1.array()+Scalar(s));
+  VERIFY(g_called && "matrix<complex> + real not properly optimized");
+
+  g_called = false;
+  VERIFY_IS_APPROX(s-m1.array(), Scalar(s)-m1.array());
+  VERIFY(g_called && "real - matrix<complex> not properly optimized");
+
+  g_called = false;
+  VERIFY_IS_APPROX(m1.array()-s, m1.array()-Scalar(s));
+  VERIFY(g_called && "matrix<complex> - real not properly optimized");
 }

 void test_linearstructure()
--- a/test/mixingtypes.cpp
+++ b/test/mixingtypes.cpp
@ -23,10 +23,18 @@

 #endif

+static bool g_called;
+#define EIGEN_SCALAR_BINARY_OP_PLUGIN { g_called |= (!internal::is_same<LhsScalar,RhsScalar>::value); }
+
 #include "main.h"

 using namespace std;

+#define VERIFY_MIX_SCALAR(XPR,REF) \
+  g_called = false; \
+  VERIFY_IS_APPROX(XPR,REF); \
+  VERIFY( g_called && #XPR" not properly optimized");
+
 template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
 {
  typedef std::complex<float>   CF;
@ -42,6 +50,7 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)

  Mat_f mf    = Mat_f::Random(size,size);
  Mat_d md    = mf.template cast<double>();
+  //Mat_d rd    = md;
  Mat_cf mcf  = Mat_cf::Random(size,size);
  Mat_cd mcd  = mcf.template cast<complex<double> >();
  Mat_cd rcd = mcd;
@ -56,23 +65,50 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)


  mf+mf;
-  VERIFY_RAISES_ASSERT(mf+md);
-#if !EIGEN_HAS_STD_RESULT_OF
-  // this one does not even compile with C++11
-  VERIFY_RAISES_ASSERT(mf+mcf);
-#endif
+
+//   VERIFY_RAISES_ASSERT(mf+md); // does not even compile

 #ifdef EIGEN_DONT_VECTORIZE
  VERIFY_RAISES_ASSERT(vf=vd);
  VERIFY_RAISES_ASSERT(vf+=vd);
-  VERIFY_RAISES_ASSERT(mcd=md);
 #endif
  
  // check scalar products
-  VERIFY_IS_APPROX(vcf * sf , vcf * complex<float>(sf));
-  VERIFY_IS_APPROX(sd * vcd, complex<double>(sd) * vcd);
-  VERIFY_IS_APPROX(vf * scf , vf.template cast<complex<float> >() * scf);
-  VERIFY_IS_APPROX(scd * vd, scd * vd.template cast<complex<double> >());
+  VERIFY_MIX_SCALAR(vcf * sf , vcf * complex<float>(sf));
+  VERIFY_MIX_SCALAR(sd * vcd , complex<double>(sd) * vcd);
+  VERIFY_MIX_SCALAR(vf * scf , vf.template cast<complex<float> >() * scf);
+  VERIFY_MIX_SCALAR(scd * vd , scd * vd.template cast<complex<double> >());
+
+  VERIFY_MIX_SCALAR(vcf * 2 , vcf * complex<float>(2));
+  VERIFY_MIX_SCALAR(vcf * 2.1 , vcf * complex<float>(2.1));
+  VERIFY_MIX_SCALAR(2 * vcf, vcf * complex<float>(2));
+  VERIFY_MIX_SCALAR(2.1 * vcf , vcf * complex<float>(2.1));
+
+  // check scalar quotients
+  VERIFY_MIX_SCALAR(vcf / sf , vcf / complex<float>(sf));
+  VERIFY_MIX_SCALAR(vf / scf , vf.template cast<complex<float> >() / scf);
+  VERIFY_MIX_SCALAR(vf.array()  / scf, vf.template cast<complex<float> >().array() / scf);
+  VERIFY_MIX_SCALAR(scd / vd.array() , scd / vd.template cast<complex<double> >().array());
+
+  // check scalar increment
+  VERIFY_MIX_SCALAR(vcf.array() + sf , vcf.array() + complex<float>(sf));
+  VERIFY_MIX_SCALAR(sd  + vcd.array(), complex<double>(sd) + vcd.array());
+  VERIFY_MIX_SCALAR(vf.array()  + scf, vf.template cast<complex<float> >().array() + scf);
+  VERIFY_MIX_SCALAR(scd + vd.array() , scd + vd.template cast<complex<double> >().array());
+
+  // check scalar subtractions
+  VERIFY_MIX_SCALAR(vcf.array() - sf , vcf.array() - complex<float>(sf));
+  VERIFY_MIX_SCALAR(sd  - vcd.array(), complex<double>(sd) - vcd.array());
+  VERIFY_MIX_SCALAR(vf.array()  - scf, vf.template cast<complex<float> >().array() - scf);
+  VERIFY_MIX_SCALAR(scd - vd.array() , scd - vd.template cast<complex<double> >().array());
+
+  // check scalar powers
+  VERIFY_MIX_SCALAR( pow(vcf.array(), sf), pow(vcf.array(), complex<float>(sf)) );
+  VERIFY_MIX_SCALAR( vcf.array().pow(sf) , pow(vcf.array(), complex<float>(sf)) );
+  VERIFY_MIX_SCALAR( pow(sd, vcd.array()), pow(complex<double>(sd), vcd.array()) );
+  VERIFY_MIX_SCALAR( pow(vf.array(), scf), pow(vf.template cast<complex<float> >().array(), scf) );
+  VERIFY_MIX_SCALAR( vf.array().pow(scf) , pow(vf.template cast<complex<float> >().array(), scf) );
+  VERIFY_MIX_SCALAR( pow(scd, vd.array()), pow(scd, vd.template cast<complex<double> >().array()) );

  // check dot product
  vf.dot(vf);
@ -186,16 +222,50 @@ template<int SizeAtCompileType> void mixingtypes(int size = SizeAtCompileType)
                   Mat_cd((scd * md.template cast<CD>().eval() * mcd).template triangularView<Upper>()));


-  VERIFY_IS_APPROX( md.array() * mcd.array(), md.template cast<CD>().eval().array() * mcd.array() );
-  VERIFY_IS_APPROX( mcd.array() * md.array(), mcd.array() * md.template cast<CD>().eval().array() );

-//   VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast<CD>().eval().array() / mcd.array() );
+  VERIFY_IS_APPROX( md.array()  * mcd.array(), md.template cast<CD>().eval().array() * mcd.array() );
+  VERIFY_IS_APPROX( mcd.array() * md.array(),  mcd.array() * md.template cast<CD>().eval().array() );
+
+  VERIFY_IS_APPROX( md.array()  + mcd.array(), md.template cast<CD>().eval().array() + mcd.array() );
+  VERIFY_IS_APPROX( mcd.array() + md.array(),  mcd.array() + md.template cast<CD>().eval().array() );
+
+  VERIFY_IS_APPROX( md.array()  - mcd.array(), md.template cast<CD>().eval().array() - mcd.array() );
+  VERIFY_IS_APPROX( mcd.array() - md.array(),  mcd.array() - md.template cast<CD>().eval().array() );
+
+  VERIFY_IS_APPROX( md.array() / mcd.array(), md.template cast<CD>().eval().array() / mcd.array() );
  VERIFY_IS_APPROX( mcd.array() / md.array(), mcd.array() / md.template cast<CD>().eval().array() );

+  VERIFY_IS_APPROX( md.array().pow(mcd.array()), md.template cast<CD>().eval().array().pow(mcd.array()) );
+  VERIFY_IS_APPROX( mcd.array().pow(md.array()),  mcd.array().pow(md.template cast<CD>().eval().array()) );
+
+  VERIFY_IS_APPROX( pow(md.array(),mcd.array()), md.template cast<CD>().eval().array().pow(mcd.array()) );
+  VERIFY_IS_APPROX( pow(mcd.array(),md.array()),  mcd.array().pow(md.template cast<CD>().eval().array()) );
+
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd = md, md.template cast<CD>().eval() );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd += md, mcd + md.template cast<CD>().eval() );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd -= md, mcd - md.template cast<CD>().eval() );
  rcd = mcd;
  VERIFY_IS_APPROX( rcd.array() *= md.array(), mcd.array() * md.template cast<CD>().eval().array() );
  rcd = mcd;
  VERIFY_IS_APPROX( rcd.array() /= md.array(), mcd.array() / md.template cast<CD>().eval().array() );
+
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd.noalias() += md + mcd*md, mcd + (md.template cast<CD>().eval()) + mcd*(md.template cast<CD>().eval()));
+
+  VERIFY_IS_APPROX( rcd.noalias()  = md*md,       ((md*md).eval().template cast<CD>()) );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd.noalias() += md*md, mcd + ((md*md).eval().template cast<CD>()) );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd.noalias() -= md*md, mcd - ((md*md).eval().template cast<CD>()) );
+
+  VERIFY_IS_APPROX( rcd.noalias()  = mcd + md*md,       mcd + ((md*md).eval().template cast<CD>()) );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd.noalias() += mcd + md*md, mcd + mcd + ((md*md).eval().template cast<CD>()) );
+  rcd = mcd;
+  VERIFY_IS_APPROX( rcd.noalias() -= mcd + md*md,           - ((md*md).eval().template cast<CD>()) );
 }

 void test_mixingtypes()
--- a/test/nesting_ops.cpp
+++ b/test/nesting_ops.cpp
@ -75,8 +75,8 @@ template <typename MatrixType> void run_nesting_ops_2(const MatrixType& _m)
    }
    else
    {
-      VERIFY( verify_eval_type<1>(2*m1, 2*m1) );
-      VERIFY( verify_eval_type<2>(2*m1, m1) );
+      VERIFY( verify_eval_type<2>(2*m1, 2*m1) );
+      VERIFY( verify_eval_type<3>(2*m1, m1) );
    }
    VERIFY( verify_eval_type<2>(m1+m1, m1+m1) );
    VERIFY( verify_eval_type<3>(m1+m1, m1) );
--- a/test/real_qz.cpp
+++ b/test/real_qz.cpp
@ -49,11 +49,20 @@ template<typename MatrixType> void real_qz(const MatrixType& m)
  for (Index i=0; i<A.cols(); i++)
    for (Index j=0; j<i; j++) {
      if (abs(qz.matrixT()(i,j))!=Scalar(0.0))
+      {
+        std::cerr << "Error: T(" << i << "," << j << ") = " << qz.matrixT()(i,j) << std::endl;
        all_zeros = false;
+      }
      if (j<i-1 && abs(qz.matrixS()(i,j))!=Scalar(0.0))
+      {
+        std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j) << std::endl;
        all_zeros = false;
+      }
      if (j==i-1 && j>0 && abs(qz.matrixS()(i,j))!=Scalar(0.0) && abs(qz.matrixS()(i-1,j-1))!=Scalar(0.0))
+      {
+        std::cerr << "Error: S(" << i << "," << j << ") = " << qz.matrixS()(i,j)  << " && S(" << i-1 << "," << j-1 << ") = " << qz.matrixS()(i-1,j-1) << std::endl;
        all_zeros = false;
+      }
    }
  VERIFY_IS_EQUAL(all_zeros, true);
  VERIFY_IS_APPROX(qz.matrixQ()*qz.matrixS()*qz.matrixZ(), A);
--- a/test/vectorization_logic.cpp
+++ b/test/vectorization_logic.cpp
@ -29,7 +29,7 @@ using internal::demangle_unrolling;
 template<typename Dst, typename Src>
 bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
 {
-  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar,typename Src::Scalar> > traits;
  bool res = traits::Traversal==traversal;
  if(unrolling==InnerUnrolling+CompleteUnrolling)
    res = res && (int(traits::Unrolling)==InnerUnrolling || int(traits::Unrolling)==CompleteUnrolling);
@ -53,7 +53,7 @@ bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
 template<typename Dst, typename Src>
 bool test_assign(int traversal, int unrolling)
 {
-  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
+  typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar,typename Src::Scalar> > traits;
  bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
  if(!res)
  {
@ -73,7 +73,8 @@ bool test_assign(int traversal, int unrolling)
 template<typename Xpr>
 bool test_redux(const Xpr&, int traversal, int unrolling)
 {
-  typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,internal::redux_evaluator<Xpr> > traits;
+  typedef typename Xpr::Scalar Scalar;
+  typedef internal::redux_traits<internal::scalar_sum_op<Scalar,Scalar>,internal::redux_evaluator<Xpr> > traits;
  
  bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
  if(!res)
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@ -204,64 +204,62 @@ class TensorBase<Derived, ReadOnlyAccessors>
    }

    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
    pow(Scalar exponent) const {
-      return unaryExpr(internal::scalar_pow_op<Scalar>(exponent));
+      return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
    }

    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >, const Derived>
    operator+ (Scalar rhs) const {
-      return unaryExpr(internal::scalar_add_op<Scalar>(rhs));
+      return unaryExpr(internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >(rhs));
    }

    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE friend
-    const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
+    const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_sum_op<Scalar> >, const Derived>
    operator+ (Scalar lhs, const Derived& rhs) {
-      return rhs + lhs;
+      return rhs.unaryExpr(internal::bind1st_op<internal::scalar_sum_op<Scalar> >(lhs));
    }

    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >, const Derived>
    operator- (Scalar rhs) const {
      EIGEN_STATIC_ASSERT((NumTraits<Scalar>::IsSigned || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
-      return unaryExpr(internal::scalar_sub_op<Scalar>(rhs));
+      return unaryExpr(internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >(rhs));
    }

    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE friend
-    const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>,
-                             const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> >
+    const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_difference_op<Scalar> >, const Derived>
    operator- (Scalar lhs, const Derived& rhs) {
-      return -rhs + lhs;
+      return rhs.unaryExpr(internal::bind1st_op<internal::scalar_difference_op<Scalar> >(lhs));
    }

    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >, const Derived>
    operator* (Scalar rhs) const {
-      return unaryExpr(internal::scalar_multiple_op<Scalar>(rhs));
+      return unaryExpr(internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >(rhs));
    }

    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE friend
-    const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
+    const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_product_op<Scalar> >, const Derived>
    operator* (Scalar lhs, const Derived& rhs) {
-      return rhs * lhs;
+      return rhs.unaryExpr(internal::bind1st_op<internal::scalar_product_op<Scalar> >(lhs));
    }

    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>
+    EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >, const Derived>
    operator/ (Scalar rhs) const {
-      return unaryExpr(internal::scalar_quotient1_op<Scalar>(rhs));
+      return unaryExpr(internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >(rhs));
    }

    EIGEN_DEVICE_FUNC
    EIGEN_STRONG_INLINE friend
-    const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>,
-                             const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> >
+    const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_quotient_op<Scalar> >, const Derived>
    operator/ (Scalar lhs, const Derived& rhs) {
-      return rhs.inverse() * lhs;
+      return rhs.unaryExpr(internal::bind1st_op<internal::scalar_quotient_op<Scalar> >(lhs));
    }

    EIGEN_DEVICE_FUNC
@ -371,66 +369,66 @@ class TensorBase<Derived, ReadOnlyAccessors>

    // Comparisons and tests.
    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
    operator<(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
    }
    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
    operator<=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
    }
    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
    operator>(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
    }
    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
    operator>=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
    }

    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
    operator==(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
    }

    template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
+    const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
    operator!=(const OtherDerived& other) const {
-      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>());
+      return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
    }

    // comparisons and tests for Scalars
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator<(Scalar threshold) const {
      return operator<(constant(threshold));
    }
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator<=(Scalar threshold) const {
      return operator<=(constant(threshold));
    }
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator>(Scalar threshold) const {
      return operator>(constant(threshold));
    }
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator>=(Scalar threshold) const {
      return operator>=(constant(threshold));
    }
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator==(Scalar threshold) const {
      return operator==(constant(threshold));
    }
    EIGEN_DEVICE_FUNC
-    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+    EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
    operator!=(Scalar threshold) const {
      return operator!=(constant(threshold));
    }
@ -486,22 +484,22 @@ class TensorBase<Derived, ReadOnlyAccessors>
    typedef TensorScanOp<internal::SumReducer<CoeffReturnType>, const Derived> TensorScanSumOp;
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorScanSumOp
-    cumsum(const Index& axis) const {
-      return TensorScanSumOp(derived(), axis);
+    cumsum(const Index& axis, bool exclusive = false) const {
+      return TensorScanSumOp(derived(), axis, exclusive);
    }

    typedef TensorScanOp<internal::ProdReducer<CoeffReturnType>, const Derived> TensorScanProdOp;
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorScanProdOp
-    cumprod(const Index& axis) const {
-      return TensorScanProdOp(derived(), axis);
+    cumprod(const Index& axis, bool exclusive = false) const {
+      return TensorScanProdOp(derived(), axis, exclusive);
    }

    template <typename Reducer>
    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
    const TensorScanOp<Reducer, const Derived>
-    scan(const Index& axis, const Reducer& reducer) const {
-      return TensorScanOp<Reducer, const Derived>(derived(), axis, reducer);
+    scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const {
+      return TensorScanOp<Reducer, const Derived>(derived(), axis, exclusive, reducer);
    }

    // Reductions.
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@ -202,7 +202,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
    // across k dimension.
    const TensorOpCost cost =
        contractionCost(m, n, bm, bn, bk, shard_by_col, false);
-    Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
+    int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
        static_cast<double>(n) * m, cost, this->m_device.numThreads());

    // TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
@ -301,7 +301,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
  class Context {
   public:
    Context(const Device& device, int num_threads, LhsMapper& lhs,
-            RhsMapper& rhs, Scalar* buffer, Index m, Index n, Index k, Index bm,
+            RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm,
            Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
            Index gn, Index nm0, Index nn0, bool shard_by_col,
            bool parallel_pack)
@ -309,13 +309,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
          lhs_(lhs),
          rhs_(rhs),
          buffer_(buffer),
-          output_(buffer, m),
+          output_(buffer, tm),
          num_threads_(num_threads),
          shard_by_col_(shard_by_col),
          parallel_pack_(parallel_pack),
-          m_(m),
-          n_(n),
-          k_(k),
+          m_(tm),
+          n_(tn),
+          k_(tk),
          bm_(bm),
          bn_(bn),
          bk_(bk),
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
 // Build a thread pool device on top the an existing pool of threads.
 struct ThreadPoolDevice {
  // The ownership of the thread pool remains with the caller.
-  ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
+  ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }

  EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
    return internal::aligned_malloc(num_bytes);
@ -130,7 +130,7 @@ struct ThreadPoolDevice {
    ::memset(buffer, c, n);
  }

-  EIGEN_STRONG_INLINE size_t numThreads() const {
+  EIGEN_STRONG_INLINE int numThreads() const {
    return num_threads_;
  }

@ -172,6 +172,12 @@ struct ThreadPoolDevice {
    pool_->Schedule(func);
  }

+  // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if
+  // called from one of the threads in pool_. Returns -1 otherwise.
+  EIGEN_STRONG_INLINE int currentThreadId() const {
+    return pool_->CurrentThreadId();
+  }
+
  // parallelFor executes f with [0, n) arguments in parallel and waits for
  // completion. F accepts a half-open interval [first, last).
  // Block size is choosen based on the iteration cost and resulting parallel
@ -182,7 +188,7 @@ struct ThreadPoolDevice {
                   std::function<void(Index, Index)> f) const {
    typedef TensorCostModel<ThreadPoolDevice> CostModel;
    if (n <= 1 || numThreads() == 1 ||
-        CostModel::numThreads(n, cost, numThreads()) == 1) {
+        CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) {
      f(0, n);
      return;
    }
@ -242,7 +248,7 @@ struct ThreadPoolDevice {
    // Recursively divide size into halves until we reach block_size.
    // Division code rounds mid to block_size, so we are guaranteed to get
    // block_count leaves that do actual computations.
-    Barrier barrier(block_count);
+    Barrier barrier(static_cast<unsigned int>(block_count));
    std::function<void(Index, Index)> handleRange;
    handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) {
      if (last - first <= block_size) {
@ -268,7 +274,7 @@ struct ThreadPoolDevice {

 private:
  ThreadPoolInterface* pool_;
-  size_t num_threads_;
+  int num_threads_;
 };


--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@ -426,6 +426,20 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
      m_arg3Impl(op.arg3Expression(), device)
  {
    EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+    EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg2Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
+                         typename internal::traits<Arg3Type>::StorageKind>::value),
+                        STORAGE_KIND_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
+                         typename internal::traits<Arg2Type>::Index>::value),
+                        STORAGE_INDEX_MUST_MATCH)
+    EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
+                         typename internal::traits<Arg3Type>::Index>::value),
+                        STORAGE_INDEX_MUST_MATCH)
+
    eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
  }

--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@ -227,22 +227,6 @@ struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprT
      TernaryOp(typename Arg1XprType::Scalar,
                typename Arg2XprType::Scalar,
                typename Arg3XprType::Scalar)>::type Scalar;
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename traits<Arg1XprType>::StorageKind,
-                         typename traits<Arg2XprType>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename traits<Arg1XprType>::StorageKind,
-                         typename traits<Arg3XprType>::StorageKind>::value),
-      STORAGE_KIND_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename traits<Arg1XprType>::Index,
-                         typename traits<Arg2XprType>::Index>::value),
-      STORAGE_INDEX_MUST_MATCH)
-  EIGEN_STATIC_ASSERT(
-      (internal::is_same<typename traits<Arg1XprType>::Index,
-                         typename traits<Arg3XprType>::Index>::value),
-      STORAGE_INDEX_MUST_MATCH)
  typedef traits<Arg1XprType> XprTraits;
  typedef typename traits<Arg1XprType>::StorageKind StorageKind;
  typedef typename traits<Arg1XprType>::Index Index;
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
 };


+template<typename Reducer, typename Device>
+struct reducer_traits {
+  enum {
+    Cost = 1,
+    PacketAccess = false
+  };
+};
+
 // Standard reduction functors
 template <typename T> struct SumReducer
 {
@ -119,6 +127,15 @@ template <typename T> struct SumReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<SumReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = PacketType<T, Device>::HasAdd
+  };
+};
+
+
 template <typename T> struct MeanReducer
 {
  static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
    DenseIndex packetCount_;
 };

+template <typename T, typename Device>
+struct reducer_traits<MeanReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = PacketType<T, Device>::HasAdd
+  };
+};
+
+
 template <typename T> struct MaxReducer
 {
  static const bool PacketAccess = packet_traits<T>::HasMax;
@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<MaxReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = PacketType<T, Device>::HasMax
+  };
+};
+
+
 template <typename T> struct MinReducer
 {
  static const bool PacketAccess = packet_traits<T>::HasMin;
@ -228,6 +263,14 @@ template <typename T> struct MinReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<MinReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = PacketType<T, Device>::HasMin
+  };
+};
+

 template <typename T> struct ProdReducer
 {
@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<ProdReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::MulCost,
+    PacketAccess = PacketType<T, Device>::HasMul
+  };
+};
+

 struct AndReducer
 {
@ -280,6 +331,15 @@ struct AndReducer
  }
 };

+template <typename Device>
+struct reducer_traits<AndReducer, Device> {
+  enum {
+    Cost = 1,
+    PacketAccess = false
+  };
+};
+
+
 struct OrReducer {
  static const bool PacketAccess = false;
  static const bool IsStateful = false;
@ -295,6 +355,15 @@ struct OrReducer {
  }
 };

+template <typename Device>
+struct reducer_traits<OrReducer, Device> {
+  enum {
+    Cost = 1,
+    PacketAccess = false
+  };
+};
+
+
 // Argmin/Argmax reducers
 template <typename T> struct ArgMaxTupleReducer
 {
@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = false
+  };
+};
+
+
 template <typename T> struct ArgMinTupleReducer
 {
  static const bool PacketAccess = false;
@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
  }
 };

+template <typename T, typename Device>
+struct reducer_traits<ArgMinTupleReducer<T>, Device> {
+  enum {
+    Cost = NumTraits<T>::AddCost,
+    PacketAccess = false
+  };
+};
+

 // Random number generation
 namespace {
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@ -17,34 +17,62 @@ template<>
 struct significant_decimals_impl<std::string>
    : significant_decimals_default_impl<std::string, true>
 {};
-}


+// Print the tensor as a 2d matrix
+template <typename Tensor, int Rank>
+struct TensorPrinter {
+  static void run (std::ostream& os, const Tensor& tensor) {
+    typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
+    typedef typename Tensor::Index Index;
+    const Index total_size = internal::array_prod(tensor.dimensions());
+    if (total_size > 0) {
+      const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
+      static const int layout = Tensor::Layout;
+      Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
+      os << matrix;
+    }
+  }
+};
+
+
+// Print the tensor as a vector
+template <typename Tensor>
+struct TensorPrinter<Tensor, 1> {
+  static void run (std::ostream& os, const Tensor& tensor) {
+    typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
+    typedef typename Tensor::Index Index;
+    const Index total_size = internal::array_prod(tensor.dimensions());
+    if (total_size > 0) {
+      Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
+      os << array;
+    }
+  }
+};
+
+
+// Print the tensor as a scalar
+template <typename Tensor>
+struct TensorPrinter<Tensor, 0> {
+  static void run (std::ostream& os, const Tensor& tensor) {
+    os << tensor.coeff(0);
+  }
+};
+}
+
 template <typename T>
 std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
+  typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
+  typedef typename Evaluator::Dimensions Dimensions;
+
  // Evaluate the expression if needed
  TensorForcedEvalOp<const T> eval = expr.eval();
-  TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> tensor(eval, DefaultDevice());
+  Evaluator tensor(eval, DefaultDevice());
  tensor.evalSubExprsIfNeeded(NULL);

-  typedef typename internal::remove_const<typename T::Scalar>::type Scalar;
-  typedef typename T::Index Index;
-  typedef typename TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Dimensions Dimensions;
-  const Index total_size = internal::array_prod(tensor.dimensions());
-
-  // Print the tensor as a 1d vector or a 2d matrix.
+  // Print the result
  static const int rank = internal::array_size<Dimensions>::value;
-  if (rank == 0) {
-    os << tensor.coeff(0);
-  } else if (rank == 1) {
-    Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
-    os << array;
-  } else {
-    const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
-    static const int layout = TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Layout;
-    Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
-    os << matrix;
-  }
+  internal::TensorPrinter<Evaluator, rank>::run(os, tensor);

  // Cleanup.
  tensor.cleanup();
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@ -47,22 +47,39 @@ template <> struct max_n_1<0> {

 // Default packet types
 template <typename Scalar, typename Device>
-struct PacketType {
+struct PacketType : internal::packet_traits<Scalar> {
  typedef typename internal::packet_traits<Scalar>::type type;
-  enum { size = internal::unpacket_traits<type>::size };
 };

 // For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16)
 template <>
-struct PacketType<float, GpuDevice> {
-  typedef float4 type;
-  static const int size = 4;
-};
-template <>
-struct PacketType<double, GpuDevice> {
-  typedef double2 type;
+struct PacketType<half, GpuDevice> {
+  typedef half2 type;
  static const int size = 2;
+  enum {
+    HasAdd    = 1,
+    HasSub    = 1,
+    HasMul    = 1,
+    HasNegate = 1,
+    HasAbs    = 1,
+    HasArg    = 0,
+    HasAbs2   = 0,
+    HasMin    = 1,
+    HasMax    = 1,
+    HasConj   = 0,
+    HasSetLinear = 0,
+    HasBlend  = 0,
+
+    HasDiv    = 1,
+    HasSqrt   = 1,
+    HasRsqrt  = 1,
+    HasExp    = 1,
+    HasLog    = 1,
+    HasLog1p  = 0,
+    HasLog10  = 0,
+    HasPow    = 1,
+  };
 };
 #endif

--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@ -148,7 +148,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>

  EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }

-  const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+  EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }

 protected:
  TensorEvaluator<ArgType, Device> m_impl;
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@ -130,15 +130,17 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
      if (block == 0) {
        // We're the first block to run, initialize the output value
        atomicExch(output, reducer.initialize());
-        unsigned int old = atomicExch(semaphore, 2u);
-        assert(old == 1u);
+        __threadfence();
+        atomicExch(semaphore, 2u);
      }
      else {
+        // Wait for the first block to initialize the output value.
        // Use atomicCAS here to ensure that the reads aren't cached
-        unsigned int val = atomicCAS(semaphore, 2u, 2u);
-        while (val < 2u) {
+        unsigned int val;
+        do {
          val = atomicCAS(semaphore, 2u, 2u);
        }
+        while (val < 2u);
      }
    }
  }
@ -166,12 +168,8 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
  }

  if (gridDim.x > 1 && threadIdx.x == 0) {
-    unsigned int ticket = atomicInc(semaphore, UINT_MAX);
-    assert(ticket >= 2u);
-    if (ticket == gridDim.x + 1) {
-      // We're the last block, reset the semaphore
-      *semaphore = 0;
-    }
+    // Let the last block reset the semaphore
+    atomicInc(semaphore, gridDim.x + 1);
  }
 }

@ -330,10 +328,10 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
  // Unfortunately nvidia doesn't support well exotic types such as complex,
  // so reduce the scope of the optimized version of the code to the simple case
  // of floats and half floats.
-  #ifdef EIGEN_HAS_CUDA_FP16
+#ifdef EIGEN_HAS_CUDA_FP16
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
      (internal::is_same<typename Self::CoeffReturnType, float>::value ||
-       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
@ -348,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
      return;
    }

-    FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
+    FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
  }
 };

@ -610,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
 #ifdef EIGEN_HAS_CUDA_FP16
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
      (internal::is_same<typename Self::CoeffReturnType, float>::value ||
-       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else
  static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                 internal::is_same<typename Self::CoeffReturnType, float>::value;
@ -629,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
      return true;
    }

-    return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
+    return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
  }
 };

--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@ -57,8 +57,8 @@ public:
  typedef typename Eigen::internal::traits<TensorScanOp>::Index Index;

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(
-      const XprType& expr, const Index& axis, const Op& op = Op())
-      : m_expr(expr), m_axis(axis), m_accumulator(op) {}
+      const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op())
+      : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {}

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  const Index axis() const { return m_axis; }
@ -66,11 +66,14 @@ public:
  const XprType& expression() const { return m_expr; }
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
  const Op accumulator() const { return m_accumulator; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+  bool exclusive() const { return m_exclusive; }

 protected:
  typename XprType::Nested m_expr;
  const Index m_axis;
  const Op m_accumulator;
+  const bool m_exclusive;
 };

 // Eval as rvalue
@ -81,7 +84,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
  typedef typename XprType::Index Index;
  static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
  typedef DSizes<Index, NumDims> Dimensions;
-  typedef typename XprType::Scalar Scalar;
+  typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
  typedef typename XprType::CoeffReturnType CoeffReturnType;
  typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;

@ -99,6 +102,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
      : m_impl(op.expression(), device),
        m_device(device),
        m_axis(op.axis()),
+        m_exclusive(op.exclusive()),
        m_accumulator(op.accumulator()),
        m_dimensions(m_impl.dimensions()),
        m_size(m_dimensions[m_axis]),
@ -106,7 +110,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
        m_output(NULL) {

    // Accumulating a scalar isn't supported.
-    EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
+    EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
    eigen_assert(m_axis >= 0 && m_axis < NumDims);

    // Compute stride of scan axis
@ -122,7 +126,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
-      return m_dimensions;
+    return m_dimensions;
  }

  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
@ -136,7 +140,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
      return true;
    }
  }
-  
+
  template<int LoadMode>
  EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
    return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
@ -152,6 +156,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
    return m_output[index];
  }

+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {
+    return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
+  }
+
  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
    if (m_output != NULL) {
      m_device.deallocate(m_output);
@ -164,6 +172,7 @@ protected:
  TensorEvaluator<ArgType, Device> m_impl;
  const Device& m_device;
  const Index m_axis;
+  const bool m_exclusive;
  Op m_accumulator;
  const Dimensions& m_dimensions;
  const Index& m_size;
@ -172,7 +181,7 @@ protected:

  // TODO(ibab) Parallelize this single-threaded implementation if desired
  EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) {
-    // We fix the index along the scan axis to 0 and perform an
+    // We fix the index along the scan axis to 0 and perform a
    // scan per remaining entry. The iteration is split into two nested
    // loops to avoid an integer division by keeping track of each idx1 and idx2.
    for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) {
@ -180,12 +189,17 @@ protected:
          // Calculate the starting offset for the scan
          Index offset = idx1 * m_size + idx2;

-          // Compute the prefix sum along the axis, starting at the calculated offset
+          // Compute the scan along the axis, starting at the calculated offset
          CoeffReturnType accum = m_accumulator.initialize();
          for (Index idx3 = 0; idx3 < m_size; idx3++) {
            Index curr = offset + idx3 * m_stride;
-            m_accumulator.reduce(m_impl.coeff(curr), &accum);
-            data[curr] = m_accumulator.finalize(accum);
+            if (m_exclusive) {
+              data[curr] = m_accumulator.finalize(accum);
+              m_accumulator.reduce(m_impl.coeff(curr), &accum);
+            } else {
+              m_accumulator.reduce(m_impl.coeff(curr), &accum);
+              data[curr] = m_accumulator.finalize(accum);
+            }
          }
       }
    }
--- a/Show More
+++ b/Show More