diff --git a/Eigen/Core b/Eigen/Core
index 0cf101636..0189654a6 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -355,6 +355,12 @@ using std::ptrdiff_t;
 #include "src/Core/ArrayBase.h"
 #include "src/Core/ArrayWrapper.h"
 
+#ifdef EIGEN_ENABLE_EVALUATORS
+#include "src/Core/Product.h"
+#include "src/Core/CoreEvaluators.h"
+#include "src/Core/AssignEvaluator.h"
+#endif
+
 #ifdef EIGEN_USE_BLAS
 #include "src/Core/products/GeneralMatrixMatrix_MKL.h"
 #include "src/Core/products/GeneralMatrixVector_MKL.h"
diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
new file mode 100644
index 000000000..006a87d47
--- /dev/null
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -0,0 +1,682 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EIGEN_ASSIGN_EVALUATOR_H
+#define EIGEN_ASSIGN_EVALUATOR_H
+
+// This implementation is based on Assign.h
+
+namespace internal {
+  
+/***************************************************************************
+* Part 1 : the logic deciding a strategy for traversal and unrolling       *
+***************************************************************************/
+
+// copy_using_evaluator_traits is based on assign_traits
+// (actually, it's identical)
+
+template <typename Derived, typename OtherDerived>
+struct copy_using_evaluator_traits
+{
+public:
+  enum {
+    DstIsAligned = Derived::Flags & AlignedBit,
+    DstHasDirectAccess = Derived::Flags & DirectAccessBit,
+    SrcIsAligned = OtherDerived::Flags & AlignedBit,
+    JointAlignment = bool(DstIsAligned) && bool(SrcIsAligned) ? Aligned : Unaligned
+  };
+
+private:
+  enum {
+    InnerSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::SizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::ColsAtCompileTime)
+              : int(Derived::RowsAtCompileTime),
+    InnerMaxSize = int(Derived::IsVectorAtCompileTime) ? int(Derived::MaxSizeAtCompileTime)
+              : int(Derived::Flags)&RowMajorBit ? int(Derived::MaxColsAtCompileTime)
+              : int(Derived::MaxRowsAtCompileTime),
+    MaxSizeAtCompileTime = Derived::SizeAtCompileTime,
+    PacketSize = packet_traits<typename Derived::Scalar>::size
+  };
+
+  enum {
+    StorageOrdersAgree = (int(Derived::IsRowMajor) == int(OtherDerived::IsRowMajor)),
+    MightVectorize = StorageOrdersAgree
+                  && (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
+    MayInnerVectorize  = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
+                       && int(DstIsAligned) && int(SrcIsAligned),
+    MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
+    MayLinearVectorize = MightVectorize && MayLinearize && DstHasDirectAccess
+                       && (DstIsAligned || MaxSizeAtCompileTime == Dynamic),
+      /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
+         so it's only good for large enough sizes. */
+    MaySliceVectorize  = MightVectorize && DstHasDirectAccess
+                       && (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*PacketSize)
+      /* slice vectorization can be slow, so we only want it if the slices are big, which is
+         indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
+         in a fixed-size matrix */
+  };
+
+public:
+  enum {
+    Traversal = int(MayInnerVectorize)  ? int(InnerVectorizedTraversal)
+              : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
+              : int(MaySliceVectorize)  ? int(SliceVectorizedTraversal)
+              : int(MayLinearize)       ? int(LinearTraversal)
+                                        : int(DefaultTraversal),
+    Vectorized = int(Traversal) == InnerVectorizedTraversal
+              || int(Traversal) == LinearVectorizedTraversal
+              || int(Traversal) == SliceVectorizedTraversal
+  };
+
+private:
+  enum {
+    UnrollingLimit      = EIGEN_UNROLLING_LIMIT * (Vectorized ? int(PacketSize) : 1),
+    MayUnrollCompletely = int(Derived::SizeAtCompileTime) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(Derived::SizeAtCompileTime) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit),
+    MayUnrollInner      = int(InnerSize) != Dynamic
+                       && int(OtherDerived::CoeffReadCost) != Dynamic
+                       && int(InnerSize) * int(OtherDerived::CoeffReadCost) <= int(UnrollingLimit)
+  };
+
+public:
+  enum {
+    Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal))
+                ? (
+ 		    int(MayUnrollCompletely) ? int(CompleteUnrolling)
+                  : int(MayUnrollInner)      ? int(InnerUnrolling)
+                                             : int(NoUnrolling)
+                  )
+              : int(Traversal) == int(LinearVectorizedTraversal)
+                ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) 
+                                                                    : int(NoUnrolling) )
+              : int(Traversal) == int(LinearTraversal)
+                ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) 
+                                              : int(NoUnrolling) )
+              : int(NoUnrolling)
+  };
+
+#ifdef EIGEN_DEBUG_ASSIGN
+  static void debug()
+  {
+    EIGEN_DEBUG_VAR(DstIsAligned)
+    EIGEN_DEBUG_VAR(SrcIsAligned)
+    EIGEN_DEBUG_VAR(JointAlignment)
+    EIGEN_DEBUG_VAR(InnerSize)
+    EIGEN_DEBUG_VAR(InnerMaxSize)
+    EIGEN_DEBUG_VAR(PacketSize)
+    EIGEN_DEBUG_VAR(StorageOrdersAgree)
+    EIGEN_DEBUG_VAR(MightVectorize)
+    EIGEN_DEBUG_VAR(MayLinearize)
+    EIGEN_DEBUG_VAR(MayInnerVectorize)
+    EIGEN_DEBUG_VAR(MayLinearVectorize)
+    EIGEN_DEBUG_VAR(MaySliceVectorize)
+    EIGEN_DEBUG_VAR(Traversal)
+    EIGEN_DEBUG_VAR(UnrollingLimit)
+    EIGEN_DEBUG_VAR(MayUnrollCompletely)
+    EIGEN_DEBUG_VAR(MayUnrollInner)
+    EIGEN_DEBUG_VAR(Unrolling)
+  }
+#endif
+};
+
+/***************************************************************************
+* Part 2 : meta-unrollers
+***************************************************************************/
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+{
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime
+  };
+
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator, 
+				      int outer)
+  {
+    dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_InnerUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_DefaultTraversal_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.copyCoeff(Index, srcEvaluator);
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, Index+1, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_LinearTraversal_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling
+{
+  typedef typename DstEvaluatorType::XprType DstXprType;
+  typedef typename SrcEvaluatorType::XprType SrcXprType;
+
+  enum {
+    outer = Index / DstXprType::InnerSizeAtCompileTime,
+    inner = Index % DstXprType::InnerSizeAtCompileTime,
+    JointAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+  };
+
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator)
+  {
+    dstEvaluator.template copyPacketByOuterInner<Aligned, JointAlignment>(outer, inner, srcEvaluator);
+    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_innervec_CompleteUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&) { }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Index, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, 
+				      SrcEvaluatorType &srcEvaluator, 
+				      int outer)
+  {
+    dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, Index, srcEvaluator);
+    typedef typename DstEvaluatorType::XprType DstXprType;
+    enum { NextIndex = Index + packet_traits<typename DstXprType::Scalar>::size };
+    copy_using_evaluator_innervec_InnerUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, NextIndex, Stop>
+      ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+template<typename DstEvaluatorType, typename SrcEvaluatorType, int Stop>
+struct copy_using_evaluator_innervec_InnerUnrolling<DstEvaluatorType, SrcEvaluatorType, Stop, Stop>
+{
+  EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, int) { }
+};
+
+/***************************************************************************
+* Part 3 : implementation of all cases
+***************************************************************************/
+
+// copy_using_evaluator_impl is based on assign_impl
+
+template<typename DstXprType, typename SrcXprType,
+         int Traversal = copy_using_evaluator_traits<DstXprType, SrcXprType>::Traversal,
+         int Unrolling = copy_using_evaluator_traits<DstXprType, SrcXprType>::Unrolling>
+struct copy_using_evaluator_impl;
+
+/************************
+*** Default traversal ***
+************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, NoUnrolling>
+{
+  static void run(DstXprType& dst, const SrcXprType& src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    for(Index outer = 0; outer < dst.outerSize(); ++outer) {
+      for(Index inner = 0; inner < dst.innerSize(); ++inner) {
+	dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+    }
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, DefaultTraversal, InnerUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_DefaultTraversal_InnerUnrolling
+	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+/***************************
+*** Linear vectorization ***
+***************************/
+
+template <bool IsAligned = false>
+struct unaligned_copy_using_evaluator_impl
+{
+  // if IsAligned = true, then do nothing
+  template <typename SrcEvaluatorType, typename DstEvaluatorType>
+  static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, 
+				      typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {}
+};
+
+template <>
+struct unaligned_copy_using_evaluator_impl<false>
+{
+  // MSVC must not inline this functions. If it does, it fails to optimize the
+  // packet access path.
+#ifdef _MSC_VER
+  template <typename DstEvaluatorType, typename SrcEvaluatorType>
+  static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, 
+				    const SrcEvaluatorType &srcEvaluator, 
+				    typename DstEvaluatorType::Index start, 
+				    typename DstEvaluatorType::Index end)
+#else
+  template <typename DstEvaluatorType, typename SrcEvaluatorType>
+  static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, 
+				      const SrcEvaluatorType &srcEvaluator, 
+				      typename DstEvaluatorType::Index start, 
+				      typename DstEvaluatorType::Index end)
+#endif
+  {
+    for (typename DstEvaluatorType::Index index = start; index < end; ++index)
+      dstEvaluator.copyCoeff(index, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, NoUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index size = dst.size();
+    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      dstIsAligned = int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned),
+      dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned,
+      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+    };
+    const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dstEvaluator.coeffRef(0), size);
+    const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
+
+    unaligned_copy_using_evaluator_impl<dstIsAligned!=0>::run(dstEvaluator, srcEvaluator, 0, alignedStart);
+
+    for(Index index = alignedStart; index < alignedEnd; index += packetSize)
+    {
+      dstEvaluator.template copyPacket<dstAlignment, srcAlignment>(index, srcEvaluator);
+    }
+
+    unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearVectorizedTraversal, CompleteUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    enum { size = DstXprType::SizeAtCompileTime,
+           packetSize = packet_traits<typename DstXprType::Scalar>::size,
+           alignedSize = (size/packetSize)*packetSize };
+
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, alignedSize>
+      ::run(dstEvaluator, srcEvaluator);
+    copy_using_evaluator_DefaultTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, alignedSize, size>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+/**************************
+*** Inner vectorization ***
+**************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index packetSize = packet_traits<typename DstXprType::Scalar>::size;
+    for(Index outer = 0; outer < outerSize; ++outer)
+      for(Index inner = 0; inner < innerSize; inner+=packetSize) {
+	dstEvaluator.template copyPacketByOuterInner<Aligned, Aligned>(outer, inner, srcEvaluator);
+      }
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_innervec_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, InnerVectorizedTraversal, InnerUnrolling>
+{
+  typedef typename DstXprType::Index Index;
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index outerSize = dst.outerSize();
+    for(Index outer = 0; outer < outerSize; ++outer)
+      copy_using_evaluator_innervec_InnerUnrolling
+	<DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::InnerSizeAtCompileTime>
+        ::run(dstEvaluator, srcEvaluator, outer);
+  }
+};
+
+/***********************
+*** Linear traversal ***
+***********************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    const Index size = dst.size();
+    for(Index i = 0; i < size; ++i)
+      dstEvaluator.copyCoeff(i, srcEvaluator);
+  }
+};
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, LinearTraversal, CompleteUnrolling>
+{
+  EIGEN_STRONG_INLINE static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    copy_using_evaluator_LinearTraversal_CompleteUnrolling
+      <DstEvaluatorType, SrcEvaluatorType, 0, DstXprType::SizeAtCompileTime>
+      ::run(dstEvaluator, srcEvaluator);
+  }
+};
+
+/**************************
+*** Slice vectorization ***
+***************************/
+
+template<typename DstXprType, typename SrcXprType>
+struct copy_using_evaluator_impl<DstXprType, SrcXprType, SliceVectorizedTraversal, NoUnrolling>
+{
+  inline static void run(DstXprType &dst, const SrcXprType &src)
+  {
+    typedef typename evaluator<DstXprType>::type DstEvaluatorType;
+    typedef typename evaluator<SrcXprType>::type SrcEvaluatorType;
+    typedef typename DstXprType::Index Index;
+
+    DstEvaluatorType dstEvaluator(dst);
+    SrcEvaluatorType srcEvaluator(src);
+
+    typedef packet_traits<typename DstXprType::Scalar> PacketTraits;
+    enum {
+      packetSize = PacketTraits::size,
+      alignable = PacketTraits::AlignedOnScalar,
+      dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ,
+      srcAlignment = copy_using_evaluator_traits<DstXprType,SrcXprType>::JointAlignment
+    };
+    const Index packetAlignedMask = packetSize - 1;
+    const Index innerSize = dst.innerSize();
+    const Index outerSize = dst.outerSize();
+    const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0;
+    Index alignedStart = ((!alignable) || copy_using_evaluator_traits<DstXprType,SrcXprType>::DstIsAligned) ? 0
+                       : first_aligned(&dstEvaluator.coeffRef(0,0), innerSize);
+
+    for(Index outer = 0; outer < outerSize; ++outer)
+    {
+      const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask);
+      // do the non-vectorizable part of the assignment
+      for(Index inner = 0; inner<alignedStart ; ++inner) {
+        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+
+      // do the vectorizable part of the assignment
+      for(Index inner = alignedStart; inner<alignedEnd; inner+=packetSize) {
+        dstEvaluator.template copyPacketByOuterInner<dstAlignment, srcAlignment>(outer, inner, srcEvaluator);
+      }
+
+      // do the non-vectorizable part of the assignment
+      for(Index inner = alignedEnd; inner<innerSize ; ++inner) {
+        dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator);
+      }
+
+      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+    }
+  }
+};
+
+/***************************************************************************
+* Part 4 : Entry points
+***************************************************************************/
+
+// Based on DenseBase::LazyAssign()
+
+template<typename DstXprType, typename SrcXprType>
+const DstXprType& copy_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+{
+#ifdef EIGEN_DEBUG_ASSIGN
+  internal::copy_using_evaluator_traits<DstXprType, SrcXprType>::debug();
+#endif
+  copy_using_evaluator_impl<DstXprType, SrcXprType>::run(const_cast<DstXprType&>(dst), src);
+  return dst;
+}
+
+// Based on DenseBase::swap()
+// TODO: Chech whether we need to do something special for swapping two
+//       Arrays or Matrices.
+
+template<typename DstXprType, typename SrcXprType>
+void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src)
+{
+  copy_using_evaluator(SwapWrapper<DstXprType>(const_cast<DstXprType&>(dst)), src);
+}
+
+// Based on MatrixBase::operator+= (in CwiseBinaryOp.h)
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+// Based on ArrayBase::operator+=
+template<typename DstXprType, typename SrcXprType>
+void add_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+// TODO: Add add_assign_using_evaluator for EigenBase ?
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const MatrixBase<DstXprType>& dst, const MatrixBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void subtract_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void multiply_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_product_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+template<typename DstXprType, typename SrcXprType>
+void divide_assign_using_evaluator(const ArrayBase<DstXprType>& dst, const ArrayBase<SrcXprType>& src)
+{
+  typedef typename DstXprType::Scalar Scalar;
+  SelfCwiseBinaryOp<internal::scalar_quotient_op<Scalar>, DstXprType, SrcXprType> tmp(dst.const_cast_derived());
+  copy_using_evaluator(tmp, src.derived());
+}
+
+
+} // namespace internal
+
+#endif // EIGEN_ASSIGN_EVALUATOR_H
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
new file mode 100644
index 000000000..c060913fb
--- /dev/null
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -0,0 +1,1157 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Benoit Jacob <jacob.benoit.1@gmail.com>
+// Copyright (C) 2011 Gael Guennebaud <gael.guennebaud@inria.fr>
+// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
+//
+// Eigen is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 3 of the License, or (at your option) any later version.
+//
+// Alternatively, you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of
+// the License, or (at your option) any later version.
+//
+// Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License and a copy of the GNU General Public License along with
+// Eigen. If not, see <http://www.gnu.org/licenses/>.
+
+
+#ifndef EIGEN_COREEVALUATORS_H
+#define EIGEN_COREEVALUATORS_H
+
+namespace internal {
+  
+template<typename T>
+struct evaluator_impl {};
+
+template<typename T>
+struct evaluator
+{
+  typedef evaluator_impl<T> type;
+};
+
+// TODO: Think about const-correctness
+
+template<typename T>
+struct evaluator<const T>
+{
+  typedef evaluator_impl<T> type;
+};
+
+// ---------- base class for all writable evaluators ----------
+
+template<typename ExpressionType>
+struct evaluator_impl_base
+{
+  typedef typename ExpressionType::Index Index;
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    derived().coeffRef(row, col) = other.coeff(row, col);
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeffByOuterInner(Index outer, Index inner, const OtherEvaluatorType& other)
+  {
+    Index row = rowIndexByOuterInner(outer, inner); 
+    Index col = colIndexByOuterInner(outer, inner); 
+    derived().copyCoeff(row, col, other);
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index index, const OtherEvaluatorType& other)
+  {
+    derived().coeffRef(index) = other.coeff(index);
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    derived().template writePacket<StoreMode>(row, col, 
+      other.template packet<LoadMode>(row, col));
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacketByOuterInner(Index outer, Index inner, const OtherEvaluatorType& other)
+  {
+    Index row = rowIndexByOuterInner(outer, inner); 
+    Index col = colIndexByOuterInner(outer, inner); 
+    derived().template copyPacket<StoreMode, LoadMode>(row, col, other);
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index index, const OtherEvaluatorType& other)
+  {
+    derived().template writePacket<StoreMode>(index, 
+      other.template packet<LoadMode>(index));
+  }
+
+  Index rowIndexByOuterInner(Index outer, Index inner) const
+  {
+    return int(ExpressionType::RowsAtCompileTime) == 1 ? 0
+      : int(ExpressionType::ColsAtCompileTime) == 1 ? inner
+      : int(ExpressionType::Flags)&RowMajorBit ? outer
+      : inner;
+  }
+
+  Index colIndexByOuterInner(Index outer, Index inner) const
+  {
+    return int(ExpressionType::ColsAtCompileTime) == 1 ? 0
+      : int(ExpressionType::RowsAtCompileTime) == 1 ? inner
+      : int(ExpressionType::Flags)&RowMajorBit ? inner
+      : outer;
+  }
+
+  evaluator_impl<ExpressionType>& derived() 
+  {
+    return *static_cast<evaluator_impl<ExpressionType>*>(this); 
+  }
+};
+
+// -------------------- Transpose --------------------
+
+template<typename ArgType>
+struct evaluator_impl<Transpose<ArgType> >
+  : evaluator_impl_base<Transpose<ArgType> >
+{
+  typedef Transpose<ArgType> XprType;
+
+  evaluator_impl(const XprType& t) : m_argImpl(t.nestedExpression()) {}
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(col, row);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index);
+  }
+
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(col, row);
+  }
+
+  typename XprType::Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<int LoadMode>
+  PacketReturnType packet(Index row, Index col) const
+  {
+    return m_argImpl.template packet<LoadMode>(col, row);
+  }
+
+  template<int LoadMode>
+  PacketReturnType packet(Index index) const
+  {
+    return m_argImpl.template packet<LoadMode>(index);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(col, row, x);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(index, x);
+  }
+
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+// -------------------- Matrix and Array --------------------
+//
+// evaluator_impl<PlainObjectBase> is a common base class for the
+// Matrix and Array evaluators.
+
+template<typename Derived>
+struct evaluator_impl<PlainObjectBase<Derived> >
+  : evaluator_impl_base<Derived>
+{
+  typedef PlainObjectBase<Derived> PlainObjectType;
+
+  evaluator_impl(const PlainObjectType& m) : m_plainObject(m) {}
+
+  typedef typename PlainObjectType::Index Index;
+  typedef typename PlainObjectType::Scalar Scalar;
+  typedef typename PlainObjectType::CoeffReturnType CoeffReturnType;
+  typedef typename PlainObjectType::PacketScalar PacketScalar;
+  typedef typename PlainObjectType::PacketReturnType PacketReturnType;
+
+  CoeffReturnType coeff(Index i, Index j) const
+  {
+    return m_plainObject.coeff(i, j);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_plainObject.coeff(index);
+  }
+
+  Scalar& coeffRef(Index i, Index j)
+  {
+    return m_plainObject.const_cast_derived().coeffRef(i, j);
+  }
+
+  Scalar& coeffRef(Index index)
+  {
+    return m_plainObject.const_cast_derived().coeffRef(index);
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index row, Index col) const
+  {
+    return m_plainObject.template packet<LoadMode>(row, col);
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index index) const
+  {
+    return m_plainObject.template packet<LoadMode>(index);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x)
+  {
+    m_plainObject.const_cast_derived().template writePacket<StoreMode>(row, col, x);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x)
+  {
+    m_plainObject.const_cast_derived().template writePacket<StoreMode>(index, x);
+  }
+
+protected:
+  const PlainObjectType &m_plainObject;
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator_impl<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator_impl<PlainObjectBase<Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+  typedef Matrix<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+  evaluator_impl(const XprType& m) 
+    : evaluator_impl<PlainObjectBase<XprType> >(m) 
+  { }
+};
+
+template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
+struct evaluator_impl<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> >
+  : evaluator_impl<PlainObjectBase<Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> > >
+{
+  typedef Array<Scalar, Rows, Cols, Options, MaxRows, MaxCols> XprType;
+
+  evaluator_impl(const XprType& m) 
+    : evaluator_impl<PlainObjectBase<XprType> >(m) 
+  { }
+};
+
+// -------------------- CwiseNullaryOp --------------------
+
+template<typename NullaryOp, typename PlainObjectType>
+struct evaluator_impl<CwiseNullaryOp<NullaryOp,PlainObjectType> >
+{
+  typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
+
+  evaluator_impl(const XprType& n) 
+    : m_functor(n.functor()) 
+  { }
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(row, col);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(index);
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(row, col);
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index index) const
+  {
+    return m_functor.packetOp(index);
+  }
+
+protected:
+  const NullaryOp m_functor;
+};
+
+// -------------------- CwiseUnaryOp --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct evaluator_impl<CwiseUnaryOp<UnaryOp, ArgType> >
+{
+  typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
+
+  evaluator_impl(const XprType& op) 
+    : m_functor(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
+  { }
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(m_argImpl.coeff(row, col));
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(m_argImpl.coeff(index));
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode>(row, col));
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index index) const
+  {
+    return m_functor.packetOp(m_argImpl.template packet<LoadMode>(index));
+  }
+
+protected:
+  const UnaryOp m_functor;
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+// -------------------- CwiseBinaryOp --------------------
+
+template<typename BinaryOp, typename Lhs, typename Rhs>
+struct evaluator_impl<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
+{
+  typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
+
+  evaluator_impl(const XprType& xpr) 
+    : m_functor(xpr.functor()),
+      m_lhsImpl(xpr.lhs()), 
+      m_rhsImpl(xpr.rhs())  
+  { }
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_functor(m_lhsImpl.coeff(row, col), m_rhsImpl.coeff(row, col));
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_functor(m_lhsImpl.coeff(index), m_rhsImpl.coeff(index));
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index row, Index col) const
+  {
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(row, col),
+			      m_rhsImpl.template packet<LoadMode>(row, col));
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index index) const
+  {
+    return m_functor.packetOp(m_lhsImpl.template packet<LoadMode>(index),
+			      m_rhsImpl.template packet<LoadMode>(index));
+  }
+
+protected:
+  const BinaryOp m_functor;
+  typename evaluator<Lhs>::type m_lhsImpl;
+  typename evaluator<Rhs>::type m_rhsImpl;
+};
+
+// -------------------- CwiseUnaryView --------------------
+
+template<typename UnaryOp, typename ArgType>
+struct evaluator_impl<CwiseUnaryView<UnaryOp, ArgType> >
+  : evaluator_impl_base<CwiseUnaryView<UnaryOp, ArgType> >
+{
+  typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
+
+  evaluator_impl(const XprType& op) 
+    : m_unaryOp(op.functor()), 
+      m_argImpl(op.nestedExpression()) 
+  { }
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_unaryOp(m_argImpl.coeff(row, col));
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_unaryOp(m_argImpl.coeff(index));
+  }
+
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_unaryOp(m_argImpl.coeffRef(row, col));
+  }
+
+  Scalar& coeffRef(Index index)
+  {
+    return m_unaryOp(m_argImpl.coeffRef(index));
+  }
+
+protected:
+  const UnaryOp m_unaryOp;
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+// -------------------- Product --------------------
+
+template<typename Lhs, typename Rhs>
+struct evaluator_impl<Product<Lhs,Rhs> > : public evaluator<typename Product<Lhs,Rhs>::PlainObject>::type
+{
+  typedef Product<Lhs,Rhs> XprType;
+  typedef typename XprType::PlainObject PlainObject;
+  typedef typename evaluator<PlainObject>::type evaluator_base;
+  
+//   enum {
+//     EvaluateLhs = ;
+//     EvaluateRhs = ;
+//   };
+  
+  evaluator_impl(const XprType& product) : evaluator_base(m_result)
+  {
+    // here we process the left and right hand sides with a specialized evaluator
+    // perhaps this step should be done by the TreeOptimizer to get a canonical tree and reduce evaluator instanciations
+    // typename product_operand_evaluator<Lhs>::type m_lhsImpl(product.lhs());
+    // typename product_operand_evaluator<Rhs>::type m_rhsImpl(product.rhs());
+  
+    // TODO do not rely on previous product mechanism !!
+    m_result.resize(product.rows(), product.cols());
+    m_result.noalias() = product.lhs() * product.rhs();
+  }
+  
+protected:  
+  PlainObject m_result;
+};
+
+// -------------------- Map --------------------
+
+template<typename Derived, int AccessorsType>
+struct evaluator_impl<MapBase<Derived, AccessorsType> >
+  : evaluator_impl_base<Derived>
+{
+  typedef MapBase<Derived, AccessorsType> MapType;
+  typedef Derived XprType;
+
+  typedef typename XprType::PointerType PointerType;
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+  
+  evaluator_impl(const XprType& map) 
+    : m_data(const_cast<PointerType>(map.data())),  
+      m_rowStride(map.rowStride()),
+      m_colStride(map.colStride())
+  { }
+ 
+  enum {
+    RowsAtCompileTime = XprType::RowsAtCompileTime
+  };
+ 
+  CoeffReturnType coeff(Index row, Index col) const 
+  { 
+    return m_data[col * m_colStride + row * m_rowStride];
+  }
+  
+  CoeffReturnType coeff(Index index) const 
+  { 
+    return coeff(RowsAtCompileTime == 1 ? 0 : index,
+		 RowsAtCompileTime == 1 ? index : 0);
+  }
+
+  Scalar& coeffRef(Index row, Index col) 
+  { 
+    return m_data[col * m_colStride + row * m_rowStride];
+  }
+  
+  Scalar& coeffRef(Index index) 
+  { 
+    return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
+		    RowsAtCompileTime == 1 ? index : 0);
+  }
+ 
+  template<int LoadMode> 
+  PacketReturnType packet(Index row, Index col) const 
+  { 
+    PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+    return internal::ploadt<PacketScalar, LoadMode>(ptr);
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index index) const 
+  { 
+    return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
+			    RowsAtCompileTime == 1 ? index : 0);
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x) 
+  { 
+    PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
+    return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x) 
+  { 
+    return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
+				  RowsAtCompileTime == 1 ? index : 0,
+				  x);
+  }
+ 
+protected:
+  PointerType m_data;
+  int m_rowStride;
+  int m_colStride;
+};
+
+template<typename PlainObjectType, int MapOptions, typename StrideType> 
+struct evaluator_impl<Map<PlainObjectType, MapOptions, StrideType> >
+  : public evaluator_impl<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
+{
+  typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
+
+  evaluator_impl(const XprType& map) 
+    : evaluator_impl<MapBase<XprType> >(map) 
+  { }
+};
+
+// -------------------- Block --------------------
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
+struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ false> >
+  : evaluator_impl_base<Block<ArgType, BlockRows, BlockCols, InnerPanel, false> >
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel, false> XprType;
+
+  evaluator_impl(const XprType& block) 
+    : m_argImpl(block.nestedExpression()), 
+      m_startRow(block.startRow()), 
+      m_startCol(block.startCol()) 
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+
+  enum {
+    RowsAtCompileTime = XprType::RowsAtCompileTime
+  };
+ 
+  CoeffReturnType coeff(Index row, Index col) const 
+  { 
+    return m_argImpl.coeff(m_startRow + row, m_startCol + col); 
+  }
+  
+  CoeffReturnType coeff(Index index) const 
+  { 
+    return coeff(RowsAtCompileTime == 1 ? 0 : index,
+		 RowsAtCompileTime == 1 ? index : 0);
+  }
+
+  Scalar& coeffRef(Index row, Index col) 
+  { 
+    return m_argImpl.coeffRef(m_startRow + row, m_startCol + col); 
+  }
+  
+  Scalar& coeffRef(Index index) 
+  { 
+    return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
+		    RowsAtCompileTime == 1 ? index : 0);
+  }
+ 
+  template<int LoadMode> 
+  PacketReturnType packet(Index row, Index col) const 
+  { 
+    return m_argImpl.template packet<LoadMode>(m_startRow + row, m_startCol + col); 
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index index) const 
+  { 
+    return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
+			    RowsAtCompileTime == 1 ? index : 0);
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x) 
+  { 
+    return m_argImpl.template writePacket<StoreMode>(m_startRow + row, m_startCol + col, x); 
+  }
+  
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x) 
+  { 
+    return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
+				  RowsAtCompileTime == 1 ? index : 0,
+				  x);
+  }
+ 
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+
+  // TODO: Get rid of m_startRow, m_startCol if known at compile time
+  Index m_startRow; 
+  Index m_startCol;
+};
+
+// TODO: This evaluator does not actually use the child evaluator; 
+// all action is via the data() as returned by the Block expression.
+
+template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel> 
+struct evaluator_impl<Block<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true> >
+  : evaluator_impl<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel, true> > >
+{
+  typedef Block<ArgType, BlockRows, BlockCols, InnerPanel, true> XprType;
+
+  evaluator_impl(const XprType& block) 
+    : evaluator_impl<MapBase<XprType> >(block) 
+  { }
+};
+
+
+// -------------------- Select --------------------
+
+template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
+struct evaluator_impl<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
+{
+  typedef Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> XprType;
+
+  evaluator_impl(const XprType& select) 
+    : m_conditionImpl(select.conditionMatrix()),
+      m_thenImpl(select.thenMatrix()),
+      m_elseImpl(select.elseMatrix())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    if (m_conditionImpl.coeff(row, col))
+      return m_thenImpl.coeff(row, col);
+    else
+      return m_elseImpl.coeff(row, col);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    if (m_conditionImpl.coeff(index))
+      return m_thenImpl.coeff(index);
+    else
+      return m_elseImpl.coeff(index);
+  }
+ 
+protected:
+  typename evaluator<ConditionMatrixType>::type m_conditionImpl;
+  typename evaluator<ThenMatrixType>::type m_thenImpl;
+  typename evaluator<ElseMatrixType>::type m_elseImpl;
+};
+
+
+// -------------------- Replicate --------------------
+
+template<typename ArgType, int RowFactor, int ColFactor> 
+struct evaluator_impl<Replicate<ArgType, RowFactor, ColFactor> >
+{
+  typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
+
+  evaluator_impl(const XprType& replicate) 
+    : m_argImpl(replicate.nestedExpression()),
+      m_rows(replicate.nestedExpression().rows()),
+      m_cols(replicate.nestedExpression().cols())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    // try to avoid using modulo; this is a pure optimization strategy
+    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+                           : RowFactor==1 ? row
+                           : row % m_rows;
+    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+                           : ColFactor==1 ? col
+                           : col % m_cols;
+    
+    return m_argImpl.coeff(actual_row, actual_col);
+  }
+
+  template<int LoadMode>
+  PacketReturnType packet(Index row, Index col) const
+  {
+    const Index actual_row = internal::traits<XprType>::RowsAtCompileTime==1 ? 0
+                           : RowFactor==1 ? row
+                           : row % m_rows;
+    const Index actual_col = internal::traits<XprType>::ColsAtCompileTime==1 ? 0
+                           : ColFactor==1 ? col
+                           : col % m_cols;
+
+    return m_argImpl.template packet<LoadMode>(actual_row, actual_col);
+  }
+ 
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+  Index m_rows; // TODO: Get rid of this if known at compile time
+  Index m_cols;
+};
+
+
+// -------------------- PartialReduxExpr --------------------
+//
+// This is a wrapper around the expression object. 
+// TODO: Find out how to write a proper evaluator without duplicating
+//       the row() and col() member functions.
+
+template< typename ArgType, typename MemberOp, int Direction>
+struct evaluator_impl<PartialReduxExpr<ArgType, MemberOp, Direction> >
+{
+  typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
+
+  evaluator_impl(const XprType expr)
+    : m_expr(expr)
+  { }
+
+  typedef typename XprType::Index Index;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+ 
+  CoeffReturnType coeff(Index row, Index col) const 
+  { 
+    return m_expr.coeff(row, col);
+  }
+  
+  CoeffReturnType coeff(Index index) const 
+  { 
+    return m_expr.coeff(index);
+  }
+
+protected:
+  const XprType m_expr;
+};
+
+
+// -------------------- MatrixWrapper and ArrayWrapper --------------------
+//
+// evaluator_impl_wrapper_base<T> is a common base class for the
+// MatrixWrapper and ArrayWrapper evaluators.
+
+template<typename ArgType>
+struct evaluator_impl_wrapper_base
+  : evaluator_impl_base<ArgType>
+{
+  evaluator_impl_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
+
+  typedef typename ArgType::Index Index;
+  typedef typename ArgType::Scalar Scalar;
+  typedef typename ArgType::CoeffReturnType CoeffReturnType;
+  typedef typename ArgType::PacketScalar PacketScalar;
+  typedef typename ArgType::PacketReturnType PacketReturnType;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(row, col);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index);
+  }
+
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(row, col);
+  }
+
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index row, Index col) const
+  {
+    return m_argImpl.template packet<LoadMode>(row, col);
+  }
+
+  template<int LoadMode> 
+  PacketReturnType packet(Index index) const
+  {
+    return m_argImpl.template packet<LoadMode>(index);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index row, Index col, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(row, col, x);
+  }
+
+  template<int StoreMode> 
+  void writePacket(Index index, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<StoreMode>(index, x);
+  }
+
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+template<typename ArgType>
+struct evaluator_impl<MatrixWrapper<ArgType> >
+  : evaluator_impl_wrapper_base<ArgType>
+{
+  typedef MatrixWrapper<ArgType> XprType;
+
+  evaluator_impl(const XprType& wrapper) 
+    : evaluator_impl_wrapper_base<ArgType>(wrapper.nestedExpression())
+  { }
+};
+
+template<typename ArgType>
+struct evaluator_impl<ArrayWrapper<ArgType> >
+  : evaluator_impl_wrapper_base<ArgType>
+{
+  typedef ArrayWrapper<ArgType> XprType;
+
+  evaluator_impl(const XprType& wrapper) 
+    : evaluator_impl_wrapper_base<ArgType>(wrapper.nestedExpression())
+  { }
+};
+
+
+// -------------------- Reverse --------------------
+
+// defined in Reverse.h:
+template<typename PacketScalar, bool ReversePacket> struct reverse_packet_cond;
+
+template<typename ArgType, int Direction>
+struct evaluator_impl<Reverse<ArgType, Direction> >
+  : evaluator_impl_base<Reverse<ArgType, Direction> >
+{
+  typedef Reverse<ArgType, Direction> XprType;
+
+  evaluator_impl(const XprType& reverse) 
+    : m_argImpl(reverse.nestedExpression()),
+      m_rows(reverse.nestedExpression().rows()),
+      m_cols(reverse.nestedExpression().cols())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+  typedef typename XprType::PacketScalar PacketScalar;
+  typedef typename XprType::PacketReturnType PacketReturnType;
+
+  enum {
+    PacketSize = internal::packet_traits<Scalar>::size,
+    IsRowMajor = XprType::IsRowMajor,
+    IsColMajor = !IsRowMajor,
+    ReverseRow = (Direction == Vertical)   || (Direction == BothDirections),
+    ReverseCol = (Direction == Horizontal) || (Direction == BothDirections),
+    OffsetRow  = ReverseRow && IsColMajor ? PacketSize : 1,
+    OffsetCol  = ReverseCol && IsRowMajor ? PacketSize : 1,
+    ReversePacket = (Direction == BothDirections)
+                    || ((Direction == Vertical)   && IsColMajor)
+                    || ((Direction == Horizontal) && IsRowMajor)
+  };
+  typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
+
+  CoeffReturnType coeff(Index row, Index col) const
+  {
+    return m_argImpl.coeff(ReverseRow ? m_rows - row - 1 : row,
+			   ReverseCol ? m_cols - col - 1 : col);
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(m_rows * m_cols - index - 1);
+  }
+
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(ReverseRow ? m_rows - row - 1 : row,
+			      ReverseCol ? m_cols - col - 1 : col);
+  }
+
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(m_rows * m_cols - index - 1);
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index row, Index col) const
+  {
+    return reverse_packet::run(m_argImpl.template packet<LoadMode>(
+                                  ReverseRow ? m_rows - row - OffsetRow : row,
+                                  ReverseCol ? m_cols - col - OffsetCol : col));
+  }
+
+  template<int LoadMode>
+  PacketScalar packet(Index index) const
+  {
+    return preverse(m_argImpl.template packet<LoadMode>(m_rows * m_cols - index - PacketSize));
+  }
+
+  template<int LoadMode>
+  void writePacket(Index row, Index col, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<LoadMode>(
+                                  ReverseRow ? m_rows - row - OffsetRow : row,
+                                  ReverseCol ? m_cols - col - OffsetCol : col,
+                                  reverse_packet::run(x));
+  }
+
+  template<int LoadMode>
+  void writePacket(Index index, const PacketScalar& x)
+  {
+    m_argImpl.template writePacket<LoadMode>(m_rows * m_cols - index - PacketSize, preverse(x));
+  }
+ 
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+  Index m_rows; // TODO: Don't use if known at compile time or not needed
+  Index m_cols;
+};
+
+
+// -------------------- Diagonal --------------------
+
+template<typename ArgType, int DiagIndex>
+struct evaluator_impl<Diagonal<ArgType, DiagIndex> >
+  : evaluator_impl_base<Diagonal<ArgType, DiagIndex> >
+{
+  typedef Diagonal<ArgType, DiagIndex> XprType;
+
+  evaluator_impl(const XprType& diagonal) 
+    : m_argImpl(diagonal.nestedExpression()),
+      m_index(diagonal.index())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::CoeffReturnType CoeffReturnType;
+
+  CoeffReturnType coeff(Index row, Index) const
+  {
+    return m_argImpl.coeff(row + rowOffset(), row + colOffset());
+  }
+
+  CoeffReturnType coeff(Index index) const
+  {
+    return m_argImpl.coeff(index + rowOffset(), index + colOffset());
+  }
+
+  Scalar& coeffRef(Index row, Index)
+  {
+    return m_argImpl.coeffRef(row + rowOffset(), row + colOffset());
+  }
+
+  Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index + rowOffset(), index + colOffset());
+  }
+
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+  Index m_index; // TODO: Don't use if known at compile time
+
+private:
+  EIGEN_STRONG_INLINE Index rowOffset() const { return m_index>0 ? 0 : -m_index; }
+  EIGEN_STRONG_INLINE Index colOffset() const { return m_index>0 ? m_index : 0; }
+};
+
+
+// ---------- SwapWrapper ----------
+
+template<typename ArgType>
+struct evaluator_impl<SwapWrapper<ArgType> >
+  : evaluator_impl_base<SwapWrapper<ArgType> >
+{
+  typedef SwapWrapper<ArgType> XprType;
+
+  evaluator_impl(const XprType& swapWrapper) 
+    : m_argImpl(swapWrapper.expression())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::Packet Packet;
+
+  // This function and the next one are needed by assign to correctly align loads/stores
+  // TODO make Assign use .data()
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(row, col);
+  }
+  
+  inline Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    OtherEvaluatorType& nonconst_other = const_cast<OtherEvaluatorType&>(other);
+    Scalar tmp = m_argImpl.coeff(row, col);
+    m_argImpl.coeffRef(row, col) = nonconst_other.coeff(row, col);
+    nonconst_other.coeffRef(row, col) = tmp;
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index index, const OtherEvaluatorType& other)
+  {
+    OtherEvaluatorType& nonconst_other = const_cast<OtherEvaluatorType&>(other);
+    Scalar tmp = m_argImpl.coeff(index);
+    m_argImpl.coeffRef(index) = nonconst_other.coeff(index);
+    nonconst_other.coeffRef(index) = tmp;
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    OtherEvaluatorType& nonconst_other = const_cast<OtherEvaluatorType&>(other);
+    Packet tmp = m_argImpl.template packet<StoreMode>(row, col);
+    m_argImpl.template writePacket<StoreMode>
+      (row, col, nonconst_other.template packet<LoadMode>(row, col));
+    nonconst_other.template writePacket<LoadMode>(row, col, tmp);
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index index, const OtherEvaluatorType& other)
+  {
+    OtherEvaluatorType& nonconst_other = const_cast<OtherEvaluatorType&>(other);
+    Packet tmp = m_argImpl.template packet<StoreMode>(index);
+    m_argImpl.template writePacket<StoreMode>
+      (index, nonconst_other.template packet<LoadMode>(index));
+    nonconst_other.template writePacket<LoadMode>(index, tmp);
+  }
+
+protected:
+  typename evaluator<ArgType>::type m_argImpl;
+};
+
+
+// ---------- SelfCwiseBinaryOp ----------
+
+template<typename BinaryOp, typename LhsXpr, typename RhsXpr>
+struct evaluator_impl<SelfCwiseBinaryOp<BinaryOp, LhsXpr, RhsXpr> >
+  : evaluator_impl_base<SelfCwiseBinaryOp<BinaryOp, LhsXpr, RhsXpr> >
+{
+  typedef SelfCwiseBinaryOp<BinaryOp, LhsXpr, RhsXpr> XprType;
+
+  evaluator_impl(const XprType& selfCwiseBinaryOp) 
+    : m_argImpl(selfCwiseBinaryOp.expression()),
+      m_functor(selfCwiseBinaryOp.functor())
+  { }
+ 
+  typedef typename XprType::Index Index;
+  typedef typename XprType::Scalar Scalar;
+  typedef typename XprType::Packet Packet;
+
+  // This function and the next one are needed by assign to correctly align loads/stores
+  // TODO make Assign use .data()
+  Scalar& coeffRef(Index row, Index col)
+  {
+    return m_argImpl.coeffRef(row, col);
+  }
+  
+  inline Scalar& coeffRef(Index index)
+  {
+    return m_argImpl.coeffRef(index);
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    Scalar& tmp = m_argImpl.coeffRef(row, col);
+    tmp = m_functor(tmp, other.coeff(row, col));
+  }
+
+  template<typename OtherEvaluatorType>
+  void copyCoeff(Index index, const OtherEvaluatorType& other)
+  {
+    Scalar& tmp = m_argImpl.coeffRef(index);
+    tmp = m_functor(tmp, other.coeff(index));
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index row, Index col, const OtherEvaluatorType& other)
+  {
+    const Packet res = m_functor.packetOp(m_argImpl.template packet<StoreMode>(row, col),
+					  other.template packet<LoadMode>(row, col));
+    m_argImpl.template writePacket<StoreMode>(row, col, res);
+  }
+
+  template<int StoreMode, int LoadMode, typename OtherEvaluatorType>
+  void copyPacket(Index index, const OtherEvaluatorType& other)
+  {
+    const Packet res = m_functor.packetOp(m_argImpl.template packet<StoreMode>(index),
+					  other.template packet<LoadMode>(index));
+    m_argImpl.template writePacket<StoreMode>(index, res);
+  }
+
+protected:
+  typename evaluator<LhsXpr>::type m_argImpl;
+  const BinaryOp m_functor;
+};
+
+
+} // namespace internal
+
+#endif // EIGEN_COREEVALUATORS_H
diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h
index 53eb0fbae..9bea26886 100644
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@@ -110,4 +110,18 @@ class ProductImpl<Lhs,Rhs,Dense> : public internal::dense_xpr_base<Product<Lhs,R
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
 };
 
+/***************************************************************************
+* Implementation of matrix base methods
+***************************************************************************/
+
+
+/** \internal used to test the evaluator only
+  */
+template<typename Lhs,typename Rhs>
+const Product<Lhs,Rhs>
+prod(const Lhs& lhs, const Rhs& rhs)
+{
+  return Product<Lhs,Rhs>(lhs,rhs);
+}
+
 #endif // EIGEN_PRODUCT_H
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 6f8fc4ae3..c71534df9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -195,6 +195,7 @@ ei_add_test(nullary)
 ei_add_test(nesting_ops "${CMAKE_CXX_FLAGS_DEBUG}")
 ei_add_test(zerosized)
 ei_add_test(dontalign)
+ei_add_test(evaluators)
 ei_add_test(sizeoverflow)
 ei_add_test(prec_inverse_4x4)
 ei_add_test(vectorwiseop)
diff --git a/test/evaluators.cpp b/test/evaluators.cpp
new file mode 100644
index 000000000..5c8e500bc
--- /dev/null
+++ b/test/evaluators.cpp
@@ -0,0 +1,264 @@
+#define EIGEN_ENABLE_EVALUATORS
+#include "main.h"
+
+using internal::copy_using_evaluator;
+using namespace std;
+
+#define VERIFY_IS_APPROX_EVALUATOR(DEST,EXPR) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (EXPR).eval());
+#define VERIFY_IS_APPROX_EVALUATOR2(DEST,EXPR,REF) VERIFY_IS_APPROX(copy_using_evaluator(DEST,(EXPR)), (REF).eval());
+
+void test_evaluators()
+{
+  // Testing Matrix evaluator and Transpose
+  Vector2d v = Vector2d::Random();
+  const Vector2d v_const(v);
+  Vector2d v2;
+  RowVector2d w;
+
+  VERIFY_IS_APPROX_EVALUATOR(v2, v);
+  VERIFY_IS_APPROX_EVALUATOR(v2, v_const);
+
+  // Testing Transpose
+  VERIFY_IS_APPROX_EVALUATOR(w, v.transpose()); // Transpose as rvalue
+  VERIFY_IS_APPROX_EVALUATOR(w, v_const.transpose());
+
+  copy_using_evaluator(w.transpose(), v); // Transpose as lvalue
+  VERIFY_IS_APPROX(w,v.transpose().eval());
+
+  copy_using_evaluator(w.transpose(), v_const);
+  VERIFY_IS_APPROX(w,v_const.transpose().eval());
+
+  // Testing Array evaluator
+  ArrayXXf a(2,3);
+  ArrayXXf b(3,2);
+  a << 1,2,3, 4,5,6;
+  const ArrayXXf a_const(a);
+
+  VERIFY_IS_APPROX_EVALUATOR(b, a.transpose());
+
+  VERIFY_IS_APPROX_EVALUATOR(b, a_const.transpose());
+
+  // Testing CwiseNullaryOp evaluator
+  copy_using_evaluator(w, RowVector2d::Random());
+  VERIFY((w.array() >= -1).all() && (w.array() <= 1).all()); // not easy to test ...
+
+  VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Zero());
+
+  VERIFY_IS_APPROX_EVALUATOR(w, RowVector2d::Constant(3));
+  
+  // mix CwiseNullaryOp and transpose
+  VERIFY_IS_APPROX_EVALUATOR(w, Vector2d::Zero().transpose());
+
+  {
+    int s = internal::random<int>(1,100);
+    MatrixXf a(s,s), b(s,s), c(s,s), d(s,s);
+    a.setRandom();
+    b.setRandom();
+    c.setRandom();
+    d.setRandom();
+    VERIFY_IS_APPROX_EVALUATOR(d, (a + b));
+    VERIFY_IS_APPROX_EVALUATOR(d, (a + b).transpose());
+    VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b).transpose(), (a*b).transpose());
+    VERIFY_IS_APPROX_EVALUATOR2(d, prod(a,b) + prod(b,c), a*b + b*c);
+    
+//     copy_using_evaluator(d, a.transpose() + (a.transpose() * (b+b)));
+//     cout << d << endl;
+  }
+  
+  // this does not work because Random is eval-before-nested: 
+  // copy_using_evaluator(w, Vector2d::Random().transpose());
+  
+  // test CwiseUnaryOp
+  VERIFY_IS_APPROX_EVALUATOR(v2, 3 * v);
+  VERIFY_IS_APPROX_EVALUATOR(w, (3 * v).transpose());
+  VERIFY_IS_APPROX_EVALUATOR(b, (a + 3).transpose());
+  VERIFY_IS_APPROX_EVALUATOR(b, (2 * a_const + 3).transpose());
+
+  // test CwiseBinaryOp
+  VERIFY_IS_APPROX_EVALUATOR(v2, v + Vector2d::Ones());
+  VERIFY_IS_APPROX_EVALUATOR(w, (v + Vector2d::Ones()).transpose().cwiseProduct(RowVector2d::Constant(3)));
+
+  // dynamic matrices and arrays
+  MatrixXd mat1(6,6), mat2(6,6);
+  VERIFY_IS_APPROX_EVALUATOR(mat1, MatrixXd::Identity(6,6));
+  VERIFY_IS_APPROX_EVALUATOR(mat2, mat1);
+  copy_using_evaluator(mat2.transpose(), mat1);
+  VERIFY_IS_APPROX(mat2.transpose(), mat1);
+
+  ArrayXXd arr1(6,6), arr2(6,6);
+  VERIFY_IS_APPROX_EVALUATOR(arr1, ArrayXXd::Constant(6,6, 3.0));
+  VERIFY_IS_APPROX_EVALUATOR(arr2, arr1);
+
+  // test direct traversal
+  Matrix3f m3;
+  Array33f a3;
+  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity());  // matrix, nullary
+  // TODO: find a way to test direct traversal with array
+  VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Identity().transpose());  // transpose
+  VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Identity());  // unary
+  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Identity() + m3);  // binary
+  VERIFY_IS_APPROX_EVALUATOR(m3.block(0,0,2,2), Matrix3f::Identity().block(1,1,2,2));  // block
+
+  // test linear traversal
+  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero());  // matrix, nullary
+  VERIFY_IS_APPROX_EVALUATOR(a3, Array33f::Zero());  // array
+  VERIFY_IS_APPROX_EVALUATOR(m3.transpose(), Matrix3f::Zero().transpose());  // transpose
+  VERIFY_IS_APPROX_EVALUATOR(m3, 2 * Matrix3f::Zero());  // unary
+  VERIFY_IS_APPROX_EVALUATOR(m3, Matrix3f::Zero() + m3);  // binary  
+
+  // test inner vectorization
+  Matrix4f m4, m4src = Matrix4f::Random();
+  Array44f a4, a4src = Matrix4f::Random();
+  VERIFY_IS_APPROX_EVALUATOR(m4, m4src);  // matrix
+  VERIFY_IS_APPROX_EVALUATOR(a4, a4src);  // array
+  VERIFY_IS_APPROX_EVALUATOR(m4.transpose(), m4src.transpose());  // transpose
+  // TODO: find out why Matrix4f::Zero() does not allow inner vectorization
+  VERIFY_IS_APPROX_EVALUATOR(m4, 2 * m4src);  // unary
+  VERIFY_IS_APPROX_EVALUATOR(m4, m4src + m4src);  // binary
+
+  // test linear vectorization
+  MatrixXf mX(6,6), mXsrc = MatrixXf::Random(6,6);
+  ArrayXXf aX(6,6), aXsrc = ArrayXXf::Random(6,6);
+  VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc);  // matrix
+  VERIFY_IS_APPROX_EVALUATOR(aX, aXsrc);  // array
+  VERIFY_IS_APPROX_EVALUATOR(mX.transpose(), mXsrc.transpose());  // transpose
+  VERIFY_IS_APPROX_EVALUATOR(mX, MatrixXf::Zero(6,6));  // nullary
+  VERIFY_IS_APPROX_EVALUATOR(mX, 2 * mXsrc);  // unary
+  VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc + mXsrc);  // binary
+
+  // test blocks and slice vectorization
+  VERIFY_IS_APPROX_EVALUATOR(m4, (mXsrc.block<4,4>(1,0)));
+  VERIFY_IS_APPROX_EVALUATOR(aX, ArrayXXf::Constant(10, 10, 3.0).block(2, 3, 6, 6));
+
+  Matrix4f m4ref = m4;
+  copy_using_evaluator(m4.block(1, 1, 2, 3), m3.bottomRows(2));
+  m4ref.block(1, 1, 2, 3) = m3.bottomRows(2);
+  VERIFY_IS_APPROX(m4, m4ref);
+
+  mX.setIdentity(20,20);
+  MatrixXf mXref = MatrixXf::Identity(20,20);
+  mXsrc = MatrixXf::Random(9,12);
+  copy_using_evaluator(mX.block(4, 4, 9, 12), mXsrc);
+  mXref.block(4, 4, 9, 12) = mXsrc;
+  VERIFY_IS_APPROX(mX, mXref);
+
+  // test Map
+  const float raw[3] = {1,2,3};
+  float buffer[3] = {0,0,0};
+  Vector3f v3;
+  Array3f a3f;
+  VERIFY_IS_APPROX_EVALUATOR(v3, Map<const Vector3f>(raw));
+  VERIFY_IS_APPROX_EVALUATOR(a3f, Map<const Array3f>(raw));
+  Vector3f::Map(buffer) = 2*v3;
+  VERIFY(buffer[0] == 2);
+  VERIFY(buffer[1] == 4);
+  VERIFY(buffer[2] == 6);
+
+  // test CwiseUnaryView
+  mat1.setRandom();
+  mat2.setIdentity();
+  MatrixXcd matXcd(6,6), matXcd_ref(6,6);
+  copy_using_evaluator(matXcd.real(), mat1);
+  copy_using_evaluator(matXcd.imag(), mat2);
+  matXcd_ref.real() = mat1;
+  matXcd_ref.imag() = mat2;
+  VERIFY_IS_APPROX(matXcd, matXcd_ref);
+
+  // test Select
+  VERIFY_IS_APPROX_EVALUATOR(aX, (aXsrc > 0).select(aXsrc, -aXsrc));
+
+  // test Replicate
+  mXsrc = MatrixXf::Random(6, 6);
+  VectorXf vX = VectorXf::Random(6);
+  mX.resize(6, 6);
+  VERIFY_IS_APPROX_EVALUATOR(mX, mXsrc.colwise() + vX);
+  matXcd.resize(12, 12);
+  VERIFY_IS_APPROX_EVALUATOR(matXcd, matXcd_ref.replicate(2,2));
+  VERIFY_IS_APPROX_EVALUATOR(matXcd, (matXcd_ref.replicate<2,2>()));
+
+  // test partial reductions
+  VectorXd vec1(6);
+  VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.rowwise().sum());
+  VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.colwise().sum().transpose());
+
+  // test MatrixWrapper and ArrayWrapper
+  mat1.setRandom(6,6);
+  arr1.setRandom(6,6);
+  VERIFY_IS_APPROX_EVALUATOR(mat2, arr1.matrix());
+  VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array());
+  VERIFY_IS_APPROX_EVALUATOR(mat2, (arr1 + 2).matrix());
+  VERIFY_IS_APPROX_EVALUATOR(arr2, mat1.array() + 2);
+  mat2.array() = arr1 * arr1;
+  VERIFY_IS_APPROX(mat2, (arr1 * arr1).matrix());
+  arr2.matrix() = MatrixXd::Identity(6,6);
+  VERIFY_IS_APPROX(arr2, MatrixXd::Identity(6,6).array());
+
+  // test Reverse
+  VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.reverse());
+  VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.colwise().reverse());
+  VERIFY_IS_APPROX_EVALUATOR(arr2, arr1.rowwise().reverse());
+  arr2.reverse() = arr1;
+  VERIFY_IS_APPROX(arr2, arr1.reverse());
+
+  // test Diagonal
+  VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal());
+  vec1.resize(5);
+  VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal(1));
+  VERIFY_IS_APPROX_EVALUATOR(vec1, mat1.diagonal<-1>());
+  vec1.setRandom();
+
+  mat2 = mat1;
+  copy_using_evaluator(mat1.diagonal(1), vec1);
+  mat2.diagonal(1) = vec1;
+  VERIFY_IS_APPROX(mat1, mat2);
+
+  copy_using_evaluator(mat1.diagonal<-1>(), mat1.diagonal(1));
+  mat2.diagonal<-1>() = mat2.diagonal(1);
+  VERIFY_IS_APPROX(mat1, mat2);
+  
+  {
+    // test swapping
+    MatrixXd mat1, mat2, mat1ref, mat2ref;
+    mat1ref = mat1 = MatrixXd::Random(6, 6);
+    mat2ref = mat2 = 2 * mat1 + MatrixXd::Identity(6, 6);
+    swap_using_evaluator(mat1, mat2);
+    mat1ref.swap(mat2ref);
+    VERIFY_IS_APPROX(mat1, mat1ref);
+    VERIFY_IS_APPROX(mat2, mat2ref);
+
+    swap_using_evaluator(mat1.block(0, 0, 3, 3), mat2.block(3, 3, 3, 3));
+    mat1ref.block(0, 0, 3, 3).swap(mat2ref.block(3, 3, 3, 3));
+    VERIFY_IS_APPROX(mat1, mat1ref);
+    VERIFY_IS_APPROX(mat2, mat2ref);
+
+    swap_using_evaluator(mat1.row(2), mat2.col(3).transpose());
+    mat1.row(2).swap(mat2.col(3).transpose());
+    VERIFY_IS_APPROX(mat1, mat1ref);
+    VERIFY_IS_APPROX(mat2, mat2ref);
+  }
+
+  {
+    // test compound assignment
+    const Matrix4d mat_const = Matrix4d::Random(); 
+    Matrix4d mat, mat_ref;
+    mat = mat_ref = Matrix4d::Identity();
+    add_assign_using_evaluator(mat, mat_const);
+    mat_ref += mat_const;
+    VERIFY_IS_APPROX(mat, mat_ref);
+
+    subtract_assign_using_evaluator(mat.row(1), 2*mat.row(2));
+    mat_ref.row(1) -= 2*mat_ref.row(2);
+    VERIFY_IS_APPROX(mat, mat_ref);
+
+    const ArrayXXf arr_const = ArrayXXf::Random(5,3); 
+    ArrayXXf arr, arr_ref;
+    arr = arr_ref = ArrayXXf::Constant(5, 3, 0.5);
+    multiply_assign_using_evaluator(arr, arr_const);
+    arr_ref *= arr_const;
+    VERIFY_IS_APPROX(arr, arr_ref);
+
+    divide_assign_using_evaluator(arr.row(1), arr.row(2) + 1);
+    arr_ref.row(1) /= (arr_ref.row(2) + 1);
+    VERIFY_IS_APPROX(arr, arr_ref);
+  }
+}