diff --git a/Eigen/Core b/Eigen/Core
index a508b97f6..d0f0adbe4 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -14,6 +14,27 @@
 // first thing Eigen does: stop the compiler from committing suicide
 #include "src/Core/util/DisableStupidWarnings.h"
 
+// Handle NVCC/CUDA
+#ifdef __CUDACC__
+  // Do not try asserts on CUDA!
+  #define EIGEN_NO_DEBUG
+  // Do not try to vectorize on CUDA!
+  #define EIGEN_DONT_VECTORIZE
+  
+  // All functions callable from CUDA code must be qualified with __device__
+  #define EIGEN_DEVICE_FUNC __host__ __device__
+  
+#else
+  #define EIGEN_DEVICE_FUNC
+  
+#endif
+
+#if defined(__CUDA_ARCH__)
+  #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC;
+#else
+  #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC;
+#endif
+
 // then include this file where all our macros are defined. It's really important to do it first because
 // it's where we do all the alignment settings (platform detection and honoring the user's will if he
 // defined e.g. EIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index d19cb3968..4faedd257 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -502,7 +502,7 @@ struct solve_retval<LDLT<_MatrixType,_UpLo>, Rhs>
     // dst = D^-1 (L^-1 P b)
     // more precisely, use pseudo-inverse of D (see bug 241)
     using std::abs;
-    using std::max;
+    EIGEN_USING_STD_MATH(max);
     typedef typename LDLTType::MatrixType MatrixType;
     typedef typename LDLTType::Scalar Scalar;
     typedef typename LDLTType::RealScalar RealScalar;
@@ -575,6 +575,7 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return res;
 }
 
+#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the Cholesky decomposition with full pivoting without square root of \c *this
   */
@@ -594,6 +595,7 @@ MatrixBase<Derived>::ldlt() const
 {
   return LDLT<PlainObject>(derived());
 }
+#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index 2e6189f7d..2201c641e 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -465,6 +465,7 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return matrixL() * matrixL().adjoint().toDenseMatrix();
 }
 
+#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the LLT decomposition of \c *this
   */
@@ -484,7 +485,8 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 {
   return LLT<PlainObject,UpLo>(m_matrix);
 }
-
+#endif // __CUDACC__
+  
 } // end namespace Eigen
 
 #endif // EIGEN_LLT_H
diff --git a/Eigen/src/Core/Array.h b/Eigen/src/Core/Array.h
index 0b9c38c82..8d2906a10 100644
--- a/Eigen/src/Core/Array.h
+++ b/Eigen/src/Core/Array.h
@@ -69,6 +69,7 @@ class Array
       * the usage of 'using'. This should be done only for operator=.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
@@ -84,6 +85,7 @@ class Array
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array& operator=(const ArrayBase<OtherDerived>& other)
     {
       return Base::_set(other);
@@ -92,6 +94,7 @@ class Array
     /** This is a special case of the templated operator=. Its purpose is to
       * prevent a default operator= from hiding the templated operator=.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array& operator=(const Array& other)
     {
       return Base::_set(other);
@@ -107,6 +110,7 @@ class Array
       *
       * \sa resize(Index,Index)
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array() : Base()
     {
       Base::_check_template_params();
@@ -116,6 +120,7 @@ class Array
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     // FIXME is it still needed ??
     /** \internal */
+    EIGEN_DEVICE_FUNC
     Array(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
     {
@@ -145,6 +150,7 @@ class Array
       * it is redundant to pass the dimension here, so it makes more sense to use the default
       * constructor Matrix() instead.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE explicit Array(Index dim)
       : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
     {
@@ -157,6 +163,7 @@ class Array
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1)
     {
       Base::_check_template_params();
@@ -174,6 +181,7 @@ class Array
     #endif
 
     /** constructs an initialized 3D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2)
     {
       Base::_check_template_params();
@@ -183,6 +191,7 @@ class Array
       m_storage.data()[2] = val2;
     }
     /** constructs an initialized 4D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, const Scalar& val3)
     {
       Base::_check_template_params();
@@ -193,10 +202,11 @@ class Array
       m_storage.data()[3] = val3;
     }
 
-    explicit Array(const Scalar *data);
+    EIGEN_DEVICE_FUNC explicit Array(const Scalar *data);
 
     /** Constructor copying the value of the expression \a other */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const ArrayBase<OtherDerived>& other)
              : Base(other.rows() * other.cols(), other.rows(), other.cols())
     {
@@ -204,6 +214,7 @@ class Array
       Base::_set_noalias(other);
     }
     /** Copy constructor */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const Array& other)
             : Base(other.rows() * other.cols(), other.rows(), other.cols())
     {
@@ -212,6 +223,7 @@ class Array
     }
     /** Copy constructor with in-place evaluation */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const ReturnByValue<OtherDerived>& other)
     {
       Base::_check_template_params();
@@ -221,6 +233,7 @@ class Array
 
     /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Array(const EigenBase<OtherDerived> &other)
       : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
@@ -236,8 +249,8 @@ class Array
     void swap(ArrayBase<OtherDerived> const & other)
     { this->_swap(other.derived()); }
 
-    inline Index innerStride() const { return 1; }
-    inline Index outerStride() const { return this->innerSize(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
 
     #ifdef EIGEN_ARRAY_PLUGIN
     #include EIGEN_ARRAY_PLUGIN
diff --git a/Eigen/src/Core/ArrayBase.h b/Eigen/src/Core/ArrayBase.h
index b7c4a1c71..2c9ace4a7 100644
--- a/Eigen/src/Core/ArrayBase.h
+++ b/Eigen/src/Core/ArrayBase.h
@@ -118,38 +118,50 @@ template<typename Derived> class ArrayBase
     /** Special case of the template operator=, in order to prevent the compiler
       * from generating a default operator= (issue hit with g++ 4.1)
       */
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const ArrayBase& other)
     {
       return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
     }
 
+    EIGEN_DEVICE_FUNC
     Derived& operator+=(const Scalar& scalar);
+    EIGEN_DEVICE_FUNC
     Derived& operator-=(const Scalar& scalar);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator+=(const ArrayBase<OtherDerived>& other);
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator-=(const ArrayBase<OtherDerived>& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator*=(const ArrayBase<OtherDerived>& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator/=(const ArrayBase<OtherDerived>& other);
 
   public:
+    EIGEN_DEVICE_FUNC
     ArrayBase<Derived>& array() { return *this; }
+    EIGEN_DEVICE_FUNC
     const ArrayBase<Derived>& array() const { return *this; }
 
     /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array
       * \sa MatrixBase::array() */
+    EIGEN_DEVICE_FUNC
     MatrixWrapper<Derived> matrix() { return derived(); }
+    EIGEN_DEVICE_FUNC
     const MatrixWrapper<const Derived> matrix() const { return derived(); }
 
 //     template<typename Dest>
 //     inline void evalTo(Dest& dst) const { dst = matrix(); }
 
   protected:
+    EIGEN_DEVICE_FUNC
     ArrayBase() : Base() {}
 
   private:
diff --git a/Eigen/src/Core/ArrayWrapper.h b/Eigen/src/Core/ArrayWrapper.h
index a791bc358..21830745b 100644
--- a/Eigen/src/Core/ArrayWrapper.h
+++ b/Eigen/src/Core/ArrayWrapper.h
@@ -48,41 +48,54 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
 
     typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
 
+    EIGEN_DEVICE_FUNC
     inline ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {}
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return m_expression.innerStride(); }
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+    EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return m_expression.data(); }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index rowId, Index colId) const
     {
       return m_expression.coeff(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId)
     {
       return m_expression.const_cast_derived().coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_expression.const_cast_derived().coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index index) const
     {
       return m_expression.coeff(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index)
     {
       return m_expression.const_cast_derived().coeffRef(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_expression.const_cast_derived().coeffRef(index);
@@ -113,9 +126,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
     }
 
     template<typename Dest>
+    EIGEN_DEVICE_FUNC
     inline void evalTo(Dest& dst) const { dst = m_expression; }
 
     const typename internal::remove_all<NestedExpressionType>::type& 
+    EIGEN_DEVICE_FUNC
     nestedExpression() const 
     {
       return m_expression;
@@ -123,9 +138,11 @@ class ArrayWrapper : public ArrayBase<ArrayWrapper<ExpressionType> >
 
     /** Forwards the resizing request to the nested expression
       * \sa DenseBase::resize(Index)  */
+    EIGEN_DEVICE_FUNC
     void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
     /** Forwards the resizing request to the nested expression
       * \sa DenseBase::resize(Index,Index)*/
+    EIGEN_DEVICE_FUNC
     void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
 
   protected:
@@ -168,41 +185,54 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
 
     typedef typename internal::nested<ExpressionType>::type NestedExpressionType;
 
+    EIGEN_DEVICE_FUNC
     inline MatrixWrapper(ExpressionType& a_matrix) : m_expression(a_matrix) {}
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return m_expression.innerStride(); }
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return m_expression.const_cast_derived().data(); }
+    EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return m_expression.data(); }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index rowId, Index colId) const
     {
       return m_expression.coeff(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId)
     {
       return m_expression.const_cast_derived().coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_expression.derived().coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index index) const
     {
       return m_expression.coeff(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index)
     {
       return m_expression.const_cast_derived().coeffRef(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_expression.const_cast_derived().coeffRef(index);
@@ -232,6 +262,7 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
       m_expression.const_cast_derived().template writePacket<LoadMode>(index, val);
     }
 
+    EIGEN_DEVICE_FUNC
     const typename internal::remove_all<NestedExpressionType>::type& 
     nestedExpression() const 
     {
@@ -240,9 +271,11 @@ class MatrixWrapper : public MatrixBase<MatrixWrapper<ExpressionType> >
 
     /** Forwards the resizing request to the nested expression
       * \sa DenseBase::resize(Index)  */
+    EIGEN_DEVICE_FUNC
     void resize(Index newSize) { m_expression.const_cast_derived().resize(newSize); }
     /** Forwards the resizing request to the nested expression
       * \sa DenseBase::resize(Index,Index)*/
+    EIGEN_DEVICE_FUNC
     void resize(Index nbRows, Index nbCols) { m_expression.const_cast_derived().resize(nbRows,nbCols); }
 
   protected:
diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h
index 1dccc2f42..906adcf82 100644
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@@ -139,6 +139,7 @@ struct assign_DefaultTraversal_CompleteUnrolling
     inner = Index % Derived1::InnerSizeAtCompileTime
   };
 
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
   {
     dst.copyCoeffByOuterInner(outer, inner, src);
@@ -149,12 +150,14 @@ struct assign_DefaultTraversal_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
 };
 
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling
 {
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src, typename Derived1::Index outer)
   {
     dst.copyCoeffByOuterInner(outer, Index, src);
@@ -165,6 +168,7 @@ struct assign_DefaultTraversal_InnerUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 {
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &, typename Derived1::Index) {}
 };
 
@@ -175,6 +179,7 @@ struct assign_DefaultTraversal_InnerUnrolling<Derived1, Derived2, Stop, Stop>
 template<typename Derived1, typename Derived2, int Index, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling
 {
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
   {
     dst.copyCoeff(Index, src);
@@ -185,6 +190,7 @@ struct assign_LinearTraversal_CompleteUnrolling
 template<typename Derived1, typename Derived2, int Stop>
 struct assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, Stop, Stop>
 {
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &, const Derived2 &) {}
 };
 
@@ -249,6 +255,7 @@ struct assign_impl;
 template<typename Derived1, typename Derived2, int Unrolling, int Version>
 struct assign_impl<Derived1, Derived2, InvalidTraversal, Unrolling, Version>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &, const Derived2 &) { }
 };
 
@@ -256,6 +263,7 @@ template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC 
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     const Index innerSize = dst.innerSize();
@@ -269,6 +277,7 @@ struct assign_impl<Derived1, Derived2, DefaultTraversal, NoUnrolling, Version>
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, CompleteUnrolling, Version>
 {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
   {
     assign_DefaultTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
@@ -280,6 +289,7 @@ template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, DefaultTraversal, InnerUnrolling, Version>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
   {
     const Index outerSize = dst.outerSize();
@@ -297,6 +307,7 @@ template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     const Index size = dst.size();
@@ -308,6 +319,7 @@ struct assign_impl<Derived1, Derived2, LinearTraversal, NoUnrolling, Version>
 template<typename Derived1, typename Derived2, int Version>
 struct assign_impl<Derived1, Derived2, LinearTraversal, CompleteUnrolling, Version>
 {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE void run(Derived1 &dst, const Derived2 &src)
   {
     assign_LinearTraversal_CompleteUnrolling<Derived1, Derived2, 0, Derived1::SizeAtCompileTime>
@@ -517,22 +529,28 @@ struct assign_selector;
 
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,false> {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.derived()); }
   template<typename ActualDerived, typename ActualOtherDerived>
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { other.evalTo(dst); return dst; }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,false> {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.eval()); }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,false,true> {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose()); }
   template<typename ActualDerived, typename ActualOtherDerived>
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& evalTo(ActualDerived& dst, const ActualOtherDerived& other) { Transpose<ActualDerived> dstTrans(dst); other.evalTo(dstTrans); return dst; }
 };
 template<typename Derived, typename OtherDerived>
 struct assign_selector<Derived,OtherDerived,true,true> {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& dst, const OtherDerived& other) { return dst.lazyAssign(other.transpose().eval()); }
 };
 
@@ -540,18 +558,21 @@ struct assign_selector<Derived,OtherDerived,true,true> {
 
 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
   return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
 }
 
 template<typename Derived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::operator=(const DenseBase& other)
 {
   return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
 }
 
 template<typename Derived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& other)
 {
   return internal::assign_selector<Derived,Derived>::run(derived(), other.derived());
@@ -559,6 +580,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const MatrixBase& ot
 
 template<typename Derived>
 template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<OtherDerived>& other)
 {
   return internal::assign_selector<Derived,OtherDerived>::run(derived(), other.derived());
@@ -566,6 +588,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const DenseBase<Othe
 
 template<typename Derived>
 template <typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<OtherDerived>& other)
 {
   return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
@@ -573,6 +596,7 @@ EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const EigenBase<Othe
 
 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE Derived& MatrixBase<Derived>::operator=(const ReturnByValue<OtherDerived>& other)
 {
   return internal::assign_selector<Derived,OtherDerived,false>::evalTo(derived(), other.derived());
diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h
index 3efdcfee3..31cd5c72c 100644
--- a/Eigen/src/Core/Block.h
+++ b/Eigen/src/Core/Block.h
@@ -114,6 +114,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
   
     /** Column or Row constructor
       */
+    EIGEN_DEVICE_FUNC
     inline Block(XprType& xpr, Index i) : Impl(xpr,i)
     {
       eigen_assert( (i>=0) && (
@@ -123,6 +124,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
 
     /** Fixed-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline Block(XprType& xpr, Index a_startRow, Index a_startCol)
       : Impl(xpr, a_startRow, a_startCol)
     {
@@ -133,6 +135,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel> class
 
     /** Dynamic-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline Block(XprType& xpr,
           Index a_startRow, Index a_startCol,
           Index blockRows, Index blockCols)
@@ -156,8 +159,9 @@ class BlockImpl<XprType, BlockRows, BlockCols, InnerPanel, Dense>
   public:
     typedef Impl Base;
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl)
-    inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
-    inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index i) : Impl(xpr,i) {}
+    EIGEN_DEVICE_FUNC inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol) : Impl(xpr, a_startRow, a_startCol) {}
+    EIGEN_DEVICE_FUNC
     inline BlockImpl(XprType& xpr, Index a_startRow, Index a_startCol, Index blockRows, Index blockCols)
       : Impl(xpr, a_startRow, a_startCol, blockRows, blockCols) {}
 };
@@ -179,6 +183,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
 
     /** Column or Row constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index i)
       : m_xpr(xpr),
         // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime,
@@ -193,6 +198,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
 
     /** Fixed-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index a_startRow, Index a_startCol)
       : m_xpr(xpr), m_startRow(a_startRow), m_startCol(a_startCol),
                     m_blockRows(BlockRows), m_blockCols(BlockCols)
@@ -200,6 +206,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
 
     /** Dynamic-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr,
           Index a_startRow, Index a_startCol,
           Index blockRows, Index blockCols)
@@ -207,9 +214,10 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                     m_blockRows(blockRows), m_blockCols(blockCols)
     {}
 
-    inline Index rows() const { return m_blockRows.value(); }
-    inline Index cols() const { return m_blockCols.value(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId)
     {
       EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -217,17 +225,20 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_xpr.derived()
                .coeffRef(rowId + m_startRow.value(), colId + m_startCol.value());
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const
     {
       return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value());
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index)
     {
       EIGEN_STATIC_ASSERT_LVALUE(XprType)
@@ -236,6 +247,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                        m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_xpr.const_cast_derived()
@@ -243,6 +255,7 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
                        m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0));
     }
 
+    EIGEN_DEVICE_FUNC
     inline const CoeffReturnType coeff(Index index) const
     {
       return m_xpr
@@ -282,21 +295,24 @@ template<typename XprType, int BlockRows, int BlockCols, bool InnerPanel, bool H
 
     #ifdef EIGEN_PARSED_BY_DOXYGEN
     /** \sa MapBase::data() */
-    inline const Scalar* data() const;
-    inline Index innerStride() const;
-    inline Index outerStride() const;
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const;
+    EIGEN_DEVICE_FUNC inline Index innerStride() const;
+    EIGEN_DEVICE_FUNC inline Index outerStride() const;
     #endif
 
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
     { 
       return m_xpr; 
     }
       
-    Index startRow() const 
+    EIGEN_DEVICE_FUNC
+    Index startRow() const
     { 
       return m_startRow.value(); 
     }
       
+    EIGEN_DEVICE_FUNC
     Index startCol() const 
     { 
       return m_startCol.value(); 
@@ -325,6 +341,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
 
     /** Column or Row constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index i)
       : Base(internal::const_cast_ptr(&xpr.coeffRef(
               (BlockRows==1) && (BlockCols==XprType::ColsAtCompileTime) ? i : 0,
@@ -338,6 +355,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
 
     /** Fixed-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol)
       : Base(internal::const_cast_ptr(&xpr.coeffRef(startRow,startCol))), m_xpr(xpr)
     {
@@ -346,6 +364,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
 
     /** Dynamic-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr,
           Index startRow, Index startCol,
           Index blockRows, Index blockCols)
@@ -355,12 +374,14 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
       init();
     }
 
-    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const 
+    EIGEN_DEVICE_FUNC
+    const typename internal::remove_all<typename XprType::Nested>::type& nestedExpression() const
     { 
       return m_xpr; 
     }
       
     /** \sa MapBase::innerStride() */
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const
     {
       return internal::traits<BlockType>::HasSameStorageOrderAsXprType
@@ -369,6 +390,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
     }
 
     /** \sa MapBase::outerStride() */
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
       return m_outerStride;
@@ -382,6 +404,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** \internal used by allowAligned() */
+    EIGEN_DEVICE_FUNC
     inline BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, Index blockCols)
       : Base(data, blockRows, blockCols), m_xpr(xpr)
     {
@@ -390,6 +413,7 @@ class BlockImpl_dense<XprType,BlockRows,BlockCols, InnerPanel,true>
     #endif
 
   protected:
+    EIGEN_DEVICE_FUNC
     void init()
     {
       m_outerStride = internal::traits<BlockType>::HasSameStorageOrderAsXprType
diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h
index a96867af4..2bbf74b05 100644
--- a/Eigen/src/Core/CommaInitializer.h
+++ b/Eigen/src/Core/CommaInitializer.h
@@ -30,6 +30,7 @@ struct CommaInitializer
   typedef typename XprType::Scalar Scalar;
   typedef typename XprType::Index Index;
 
+  EIGEN_DEVICE_FUNC
   inline CommaInitializer(XprType& xpr, const Scalar& s)
     : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1)
   {
@@ -37,6 +38,7 @@ struct CommaInitializer
   }
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline CommaInitializer(XprType& xpr, const DenseBase<OtherDerived>& other)
     : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows())
   {
@@ -44,6 +46,7 @@ struct CommaInitializer
   }
 
   /* inserts a scalar value in the target matrix */
+  EIGEN_DEVICE_FUNC
   CommaInitializer& operator,(const Scalar& s)
   {
     if (m_col==m_xpr.cols())
@@ -63,6 +66,7 @@ struct CommaInitializer
 
   /* inserts a matrix expression in the target matrix */
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   CommaInitializer& operator,(const DenseBase<OtherDerived>& other)
   {
     if(other.cols()==0 || other.rows()==0)
@@ -88,6 +92,7 @@ struct CommaInitializer
     return *this;
   }
 
+  EIGEN_DEVICE_FUNC
   inline ~CommaInitializer()
   {
     eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows()
@@ -102,6 +107,7 @@ struct CommaInitializer
     * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished());
     * \endcode
     */
+  EIGEN_DEVICE_FUNC
   inline XprType& finished() { return m_xpr; }
 
   XprType& m_xpr;   // target expression
diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h
index 586f77aaf..e20daacc8 100644
--- a/Eigen/src/Core/CwiseBinaryOp.h
+++ b/Eigen/src/Core/CwiseBinaryOp.h
@@ -122,6 +122,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
     typedef typename internal::remove_reference<LhsNested>::type _LhsNested;
     typedef typename internal::remove_reference<RhsNested>::type _RhsNested;
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, const BinaryOp& func = BinaryOp())
       : m_lhs(aLhs), m_rhs(aRhs), m_functor(func)
     {
@@ -131,6 +132,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
       eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols());
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rows() const {
       // return the fixed size type if available to enable compile time optimizations
       if (internal::traits<typename internal::remove_all<LhsNested>::type>::RowsAtCompileTime==Dynamic)
@@ -138,6 +140,7 @@ class CwiseBinaryOp : internal::no_assignment_operator,
       else
         return m_lhs.rows();
     }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index cols() const {
       // return the fixed size type if available to enable compile time optimizations
       if (internal::traits<typename internal::remove_all<LhsNested>::type>::ColsAtCompileTime==Dynamic)
@@ -147,10 +150,13 @@ class CwiseBinaryOp : internal::no_assignment_operator,
     }
 
     /** \returns the left hand side nested expression */
+    EIGEN_DEVICE_FUNC
     const _LhsNested& lhs() const { return m_lhs; }
     /** \returns the right hand side nested expression */
+    EIGEN_DEVICE_FUNC
     const _RhsNested& rhs() const { return m_rhs; }
     /** \returns the functor representing the binary operation */
+    EIGEN_DEVICE_FUNC
     const BinaryOp& functor() const { return m_functor; }
 
   protected:
@@ -169,6 +175,7 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
     typedef typename internal::dense_xpr_base<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE( Derived )
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
     {
       return derived().functor()(derived().lhs().coeff(rowId, colId),
@@ -182,6 +189,7 @@ class CwiseBinaryOpImpl<BinaryOp, Lhs, Rhs, Dense>
                                           derived().rhs().template packet<LoadMode>(rowId, colId));
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
     {
       return derived().functor()(derived().lhs().coeff(index),
@@ -227,3 +235,4 @@ MatrixBase<Derived>::operator+=(const MatrixBase<OtherDerived>& other)
 } // end namespace Eigen
 
 #endif // EIGEN_CWISE_BINARY_OP_H
+
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index a93bab2d0..1d4ee50a8 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -54,6 +54,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
     typedef typename internal::dense_xpr_base<CwiseNullaryOp>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp)
 
+    EIGEN_DEVICE_FUNC
     CwiseNullaryOp(Index nbRows, Index nbCols, const NullaryOp& func = NullaryOp())
       : m_rows(nbRows), m_cols(nbCols), m_functor(func)
     {
@@ -63,9 +64,12 @@ class CwiseNullaryOp : internal::no_assignment_operator,
             && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == nbCols));
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rows() const { return m_rows.value(); }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index cols() const { return m_cols.value(); }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
     {
       return m_functor(rowId, colId);
@@ -77,6 +81,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
       return m_functor.packetOp(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
     {
       return m_functor(index);
@@ -89,6 +94,7 @@ class CwiseNullaryOp : internal::no_assignment_operator,
     }
 
     /** \returns the functor representing the nullary operation */
+    EIGEN_DEVICE_FUNC
     const NullaryOp& functor() const { return m_functor; }
 
   protected:
@@ -740,6 +746,7 @@ namespace internal {
 template<typename Derived, bool Big = (Derived::SizeAtCompileTime>=16)>
 struct setIdentity_impl
 {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& m)
   {
     return m = Derived::Identity(m.rows(), m.cols());
@@ -750,6 +757,7 @@ template<typename Derived>
 struct setIdentity_impl<Derived, true>
 {
   typedef typename Derived::Index Index;
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Derived& run(Derived& m)
   {
     m.setZero();
diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h
index f2de749f9..aa7df197f 100644
--- a/Eigen/src/Core/CwiseUnaryOp.h
+++ b/Eigen/src/Core/CwiseUnaryOp.h
@@ -64,20 +64,26 @@ class CwiseUnaryOp : internal::no_assignment_operator,
     typedef typename CwiseUnaryOpImpl<UnaryOp, XprType,typename internal::traits<XprType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp)
 
+    EIGEN_DEVICE_FUNC
     inline CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp())
       : m_xpr(xpr), m_functor(func) {}
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rows() const { return m_xpr.rows(); }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index cols() const { return m_xpr.cols(); }
 
     /** \returns the functor representing the unary operation */
+    EIGEN_DEVICE_FUNC
     const UnaryOp& functor() const { return m_functor; }
 
     /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
     const typename internal::remove_all<typename XprType::Nested>::type&
     nestedExpression() const { return m_xpr; }
 
     /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
     typename internal::remove_all<typename XprType::Nested>::type&
     nestedExpression() { return m_xpr.const_cast_derived(); }
 
@@ -98,6 +104,7 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
     typedef typename internal::dense_xpr_base<CwiseUnaryOp<UnaryOp, XprType> >::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Derived)
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index rowId, Index colId) const
     {
       return derived().functor()(derived().nestedExpression().coeff(rowId, colId));
@@ -109,12 +116,14 @@ class CwiseUnaryOpImpl<UnaryOp,XprType,Dense>
       return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(rowId, colId));
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
     {
       return derived().functor()(derived().nestedExpression().coeff(index));
     }
 
     template<int LoadMode>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PacketScalar packet(Index index) const
     {
       return derived().functor().packetOp(derived().nestedExpression().template packet<LoadMode>(index));
diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h
index c5800f6c8..4794c2f13 100644
--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@@ -182,6 +182,7 @@ template<typename Derived> class DenseBase
 
     /** \returns the number of nonzero coefficients which is in practice the number
       * of stored coefficients. */
+    EIGEN_DEVICE_FUNC
     inline Index nonZeros() const { return size(); }
     /** \returns true if either the number of rows or the number of columns is equal to 1.
       * In other words, this function returns
@@ -193,6 +194,7 @@ template<typename Derived> class DenseBase
       * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension
       * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a
       * column-major matrix, and the number of rows for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
     Index outerSize() const
     {
       return IsVectorAtCompileTime ? 1
@@ -204,6 +206,7 @@ template<typename Derived> class DenseBase
       * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension
       * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a 
       * column-major matrix, and the number of columns for a row-major matrix. */
+    EIGEN_DEVICE_FUNC
     Index innerSize() const
     {
       return IsVectorAtCompileTime ? this->size()
@@ -214,6 +217,7 @@ template<typename Derived> class DenseBase
       * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
       * nothing else.
       */
+    EIGEN_DEVICE_FUNC
     void resize(Index newSize)
     {
       EIGEN_ONLY_USED_FOR_DEBUG(newSize);
@@ -224,6 +228,7 @@ template<typename Derived> class DenseBase
       * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and does
       * nothing else.
       */
+    EIGEN_DEVICE_FUNC
     void resize(Index nbRows, Index nbCols)
     {
       EIGEN_ONLY_USED_FOR_DEBUG(nbRows);
@@ -247,42 +252,54 @@ template<typename Derived> class DenseBase
 
     /** Copies \a other into *this. \returns a reference to *this. */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const DenseBase<OtherDerived>& other);
 
     /** Special case of the template operator=, in order to prevent the compiler
       * from generating a default operator= (issue hit with g++ 4.1)
       */
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const DenseBase& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator+=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator-=(const EigenBase<OtherDerived> &other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const ReturnByValue<OtherDerived>& func);
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     /** Copies \a other into *this without evaluating other. \returns a reference to *this. */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& lazyAssign(const DenseBase<OtherDerived>& other);
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
+    EIGEN_DEVICE_FUNC
     CommaInitializer<Derived> operator<< (const Scalar& s);
 
     template<unsigned int Added,unsigned int Removed>
     const Flagged<Derived, Added, Removed> flagged() const;
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     CommaInitializer<Derived> operator<< (const DenseBase<OtherDerived>& other);
 
+    EIGEN_DEVICE_FUNC
     Eigen::Transpose<Derived> transpose();
-	typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    typedef typename internal::add_const<Transpose<const Derived> >::type ConstTransposeReturnType;
+    EIGEN_DEVICE_FUNC
     ConstTransposeReturnType transpose() const;
+    EIGEN_DEVICE_FUNC
     void transposeInPlace();
 #ifndef EIGEN_NO_DEBUG
   protected:
@@ -292,65 +309,68 @@ template<typename Derived> class DenseBase
 #endif
 
 
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(Index rows, Index cols, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(Index size, const Scalar& value);
-    static const ConstantReturnType
+    EIGEN_DEVICE_FUNC static const ConstantReturnType
     Constant(const Scalar& value);
 
-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
     LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
     LinSpaced(Index size, const Scalar& low, const Scalar& high);
-    static const SequentialLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const SequentialLinSpacedReturnType
     LinSpaced(Sequential_t, const Scalar& low, const Scalar& high);
-    static const RandomAccessLinSpacedReturnType
+    EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType
     LinSpaced(const Scalar& low, const Scalar& high);
 
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, Derived>
     NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, Derived>
     NullaryExpr(Index size, const CustomNullaryOp& func);
-    template<typename CustomNullaryOp>
+    template<typename CustomNullaryOp> EIGEN_DEVICE_FUNC
     static const CwiseNullaryOp<CustomNullaryOp, Derived>
     NullaryExpr(const CustomNullaryOp& func);
 
-    static const ConstantReturnType Zero(Index rows, Index cols);
-    static const ConstantReturnType Zero(Index size);
-    static const ConstantReturnType Zero();
-    static const ConstantReturnType Ones(Index rows, Index cols);
-    static const ConstantReturnType Ones(Index size);
-    static const ConstantReturnType Ones();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Zero();
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size);
+    EIGEN_DEVICE_FUNC static const ConstantReturnType Ones();
 
-    void fill(const Scalar& value);
-    Derived& setConstant(const Scalar& value);
-    Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
-    Derived& setLinSpaced(const Scalar& low, const Scalar& high);
-    Derived& setZero();
-    Derived& setOnes();
-    Derived& setRandom();
+    EIGEN_DEVICE_FUNC void fill(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high);
+    EIGEN_DEVICE_FUNC Derived& setZero();
+    EIGEN_DEVICE_FUNC Derived& setOnes();
+    EIGEN_DEVICE_FUNC Derived& setRandom();
 
-    template<typename OtherDerived>
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
     bool isApprox(const DenseBase<OtherDerived>& other,
                   const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC 
     bool isMuchSmallerThan(const RealScalar& other,
                            const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    template<typename OtherDerived>
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC
     bool isMuchSmallerThan(const DenseBase<OtherDerived>& other,
                            const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
 
-    bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
-    bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
+    EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
     
     inline bool hasNaN() const;
     inline bool allFinite() const;
 
+    EIGEN_DEVICE_FUNC
     inline Derived& operator*=(const Scalar& other);
+    EIGEN_DEVICE_FUNC
     inline Derived& operator/=(const Scalar& other);
 
     typedef typename internal::add_const_on_value_type<typename internal::eval<Derived>::type>::type EvalReturnType;
@@ -359,6 +379,7 @@ template<typename Derived> class DenseBase
       * Notice that in the case of a plain matrix or vector (not an expression) this function just returns
       * a const reference, in order to avoid a useless copy.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE EvalReturnType eval() const
     {
       // Even though MSVC does not honor strong inlining when the return type
@@ -371,6 +392,7 @@ template<typename Derived> class DenseBase
       *
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void swap(const DenseBase<OtherDerived>& other,
               int = OtherDerived::ThisConstantIsPrivateInPlainObjectBase)
     {
@@ -381,46 +403,52 @@ template<typename Derived> class DenseBase
       *
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void swap(PlainObjectBase<OtherDerived>& other)
     {
       SwapWrapper<Derived>(derived()).lazyAssign(other.derived());
     }
 
 
-    inline const NestByValue<Derived> nestByValue() const;
-    inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
-    inline ForceAlignedAccess<Derived> forceAlignedAccess();
-    template<bool Enable> inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
-    template<bool Enable> inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
+    EIGEN_DEVICE_FUNC inline const NestByValue<Derived> nestByValue() const;
+    EIGEN_DEVICE_FUNC inline const ForceAlignedAccess<Derived> forceAlignedAccess() const;
+    EIGEN_DEVICE_FUNC inline ForceAlignedAccess<Derived> forceAlignedAccess();
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline const typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf() const;
+    template<bool Enable> EIGEN_DEVICE_FUNC
+    inline typename internal::conditional<Enable,ForceAlignedAccess<Derived>,Derived&>::type forceAlignedAccessIf();
 
-    Scalar sum() const;
-    Scalar mean() const;
-    Scalar trace() const;
+    EIGEN_DEVICE_FUNC Scalar sum() const;
+    EIGEN_DEVICE_FUNC Scalar mean() const;
+    EIGEN_DEVICE_FUNC Scalar trace() const;
 
-    Scalar prod() const;
+    EIGEN_DEVICE_FUNC Scalar prod() const;
 
-    typename internal::traits<Derived>::Scalar minCoeff() const;
-    typename internal::traits<Derived>::Scalar maxCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar minCoeff() const;
+    EIGEN_DEVICE_FUNC typename internal::traits<Derived>::Scalar maxCoeff() const;
 
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
     typename internal::traits<Derived>::Scalar minCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
     typename internal::traits<Derived>::Scalar maxCoeff(IndexType* row, IndexType* col) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
     typename internal::traits<Derived>::Scalar minCoeff(IndexType* index) const;
-    template<typename IndexType>
+    template<typename IndexType> EIGEN_DEVICE_FUNC
     typename internal::traits<Derived>::Scalar maxCoeff(IndexType* index) const;
 
     template<typename BinaryOp>
+    EIGEN_DEVICE_FUNC
     typename internal::result_of<BinaryOp(typename internal::traits<Derived>::Scalar)>::type
     redux(const BinaryOp& func) const;
 
     template<typename Visitor>
+    EIGEN_DEVICE_FUNC
     void visit(Visitor& func) const;
 
     inline const WithFormat<Derived> format(const IOFormat& fmt) const;
 
     /** \returns the unique coefficient of a 1x1 expression */
+    EIGEN_DEVICE_FUNC
     CoeffReturnType value() const
     {
       EIGEN_STATIC_ASSERT_SIZE_1x1(Derived)
@@ -428,8 +456,8 @@ template<typename Derived> class DenseBase
       return derived().coeff(0,0);
     }
 
-    bool all(void) const;
-    bool any(void) const;
+    bool all() const;
+    bool any() const;
     Index count() const;
 
     typedef VectorwiseOp<Derived, Horizontal> RowwiseReturnType;
@@ -491,14 +519,16 @@ template<typename Derived> class DenseBase
 
 
     // disable the use of evalTo for dense objects with a nice compilation error
-    template<typename Dest> inline void evalTo(Dest& ) const
+    template<typename Dest>
+    EIGEN_DEVICE_FUNC
+    inline void evalTo(Dest& ) const
     {
       EIGEN_STATIC_ASSERT((internal::is_same<Dest,void>::value),THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS);
     }
 
   protected:
     /** Default constructor. Do nothing. */
-    DenseBase()
+    EIGEN_DEVICE_FUNC DenseBase()
     {
       /* Just checks for self-consistency of the flags.
        * Only do it when debugging Eigen, as this borders on paranoiac and could slow compilation down
@@ -511,9 +541,9 @@ template<typename Derived> class DenseBase
     }
 
   private:
-    explicit DenseBase(int);
-    DenseBase(int,int);
-    template<typename OtherDerived> explicit DenseBase(const DenseBase<OtherDerived>&);
+    EIGEN_DEVICE_FUNC explicit DenseBase(int);
+    EIGEN_DEVICE_FUNC DenseBase(int,int);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase<OtherDerived>&);
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/DenseCoeffsBase.h b/Eigen/src/Core/DenseCoeffsBase.h
index 3c890f215..efabb5e67 100644
--- a/Eigen/src/Core/DenseCoeffsBase.h
+++ b/Eigen/src/Core/DenseCoeffsBase.h
@@ -61,6 +61,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
     using Base::size;
     using Base::derived;
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const
     {
       return int(Derived::RowsAtCompileTime) == 1 ? 0
@@ -69,6 +70,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
           : inner;
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const
     {
       return int(Derived::ColsAtCompileTime) == 1 ? 0
@@ -91,6 +93,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       *
       * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const
     {
       eigen_internal_assert(row >= 0 && row < rows()
@@ -98,6 +101,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       return derived().coeff(row, col);
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const
     {
       return coeff(rowIndexByOuterInner(outer, inner),
@@ -108,6 +112,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       *
       * \sa operator()(Index,Index), operator[](Index)
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType operator()(Index row, Index col) const
     {
       eigen_assert(row >= 0 && row < rows()
@@ -130,6 +135,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     coeff(Index index) const
     {
@@ -146,6 +152,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       * z() const, w() const
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     operator[](Index index) const
     {
@@ -167,6 +174,7 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
       * z() const, w() const
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     operator()(Index index) const
     {
@@ -176,21 +184,25 @@ class DenseCoeffsBase<Derived,ReadOnlyAccessors> : public EigenBase<Derived>
 
     /** equivalent to operator[](0).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     x() const { return (*this)[0]; }
 
     /** equivalent to operator[](1).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     y() const { return (*this)[1]; }
 
     /** equivalent to operator[](2).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     z() const { return (*this)[2]; }
 
     /** equivalent to operator[](3).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE CoeffReturnType
     w() const { return (*this)[3]; }
 
@@ -311,6 +323,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       *
       * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index)
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col)
     {
       eigen_internal_assert(row >= 0 && row < rows()
@@ -318,6 +331,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       return derived().coeffRef(row, col);
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     coeffRefByOuterInner(Index outer, Index inner)
     {
@@ -330,6 +344,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       * \sa operator[](Index)
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator()(Index row, Index col)
     {
@@ -354,6 +369,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index)
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     coeffRef(Index index)
     {
@@ -368,6 +384,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator[](Index index)
     {
@@ -388,6 +405,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w()
       */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     operator()(Index index)
     {
@@ -397,21 +415,25 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
 
     /** equivalent to operator[](0).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     x() { return (*this)[0]; }
 
     /** equivalent to operator[](1).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     y() { return (*this)[1]; }
 
     /** equivalent to operator[](2).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     z() { return (*this)[2]; }
 
     /** equivalent to operator[](3).  */
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar&
     w() { return (*this)[3]; }
 
@@ -473,6 +495,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       */
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
     {
       eigen_internal_assert(row >= 0 && row < rows()
@@ -489,6 +512,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
       */
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
     {
       eigen_internal_assert(index >= 0 && index < size());
@@ -497,6 +521,7 @@ class DenseCoeffsBase<Derived, WriteAccessors> : public DenseCoeffsBase<Derived,
 
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void copyCoeffByOuterInner(Index outer, Index inner, const DenseBase<OtherDerived>& other)
     {
       const Index row = rowIndexByOuterInner(outer,inner);
@@ -581,6 +606,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
       *
       * \sa outerStride(), rowStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const
     {
       return derived().innerStride();
@@ -591,6 +617,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
       *
       * \sa innerStride(), rowStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
       return derived().outerStride();
@@ -606,6 +633,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
       *
       * \sa innerStride(), outerStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index rowStride() const
     {
       return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -615,6 +643,7 @@ class DenseCoeffsBase<Derived, DirectAccessors> : public DenseCoeffsBase<Derived
       *
       * \sa innerStride(), outerStride(), rowStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index colStride() const
     {
       return Derived::IsRowMajor ? innerStride() : outerStride();
@@ -652,6 +681,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
       *
       * \sa outerStride(), rowStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const
     {
       return derived().innerStride();
@@ -662,6 +692,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
       *
       * \sa innerStride(), rowStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
       return derived().outerStride();
@@ -677,6 +708,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
       *
       * \sa innerStride(), outerStride(), colStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index rowStride() const
     {
       return Derived::IsRowMajor ? outerStride() : innerStride();
@@ -686,6 +718,7 @@ class DenseCoeffsBase<Derived, DirectWriteAccessors>
       *
       * \sa innerStride(), outerStride(), rowStride()
       */
+    EIGEN_DEVICE_FUNC
     inline Index colStride() const
     {
       return Derived::IsRowMajor ? innerStride() : outerStride();
diff --git a/Eigen/src/Core/DenseStorage.h b/Eigen/src/Core/DenseStorage.h
index 2549bd84e..af14832cf 100644
--- a/Eigen/src/Core/DenseStorage.h
+++ b/Eigen/src/Core/DenseStorage.h
@@ -44,12 +44,14 @@ struct plain_array
 {
   T array[Size];
 
-  plain_array() 
+  EIGEN_DEVICE_FUNC
+  plain_array()
   { 
     check_static_allocation_size<T,Size>();
   }
 
-  plain_array(constructor_without_unaligned_array_assert) 
+  EIGEN_DEVICE_FUNC
+  plain_array(constructor_without_unaligned_array_assert)
   { 
     check_static_allocation_size<T,Size>();
   }
@@ -81,12 +83,14 @@ struct plain_array<T, Size, MatrixOrArrayOptions, 16>
 {
   EIGEN_USER_ALIGN16 T array[Size];
 
+  EIGEN_DEVICE_FUNC
   plain_array() 
   { 
     EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(0xf);
     check_static_allocation_size<T,Size>();
   }
 
+  EIGEN_DEVICE_FUNC
   plain_array(constructor_without_unaligned_array_assert) 
   { 
     check_static_allocation_size<T,Size>();
@@ -97,8 +101,8 @@ template <typename T, int MatrixOrArrayOptions, int Alignment>
 struct plain_array<T, 0, MatrixOrArrayOptions, Alignment>
 {
   EIGEN_USER_ALIGN16 T array[1];
-  plain_array() {}
-  plain_array(constructor_without_unaligned_array_assert) {}
+  EIGEN_DEVICE_FUNC plain_array() {}
+  EIGEN_DEVICE_FUNC plain_array(constructor_without_unaligned_array_assert) {}
 };
 
 } // end namespace internal
@@ -122,23 +126,26 @@ template<typename T, int Size, int _Rows, int _Cols, int _Options> class DenseSt
 {
     internal::plain_array<T,Size,_Options> m_data;
   public:
-    DenseStorage() {}
+    EIGEN_DEVICE_FUNC DenseStorage() {}
+    EIGEN_DEVICE_FUNC
     DenseStorage(internal::constructor_without_unaligned_array_assert)
       : m_data(internal::constructor_without_unaligned_array_assert()) {}
+    EIGEN_DEVICE_FUNC 
     DenseStorage(const DenseStorage& other) : m_data(other.m_data) {}
+    EIGEN_DEVICE_FUNC 
     DenseStorage& operator=(const DenseStorage& other)
     { 
       if (this != &other) m_data = other.m_data;
       return *this; 
     }
-    DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
-    void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
-    static DenseIndex rows(void) {return _Rows;}
-    static DenseIndex cols(void) {return _Cols;}
-    void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
-    void resize(DenseIndex,DenseIndex,DenseIndex) {}
-    const T *data() const { return m_data.array; }
-    T *data() { return m_data.array; }
+    EIGEN_DEVICE_FUNC DenseStorage(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC void swap(DenseStorage& other) { std::swap(m_data,other.m_data); }
+    EIGEN_DEVICE_FUNC static DenseIndex rows(void) {return _Rows;}
+    EIGEN_DEVICE_FUNC static DenseIndex cols(void) {return _Cols;}
+    EIGEN_DEVICE_FUNC void conservativeResize(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC void resize(DenseIndex,DenseIndex,DenseIndex) {}
+    EIGEN_DEVICE_FUNC const T *data() const { return m_data.array; }
+    EIGEN_DEVICE_FUNC T *data() { return m_data.array; }
 };
 
 // null matrix
diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h
index aab8007b3..4436c6a69 100644
--- a/Eigen/src/Core/Diagonal.h
+++ b/Eigen/src/Core/Diagonal.h
@@ -70,20 +70,25 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
     typedef typename internal::dense_xpr_base<Diagonal>::type Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal)
 
+    EIGEN_DEVICE_FUNC
     inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) : m_matrix(matrix), m_index(a_index) {}
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal)
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const
     { return m_index.value()<0 ? (std::min<Index>)(m_matrix.cols(),m_matrix.rows()+m_index.value()) : (std::min<Index>)(m_matrix.rows(),m_matrix.cols()-m_index.value()); }
 
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return 1; }
 
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const
     {
       return m_matrix.outerStride() + 1;
     }
 
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
       return 0;
@@ -95,47 +100,57 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
                        const Scalar
                      >::type ScalarWithConstIfNotLvalue;
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
+    EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return &(m_matrix.const_cast_derived().coeffRef(rowOffset(), colOffset())); }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index row, Index)
     {
       EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index row, Index) const
     {
       return m_matrix.const_cast_derived().coeffRef(row+rowOffset(), row+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index row, Index) const
     {
       return m_matrix.coeff(row+rowOffset(), row+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index idx)
     {
       EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index idx) const
     {
       return m_matrix.const_cast_derived().coeffRef(idx+rowOffset(), idx+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index idx) const
     {
       return m_matrix.coeff(idx+rowOffset(), idx+colOffset());
     }
 
+    EIGEN_DEVICE_FUNC
     const typename internal::remove_all<typename MatrixType::Nested>::type& 
     nestedExpression() const 
     {
       return m_matrix;
     }
 
+    EIGEN_DEVICE_FUNC
     int index() const
     {
       return m_index.value();
@@ -147,8 +162,11 @@ template<typename MatrixType, int _DiagIndex> class Diagonal
 
   private:
     // some compilers may fail to optimize std::max etc in case of compile-time constants...
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index absDiagIndex() const { return m_index.value()>0 ? m_index.value() : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rowOffset() const { return m_index.value()>0 ? 0 : -m_index.value(); }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index colOffset() const { return m_index.value()>0 ? m_index.value() : 0; }
     // triger a compile time error is someone try to call packet
     template<int LoadMode> typename MatrixType::PacketReturnType packet(Index) const;
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index e6c220f41..f7ac22f8b 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -37,45 +37,59 @@ class DiagonalBase : public EigenBase<Derived>
     typedef DenseMatrixType DenseType;
     typedef DiagonalMatrix<Scalar,DiagonalVectorType::SizeAtCompileTime,DiagonalVectorType::MaxSizeAtCompileTime> PlainObject;
 
+    EIGEN_DEVICE_FUNC
     inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC
     inline Derived& derived() { return *static_cast<Derived*>(this); }
 
+    EIGEN_DEVICE_FUNC
     DenseMatrixType toDenseMatrix() const { return derived(); }
     template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
     void evalTo(MatrixBase<DenseDerived> &other) const;
     template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
     void addTo(MatrixBase<DenseDerived> &other) const
     { other.diagonal() += diagonal(); }
     template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
     void subTo(MatrixBase<DenseDerived> &other) const
     { other.diagonal() -= diagonal(); }
 
+    EIGEN_DEVICE_FUNC
     inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); }
+    EIGEN_DEVICE_FUNC
     inline DiagonalVectorType& diagonal() { return derived().diagonal(); }
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return diagonal().size(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return diagonal().size(); }
 
     /** \returns the diagonal matrix product of \c *this by the matrix \a matrix.
       */
     template<typename MatrixDerived>
+    EIGEN_DEVICE_FUNC
     const DiagonalProduct<MatrixDerived, Derived, OnTheLeft>
     operator*(const MatrixBase<MatrixDerived> &matrix) const
     {
       return DiagonalProduct<MatrixDerived, Derived, OnTheLeft>(matrix.derived(), derived());
     }
 
+    EIGEN_DEVICE_FUNC
     inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const DiagonalVectorType> >
     inverse() const
     {
       return diagonal().cwiseInverse();
     }
     
+    EIGEN_DEVICE_FUNC
     inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
     operator*(const Scalar& scalar) const
     {
       return diagonal() * scalar;
     }
+    EIGEN_DEVICE_FUNC
     friend inline const DiagonalWrapper<const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DiagonalVectorType> >
     operator*(const Scalar& scalar, const DiagonalBase& other)
     {
@@ -84,11 +98,13 @@ class DiagonalBase : public EigenBase<Derived>
     
     #ifdef EIGEN2_SUPPORT
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     bool isApprox(const DiagonalBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
     {
       return diagonal().isApprox(other.diagonal(), precision);
     }
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     bool isApprox(const MatrixBase<OtherDerived>& other, typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision()) const
     {
       return toDenseMatrix().isApprox(other, precision);
@@ -151,24 +167,31 @@ class DiagonalMatrix
   public:
 
     /** const version of diagonal(). */
+    EIGEN_DEVICE_FUNC
     inline const DiagonalVectorType& diagonal() const { return m_diagonal; }
     /** \returns a reference to the stored vector of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
     inline DiagonalVectorType& diagonal() { return m_diagonal; }
 
     /** Default constructor without initialization */
+    EIGEN_DEVICE_FUNC
     inline DiagonalMatrix() {}
 
     /** Constructs a diagonal matrix with given dimension  */
+    EIGEN_DEVICE_FUNC
     inline DiagonalMatrix(Index dim) : m_diagonal(dim) {}
 
     /** 2D constructor. */
+    EIGEN_DEVICE_FUNC
     inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x,y) {}
 
     /** 3D constructor. */
+    EIGEN_DEVICE_FUNC
     inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x,y,z) {}
 
     /** Copy constructor. */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     inline DiagonalMatrix(const DiagonalBase<OtherDerived>& other) : m_diagonal(other.diagonal()) {}
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
@@ -178,11 +201,13 @@ class DiagonalMatrix
 
     /** generic constructor from expression of the diagonal coefficients */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     explicit inline DiagonalMatrix(const MatrixBase<OtherDerived>& other) : m_diagonal(other)
     {}
 
     /** Copy operator. */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     DiagonalMatrix& operator=(const DiagonalBase<OtherDerived>& other)
     {
       m_diagonal = other.diagonal();
@@ -193,6 +218,7 @@ class DiagonalMatrix
     /** This is a special case of the templated operator=. Its purpose is to
       * prevent a default operator= from hiding the templated operator=.
       */
+    EIGEN_DEVICE_FUNC
     DiagonalMatrix& operator=(const DiagonalMatrix& other)
     {
       m_diagonal = other.diagonal();
@@ -201,14 +227,19 @@ class DiagonalMatrix
     #endif
 
     /** Resizes to given size. */
+    EIGEN_DEVICE_FUNC
     inline void resize(Index size) { m_diagonal.resize(size); }
     /** Sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
     inline void setZero() { m_diagonal.setZero(); }
     /** Resizes and sets all coefficients to zero. */
+    EIGEN_DEVICE_FUNC
     inline void setZero(Index size) { m_diagonal.setZero(size); }
     /** Sets this matrix to be the identity matrix of the current size. */
+    EIGEN_DEVICE_FUNC
     inline void setIdentity() { m_diagonal.setOnes(); }
     /** Sets this matrix to be the identity matrix of the given size. */
+    EIGEN_DEVICE_FUNC
     inline void setIdentity(Index size) { m_diagonal.setOnes(size); }
 };
 
@@ -255,9 +286,11 @@ class DiagonalWrapper
     #endif
 
     /** Constructor from expression of diagonal coefficients to wrap. */
+    EIGEN_DEVICE_FUNC
     inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {}
 
     /** \returns a const reference to the wrapped expression of diagonal coefficients. */
+    EIGEN_DEVICE_FUNC
     const DiagonalVectorType& diagonal() const { return m_diagonal; }
 
   protected:
diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h
index 9d7651f1f..718de5d1a 100644
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@@ -29,6 +29,7 @@ template<typename T, typename U,
 struct dot_nocheck
 {
   typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  EIGEN_DEVICE_FUNC
   static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -39,6 +40,7 @@ template<typename T, typename U>
 struct dot_nocheck<T, U, true>
 {
   typedef typename scalar_product_traits<typename traits<T>::Scalar,typename traits<U>::Scalar>::ReturnType ResScalar;
+  EIGEN_DEVICE_FUNC
   static inline ResScalar run(const MatrixBase<T>& a, const MatrixBase<U>& b)
   {
     return a.transpose().template binaryExpr<scalar_conj_product_op<typename traits<T>::Scalar,typename traits<U>::Scalar> >(b).sum();
@@ -59,6 +61,7 @@ struct dot_nocheck<T, U, true>
   */
 template<typename Derived>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
 MatrixBase<Derived>::dot(const MatrixBase<OtherDerived>& other) const
 {
@@ -164,6 +167,7 @@ template<typename Derived, int p>
 struct lpNorm_selector
 {
   typedef typename NumTraits<typename traits<Derived>::Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const MatrixBase<Derived>& m)
   {
     using std::pow;
@@ -174,6 +178,7 @@ struct lpNorm_selector
 template<typename Derived>
 struct lpNorm_selector<Derived, 1>
 {
+  EIGEN_DEVICE_FUNC
   static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
   {
     return m.cwiseAbs().sum();
@@ -183,6 +188,7 @@ struct lpNorm_selector<Derived, 1>
 template<typename Derived>
 struct lpNorm_selector<Derived, 2>
 {
+  EIGEN_DEVICE_FUNC
   static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
   {
     return m.norm();
@@ -192,6 +198,7 @@ struct lpNorm_selector<Derived, 2>
 template<typename Derived>
 struct lpNorm_selector<Derived, Infinity>
 {
+  EIGEN_DEVICE_FUNC
   static inline typename NumTraits<typename traits<Derived>::Scalar>::Real run(const MatrixBase<Derived>& m)
   {
     return m.cwiseAbs().maxCoeff();
diff --git a/Eigen/src/Core/EigenBase.h b/Eigen/src/Core/EigenBase.h
index 2b8dd1b70..a25e823ab 100644
--- a/Eigen/src/Core/EigenBase.h
+++ b/Eigen/src/Core/EigenBase.h
@@ -31,29 +31,40 @@ template<typename Derived> struct EigenBase
   typedef typename internal::traits<Derived>::Index Index;
 
   /** \returns a reference to the derived object */
+  EIGEN_DEVICE_FUNC
   Derived& derived() { return *static_cast<Derived*>(this); }
   /** \returns a const reference to the derived object */
+  EIGEN_DEVICE_FUNC
   const Derived& derived() const { return *static_cast<const Derived*>(this); }
 
+  EIGEN_DEVICE_FUNC
   inline Derived& const_cast_derived() const
   { return *static_cast<Derived*>(const_cast<EigenBase*>(this)); }
+  EIGEN_DEVICE_FUNC
   inline const Derived& const_derived() const
   { return *static_cast<const Derived*>(this); }
 
   /** \returns the number of rows. \sa cols(), RowsAtCompileTime */
+  EIGEN_DEVICE_FUNC
   inline Index rows() const { return derived().rows(); }
   /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/
+  EIGEN_DEVICE_FUNC
   inline Index cols() const { return derived().cols(); }
   /** \returns the number of coefficients, which is rows()*cols().
     * \sa rows(), cols(), SizeAtCompileTime. */
+  EIGEN_DEVICE_FUNC
   inline Index size() const { return rows() * cols(); }
 
   /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */
-  template<typename Dest> inline void evalTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void evalTo(Dest& dst) const
   { derived().evalTo(dst); }
 
   /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */
-  template<typename Dest> inline void addTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void addTo(Dest& dst) const
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
@@ -63,7 +74,9 @@ template<typename Derived> struct EigenBase
   }
 
   /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */
-  template<typename Dest> inline void subTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void subTo(Dest& dst) const
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
@@ -73,7 +86,8 @@ template<typename Derived> struct EigenBase
   }
 
   /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheRight(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
@@ -81,7 +95,8 @@ template<typename Derived> struct EigenBase
   }
 
   /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */
-  template<typename Dest> inline void applyThisOnTheLeft(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const
   {
     // This is the default implementation,
     // derived class can reimplement it in a more optimized way.
diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h
index d0e05fcf2..3d43b528f 100644
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@@ -24,7 +24,7 @@ namespace internal {
 template<typename Scalar> struct scalar_sum_op {
 //   typedef Scalar result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_sum_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a + b; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::padd(a,b); }
@@ -63,7 +63,7 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_product_op {
   };
   typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_product_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a * b; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pmul(a,b); }
@@ -93,7 +93,7 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_conj_product_op {
   typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
   
   EIGEN_EMPTY_STRUCT_CTOR(scalar_conj_product_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const
   { return conj_helper<LhsScalar,RhsScalar,Conj,false>().pmul(a,b); }
   
   template<typename Packet>
@@ -115,7 +115,7 @@ struct functor_traits<scalar_conj_product_op<LhsScalar,RhsScalar> > {
   */
 template<typename Scalar> struct scalar_min_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_min_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::min; return (min)(a, b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pmin(a,b); }
@@ -138,7 +138,7 @@ struct functor_traits<scalar_min_op<Scalar> > {
   */
 template<typename Scalar> struct scalar_max_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_max_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { using std::max; return (max)(a, b); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pmax(a,b); }
@@ -162,10 +162,10 @@ struct functor_traits<scalar_max_op<Scalar> > {
 template<typename Scalar> struct scalar_hypot_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_hypot_op)
 //   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
   {
-    using std::max;
-    using std::min;
+    EIGEN_USING_STD_MATH(max);
+    EIGEN_USING_STD_MATH(min);
     using std::sqrt;
     Scalar p = (max)(_x, _y);
     Scalar q = (min)(_x, _y);
@@ -183,6 +183,7 @@ struct functor_traits<scalar_hypot_op<Scalar> > {
   */
 template<typename Scalar, typename OtherScalar> struct scalar_binary_pow_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_binary_pow_op)
+  EIGEN_DEVICE_FUNC
   inline Scalar operator() (const Scalar& a, const OtherScalar& b) const { return numext::pow(a, b); }
 };
 template<typename Scalar, typename OtherScalar>
@@ -199,7 +200,7 @@ struct functor_traits<scalar_binary_pow_op<Scalar,OtherScalar> > {
   */
 template<typename Scalar> struct scalar_difference_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_difference_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& b) const { return a - b; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::psub(a,b); }
@@ -224,7 +225,7 @@ template<typename LhsScalar,typename RhsScalar> struct scalar_quotient_op {
   };
   typedef typename scalar_product_traits<LhsScalar,RhsScalar>::ReturnType result_type;
   EIGEN_EMPTY_STRUCT_CTOR(scalar_quotient_op)
-  EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const LhsScalar& a, const RhsScalar& b) const { return a / b; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const
   { return internal::pdiv(a,b); }
@@ -246,7 +247,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
   */
 struct scalar_boolean_and_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_and_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a && b; }
 };
 template<> struct functor_traits<scalar_boolean_and_op> {
   enum {
@@ -262,7 +263,7 @@ template<> struct functor_traits<scalar_boolean_and_op> {
   */
 struct scalar_boolean_or_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_boolean_or_op)
-  EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool operator() (const bool& a, const bool& b) const { return a || b; }
 };
 template<> struct functor_traits<scalar_boolean_or_op> {
   enum {
@@ -280,7 +281,7 @@ template<> struct functor_traits<scalar_boolean_or_op> {
   */
 template<typename Scalar> struct scalar_opposite_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_opposite_op)
-  EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { return -a; }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
   { return internal::pnegate(a); }
@@ -300,7 +301,7 @@ struct functor_traits<scalar_opposite_op<Scalar> >
 template<typename Scalar> struct scalar_abs_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_abs_op)
   typedef typename NumTraits<Scalar>::Real result_type;
-  EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { using std::abs; return abs(a); }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
   { return internal::pabs(a); }
@@ -322,6 +323,7 @@ struct functor_traits<scalar_abs_op<Scalar> >
 template<typename Scalar> struct scalar_abs2_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_abs2_op)
   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const result_type operator() (const Scalar& a) const { return numext::abs2(a); }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
@@ -338,6 +340,7 @@ struct functor_traits<scalar_abs2_op<Scalar> >
   */
 template<typename Scalar> struct scalar_conjugate_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_conjugate_op)
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { using numext::conj; return conj(a); }
   template<typename Packet>
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const { return internal::pconj(a); }
@@ -360,7 +363,7 @@ template<typename Scalar, typename NewType>
 struct scalar_cast_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op)
   typedef NewType result_type;
-  EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const NewType operator() (const Scalar& a) const { return cast<Scalar, NewType>(a); }
 };
 template<typename Scalar, typename NewType>
 struct functor_traits<scalar_cast_op<Scalar,NewType> >
@@ -375,6 +378,7 @@ template<typename Scalar>
 struct scalar_real_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_real_op)
   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::real(a); }
 };
 template<typename Scalar>
@@ -390,6 +394,7 @@ template<typename Scalar>
 struct scalar_imag_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_op)
   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { return numext::imag(a); }
 };
 template<typename Scalar>
@@ -405,6 +410,7 @@ template<typename Scalar>
 struct scalar_real_ref_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_real_ref_op)
   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::real_ref(*const_cast<Scalar*>(&a)); }
 };
 template<typename Scalar>
@@ -420,6 +426,7 @@ template<typename Scalar>
 struct scalar_imag_ref_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_imag_ref_op)
   typedef typename NumTraits<Scalar>::Real result_type;
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE result_type& operator() (const Scalar& a) const { return numext::imag_ref(*const_cast<Scalar*>(&a)); }
 };
 template<typename Scalar>
@@ -434,7 +441,7 @@ struct functor_traits<scalar_imag_ref_op<Scalar> >
   */
 template<typename Scalar> struct scalar_exp_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_exp_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::exp; return exp(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::pexp(a); }
 };
@@ -450,7 +457,7 @@ struct functor_traits<scalar_exp_op<Scalar> >
   */
 template<typename Scalar> struct scalar_log_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_log_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log; return log(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::plog(a); }
 };
@@ -475,8 +482,11 @@ template<typename Scalar>
 struct scalar_multiple_op {
   typedef typename packet_traits<Scalar>::type Packet;
   // FIXME default copy constructors seems bugged with std::complex<>
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE scalar_multiple_op(const scalar_multiple_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE scalar_multiple_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC
   EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a * m_other; }
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
   { return internal::pmul(a, pset1<Packet>(m_other)); }
@@ -489,9 +499,9 @@ struct functor_traits<scalar_multiple_op<Scalar> >
 template<typename Scalar1, typename Scalar2>
 struct scalar_multiple2_op {
   typedef typename scalar_product_traits<Scalar1,Scalar2>::ReturnType result_type;
-  EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
-  EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const scalar_multiple2_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_multiple2_op(const Scalar2& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar1& a) const { return a * m_other; }
   typename add_const_on_value_type<typename NumTraits<Scalar2>::Nested>::type m_other;
 };
 template<typename Scalar1,typename Scalar2>
@@ -510,9 +520,9 @@ template<typename Scalar>
 struct scalar_quotient1_op {
   typedef typename packet_traits<Scalar>::type Packet;
   // FIXME default copy constructors seems bugged with std::complex<>
-  EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
-  EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const scalar_quotient1_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_quotient1_op(const Scalar& other) : m_other(other) {}
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator() (const Scalar& a) const { return a / m_other; }
   EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a) const
   { return internal::pdiv(a, pset1<Packet>(m_other)); }
   typename add_const_on_value_type<typename NumTraits<Scalar>::Nested>::type m_other;
@@ -526,10 +536,10 @@ struct functor_traits<scalar_quotient1_op<Scalar> >
 template<typename Scalar>
 struct scalar_constant_op {
   typedef typename packet_traits<Scalar>::type Packet;
-  EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
-  EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const scalar_constant_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE scalar_constant_op(const Scalar& other) : m_other(other) { }
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index, Index = 0) const { return m_other; }
   template<typename Index>
   EIGEN_STRONG_INLINE const Packet packetOp(Index, Index = 0) const { return internal::pset1<Packet>(m_other); }
   const Scalar m_other;
@@ -542,7 +552,7 @@ struct functor_traits<scalar_constant_op<Scalar> >
 template<typename Scalar> struct scalar_identity_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_identity_op<Scalar> >
@@ -569,7 +579,7 @@ struct linspaced_op_impl<Scalar,false>
   m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(step),plset<Scalar>(-packet_traits<Scalar>::size)))) {}
 
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
   { 
     m_base = padd(m_base, pset1<Packet>(m_step));
     return m_low+Scalar(i)*m_step; 
@@ -597,7 +607,7 @@ struct linspaced_op_impl<Scalar,true>
   m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Scalar>(0)) {}
 
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
 
   template<typename Index>
   EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
@@ -624,12 +634,12 @@ template <typename Scalar, bool RandomAccess> struct linspaced_op
   linspaced_op(const Scalar& low, const Scalar& high, DenseIndex num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/(num_steps-1))) {}
 
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
 
   // We need this function when assigning e.g. a RowVectorXd to a MatrixXd since
   // there row==0 and col is used for the actual iteration.
   template<typename Index>
-  EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const 
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const 
   {
     eigen_assert(col==0 || row==0);
     return impl(col + row);
@@ -679,9 +689,9 @@ template<typename Scalar>
 struct scalar_add_op {
   typedef typename packet_traits<Scalar>::type Packet;
   // FIXME default copy constructors seems bugged with std::complex<>
-  inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
-  inline scalar_add_op(const Scalar& other) : m_other(other) { }
-  inline Scalar operator() (const Scalar& a) const { return a + m_other; }
+  EIGEN_DEVICE_FUNC inline scalar_add_op(const scalar_add_op& other) : m_other(other.m_other) { }
+  EIGEN_DEVICE_FUNC inline scalar_add_op(const Scalar& other) : m_other(other) { }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a + m_other; }
   inline const Packet packetOp(const Packet& a) const
   { return internal::padd(a, pset1<Packet>(m_other)); }
   const Scalar m_other;
@@ -732,7 +742,7 @@ struct functor_traits<scalar_rsub_op<Scalar> >
   */
 template<typename Scalar> struct scalar_sqrt_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_sqrt_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sqrt; return sqrt(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::psqrt(a); }
 };
@@ -750,7 +760,7 @@ struct functor_traits<scalar_sqrt_op<Scalar> >
   */
 template<typename Scalar> struct scalar_cos_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cos_op)
-  inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { using std::cos; return cos(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::pcos(a); }
 };
@@ -769,7 +779,7 @@ struct functor_traits<scalar_cos_op<Scalar> >
   */
 template<typename Scalar> struct scalar_sin_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_sin_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::sin; return sin(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::psin(a); }
 };
@@ -789,7 +799,7 @@ struct functor_traits<scalar_sin_op<Scalar> >
   */
 template<typename Scalar> struct scalar_tan_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_tan_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::tan; return tan(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::ptan(a); }
 };
@@ -808,7 +818,7 @@ struct functor_traits<scalar_tan_op<Scalar> >
   */
 template<typename Scalar> struct scalar_acos_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_acos_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::acos; return acos(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::pacos(a); }
 };
@@ -827,7 +837,7 @@ struct functor_traits<scalar_acos_op<Scalar> >
   */
 template<typename Scalar> struct scalar_asin_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_asin_op)
-  inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::asin; return asin(a); }
   typedef typename packet_traits<Scalar>::type Packet;
   inline Packet packetOp(const Packet& a) const { return internal::pasin(a); }
 };
@@ -849,6 +859,7 @@ struct scalar_pow_op {
   // FIXME default copy constructors seems bugged with std::complex<>
   inline scalar_pow_op(const scalar_pow_op& other) : m_exponent(other.m_exponent) { }
   inline scalar_pow_op(const Scalar& exponent) : m_exponent(exponent) {}
+  EIGEN_DEVICE_FUNC
   inline Scalar operator() (const Scalar& a) const { return numext::pow(a, m_exponent); }
   const Scalar m_exponent;
 };
@@ -863,7 +874,7 @@ struct functor_traits<scalar_pow_op<Scalar> >
 template<typename Scalar>
 struct scalar_inverse_mult_op {
   scalar_inverse_mult_op(const Scalar& other) : m_other(other) {}
-  inline Scalar operator() (const Scalar& a) const { return m_other / a; }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return m_other / a; }
   template<typename Packet>
   inline const Packet packetOp(const Packet& a) const
   { return internal::pdiv(pset1<Packet>(m_other),a); }
@@ -877,7 +888,7 @@ struct scalar_inverse_mult_op {
 template<typename Scalar>
 struct scalar_inverse_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_inverse_op)
-  inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return Scalar(1)/a; }
   template<typename Packet>
   inline const Packet packetOp(const Packet& a) const
   { return internal::pdiv(pset1<Packet>(Scalar(1)),a); }
@@ -893,7 +904,7 @@ struct functor_traits<scalar_inverse_op<Scalar> >
 template<typename Scalar>
 struct scalar_square_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_square_op)
-  inline Scalar operator() (const Scalar& a) const { return a*a; }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a; }
   template<typename Packet>
   inline const Packet packetOp(const Packet& a) const
   { return internal::pmul(a,a); }
@@ -909,7 +920,7 @@ struct functor_traits<scalar_square_op<Scalar> >
 template<typename Scalar>
 struct scalar_cube_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_cube_op)
-  inline Scalar operator() (const Scalar& a) const { return a*a*a; }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a) const { return a*a*a; }
   template<typename Packet>
   inline const Packet packetOp(const Packet& a) const
   { return internal::pmul(a,pmul(a,a)); }
diff --git a/Eigen/src/Core/Fuzzy.h b/Eigen/src/Core/Fuzzy.h
index fe63bd298..f9a88dd3c 100644
--- a/Eigen/src/Core/Fuzzy.h
+++ b/Eigen/src/Core/Fuzzy.h
@@ -19,9 +19,10 @@ namespace internal
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isApprox_selector
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
   {
-    using std::min;
+    EIGEN_USING_STD_MATH(min);
     typename internal::nested<Derived,2>::type nested(x);
     typename internal::nested<OtherDerived,2>::type otherNested(y);
     return (nested - otherNested).cwiseAbs2().sum() <= prec * prec * (min)(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum());
@@ -31,6 +32,7 @@ struct isApprox_selector
 template<typename Derived, typename OtherDerived>
 struct isApprox_selector<Derived, OtherDerived, true>
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&)
   {
     return x.matrix() == y.matrix();
@@ -40,6 +42,7 @@ struct isApprox_selector<Derived, OtherDerived, true>
 template<typename Derived, typename OtherDerived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_object_selector
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec)
   {
     return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum();
@@ -49,6 +52,7 @@ struct isMuchSmallerThan_object_selector
 template<typename Derived, typename OtherDerived>
 struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&)
   {
     return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
@@ -58,6 +62,7 @@ struct isMuchSmallerThan_object_selector<Derived, OtherDerived, true>
 template<typename Derived, bool is_integer = NumTraits<typename Derived::Scalar>::IsInteger>
 struct isMuchSmallerThan_scalar_selector
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const typename Derived::RealScalar& y, const typename Derived::RealScalar& prec)
   {
     return x.cwiseAbs2().sum() <= numext::abs2(prec * y);
@@ -67,6 +72,7 @@ struct isMuchSmallerThan_scalar_selector
 template<typename Derived>
 struct isMuchSmallerThan_scalar_selector<Derived, true>
 {
+  EIGEN_DEVICE_FUNC
   static bool run(const Derived& x, const typename Derived::RealScalar&, const typename Derived::RealScalar&)
   {
     return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix();
diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h
index 2a59d9464..9d7d18427 100644
--- a/Eigen/src/Core/GeneralProduct.h
+++ b/Eigen/src/Core/GeneralProduct.h
@@ -565,6 +565,7 @@ template<> struct gemv_selector<OnTheRight,RowMajor,false>
   *
   * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*()
   */
+#ifndef __CUDACC__
 template<typename Derived>
 template<typename OtherDerived>
 inline const typename ProductReturnType<Derived, OtherDerived>::Type
@@ -594,7 +595,7 @@ MatrixBase<Derived>::operator*(const MatrixBase<OtherDerived> &other) const
 #endif
   return typename ProductReturnType<Derived,OtherDerived>::Type(derived(), other.derived());
 }
-
+#endif
 /** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation.
   *
   * The returned product will behave like any other expressions: the coefficients of the product will be
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 5f783ebee..b0469fa1e 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -91,69 +91,70 @@ template<typename T> struct packet_traits : default_packet_traits
 };
 
 /** \internal \returns a + b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 padd(const Packet& a,
         const Packet& b) { return a+b; }
 
 /** \internal \returns a - b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 psub(const Packet& a,
         const Packet& b) { return a-b; }
 
 /** \internal \returns -a (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pnegate(const Packet& a) { return -a; }
 
 /** \internal \returns conj(a) (coeff-wise) */
-template<typename Packet> inline Packet
+
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pconj(const Packet& a) { return numext::conj(a); }
 
 /** \internal \returns a * b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmul(const Packet& a,
         const Packet& b) { return a*b; }
 
 /** \internal \returns a / b (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pdiv(const Packet& a,
         const Packet& b) { return a/b; }
 
 /** \internal \returns the min of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmin(const Packet& a,
-        const Packet& b) { using std::min; return (min)(a, b); }
+        const Packet& b) { EIGEN_USING_STD_MATH(min); return (min)(a, b); }
 
 /** \internal \returns the max of \a a and \a b  (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmax(const Packet& a,
-        const Packet& b) { using std::max; return (max)(a, b); }
+        const Packet& b) { EIGEN_USING_STD_MATH(max); return (max)(a, b); }
 
 /** \internal \returns the absolute value of \a a */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pabs(const Packet& a) { using std::abs; return abs(a); }
 
 /** \internal \returns the bitwise and of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pand(const Packet& a, const Packet& b) { return a & b; }
 
 /** \internal \returns the bitwise or of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 por(const Packet& a, const Packet& b) { return a | b; }
 
 /** \internal \returns the bitwise xor of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pxor(const Packet& a, const Packet& b) { return a ^ b; }
 
 /** \internal \returns the bitwise andnot of \a a and \a b */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pandnot(const Packet& a, const Packet& b) { return a & (!b); }
 
 /** \internal \returns a packet version of \a *from, from must be 16 bytes aligned */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
 /** \internal \returns a packet version of \a *from, (un-aligned load) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
 /** \internal \returns a packet with elements of \a *from duplicated.
@@ -161,11 +162,11 @@ ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
   * duplicated to form: {from[0],from[0],from[1],from[1],,from[2],from[2],,from[3],from[3]}
   * Currently, this function is only used for scalar * complex products.
  */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
 
 /** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
 
 /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
@@ -173,11 +174,11 @@ template<typename Scalar> inline typename packet_traits<Scalar>::type
 plset(const Scalar& a) { return a; }
 
 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
-template<typename Scalar, typename Packet> inline void pstore(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
 { (*to) = from; }
 
 /** \internal copy the packet \a from to \a *to, (un-aligned store) */
-template<typename Scalar, typename Packet> inline void pstoreu(Scalar* to, const Packet& from)
+template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
 { (*to) = from; }
 
 /** \internal tries to do cache prefetching of \a addr */
@@ -189,36 +190,36 @@ __builtin_prefetch(addr);
 }
 
 /** \internal \returns the first element of a packet */
-template<typename Packet> inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type pfirst(const Packet& a)
 { return a; }
 
 /** \internal \returns a packet where the element i contains the sum of the packet of \a vec[i] */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 preduxp(const Packet* vecs) { return vecs[0]; }
 
 /** \internal \returns the sum of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux(const Packet& a)
 { return a; }
 
 /** \internal \returns the product of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(const Packet& a)
 { return a; }
 
 /** \internal \returns the min of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(const Packet& a)
 { return a; }
 
 /** \internal \returns the max of the elements of \a a*/
-template<typename Packet> inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(const Packet& a)
 { return a; }
 
 /** \internal \returns the reversed elements of \a a*/
-template<typename Packet> inline Packet preverse(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
 { return a; }
 
 
 /** \internal \returns \a a with real and imaginary part flipped (for complex type only) */
-template<typename Packet> inline Packet pcplxflip(const Packet& a)
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
 {
   // FIXME: uncomment the following in case we drop the internal imag and real functions.
 //   using std::imag;
@@ -275,7 +276,7 @@ inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename u
 }
 
 /** \internal \returns a * b + c (coeff-wise) */
-template<typename Packet> inline Packet
+template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
 pmadd(const Packet&  a,
          const Packet&  b,
          const Packet&  c)
@@ -336,12 +337,17 @@ inline void palign(PacketType& first, const PacketType& second)
 * Fast complex products (GCC generates a function call which is very slow)
 ***************************************************************************/
 
+// Eigen+CUDA does not support complexes.
+#ifndef __CUDACC__
+
 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
 { return std::complex<float>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
 
 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
 { return std::complex<double>(real(a)*real(b) - imag(a)*imag(b), imag(a)*real(b) + real(a)*imag(b)); }
 
+#endif
+
 } // end namespace internal
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h
index f804c89d6..8ea13cfb7 100644
--- a/Eigen/src/Core/Map.h
+++ b/Eigen/src/Core/Map.h
@@ -115,14 +115,17 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
     inline PointerType cast_to_pointer_type(PointerArgType ptr) { return const_cast<PointerType>(ptr); }
 #else
     typedef PointerType PointerArgType;
+    EIGEN_DEVICE_FUNC
     inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; }
 #endif
 
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const
     {
       return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1;
     }
 
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const
     {
       return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer()
@@ -136,6 +139,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
       * \param dataPtr pointer to the array to map
       * \param a_stride optional Stride object, passing the strides.
       */
+    EIGEN_DEVICE_FUNC
     inline Map(PointerArgType dataPtr, const StrideType& a_stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr)), m_stride(a_stride)
     {
@@ -148,6 +152,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
       * \param a_size the size of the vector expression
       * \param a_stride optional Stride object, passing the strides.
       */
+    EIGEN_DEVICE_FUNC
     inline Map(PointerArgType dataPtr, Index a_size, const StrideType& a_stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr), a_size), m_stride(a_stride)
     {
@@ -161,6 +166,7 @@ template<typename PlainObjectType, int MapOptions, typename StrideType> class Ma
       * \param nbCols the number of columns of the matrix expression
       * \param a_stride optional Stride object, passing the strides.
       */
+    EIGEN_DEVICE_FUNC
     inline Map(PointerArgType dataPtr, Index nbRows, Index nbCols, const StrideType& a_stride = StrideType())
       : Base(cast_to_pointer_type(dataPtr), nbRows, nbCols), m_stride(a_stride)
     {
diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h
index 6876de588..ffa1371c2 100644
--- a/Eigen/src/Core/MapBase.h
+++ b/Eigen/src/Core/MapBase.h
@@ -76,8 +76,8 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 
     typedef typename Base::CoeffReturnType CoeffReturnType;
 
-    inline Index rows() const { return m_rows.value(); }
-    inline Index cols() const { return m_cols.value(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_rows.value(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_cols.value(); }
 
     /** Returns a pointer to the first coefficient of the matrix or vector.
       *
@@ -87,22 +87,26 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
       */
     inline const Scalar* data() const { return m_data; }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeff(Index rowId, Index colId) const
     {
       return m_data[colId * colStride() + rowId * rowStride()];
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeff(Index index) const
     {
       EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
       return m_data[index * innerStride()];
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return this->m_data[colId * colStride() + rowId * rowStride()];
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -123,12 +127,14 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
       return internal::ploadt<PacketScalar, LoadMode>(m_data + index * innerStride());
     }
 
+    EIGEN_DEVICE_FUNC
     inline MapBase(PointerType dataPtr) : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime)
     {
       EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived)
       checkSanity();
     }
 
+    EIGEN_DEVICE_FUNC
     inline MapBase(PointerType dataPtr, Index vecSize)
             : m_data(dataPtr),
               m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)),
@@ -140,6 +146,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
       checkSanity();
     }
 
+    EIGEN_DEVICE_FUNC
     inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols)
             : m_data(dataPtr), m_rows(nbRows), m_cols(nbCols)
     {
@@ -151,6 +158,7 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
 
   protected:
 
+    EIGEN_DEVICE_FUNC
     void checkSanity() const
     {
       EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
@@ -195,14 +203,18 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                     const Scalar
                   >::type ScalarWithConstIfNotLvalue;
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return this->m_data; }
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return this->m_data; } // no const-cast here so non-const-correct code will give a compile error
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col)
     {
       return this->m_data[col * colStride() + row * rowStride()];
     }
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
     {
       EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived)
@@ -224,10 +236,11 @@ template<typename Derived> class MapBase<Derived, WriteAccessors>
                 (this->m_data + index * innerStride(), val);
     }
 
-    explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
-    inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
-    inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
+    EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {}
+    EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index nbRows, Index nbCols) : Base(dataPtr, nbRows, nbCols) {}
 
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const MapBase& other)
     {
       Base::Base::operator=(other);
diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h
index fc1af1a5d..63fb92b75 100644
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@@ -62,6 +62,7 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct real_default_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     return x;
@@ -72,6 +73,7 @@ template<typename Scalar>
 struct real_default_impl<Scalar,true>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     using std::real;
@@ -87,7 +89,6 @@ struct real_retval
   typedef typename NumTraits<Scalar>::Real type;
 };
 
-
 /****************************************************************************
 * Implementation of imag                                                 *
 ****************************************************************************/
@@ -96,6 +97,7 @@ template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct imag_default_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar&)
   {
     return RealScalar(0);
@@ -106,6 +108,7 @@ template<typename Scalar>
 struct imag_default_impl<Scalar,true>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     using std::imag;
@@ -129,10 +132,12 @@ template<typename Scalar>
 struct real_ref_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar& run(Scalar& x)
   {
     return reinterpret_cast<RealScalar*>(&x)[0];
   }
+  EIGEN_DEVICE_FUNC
   static inline const RealScalar& run(const Scalar& x)
   {
     return reinterpret_cast<const RealScalar*>(&x)[0];
@@ -153,10 +158,12 @@ template<typename Scalar, bool IsComplex>
 struct imag_ref_default_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar& run(Scalar& x)
   {
     return reinterpret_cast<RealScalar*>(&x)[1];
   }
+  EIGEN_DEVICE_FUNC
   static inline const RealScalar& run(const Scalar& x)
   {
     return reinterpret_cast<RealScalar*>(&x)[1];
@@ -166,10 +173,12 @@ struct imag_ref_default_impl
 template<typename Scalar>
 struct imag_ref_default_impl<Scalar, false>
 {
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(Scalar&)
   {
     return Scalar(0);
   }
+  EIGEN_DEVICE_FUNC
   static inline const Scalar run(const Scalar&)
   {
     return Scalar(0);
@@ -192,6 +201,7 @@ struct imag_ref_retval
 template<typename Scalar, bool IsComplex = NumTraits<Scalar>::IsComplex>
 struct conj_impl
 {
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
     return x;
@@ -201,6 +211,7 @@ struct conj_impl
 template<typename Scalar>
 struct conj_impl<Scalar,true>
 {
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
     using std::conj;
@@ -222,6 +233,7 @@ template<typename Scalar>
 struct abs2_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     return x*x;
@@ -231,6 +243,7 @@ struct abs2_impl
 template<typename RealScalar>
 struct abs2_impl<std::complex<RealScalar> >
 {
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const std::complex<RealScalar>& x)
   {
     return real(x)*real(x) + imag(x)*imag(x);
@@ -251,6 +264,7 @@ template<typename Scalar, bool IsComplex>
 struct norm1_default_impl
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline RealScalar run(const Scalar& x)
   {
     using std::abs;
@@ -261,6 +275,7 @@ struct norm1_default_impl
 template<typename Scalar>
 struct norm1_default_impl<Scalar, false>
 {
+  EIGEN_DEVICE_FUNC
   static inline Scalar run(const Scalar& x)
   {
     using std::abs;
@@ -287,8 +302,8 @@ struct hypot_impl
   typedef typename NumTraits<Scalar>::Real RealScalar;
   static inline RealScalar run(const Scalar& x, const Scalar& y)
   {
-    using std::max;
-    using std::min;
+    EIGEN_USING_STD_MATH(max);
+    EIGEN_USING_STD_MATH(min);
     using std::abs;
     using std::sqrt;
     RealScalar _x = abs(x);
@@ -562,72 +577,84 @@ inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random()
 namespace numext {
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x);
 }  
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) >::type real_ref(const Scalar& x)
 {
   return internal::real_ref_impl<Scalar>::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline typename internal::add_const_on_value_type< EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) >::type imag_ref(const Scalar& x)
 {
   return internal::imag_ref_impl<Scalar>::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x)
 {
   return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y)
 {
   return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(atanh2, Scalar) atanh2(const Scalar& x, const Scalar& y)
 {
   return EIGEN_MATHFUNC_IMPL(atanh2, Scalar)::run(x, y);
 }
 
 template<typename Scalar>
+EIGEN_DEVICE_FUNC
 inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
 {
   return EIGEN_MATHFUNC_IMPL(pow, Scalar)::run(x, y);
@@ -635,7 +662,9 @@ inline EIGEN_MATHFUNC_RETVAL(pow, Scalar) pow(const Scalar& x, const Scalar& y)
 
 // std::isfinite is non standard, so let's define our own version,
 // even though it is not very efficient.
-template<typename T> bool (isfinite)(const T& x)
+template<typename T>
+EIGEN_DEVICE_FUNC
+bool (isfinite)(const T& x)
 {
   return x<NumTraits<T>::highest() && x>NumTraits<T>::lowest();
 }
@@ -657,18 +686,20 @@ template<typename Scalar>
 struct scalar_fuzzy_default_impl<Scalar, false, false>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  template<typename OtherScalar>
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
   static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, const RealScalar& prec)
   {
     using std::abs;
     return abs(x) <= abs(y) * prec;
   }
+  EIGEN_DEVICE_FUNC
   static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
   {
-    using std::min;
+    EIGEN_USING_STD_MATH(min);
     using std::abs;
     return abs(x - y) <= (min)(abs(x), abs(y)) * prec;
   }
+  EIGEN_DEVICE_FUNC
   static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec)
   {
     return x <= y || isApprox(x, y, prec);
@@ -679,15 +710,17 @@ template<typename Scalar>
 struct scalar_fuzzy_default_impl<Scalar, false, true>
 {
   typedef typename NumTraits<Scalar>::Real RealScalar;
-  template<typename OtherScalar>
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
   static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&)
   {
     return x == Scalar(0);
   }
+  EIGEN_DEVICE_FUNC
   static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&)
   {
     return x == y;
   }
+  EIGEN_DEVICE_FUNC
   static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&)
   {
     return x <= y;
@@ -705,7 +738,7 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
   }
   static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec)
   {
-    using std::min;
+    EIGEN_USING_STD_MATH(min);
     return numext::abs2(x - y) <= (min)(numext::abs2(x), numext::abs2(y)) * prec * prec;
   }
 };
@@ -713,21 +746,21 @@ struct scalar_fuzzy_default_impl<Scalar, true, false>
 template<typename Scalar>
 struct scalar_fuzzy_impl : scalar_fuzzy_default_impl<Scalar, NumTraits<Scalar>::IsComplex, NumTraits<Scalar>::IsInteger> {};
 
-template<typename Scalar, typename OtherScalar>
+template<typename Scalar, typename OtherScalar> EIGEN_DEVICE_FUNC
 inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y,
                                    typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
 {
   return scalar_fuzzy_impl<Scalar>::template isMuchSmallerThan<OtherScalar>(x, y, precision);
 }
 
-template<typename Scalar>
+template<typename Scalar> EIGEN_DEVICE_FUNC
 inline bool isApprox(const Scalar& x, const Scalar& y,
                           typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
 {
   return scalar_fuzzy_impl<Scalar>::isApprox(x, y, precision);
 }
 
-template<typename Scalar>
+template<typename Scalar> EIGEN_DEVICE_FUNC
 inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y,
                                     typename NumTraits<Scalar>::Real precision = NumTraits<Scalar>::dummy_precision())
 {
@@ -750,17 +783,19 @@ template<> struct scalar_fuzzy_impl<bool>
 {
   typedef bool RealScalar;
   
-  template<typename OtherScalar>
+  template<typename OtherScalar> EIGEN_DEVICE_FUNC
   static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&)
   {
     return !x;
   }
   
+  EIGEN_DEVICE_FUNC
   static inline bool isApprox(bool x, bool y, bool)
   {
     return x == y;
   }
 
+  EIGEN_DEVICE_FUNC
   static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&)
   {
     return (!x) || y;
diff --git a/Eigen/src/Core/Matrix.h b/Eigen/src/Core/Matrix.h
index 02be142d8..c2cedbf6a 100644
--- a/Eigen/src/Core/Matrix.h
+++ b/Eigen/src/Core/Matrix.h
@@ -151,6 +151,7 @@ class Matrix
       *
       * \callgraph
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const Matrix& other)
     {
       return Base::_set(other);
@@ -167,6 +168,7 @@ class Matrix
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const MatrixBase<OtherDerived>& other)
     {
       return Base::_set(other);
@@ -179,12 +181,14 @@ class Matrix
       * \copydetails DenseBase::operator=(const EigenBase<OtherDerived> &other)
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase<OtherDerived> &other)
     {
       return Base::operator=(other);
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue<OtherDerived>& func)
     {
       return Base::operator=(func);
@@ -200,6 +204,7 @@ class Matrix
       *
       * \sa resize(Index,Index)
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix() : Base()
     {
       Base::_check_template_params();
@@ -207,6 +212,7 @@ class Matrix
     }
 
     // FIXME is it still needed
+    EIGEN_DEVICE_FUNC
     Matrix(internal::constructor_without_unaligned_array_assert)
       : Base(internal::constructor_without_unaligned_array_assert())
     { Base::_check_template_params(); EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED }
@@ -232,6 +238,7 @@ class Matrix
       * it is redundant to pass the dimension here, so it makes more sense to use the default
       * constructor Matrix() instead.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE explicit Matrix(Index dim)
       : Base(dim, RowsAtCompileTime == 1 ? 1 : dim, ColsAtCompileTime == 1 ? 1 : dim)
     {
@@ -244,6 +251,7 @@ class Matrix
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y)
     {
       Base::_check_template_params();
@@ -255,12 +263,14 @@ class Matrix
       * This is useful for dynamic-size matrices. For fixed-size matrices,
       * it is redundant to pass these parameters, so one should use the default constructor
       * Matrix() instead. */
+    EIGEN_DEVICE_FUNC
     Matrix(Index rows, Index cols);
     /** \brief Constructs an initialized 2D vector with given coefficients */
     Matrix(const Scalar& x, const Scalar& y);
     #endif
 
     /** \brief Constructs an initialized 3D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z)
     {
       Base::_check_template_params();
@@ -270,6 +280,7 @@ class Matrix
       m_storage.data()[2] = z;
     }
     /** \brief Constructs an initialized 4D vector with given coefficients */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w)
     {
       Base::_check_template_params();
@@ -280,10 +291,12 @@ class Matrix
       m_storage.data()[3] = w;
     }
 
+    EIGEN_DEVICE_FUNC
     explicit Matrix(const Scalar *data);
 
     /** \brief Constructor copying the value of the expression \a other */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const MatrixBase<OtherDerived>& other)
              : Base(other.rows() * other.cols(), other.rows(), other.cols())
     {
@@ -296,6 +309,7 @@ class Matrix
       Base::_set_noalias(other);
     }
     /** \brief Copy constructor */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const Matrix& other)
             : Base(other.rows() * other.cols(), other.rows(), other.cols())
     {
@@ -304,6 +318,7 @@ class Matrix
     }
     /** \brief Copy constructor with in-place evaluation */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const ReturnByValue<OtherDerived>& other)
     {
       Base::_check_template_params();
@@ -315,6 +330,7 @@ class Matrix
       * \sa MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Matrix(const EigenBase<OtherDerived> &other)
       : Base(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
@@ -330,17 +346,20 @@ class Matrix
       * of same type it is enough to swap the data pointers.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void swap(MatrixBase<OtherDerived> const & other)
     { this->_swap(other.derived()); }
 
-    inline Index innerStride() const { return 1; }
-    inline Index outerStride() const { return this->innerSize(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return 1; }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return this->innerSize(); }
 
     /////////// Geometry module ///////////
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     explicit Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Matrix& operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r);
 
     #ifdef EIGEN2_SUPPORT
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index fbed47233..e77b49627 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -98,6 +98,7 @@ template<typename Derived> class MatrixBase
 
     /** \returns the size of the main diagonal, which is min(rows(),cols()).
       * \sa rows(), cols(), SizeAtCompileTime. */
+    EIGEN_DEVICE_FUNC
     inline Index diagonalSize() const { return (std::min)(rows(),cols()); }
 
     /** \brief The plain matrix type corresponding to this expression.
@@ -145,35 +146,51 @@ template<typename Derived> class MatrixBase
     /** Special case of the template operator=, in order to prevent the compiler
       * from generating a default operator= (issue hit with g++ 4.1)
       */
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const MatrixBase& other);
 
     // We cannot inherit here via Base::operator= since it is causing
     // trouble with MSVC.
 
     template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const DenseBase<OtherDerived>& other);
 
     template <typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const EigenBase<OtherDerived>& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator=(const ReturnByValue<OtherDerived>& other);
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     Derived& lazyAssign(const ProductBase<ProductDerived, Lhs,Rhs>& other);
 #endif // not EIGEN_PARSED_BY_DOXYGEN
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator+=(const MatrixBase<OtherDerived>& other);
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     Derived& operator-=(const MatrixBase<OtherDerived>& other);
 
+#ifdef __CUDACC__
+    template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
+    const typename LazyProductReturnType<Derived,OtherDerived>::Type
+    operator*(const MatrixBase<OtherDerived> &other) const
+    { return this->lazyProduct(other); }
+#else
     template<typename OtherDerived>
     const typename ProductReturnType<Derived,OtherDerived>::Type
     operator*(const MatrixBase<OtherDerived> &other) const;
+#endif
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     const typename LazyProductReturnType<Derived,OtherDerived>::Type
     lazyProduct(const MatrixBase<OtherDerived> &other) const;
 
@@ -187,10 +204,12 @@ template<typename Derived> class MatrixBase
     void applyOnTheRight(const EigenBase<OtherDerived>& other);
 
     template<typename DiagonalDerived>
+    EIGEN_DEVICE_FUNC
     const DiagonalProduct<Derived, DiagonalDerived, OnTheRight>
     operator*(const DiagonalBase<DiagonalDerived> &diagonal) const;
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     typename internal::scalar_product_traits<typename internal::traits<Derived>::Scalar,typename internal::traits<OtherDerived>::Scalar>::ReturnType
     dot(const MatrixBase<OtherDerived>& other) const;
 
@@ -199,27 +218,35 @@ template<typename Derived> class MatrixBase
       Scalar eigen2_dot(const MatrixBase<OtherDerived>& other) const;
     #endif
 
-    RealScalar squaredNorm() const;
-    RealScalar norm() const;
+    EIGEN_DEVICE_FUNC RealScalar squaredNorm() const;
+    EIGEN_DEVICE_FUNC RealScalar norm() const;
     RealScalar stableNorm() const;
     RealScalar blueNorm() const;
     RealScalar hypotNorm() const;
-    const PlainObject normalized() const;
-    void normalize();
+    EIGEN_DEVICE_FUNC const PlainObject normalized() const;
+    EIGEN_DEVICE_FUNC void normalize();
 
-    const AdjointReturnType adjoint() const;
-    void adjointInPlace();
+    EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const;
+    EIGEN_DEVICE_FUNC void adjointInPlace();
 
     typedef Diagonal<Derived> DiagonalReturnType;
+    EIGEN_DEVICE_FUNC
     DiagonalReturnType diagonal();
-	typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+    
+    typedef typename internal::add_const<Diagonal<const Derived> >::type ConstDiagonalReturnType;
+    EIGEN_DEVICE_FUNC
     ConstDiagonalReturnType diagonal() const;
 
     template<int Index> struct DiagonalIndexReturnType { typedef Diagonal<Derived,Index> Type; };
     template<int Index> struct ConstDiagonalIndexReturnType { typedef const Diagonal<const Derived,Index> Type; };
 
-    template<int Index> typename DiagonalIndexReturnType<Index>::Type diagonal();
-    template<int Index> typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
+    template<int Index> 
+    EIGEN_DEVICE_FUNC
+    typename DiagonalIndexReturnType<Index>::Type diagonal();
+
+    template<int Index>
+    EIGEN_DEVICE_FUNC
+    typename ConstDiagonalIndexReturnType<Index>::Type diagonal() const;
 
     // Note: The "MatrixBase::" prefixes are added to help MSVC9 to match these declarations with the later implementations.
     // On the other hand they confuse MSVC8...
@@ -227,7 +254,10 @@ template<typename Derived> class MatrixBase
     typename MatrixBase::template DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
     typename MatrixBase::template ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
     #else
+    EIGEN_DEVICE_FUNC
     typename DiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index);
+    
+    EIGEN_DEVICE_FUNC
     typename ConstDiagonalIndexReturnType<DynamicIndex>::Type diagonal(Index index) const;
     #endif
 
@@ -245,30 +275,41 @@ template<typename Derived> class MatrixBase
     template<unsigned int Mode> struct TriangularViewReturnType { typedef TriangularView<Derived, Mode> Type; };
     template<unsigned int Mode> struct ConstTriangularViewReturnType { typedef const TriangularView<const Derived, Mode> Type; };
 
-    template<unsigned int Mode> typename TriangularViewReturnType<Mode>::Type triangularView();
-    template<unsigned int Mode> typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
+    template<unsigned int Mode>
+    EIGEN_DEVICE_FUNC
+    typename TriangularViewReturnType<Mode>::Type triangularView();
+    template<unsigned int Mode>
+    EIGEN_DEVICE_FUNC
+    typename ConstTriangularViewReturnType<Mode>::Type triangularView() const;
 
     template<unsigned int UpLo> struct SelfAdjointViewReturnType { typedef SelfAdjointView<Derived, UpLo> Type; };
     template<unsigned int UpLo> struct ConstSelfAdjointViewReturnType { typedef const SelfAdjointView<const Derived, UpLo> Type; };
 
-    template<unsigned int UpLo> typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
-    template<unsigned int UpLo> typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
+    template<unsigned int UpLo> 
+    EIGEN_DEVICE_FUNC
+    typename SelfAdjointViewReturnType<UpLo>::Type selfadjointView();
+    template<unsigned int UpLo>
+    EIGEN_DEVICE_FUNC
+    typename ConstSelfAdjointViewReturnType<UpLo>::Type selfadjointView() const;
 
     const SparseView<Derived> sparseView(const Scalar& m_reference = Scalar(0),
                                          const typename NumTraits<Scalar>::Real& m_epsilon = NumTraits<Scalar>::dummy_precision()) const;
-    static const IdentityReturnType Identity();
-    static const IdentityReturnType Identity(Index rows, Index cols);
-    static const BasisReturnType Unit(Index size, Index i);
-    static const BasisReturnType Unit(Index i);
-    static const BasisReturnType UnitX();
-    static const BasisReturnType UnitY();
-    static const BasisReturnType UnitZ();
-    static const BasisReturnType UnitW();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity();
+    EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i);
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitX();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitY();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ();
+    EIGEN_DEVICE_FUNC static const BasisReturnType UnitW();
 
+    EIGEN_DEVICE_FUNC
     const DiagonalWrapper<const Derived> asDiagonal() const;
     const PermutationWrapper<const Derived> asPermutation() const;
 
+    EIGEN_DEVICE_FUNC
     Derived& setIdentity();
+    EIGEN_DEVICE_FUNC
     Derived& setIdentity(Index rows, Index cols);
 
     bool isIdentity(const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const;
@@ -309,20 +350,20 @@ template<typename Derived> class MatrixBase
 
 /////////// Array module ///////////
 
-    template<int p> RealScalar lpNorm() const;
+    template<int p> EIGEN_DEVICE_FUNC RealScalar lpNorm() const;
 
-    MatrixBase<Derived>& matrix() { return *this; }
-    const MatrixBase<Derived>& matrix() const { return *this; }
+    EIGEN_DEVICE_FUNC MatrixBase<Derived>& matrix() { return *this; }
+    EIGEN_DEVICE_FUNC const MatrixBase<Derived>& matrix() const { return *this; }
 
     /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix
       * \sa ArrayBase::matrix() */
-    ArrayWrapper<Derived> array() { return derived(); }
-    const ArrayWrapper<const Derived> array() const { return derived(); }
+    EIGEN_DEVICE_FUNC ArrayWrapper<Derived> array() { return derived(); }
+    EIGEN_DEVICE_FUNC const ArrayWrapper<const Derived> array() const { return derived(); }
 
 /////////// LU module ///////////
 
-    const FullPivLU<PlainObject> fullPivLu() const;
-    const PartialPivLU<PlainObject> partialPivLu() const;
+    EIGEN_DEVICE_FUNC const FullPivLU<PlainObject> fullPivLu() const;
+    EIGEN_DEVICE_FUNC const PartialPivLU<PlainObject> partialPivLu() const;
 
     #if EIGEN2_SUPPORT_STAGE < STAGE20_RESOLVE_API_CONFLICTS
     const LU<PlainObject> lu() const;
@@ -343,6 +384,7 @@ template<typename Derived> class MatrixBase
     }
     #endif
 
+    EIGEN_DEVICE_FUNC
     const internal::inverse_impl<Derived> inverse() const;
     template<typename ResultType>
     void computeInverseAndDetWithCheck(
@@ -395,11 +437,17 @@ template<typename Derived> class MatrixBase
     };
     #endif // EIGEN_PARSED_BY_DOXYGEN
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     typename cross_product_return_type<OtherDerived>::type
     cross(const MatrixBase<OtherDerived>& other) const;
+    
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     PlainObject cross3(const MatrixBase<OtherDerived>& other) const;
+    
+    EIGEN_DEVICE_FUNC
     PlainObject unitOrthogonal(void) const;
+    
     Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
     
     #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
@@ -493,12 +541,12 @@ template<typename Derived> class MatrixBase
 #endif
 
   protected:
-    MatrixBase() : Base() {}
+    EIGEN_DEVICE_FUNC MatrixBase() : Base() {}
 
   private:
-    explicit MatrixBase(int);
-    MatrixBase(int,int);
-    template<typename OtherDerived> explicit MatrixBase(const MatrixBase<OtherDerived>&);
+    EIGEN_DEVICE_FUNC explicit MatrixBase(int);
+    EIGEN_DEVICE_FUNC MatrixBase(int,int);
+    template<typename OtherDerived> EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase<OtherDerived>&);
   protected:
     // mixing arrays and matrices is not legal
     template<typename OtherDerived> Derived& operator+=(const ArrayBase<OtherDerived>& )
diff --git a/Eigen/src/Core/NoAlias.h b/Eigen/src/Core/NoAlias.h
index 768bfb18c..0a1c32743 100644
--- a/Eigen/src/Core/NoAlias.h
+++ b/Eigen/src/Core/NoAlias.h
@@ -37,11 +37,13 @@ class NoAlias
     /** Behaves like MatrixBase::lazyAssign(other)
       * \sa MatrixBase::lazyAssign() */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase<OtherDerived>& other)
     { return internal::assign_selector<ExpressionType,OtherDerived,false>::run(m_expression,other.derived()); }
 
     /** \sa MatrixBase::operator+= */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase<OtherDerived>& other)
     {
       typedef SelfCwiseBinaryOp<internal::scalar_sum_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
@@ -54,6 +56,7 @@ class NoAlias
 
     /** \sa MatrixBase::operator-= */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase<OtherDerived>& other)
     {
       typedef SelfCwiseBinaryOp<internal::scalar_difference_op<Scalar>, ExpressionType, OtherDerived> SelfAdder;
@@ -66,10 +69,12 @@ class NoAlias
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
     { other.derived().addTo(m_expression); return m_expression; }
 
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
     { other.derived().subTo(m_expression); return m_expression; }
 
@@ -78,6 +83,7 @@ class NoAlias
     { return m_expression.derived() += CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
 
     template<typename Lhs, typename Rhs, int NestingFlags>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE ExpressionType& operator-=(const CoeffBasedProduct<Lhs,Rhs,NestingFlags>& other)
     { return m_expression.derived() -= CoeffBasedProduct<Lhs,Rhs,NestByRefBit>(other.lhs(), other.rhs()); }
     
@@ -86,6 +92,7 @@ class NoAlias
     { return m_expression = func; }
 #endif
 
+    EIGEN_DEVICE_FUNC
     ExpressionType& expression() const
     {
       return m_expression;
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index bac9e50b8..2b6633c9c 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -68,7 +68,16 @@ template<typename T> struct GenericNumTraits
                    >::type NonInteger;
   typedef T Nested;
 
-  static inline Real epsilon() { return std::numeric_limits<T>::epsilon(); }
+  EIGEN_DEVICE_FUNC
+  static inline Real epsilon()
+  {
+    #if defined(__CUDA_ARCH__)
+    return internal::device::numeric_limits<T>::epsilon();
+    #else
+    return std::numeric_limits<T>::epsilon();
+    #endif
+  }
+  EIGEN_DEVICE_FUNC
   static inline Real dummy_precision()
   {
     // make sure to override this for floating-point types
@@ -91,11 +100,13 @@ template<typename T> struct NumTraits : GenericNumTraits<T>
 template<> struct NumTraits<float>
   : GenericNumTraits<float>
 {
+  EIGEN_DEVICE_FUNC
   static inline float dummy_precision() { return 1e-5f; }
 };
 
 template<> struct NumTraits<double> : GenericNumTraits<double>
 {
+  EIGEN_DEVICE_FUNC
   static inline double dummy_precision() { return 1e-12; }
 };
 
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 1a4c19233..0305066ba 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -28,6 +28,7 @@ namespace internal {
 
 template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
   template<typename Index>
+  EIGEN_DEVICE_FUNC
   static EIGEN_ALWAYS_INLINE void run(Index, Index)
   {
   }
@@ -35,6 +36,7 @@ template<int MaxSizeAtCompileTime> struct check_rows_cols_for_overflow {
 
 template<> struct check_rows_cols_for_overflow<Dynamic> {
   template<typename Index>
+  EIGEN_DEVICE_FUNC
   static EIGEN_ALWAYS_INLINE void run(Index rows, Index cols)
   {
     // http://hg.mozilla.org/mozilla-central/file/6c8a909977d3/xpcom/ds/CheckedInt.h#l242
@@ -129,12 +131,17 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     enum { NeedsToAlign = SizeAtCompileTime != Dynamic && (internal::traits<Derived>::Flags & AlignedBit) != 0 };
     EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
 
+    EIGEN_DEVICE_FUNC
     Base& base() { return *static_cast<Base*>(this); }
+    EIGEN_DEVICE_FUNC
     const Base& base() const { return *static_cast<const Base*>(this); }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index rows() const { return m_storage.rows(); }
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Index cols() const { return m_storage.cols(); }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeff(Index rowId, Index colId) const
     {
       if(Flags & RowMajorBit)
@@ -143,11 +150,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeff(Index index) const
     {
       return m_storage.data()[index];
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index rowId, Index colId)
     {
       if(Flags & RowMajorBit)
@@ -156,11 +165,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Scalar& coeffRef(Index index)
     {
       return m_storage.data()[index];
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const
     {
       if(Flags & RowMajorBit)
@@ -169,6 +180,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
         return m_storage.data()[rowId + colId * m_storage.rows()];
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const
     {
       return m_storage.data()[index];
@@ -232,6 +244,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t)
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void resize(Index nbRows, Index nbCols)
     {
       eigen_assert(   EIGEN_IMPLIES(RowsAtCompileTime!=Dynamic,nbRows==RowsAtCompileTime)
@@ -262,6 +275,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t)
       */
+    EIGEN_DEVICE_FUNC
     inline void resize(Index size)
     {
       EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase)
@@ -286,6 +300,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index)
       */
+    EIGEN_DEVICE_FUNC
     inline void resize(NoChange_t, Index nbCols)
     {
       resize(rows(), nbCols);
@@ -299,6 +314,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * \sa resize(Index,Index)
       */
+    EIGEN_DEVICE_FUNC
     inline void resize(Index nbRows, NoChange_t)
     {
       resize(nbRows, cols());
@@ -312,6 +328,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE void resizeLike(const EigenBase<OtherDerived>& _other)
     {
       const OtherDerived& other = _other.derived();
@@ -339,6 +356,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * Matrices are resized relative to the top-left element. In case values need to be 
       * appended to the matrix they will be uninitialized.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, Index nbCols)
     {
       internal::conservative_resize_like_impl<Derived>::run(*this, nbRows, nbCols);
@@ -351,6 +369,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * In case the matrix is growing, new rows will be uninitialized.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void conservativeResize(Index nbRows, NoChange_t)
     {
       // Note: see the comment in conservativeResize(Index,Index)
@@ -364,6 +383,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * In case the matrix is growing, new columns will be uninitialized.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index nbCols)
     {
       // Note: see the comment in conservativeResize(Index,Index)
@@ -378,6 +398,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       *
       * When values are appended, they will be uninitialized.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void conservativeResize(Index size)
     {
       internal::conservative_resize_like_impl<Derived>::run(*this, size);
@@ -393,6 +414,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * appended to the matrix they will copied from \c other.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase<OtherDerived>& other)
     {
       internal::conservative_resize_like_impl<Derived,OtherDerived>::run(*this, other);
@@ -401,6 +423,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     /** This is a special case of the templated operator=. Its purpose is to
       * prevent a default operator= from hiding the templated operator=.
       */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& operator=(const PlainObjectBase& other)
     {
       return _set(other);
@@ -408,6 +431,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 
     /** \sa MatrixBase::lazyAssign() */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase<OtherDerived>& other)
     {
       _resize_to_match(other);
@@ -415,12 +439,14 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue<OtherDerived>& func)
     {
       resize(func.rows(), func.cols());
       return Base::operator=(func);
     }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase() : m_storage()
     {
 //       _check_template_params();
@@ -430,6 +456,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 #ifndef EIGEN_PARSED_BY_DOXYGEN
     // FIXME is it still needed ?
     /** \internal */
+    EIGEN_DEVICE_FUNC
     PlainObjectBase(internal::constructor_without_unaligned_array_assert)
       : m_storage(internal::constructor_without_unaligned_array_assert())
     {
@@ -438,11 +465,13 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 #endif
 
 #ifdef EIGEN_HAVE_RVALUE_REFERENCES
+    EIGEN_DEVICE_FUNC
     PlainObjectBase(PlainObjectBase&& other)
       : m_storage( std::move(other.m_storage) )
     {
     }
 
+    EIGEN_DEVICE_FUNC
     PlainObjectBase& operator=(PlainObjectBase&& other)
     {
       using std::swap;
@@ -451,6 +480,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     }
 #endif
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE PlainObjectBase(Index a_size, Index nbRows, Index nbCols)
       : m_storage(a_size, nbRows, nbCols)
     {
@@ -461,6 +491,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     /** \copydoc MatrixBase::operator=(const EigenBase<OtherDerived>&)
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE Derived& operator=(const EigenBase<OtherDerived> &other)
     {
       _resize_to_match(other);
@@ -470,6 +501,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 
     /** \sa MatrixBase::operator=(const EigenBase<OtherDerived>&) */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase<OtherDerived> &other)
       : m_storage(other.derived().rows() * other.derived().cols(), other.derived().rows(), other.derived().cols())
     {
@@ -552,16 +584,16 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     //@}
 
     using Base::setConstant;
-    Derived& setConstant(Index size, const Scalar& value);
-    Derived& setConstant(Index rows, Index cols, const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& value);
+    EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& value);
 
     using Base::setZero;
-    Derived& setZero(Index size);
-    Derived& setZero(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC Derived& setZero(Index size);
+    EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols);
 
     using Base::setOnes;
-    Derived& setOnes(Index size);
-    Derived& setOnes(Index rows, Index cols);
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index size);
+    EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols);
 
     using Base::setRandom;
     Derived& setRandom(Index size);
@@ -580,6 +612,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * remain row-vectors and vectors remain vectors.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase<OtherDerived>& other)
     {
       #ifdef EIGEN_NO_AUTOMATIC_RESIZING
@@ -607,6 +640,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * \internal
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE Derived& _set(const DenseBase<OtherDerived>& other)
     {
       _set_selector(other.derived(), typename internal::conditional<static_cast<bool>(int(OtherDerived::Flags) & EvalBeforeAssigningBit), internal::true_type, internal::false_type>::type());
@@ -614,9 +648,11 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::true_type&) { _set_noalias(other.eval()); }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE void _set_selector(const OtherDerived& other, const internal::false_type&) { _set_noalias(other); }
 
     /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which
@@ -625,6 +661,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * \sa operator=(const MatrixBase<OtherDerived>&), _set()
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE Derived& _set_noalias(const DenseBase<OtherDerived>& other)
     {
       // I don't think we need this resize call since the lazyAssign will anyways resize
@@ -636,6 +673,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
     }
 
     template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void _init2(Index nbRows, Index nbCols, typename internal::enable_if<Base::SizeAtCompileTime!=2,T0>::type* = 0)
     {
       EIGEN_STATIC_ASSERT(bool(NumTraits<T0>::IsInteger) &&
@@ -644,6 +682,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       resize(nbRows,nbCols);
     }
     template<typename T0, typename T1>
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE void _init2(const Scalar& val0, const Scalar& val1, typename internal::enable_if<Base::SizeAtCompileTime==2,T0>::type* = 0)
     {
       EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2)
@@ -658,6 +697,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
       * data pointers.
       */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void _swap(DenseBase<OtherDerived> const & other)
     {
       enum { SwapPointers = internal::is_same<Derived, OtherDerived>::value && Base::SizeAtCompileTime==Dynamic };
@@ -666,6 +706,7 @@ class PlainObjectBase : public internal::dense_xpr_base<Derived>::type
 
   public:
 #ifndef EIGEN_PARSED_BY_DOXYGEN
+    EIGEN_DEVICE_FUNC 
     static EIGEN_STRONG_INLINE void _check_template_params()
     {
       EIGEN_STATIC_ASSERT((EIGEN_IMPLIES(MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1, (Options&RowMajor)==RowMajor)
@@ -782,6 +823,7 @@ struct conservative_resize_like_impl<Derived,OtherDerived,true>
 template<typename MatrixTypeA, typename MatrixTypeB, bool SwapPointers>
 struct matrix_swap_impl
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(MatrixTypeA& a, MatrixTypeB& b)
   {
     a.base().swap(b);
@@ -791,6 +833,7 @@ struct matrix_swap_impl
 template<typename MatrixTypeA, typename MatrixTypeB>
 struct matrix_swap_impl<MatrixTypeA, MatrixTypeB, true>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(MatrixTypeA& a, MatrixTypeB& b)
   {
     static_cast<typename MatrixTypeA::Base&>(a).m_storage.swap(static_cast<typename MatrixTypeB::Base&>(b).m_storage);
diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h
index 50548fa9a..b2c775d90 100644
--- a/Eigen/src/Core/Redux.h
+++ b/Eigen/src/Core/Redux.h
@@ -82,6 +82,7 @@ struct redux_novec_unroller
 
   typedef typename Derived::Scalar Scalar;
 
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func& func)
   {
     return func(redux_novec_unroller<Func, Derived, Start, HalfLength>::run(mat,func),
@@ -99,6 +100,7 @@ struct redux_novec_unroller<Func, Derived, Start, 1>
 
   typedef typename Derived::Scalar Scalar;
 
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Scalar run(const Derived &mat, const Func&)
   {
     return mat.coeffByOuterInner(outer, inner);
@@ -112,6 +114,7 @@ template<typename Func, typename Derived, int Start>
 struct redux_novec_unroller<Func, Derived, Start, 0>
 {
   typedef typename Derived::Scalar Scalar;
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE Scalar run(const Derived&, const Func&) { return Scalar(); }
 };
 
@@ -170,6 +173,7 @@ struct redux_impl<Func, Derived, DefaultTraversal, NoUnrolling>
 {
   typedef typename Derived::Scalar Scalar;
   typedef typename Derived::Index Index;
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE Scalar run(const Derived& mat, const Func& func)
   {
     eigen_assert(mat.rows()>0 && mat.cols()>0 && "you are using an empty matrix");
diff --git a/Eigen/src/Core/ReturnByValue.h b/Eigen/src/Core/ReturnByValue.h
index d66c24ba0..7834f6cbc 100644
--- a/Eigen/src/Core/ReturnByValue.h
+++ b/Eigen/src/Core/ReturnByValue.h
@@ -57,10 +57,11 @@ template<typename Derived> class ReturnByValue
     EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue)
 
     template<typename Dest>
+    EIGEN_DEVICE_FUNC
     inline void evalTo(Dest& dst) const
     { static_cast<const Derived*>(this)->evalTo(dst); }
-    inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
-    inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return static_cast<const Derived*>(this)->rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return static_cast<const Derived*>(this)->cols(); }
 
 #ifndef EIGEN_PARSED_BY_DOXYGEN
 #define Unusable YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT
diff --git a/Eigen/src/Core/SelfAdjointView.h b/Eigen/src/Core/SelfAdjointView.h
index 6fa7cd15e..8231e3f5c 100644
--- a/Eigen/src/Core/SelfAdjointView.h
+++ b/Eigen/src/Core/SelfAdjointView.h
@@ -69,17 +69,23 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     };
     typedef typename MatrixType::PlainObject PlainObject;
 
+    EIGEN_DEVICE_FUNC
     inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix)
     {}
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return m_matrix.outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return m_matrix.innerStride(); }
 
     /** \sa MatrixBase::coeff()
       * \warning the coordinates must fit into the referenced triangular part
       */
+    EIGEN_DEVICE_FUNC
     inline Scalar coeff(Index row, Index col) const
     {
       Base::check_coordinates_internal(row, col);
@@ -89,6 +95,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     /** \sa MatrixBase::coeffRef()
       * \warning the coordinates must fit into the referenced triangular part
       */
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index row, Index col)
     {
       Base::check_coordinates_internal(row, col);
@@ -96,13 +103,17 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     }
 
     /** \internal */
+    EIGEN_DEVICE_FUNC
     const MatrixTypeNestedCleaned& _expression() const { return m_matrix; }
 
+    EIGEN_DEVICE_FUNC
     const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC
     MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
 
     /** Efficient self-adjoint matrix times vector/matrix product */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     SelfadjointProductMatrix<MatrixType,Mode,false,OtherDerived,0,OtherDerived::IsVectorAtCompileTime>
     operator*(const MatrixBase<OtherDerived>& rhs) const
     {
@@ -113,6 +124,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
 
     /** Efficient vector/matrix times self-adjoint matrix product */
     template<typename OtherDerived> friend
+    EIGEN_DEVICE_FUNC
     SelfadjointProductMatrix<OtherDerived,0,OtherDerived::IsVectorAtCompileTime,MatrixType,Mode,false>
     operator*(const MatrixBase<OtherDerived>& lhs, const SelfAdjointView& rhs)
     {
@@ -132,6 +144,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, Scalar)
       */
     template<typename DerivedU, typename DerivedV>
+    EIGEN_DEVICE_FUNC
     SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const MatrixBase<DerivedV>& v, const Scalar& alpha = Scalar(1));
 
     /** Perform a symmetric rank K update of the selfadjoint matrix \c *this:
@@ -145,6 +158,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       * \sa rankUpdate(const MatrixBase<DerivedU>&, const MatrixBase<DerivedV>&, Scalar)
       */
     template<typename DerivedU>
+    EIGEN_DEVICE_FUNC
     SelfAdjointView& rankUpdate(const MatrixBase<DerivedU>& u, const Scalar& alpha = Scalar(1));
 
 /////////// Cholesky module ///////////
@@ -159,11 +173,14 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
     /** Return type of eigenvalues() */
     typedef Matrix<RealScalar, internal::traits<MatrixType>::ColsAtCompileTime, 1> EigenvaluesReturnType;
 
+    EIGEN_DEVICE_FUNC
     EigenvaluesReturnType eigenvalues() const;
+    EIGEN_DEVICE_FUNC
     RealScalar operatorNorm() const;
     
     #ifdef EIGEN2_SUPPORT
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     SelfAdjointView& operator=(const MatrixBase<OtherDerived>& other)
     {
       enum {
@@ -174,6 +191,7 @@ template<typename MatrixType, unsigned int UpLo> class SelfAdjointView
       return *this;
     }
     template<typename OtherMatrixType, unsigned int OtherMode>
+    EIGEN_DEVICE_FUNC
     SelfAdjointView& operator=(const TriangularView<OtherMatrixType, OtherMode>& other)
     {
       enum {
@@ -209,6 +227,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
     row = (UnrollCount-1) % Derived1::RowsAtCompileTime
   };
 
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -223,6 +242,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Upper), U
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, 0, ClearOpposite>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &, const Derived2 &) {}
 };
 
@@ -234,6 +254,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
     row = (UnrollCount-1) % Derived1::RowsAtCompileTime
   };
 
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -248,6 +269,7 @@ struct triangular_assignment_selector<Derived1, Derived2, (SelfAdjoint|Lower), U
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, 0, ClearOpposite>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &, const Derived2 &) {}
 };
 
@@ -255,6 +277,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dynamic, ClearOpposite>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -272,6 +295,7 @@ struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Upper, Dyn
 template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, SelfAdjoint|Lower, Dynamic, ClearOpposite>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
   typedef typename Derived1::Index Index;
diff --git a/Eigen/src/Core/SelfCwiseBinaryOp.h b/Eigen/src/Core/SelfCwiseBinaryOp.h
index 3d2deff98..65864adf8 100644
--- a/Eigen/src/Core/SelfCwiseBinaryOp.h
+++ b/Eigen/src/Core/SelfCwiseBinaryOp.h
@@ -52,21 +52,24 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
 
     typedef typename internal::packet_traits<Scalar>::type Packet;
 
+    EIGEN_DEVICE_FUNC
     inline SelfCwiseBinaryOp(Lhs& xpr, const BinaryOp& func = BinaryOp()) : m_matrix(xpr), m_functor(func) {}
 
-    inline Index rows() const { return m_matrix.rows(); }
-    inline Index cols() const { return m_matrix.cols(); }
-    inline Index outerStride() const { return m_matrix.outerStride(); }
-    inline Index innerStride() const { return m_matrix.innerStride(); }
-    inline const Scalar* data() const { return m_matrix.data(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return m_matrix.outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return m_matrix.innerStride(); }
+    EIGEN_DEVICE_FUNC inline const Scalar* data() const { return m_matrix.data(); }
 
     // note that this function is needed by assign to correctly align loads/stores
     // TODO make Assign use .data()
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index row, Index col)
     {
       EIGEN_STATIC_ASSERT_LVALUE(Lhs)
       return m_matrix.const_cast_derived().coeffRef(row, col);
     }
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index row, Index col) const
     {
       return m_matrix.coeffRef(row, col);
@@ -74,17 +77,20 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
 
     // note that this function is needed by assign to correctly align loads/stores
     // TODO make Assign use .data()
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index)
     {
       EIGEN_STATIC_ASSERT_LVALUE(Lhs)
       return m_matrix.const_cast_derived().coeffRef(index);
     }
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return m_matrix.const_cast_derived().coeffRef(index);
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void copyCoeff(Index row, Index col, const DenseBase<OtherDerived>& other)
     {
       OtherDerived& _other = other.const_cast_derived();
@@ -95,6 +101,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
     {
       OtherDerived& _other = other.const_cast_derived();
@@ -125,6 +132,7 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
     // reimplement lazyAssign to handle complex *= real
     // see CwiseBinaryOp ctor for details
     template<typename RhsDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE SelfCwiseBinaryOp& lazyAssign(const DenseBase<RhsDerived>& rhs)
     {
       EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs,RhsDerived)
@@ -144,17 +152,20 @@ template<typename BinaryOp, typename Lhs, typename Rhs> class SelfCwiseBinaryOp
     // overloaded to honor evaluation of special matrices
     // maybe another solution would be to not use SelfCwiseBinaryOp
     // at first...
+    EIGEN_DEVICE_FUNC
     SelfCwiseBinaryOp& operator=(const Rhs& _rhs)
     {
       typename internal::nested<Rhs>::type rhs(_rhs);
       return Base::operator=(rhs);
     }
 
+    EIGEN_DEVICE_FUNC
     Lhs& expression() const 
     { 
       return m_matrix;
     }
 
+    EIGEN_DEVICE_FUNC
     const BinaryOp& functor() const 
     { 
       return m_functor;
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index c83e955ee..c219e2f53 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -36,8 +36,8 @@ blueNorm_impl(const EigenBase<Derived>& _vec)
   typedef typename Derived::RealScalar RealScalar;  
   typedef typename Derived::Index Index;
   using std::pow;
-  using std::min;
-  using std::max;
+  EIGEN_USING_STD_MATH(min);
+  EIGEN_USING_STD_MATH(max);
   using std::sqrt;
   using std::abs;
   const Derived& vec(_vec.derived());
@@ -139,7 +139,7 @@ template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real
 MatrixBase<Derived>::stableNorm() const
 {
-  using std::min;
+  EIGEN_USING_STD_MATH(min);
   using std::sqrt;
   const Index blockSize = 4096;
   RealScalar scale(0);
diff --git a/Eigen/src/Core/Stride.h b/Eigen/src/Core/Stride.h
index 1e3f5fe9f..d3d454e4e 100644
--- a/Eigen/src/Core/Stride.h
+++ b/Eigen/src/Core/Stride.h
@@ -51,6 +51,7 @@ class Stride
     };
 
     /** Default constructor, for use when strides are fixed at compile time */
+    EIGEN_DEVICE_FUNC
     Stride()
       : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime)
     {
@@ -58,6 +59,7 @@ class Stride
     }
 
     /** Constructor allowing to pass the strides at runtime */
+    EIGEN_DEVICE_FUNC
     Stride(Index outerStride, Index innerStride)
       : m_outer(outerStride), m_inner(innerStride)
     {
@@ -65,13 +67,16 @@ class Stride
     }
 
     /** Copy constructor */
+    EIGEN_DEVICE_FUNC
     Stride(const Stride& other)
       : m_outer(other.outer()), m_inner(other.inner())
     {}
 
     /** \returns the outer stride */
+    EIGEN_DEVICE_FUNC
     inline Index outer() const { return m_outer.value(); }
     /** \returns the inner stride */
+    EIGEN_DEVICE_FUNC
     inline Index inner() const { return m_inner.value(); }
 
   protected:
@@ -87,8 +92,8 @@ class InnerStride : public Stride<0, Value>
     typedef Stride<0, Value> Base;
   public:
     typedef DenseIndex Index;
-    InnerStride() : Base() {}
-    InnerStride(Index v) : Base(0, v) {}
+    EIGEN_DEVICE_FUNC InnerStride() : Base() {}
+    EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {}
 };
 
 /** \brief Convenience specialization of Stride to specify only an outer stride
@@ -99,8 +104,8 @@ class OuterStride : public Stride<Value, 0>
     typedef Stride<Value, 0> Base;
   public:
     typedef DenseIndex Index;
-    OuterStride() : Base() {}
-    OuterStride(Index v) : Base(v,0) {}
+    EIGEN_DEVICE_FUNC OuterStride() : Base() {}
+    EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v,0) {}
 };
 
 } // end namespace Eigen
diff --git a/Eigen/src/Core/Swap.h b/Eigen/src/Core/Swap.h
index bf58bd599..d602fba65 100644
--- a/Eigen/src/Core/Swap.h
+++ b/Eigen/src/Core/Swap.h
@@ -33,11 +33,16 @@ template<typename ExpressionType> class SwapWrapper
     EIGEN_DENSE_PUBLIC_INTERFACE(SwapWrapper)
     typedef typename internal::packet_traits<Scalar>::type Packet;
 
+    EIGEN_DEVICE_FUNC
     inline SwapWrapper(ExpressionType& xpr) : m_expression(xpr) {}
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_expression.rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_expression.cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return m_expression.outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return m_expression.innerStride(); }
     
     typedef typename internal::conditional<
@@ -46,30 +51,37 @@ template<typename ExpressionType> class SwapWrapper
                        const Scalar
                      >::type ScalarWithConstIfNotLvalue;
                      
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue* data() { return m_expression.data(); }
+    EIGEN_DEVICE_FUNC
     inline const Scalar* data() const { return m_expression.data(); }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId)
     {
       return m_expression.const_cast_derived().coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index)
     {
       return m_expression.const_cast_derived().coeffRef(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index rowId, Index colId) const
     {
       return m_expression.coeffRef(rowId, colId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index index) const
     {
       return m_expression.coeffRef(index);
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void copyCoeff(Index rowId, Index colId, const DenseBase<OtherDerived>& other)
     {
       OtherDerived& _other = other.const_cast_derived();
@@ -81,6 +93,7 @@ template<typename ExpressionType> class SwapWrapper
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void copyCoeff(Index index, const DenseBase<OtherDerived>& other)
     {
       OtherDerived& _other = other.const_cast_derived();
@@ -115,6 +128,7 @@ template<typename ExpressionType> class SwapWrapper
       _other.template writePacket<LoadMode>(index, tmp);
     }
 
+    EIGEN_DEVICE_FUNC
     ExpressionType& expression() const { return m_expression; }
 
   protected:
diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h
index f21b3aa65..976708a0f 100644
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@@ -62,18 +62,21 @@ template<typename MatrixType> class Transpose
     typedef typename TransposeImpl<MatrixType,typename internal::traits<MatrixType>::StorageKind>::Base Base;
     EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose)
 
+    EIGEN_DEVICE_FUNC
     inline Transpose(MatrixType& a_matrix) : m_matrix(a_matrix) {}
 
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose)
 
-    inline Index rows() const { return m_matrix.cols(); }
-    inline Index cols() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.rows(); }
 
     /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
     const typename internal::remove_all<typename MatrixType::Nested>::type&
     nestedExpression() const { return m_matrix; }
 
     /** \returns the nested expression */
+    EIGEN_DEVICE_FUNC
     typename internal::remove_all<typename MatrixType::Nested>::type&
     nestedExpression() { return m_matrix.const_cast_derived(); }
 
@@ -106,8 +109,8 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
     EIGEN_DENSE_PUBLIC_INTERFACE(Transpose<MatrixType>)
     EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl)
 
-    inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
-    inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
+    EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); }
+    EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); }
 
     typedef typename internal::conditional<
                        internal::is_lvalue<MatrixType>::value,
@@ -118,33 +121,39 @@ template<typename MatrixType> class TransposeImpl<MatrixType,Dense>
     inline ScalarWithConstIfNotLvalue* data() { return derived().nestedExpression().data(); }
     inline const Scalar* data() const { return derived().nestedExpression().data(); }
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue& coeffRef(Index rowId, Index colId)
     {
       EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       return derived().nestedExpression().const_cast_derived().coeffRef(colId, rowId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline ScalarWithConstIfNotLvalue& coeffRef(Index index)
     {
       EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       return derived().nestedExpression().const_cast_derived().coeffRef(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index rowId, Index colId) const
     {
       return derived().nestedExpression().coeffRef(colId, rowId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline const Scalar& coeffRef(Index index) const
     {
       return derived().nestedExpression().coeffRef(index);
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index rowId, Index colId) const
     {
       return derived().nestedExpression().coeff(colId, rowId);
     }
 
+    EIGEN_DEVICE_FUNC
     inline CoeffReturnType coeff(Index index) const
     {
       return derived().nestedExpression().coeff(index);
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index fba07365f..1d6e34650 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -44,29 +44,39 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
     typedef typename internal::traits<Derived>::DenseMatrixType DenseMatrixType;
     typedef DenseMatrixType DenseType;
 
+    EIGEN_DEVICE_FUNC
     inline TriangularBase() { eigen_assert(!((Mode&UnitDiag) && (Mode&ZeroDiag))); }
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return derived().rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return derived().cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return derived().outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return derived().innerStride(); }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar coeff(Index row, Index col) const  { return derived().coeff(row,col); }
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row,col); }
 
     /** \see MatrixBase::copyCoeff(row,col)
       */
     template<typename Other>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other)
     {
       derived().coeffRef(row, col) = other.coeff(row, col);
     }
 
+    EIGEN_DEVICE_FUNC
     inline Scalar operator()(Index row, Index col) const
     {
       check_coordinates(row, col);
       return coeff(row,col);
     }
+    EIGEN_DEVICE_FUNC
     inline Scalar& operator()(Index row, Index col)
     {
       check_coordinates(row, col);
@@ -74,15 +84,20 @@ template<typename Derived> class TriangularBase : public EigenBase<Derived>
     }
 
     #ifndef EIGEN_PARSED_BY_DOXYGEN
+    EIGEN_DEVICE_FUNC
     inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC
     inline Derived& derived() { return *static_cast<Derived*>(this); }
     #endif // not EIGEN_PARSED_BY_DOXYGEN
 
     template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
     void evalTo(MatrixBase<DenseDerived> &other) const;
     template<typename DenseDerived>
+    EIGEN_DEVICE_FUNC
     void evalToLazy(MatrixBase<DenseDerived> &other) const;
 
+    EIGEN_DEVICE_FUNC
     DenseMatrixType toDenseMatrix() const
     {
       DenseMatrixType res(rows(), cols());
@@ -189,36 +204,52 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
                     | (Mode & (ZeroDiag))
     };
 
+    EIGEN_DEVICE_FUNC
     inline TriangularView(const MatrixType& matrix) : m_matrix(matrix)
     {}
 
+    EIGEN_DEVICE_FUNC
     inline Index rows() const { return m_matrix.rows(); }
+    EIGEN_DEVICE_FUNC
     inline Index cols() const { return m_matrix.cols(); }
+    EIGEN_DEVICE_FUNC
     inline Index outerStride() const { return m_matrix.outerStride(); }
+    EIGEN_DEVICE_FUNC
     inline Index innerStride() const { return m_matrix.innerStride(); }
 
-    /** \sa MatrixBase::operator+=() */
-    template<typename Other> TriangularView&  operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
+    /** \sa MatrixBase::operator+=() */    
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    TriangularView&  operator+=(const DenseBase<Other>& other) { return *this = m_matrix + other.derived(); }
     /** \sa MatrixBase::operator-=() */
-    template<typename Other> TriangularView&  operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
+    template<typename Other>
+    EIGEN_DEVICE_FUNC
+    TriangularView&  operator-=(const DenseBase<Other>& other) { return *this = m_matrix - other.derived(); }
     /** \sa MatrixBase::operator*=() */
+    EIGEN_DEVICE_FUNC
     TriangularView&  operator*=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix * other; }
     /** \sa MatrixBase::operator/=() */
+    EIGEN_DEVICE_FUNC
     TriangularView&  operator/=(const typename internal::traits<MatrixType>::Scalar& other) { return *this = m_matrix / other; }
 
     /** \sa MatrixBase::fill() */
+    EIGEN_DEVICE_FUNC
     void fill(const Scalar& value) { setConstant(value); }
     /** \sa MatrixBase::setConstant() */
+    EIGEN_DEVICE_FUNC
     TriangularView& setConstant(const Scalar& value)
     { return *this = MatrixType::Constant(rows(), cols(), value); }
     /** \sa MatrixBase::setZero() */
+    EIGEN_DEVICE_FUNC
     TriangularView& setZero() { return setConstant(Scalar(0)); }
     /** \sa MatrixBase::setOnes() */
+    EIGEN_DEVICE_FUNC
     TriangularView& setOnes() { return setConstant(Scalar(1)); }
 
     /** \sa MatrixBase::coeff()
       * \warning the coordinates must fit into the referenced triangular part
       */
+    EIGEN_DEVICE_FUNC
     inline Scalar coeff(Index row, Index col) const
     {
       Base::check_coordinates_internal(row, col);
@@ -228,49 +259,62 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     /** \sa MatrixBase::coeffRef()
       * \warning the coordinates must fit into the referenced triangular part
       */
+    EIGEN_DEVICE_FUNC
     inline Scalar& coeffRef(Index row, Index col)
     {
       Base::check_coordinates_internal(row, col);
       return m_matrix.const_cast_derived().coeffRef(row, col);
     }
 
+    EIGEN_DEVICE_FUNC
     const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC
     MatrixTypeNestedCleaned& nestedExpression() { return *const_cast<MatrixTypeNestedCleaned*>(&m_matrix); }
 
     /** Assigns a triangular matrix to a triangular part of a dense matrix */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     TriangularView& operator=(const TriangularBase<OtherDerived>& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     TriangularView& operator=(const MatrixBase<OtherDerived>& other);
 
+    EIGEN_DEVICE_FUNC
     TriangularView& operator=(const TriangularView& other)
     { return *this = other.nestedExpression(); }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void lazyAssign(const TriangularBase<OtherDerived>& other);
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void lazyAssign(const MatrixBase<OtherDerived>& other);
 
     /** \sa MatrixBase::conjugate() */
+    EIGEN_DEVICE_FUNC
     inline TriangularView<MatrixConjugateReturnType,Mode> conjugate()
     { return m_matrix.conjugate(); }
     /** \sa MatrixBase::conjugate() const */
+    EIGEN_DEVICE_FUNC
     inline const TriangularView<MatrixConjugateReturnType,Mode> conjugate() const
     { return m_matrix.conjugate(); }
 
     /** \sa MatrixBase::adjoint() const */
+    EIGEN_DEVICE_FUNC
     inline const TriangularView<const typename MatrixType::AdjointReturnType,TransposeMode> adjoint() const
     { return m_matrix.adjoint(); }
 
     /** \sa MatrixBase::transpose() */
+    EIGEN_DEVICE_FUNC
     inline TriangularView<Transpose<MatrixType>,TransposeMode> transpose()
     {
       EIGEN_STATIC_ASSERT_LVALUE(MatrixType)
       return m_matrix.const_cast_derived().transpose();
     }
     /** \sa MatrixBase::transpose() const */
+    EIGEN_DEVICE_FUNC
     inline const TriangularView<Transpose<MatrixType>,TransposeMode> transpose() const
     {
       return m_matrix.transpose();
@@ -278,6 +322,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
 
     /** Efficient triangular matrix times vector/matrix product */
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     TriangularProduct<Mode,true,MatrixType,false,OtherDerived, OtherDerived::IsVectorAtCompileTime>
     operator*(const MatrixBase<OtherDerived>& rhs) const
     {
@@ -288,6 +333,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
 
     /** Efficient vector/matrix times triangular matrix product */
     template<typename OtherDerived> friend
+    EIGEN_DEVICE_FUNC
     TriangularProduct<Mode,false,OtherDerived,OtherDerived::IsVectorAtCompileTime,MatrixType,false>
     operator*(const MatrixBase<OtherDerived>& lhs, const TriangularView& rhs)
     {
@@ -326,26 +372,32 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     #endif // EIGEN2_SUPPORT
 
     template<int Side, typename Other>
+    EIGEN_DEVICE_FUNC
     inline const internal::triangular_solve_retval<Side,TriangularView, Other>
     solve(const MatrixBase<Other>& other) const;
 
     template<int Side, typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const;
 
     template<typename Other>
+    EIGEN_DEVICE_FUNC
     inline const internal::triangular_solve_retval<OnTheLeft,TriangularView, Other> 
     solve(const MatrixBase<Other>& other) const
     { return solve<OnTheLeft>(other); }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void solveInPlace(const MatrixBase<OtherDerived>& other) const
     { return solveInPlace<OnTheLeft>(other); }
 
+    EIGEN_DEVICE_FUNC
     const SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView() const
     {
       EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
       return SelfAdjointView<MatrixTypeNestedNonRef,Mode>(m_matrix);
     }
+    EIGEN_DEVICE_FUNC
     SelfAdjointView<MatrixTypeNestedNonRef,Mode> selfadjointView()
     {
       EIGEN_STATIC_ASSERT((Mode&UnitDiag)==0,PROGRAMMING_ERROR);
@@ -353,18 +405,21 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void swap(TriangularBase<OtherDerived> const & other)
     {
       TriangularView<SwapWrapper<MatrixType>,Mode>(const_cast<MatrixType&>(m_matrix)).lazyAssign(other.derived());
     }
 
     template<typename OtherDerived>
+    EIGEN_DEVICE_FUNC
     void swap(MatrixBase<OtherDerived> const & other)
     {
       SwapWrapper<MatrixType> swaper(const_cast<MatrixType&>(m_matrix));
       TriangularView<SwapWrapper<MatrixType>,Mode>(swaper).lazyAssign(other.derived());
     }
 
+    EIGEN_DEVICE_FUNC
     Scalar determinant() const
     {
       if (Mode & UnitDiag)
@@ -377,6 +432,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     
     // TODO simplify the following:
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
     {
       setZero();
@@ -384,12 +440,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     }
     
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator+=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
     {
       return assignProduct(other,1);
     }
     
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator-=(const ProductBase<ProductDerived, Lhs,Rhs>& other)
     {
       return assignProduct(other,-1);
@@ -397,6 +455,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     
     
     template<typename ProductDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator=(const ScaledProduct<ProductDerived>& other)
     {
       setZero();
@@ -404,12 +463,14 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
     }
     
     template<typename ProductDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator+=(const ScaledProduct<ProductDerived>& other)
     {
       return assignProduct(other,other.alpha());
     }
     
     template<typename ProductDerived>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& operator-=(const ScaledProduct<ProductDerived>& other)
     {
       return assignProduct(other,-other.alpha());
@@ -418,6 +479,7 @@ template<typename _MatrixType, unsigned int _Mode> class TriangularView
   protected:
     
     template<typename ProductDerived, typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE TriangularView& assignProduct(const ProductBase<ProductDerived, Lhs,Rhs>& prod, const Scalar& alpha);
 
     MatrixTypeNested m_matrix;
@@ -439,6 +501,7 @@ struct triangular_assignment_selector
   
   typedef typename Derived1::Scalar Scalar;
 
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     triangular_assignment_selector<Derived1, Derived2, Mode, UnrollCount-1, ClearOpposite>::run(dst, src);
@@ -467,6 +530,7 @@ struct triangular_assignment_selector
 template<typename Derived1, typename Derived2, unsigned int Mode, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Mode, 0, ClearOpposite>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &, const Derived2 &) {}
 };
 
@@ -475,6 +539,7 @@ struct triangular_assignment_selector<Derived1, Derived2, Upper, Dynamic, ClearO
 {
   typedef typename Derived1::Index Index;
   typedef typename Derived1::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -493,6 +558,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, Lower, Dynamic, ClearOpposite>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -512,6 +578,7 @@ struct triangular_assignment_selector<Derived1, Derived2, StrictlyUpper, Dynamic
 {
   typedef typename Derived1::Index Index;
   typedef typename Derived1::Scalar Scalar;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -530,6 +597,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, StrictlyLower, Dynamic, ClearOpposite>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -548,6 +616,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, UnitUpper, Dynamic, ClearOpposite>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
@@ -568,6 +637,7 @@ template<typename Derived1, typename Derived2, bool ClearOpposite>
 struct triangular_assignment_selector<Derived1, Derived2, UnitLower, Dynamic, ClearOpposite>
 {
   typedef typename Derived1::Index Index;
+  EIGEN_DEVICE_FUNC
   static inline void run(Derived1 &dst, const Derived2 &src)
   {
     for(Index j = 0; j < dst.cols(); ++j)
diff --git a/Eigen/src/Core/VectorBlock.h b/Eigen/src/Core/VectorBlock.h
index 1a7330f3c..216c568c4 100644
--- a/Eigen/src/Core/VectorBlock.h
+++ b/Eigen/src/Core/VectorBlock.h
@@ -72,6 +72,7 @@ template<typename VectorType, int Size> class VectorBlock
 
     /** Dynamic-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline VectorBlock(VectorType& vector, Index start, Index size)
       : Base(vector,
              IsColVector ? start : 0, IsColVector ? 0 : start,
@@ -82,6 +83,7 @@ template<typename VectorType, int Size> class VectorBlock
 
     /** Fixed-size constructor
       */
+    EIGEN_DEVICE_FUNC
     inline VectorBlock(VectorType& vector, Index start)
       : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start)
     {
diff --git a/Eigen/src/Core/products/CoeffBasedProduct.h b/Eigen/src/Core/products/CoeffBasedProduct.h
index 357e04cb0..637513132 100644
--- a/Eigen/src/Core/products/CoeffBasedProduct.h
+++ b/Eigen/src/Core/products/CoeffBasedProduct.h
@@ -141,11 +141,13 @@ class CoeffBasedProduct
 
   public:
 
+    EIGEN_DEVICE_FUNC
     inline CoeffBasedProduct(const CoeffBasedProduct& other)
       : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
     {}
 
     template<typename Lhs, typename Rhs>
+    EIGEN_DEVICE_FUNC 
     inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
       : m_lhs(lhs), m_rhs(rhs)
     {
@@ -158,9 +160,10 @@ class CoeffBasedProduct
         && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
     }
 
-    EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
-    EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
 
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
     {
       Scalar res;
@@ -171,6 +174,7 @@ class CoeffBasedProduct
     /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
      * which is why we don't set the LinearAccessBit.
      */
+    EIGEN_DEVICE_FUNC
     EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
     {
       Scalar res;
@@ -192,22 +196,26 @@ class CoeffBasedProduct
     }
 
     // Implicit conversion to the nested type (trigger the evaluation of the product)
+    EIGEN_DEVICE_FUNC 
     EIGEN_STRONG_INLINE operator const PlainObject& () const
     {
       m_result.lazyAssign(*this);
       return m_result;
     }
 
-    const _LhsNested& lhs() const { return m_lhs; }
-    const _RhsNested& rhs() const { return m_rhs; }
+    EIGEN_DEVICE_FUNC const _LhsNested& lhs() const { return m_lhs; }
+    EIGEN_DEVICE_FUNC const _RhsNested& rhs() const { return m_rhs; }
 
+    EIGEN_DEVICE_FUNC
     const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
 
     template<int DiagonalIndex>
+    EIGEN_DEVICE_FUNC 
     const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
 
+    EIGEN_DEVICE_FUNC
     const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
     { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
 
@@ -240,6 +248,7 @@ template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
   {
     product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
@@ -251,6 +260,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
   {
     res = lhs.coeff(row, 0) * rhs.coeff(0, col);
@@ -261,6 +271,7 @@ template<typename Lhs, typename Rhs, typename RetScalar>
 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
 {
   typedef typename Lhs::Index Index;
+  EIGEN_DEVICE_FUNC 
   static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
   {
     eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
diff --git a/Eigen/src/Core/util/BlasUtil.h b/Eigen/src/Core/util/BlasUtil.h
index a28f16fa0..0d8e2705a 100644
--- a/Eigen/src/Core/util/BlasUtil.h
+++ b/Eigen/src/Core/util/BlasUtil.h
@@ -56,8 +56,8 @@ template<> struct conj_if<false> {
 
 template<typename Scalar> struct conj_helper<Scalar,Scalar,false,false>
 {
-  EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
-  EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmadd(const Scalar& x, const Scalar& y, const Scalar& c) const { return internal::pmadd(x,y,c); }
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar pmul(const Scalar& x, const Scalar& y) const { return internal::pmul(x,y); }
 };
 
 template<typename RealScalar> struct conj_helper<std::complex<RealScalar>, std::complex<RealScalar>, false,true>
@@ -109,10 +109,11 @@ template<typename RealScalar,bool Conj> struct conj_helper<RealScalar, std::comp
 };
 
 template<typename From,typename To> struct get_factor {
-  static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE To run(const From& x) { return x; }
 };
 
 template<typename Scalar> struct get_factor<Scalar,typename NumTraits<Scalar>::Real> {
+  EIGEN_DEVICE_FUNC
   static EIGEN_STRONG_INLINE typename NumTraits<Scalar>::Real run(const Scalar& x) { return numext::real(x); }
 };
 
diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h
index cbedb7da4..dd0c18ad3 100644
--- a/Eigen/src/Core/util/ForwardDeclarations.h
+++ b/Eigen/src/Core/util/ForwardDeclarations.h
@@ -57,12 +57,12 @@ template<typename _Scalar, int _Rows, int _Cols,
     // differently and this makes gcc 3.4.6 happy, but it's ugly.
     // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
     // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
-                          ( (_Rows==1 && _Cols!=1) ? RowMajor
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
                           : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
-                          : ColMajor ),
+                          : Eigen::ColMajor ),
 #else
-                          ( (_Rows==1 && _Cols!=1) ? RowMajor
-                          : (_Cols==1 && _Rows!=1) ? ColMajor
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
                           : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
 #endif
          int _MaxRows = _Rows,
@@ -198,12 +198,12 @@ template<typename _Scalar, int _Rows, int _Cols,
     // differently and this makes gcc 3.4.6 happy, but it's ugly.
     // The error would only show up with EIGEN_DEFAULT_TO_ROW_MAJOR is defined
     // (when EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION is RowMajor)
-                          ( (_Rows==1 && _Cols!=1) ? RowMajor
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
                           : !(_Cols==1 && _Rows!=1) ?  EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION
-                          : ColMajor ),
+                          : Eigen::ColMajor ),
 #else
-                          ( (_Rows==1 && _Cols!=1) ? RowMajor
-                          : (_Cols==1 && _Rows!=1) ? ColMajor
+                          ( (_Rows==1 && _Cols!=1) ? Eigen::RowMajor
+                          : (_Cols==1 && _Rows!=1) ? Eigen::ColMajor
                           : EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ),
 #endif
          int _MaxRows = _Rows, int _MaxCols = _Cols> class Array;
diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 95f9eb7d1..debc04f3f 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -87,9 +87,9 @@
 #endif
 
 #ifdef EIGEN_DEFAULT_TO_ROW_MAJOR
-#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION RowMajor
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::RowMajor
 #else
-#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION ColMajor
+#define EIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION Eigen::ColMajor
 #endif
 
 #ifndef EIGEN_DEFAULT_DENSE_INDEX_TYPE
@@ -413,7 +413,7 @@
 
 #define EIGEN_MAKE_CWISE_BINARY_OP(METHOD,FUNCTOR) \
   template<typename OtherDerived> \
-  EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived> \
   (METHOD)(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const \
   { \
     return CwiseBinaryOp<FUNCTOR<Scalar>, const Derived, const OtherDerived>(derived(), other.derived()); \
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index 71d587108..aea168b46 100644
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -88,7 +88,31 @@ template<bool Condition, typename T> struct enable_if;
 template<typename T> struct enable_if<true,T>
 { typedef T type; };
 
+#if defined(__CUDA_ARCH__)
+template<typename T> EIGEN_DEVICE_FUNC   void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; }
 
+namespace device {
+template<typename T> struct numeric_limits
+{
+  EIGEN_DEVICE_FUNC
+  static T epsilon() { return 0; }
+};
+template<> struct numeric_limits<float>
+{
+  EIGEN_DEVICE_FUNC
+  static float epsilon() { return __FLT_EPSILON__; }
+};
+template<> struct numeric_limits<double>
+{
+  EIGEN_DEVICE_FUNC
+  static double epsilon() { return __DBL_EPSILON__; }
+};
+
+}
+
+#else
+template<typename T> EIGEN_STRONG_INLINE void swap(T &a, T &b) { std::swap(a,b); }
+#endif
 
 /** \internal
   * A base class do disable default copy ctor and copy assignement operator.
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 0009ec049..195d9e2e1 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -16,8 +16,8 @@
 // so currently we simply disable this optimization for gcc 4.3
 #if (defined __GNUG__) && !((__GNUC__==4) && (__GNUC_MINOR__==3))
   #define EIGEN_EMPTY_STRUCT_CTOR(X) \
-    EIGEN_STRONG_INLINE X() {} \
-    EIGEN_STRONG_INLINE X(const X& ) {}
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X() {} \
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE X(const X& ) {}
 #else
   #define EIGEN_EMPTY_STRUCT_CTOR(X)
 #endif
@@ -50,19 +50,19 @@ template<typename T, int Value> class variable_if_dynamic
 {
   public:
     EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamic)
-    explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); assert(v == T(Value)); }
-    static T value() { return T(Value); }
-    void setValue(T) {}
+    EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC static T value() { return T(Value); }
+    EIGEN_DEVICE_FUNC void setValue(T) {}
 };
 
 template<typename T> class variable_if_dynamic<T, Dynamic>
 {
     T m_value;
-    variable_if_dynamic() { assert(false); }
+    EIGEN_DEVICE_FUNC variable_if_dynamic() { eigen_assert(false); }
   public:
-    explicit variable_if_dynamic(T value) : m_value(value) {}
-    T value() const { return m_value; }
-    void setValue(T value) { m_value = value; }
+    EIGEN_DEVICE_FUNC explicit variable_if_dynamic(T value) : m_value(value) {}
+    EIGEN_DEVICE_FUNC T value() const { return m_value; }
+    EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; }
 };
 
 /** \internal like variable_if_dynamic but for DynamicIndex
@@ -71,19 +71,19 @@ template<typename T, int Value> class variable_if_dynamicindex
 {
   public:
     EIGEN_EMPTY_STRUCT_CTOR(variable_if_dynamicindex)
-    explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); assert(v == T(Value)); }
-    static T value() { return T(Value); }
-    void setValue(T) {}
+    EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T v) { EIGEN_ONLY_USED_FOR_DEBUG(v); eigen_assert(v == T(Value)); }
+    EIGEN_DEVICE_FUNC static T value() { return T(Value); }
+    EIGEN_DEVICE_FUNC void setValue(T) {}
 };
 
 template<typename T> class variable_if_dynamicindex<T, DynamicIndex>
 {
     T m_value;
-    variable_if_dynamicindex() { assert(false); }
+    EIGEN_DEVICE_FUNC variable_if_dynamicindex() { eigen_assert(false); }
   public:
-    explicit variable_if_dynamicindex(T value) : m_value(value) {}
-    T value() const { return m_value; }
-    void setValue(T value) { m_value = value; }
+    EIGEN_DEVICE_FUNC explicit variable_if_dynamicindex(T value) : m_value(value) {}
+    EIGEN_DEVICE_FUNC T value() const { return m_value; }
+    EIGEN_DEVICE_FUNC void setValue(T value) { m_value = value; }
 };
 
 template<typename T> struct functor_traits
@@ -341,6 +341,7 @@ template<typename T, int n=1, typename PlainObject = typename eval<T>::type> str
 };
 
 template<typename T>
+EIGEN_DEVICE_FUNC
 T* const_cast_ptr(const T* ptr)
 {
   return const_cast<T*>(ptr);
diff --git a/Eigen/src/Eigenvalues/EigenSolver.h b/Eigen/src/Eigenvalues/EigenSolver.h
index 6e7150685..bf20e03ef 100644
--- a/Eigen/src/Eigenvalues/EigenSolver.h
+++ b/Eigen/src/Eigenvalues/EigenSolver.h
@@ -568,7 +568,7 @@ void EigenSolver<MatrixType>::doComputeEigenvectors()
           }
 
           // Overflow control
-          using std::max;
+          EIGEN_USING_STD_MATH(max);
           Scalar t = (max)(abs(m_matT.coeff(i,n-1)),abs(m_matT.coeff(i,n)));
           if ((eps * t) * t > Scalar(1))
             m_matT.block(i, n-1, size-i, 2) /= t;
diff --git a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
index 4e06809c4..be2f7b452 100644
--- a/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
+++ b/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h
@@ -112,6 +112,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       * Example: \include SelfAdjointEigenSolver_SelfAdjointEigenSolver.cpp
       * Output: \verbinclude SelfAdjointEigenSolver_SelfAdjointEigenSolver.out
       */
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver()
         : m_eivec(),
           m_eivalues(),
@@ -131,6 +132,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa compute() for an example
       */
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver(Index size)
         : m_eivec(size, size),
           m_eivalues(size),
@@ -153,6 +155,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa compute(const MatrixType&, int)
       */
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver(const MatrixType& matrix, int options = ComputeEigenvectors)
       : m_eivec(matrix.rows(), matrix.cols()),
         m_eivalues(matrix.cols()),
@@ -192,6 +195,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa SelfAdjointEigenSolver(const MatrixType&, int)
       */
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver& compute(const MatrixType& matrix, int options = ComputeEigenvectors);
     
     /** \brief Computes eigendecomposition of given matrix using a direct algorithm
@@ -208,6 +212,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa compute(const MatrixType&, int options)
       */
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver& computeDirect(const MatrixType& matrix, int options = ComputeEigenvectors);
 
     /**
@@ -241,6 +246,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa eigenvalues()
       */
+    EIGEN_DEVICE_FUNC
     const MatrixType& eigenvectors() const
     {
       eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
@@ -263,6 +269,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \sa eigenvectors(), MatrixBase::eigenvalues()
       */
+    EIGEN_DEVICE_FUNC
     const RealVectorType& eigenvalues() const
     {
       eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
@@ -287,6 +294,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       * \sa operatorInverseSqrt(),
       *     \ref MatrixFunctions_Module "MatrixFunctions Module"
       */
+    EIGEN_DEVICE_FUNC
     MatrixType operatorSqrt() const
     {
       eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
@@ -312,6 +320,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       * \sa operatorSqrt(), MatrixBase::inverse(),
       *     \ref MatrixFunctions_Module "MatrixFunctions Module"
       */
+    EIGEN_DEVICE_FUNC
     MatrixType operatorInverseSqrt() const
     {
       eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
@@ -323,6 +332,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       *
       * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
       */
+    EIGEN_DEVICE_FUNC
     ComputationInfo info() const
     {
       eigen_assert(m_isInitialized && "SelfAdjointEigenSolver is not initialized.");
@@ -337,6 +347,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
     static const int m_maxIterations = 30;
 
     #ifdef EIGEN2_SUPPORT
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver(const MatrixType& matrix, bool computeEigenvectors)
       : m_eivec(matrix.rows(), matrix.cols()),
         m_eivalues(matrix.cols()),
@@ -346,6 +357,7 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       compute(matrix, computeEigenvectors);
     }
     
+    EIGEN_DEVICE_FUNC
     SelfAdjointEigenSolver(const MatrixType& matA, const MatrixType& matB, bool computeEigenvectors = true)
         : m_eivec(matA.cols(), matA.cols()),
           m_eivalues(matA.cols()),
@@ -355,11 +367,13 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
       static_cast<GeneralizedSelfAdjointEigenSolver<MatrixType>*>(this)->compute(matA, matB, computeEigenvectors ? ComputeEigenvectors : EigenvaluesOnly);
     }
     
+    EIGEN_DEVICE_FUNC
     void compute(const MatrixType& matrix, bool computeEigenvectors)
     {
       compute(matrix, computeEigenvectors ? ComputeEigenvectors : EigenvaluesOnly);
     }
 
+    EIGEN_DEVICE_FUNC
     void compute(const MatrixType& matA, const MatrixType& matB, bool computeEigenvectors = true)
     {
       compute(matA, matB, computeEigenvectors ? ComputeEigenvectors : EigenvaluesOnly);
@@ -393,10 +407,12 @@ template<typename _MatrixType> class SelfAdjointEigenSolver
   */
 namespace internal {
 template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC
 static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n);
 }
 
 template<typename MatrixType>
+EIGEN_DEVICE_FUNC
 SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 ::compute(const MatrixType& matrix, int options)
 {
@@ -538,6 +554,7 @@ ComputationInfo computeFromTridiagonal_impl(DiagType& diag, SubDiagType& subdiag
   
 template<typename SolverType,int Size,bool IsComplex> struct direct_selfadjoint_eigenvalues
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(SolverType& eig, const typename SolverType::MatrixType& A, int options)
   { eig.compute(A,options); }
 };
@@ -548,12 +565,13 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
   typedef typename SolverType::RealVectorType VectorType;
   typedef typename SolverType::Scalar Scalar;
   
+  EIGEN_DEVICE_FUNC
   static inline void computeRoots(const MatrixType& m, VectorType& roots)
   {
-    using std::sqrt;
-    using std::atan2;
-    using std::cos;
-    using std::sin;
+    EIGEN_USING_STD_MATH(sqrt)
+    EIGEN_USING_STD_MATH(atan2)
+    EIGEN_USING_STD_MATH(cos)
+    EIGEN_USING_STD_MATH(sin)
     const Scalar s_inv3 = Scalar(1.0)/Scalar(3.0);
     const Scalar s_sqrt3 = sqrt(Scalar(3.0));
 
@@ -588,15 +606,16 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
 
     // Sort in increasing order.
     if (roots(0) >= roots(1))
-      std::swap(roots(0),roots(1));
+      internal::swap(roots(0),roots(1));
     if (roots(1) >= roots(2))
     {
-      std::swap(roots(1),roots(2));
+      internal::swap(roots(1),roots(2));
       if (roots(0) >= roots(1))
-        std::swap(roots(0),roots(1));
+        internal::swap(roots(0),roots(1));
     }
   }
   
+  EIGEN_DEVICE_FUNC
   static inline void run(SolverType& solver, const MatrixType& mat, int options)
   {
     using std::sqrt;
@@ -717,12 +736,14 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
 };
 
 // 2x2 direct eigenvalues decomposition, code from Hauke Heibel
-template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2,false>
+template<typename SolverType> 
+struct direct_selfadjoint_eigenvalues<SolverType,2,false>
 {
   typedef typename SolverType::MatrixType MatrixType;
   typedef typename SolverType::RealVectorType VectorType;
   typedef typename SolverType::Scalar Scalar;
   
+  EIGEN_DEVICE_FUNC
   static inline void computeRoots(const MatrixType& m, VectorType& roots)
   {
     using std::sqrt;
@@ -732,6 +753,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2
     roots(1) = t1 + t0;
   }
   
+  EIGEN_DEVICE_FUNC
   static inline void run(SolverType& solver, const MatrixType& mat, int options)
   {
     using std::sqrt;
@@ -785,6 +807,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,2
 }
 
 template<typename MatrixType>
+EIGEN_DEVICE_FUNC
 SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 ::computeDirect(const MatrixType& matrix, int options)
 {
@@ -794,6 +817,7 @@ SelfAdjointEigenSolver<MatrixType>& SelfAdjointEigenSolver<MatrixType>
 
 namespace internal {
 template<int StorageOrder,typename RealScalar, typename Scalar, typename Index>
+EIGEN_DEVICE_FUNC
 static void tridiagonal_qr_step(RealScalar* diag, RealScalar* subdiag, Index start, Index end, Scalar* matrixQ, Index n)
 {
   using std::abs;
diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h
index 553d38c74..f424e6d7d 100644
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -159,8 +159,8 @@ template<typename QuatDerived>
 AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
 {
   using std::acos;
-  using std::min;
-  using std::max;
+  EIGEN_USING_STD_MATH(min);
+  EIGEN_USING_STD_MATH(max);
   using std::sqrt;
   Scalar n2 = q.vec().squaredNorm();
   if (n2 < NumTraits<Scalar>::dummy_precision()*NumTraits<Scalar>::dummy_precision())
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index 556bc8160..26be3ee5b 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -132,6 +132,7 @@ struct unitOrthogonal_selector
   typedef typename NumTraits<Scalar>::Real RealScalar;
   typedef typename Derived::Index Index;
   typedef Matrix<Scalar,2,1> Vector2;
+  EIGEN_DEVICE_FUNC
   static inline VectorType run(const Derived& src)
   {
     VectorType perp = VectorType::Zero(src.size());
@@ -154,6 +155,7 @@ struct unitOrthogonal_selector<Derived,3>
   typedef typename plain_matrix_type<Derived>::type VectorType;
   typedef typename traits<Derived>::Scalar Scalar;
   typedef typename NumTraits<Scalar>::Real RealScalar;
+  EIGEN_DEVICE_FUNC
   static inline VectorType run(const Derived& src)
   {
     VectorType perp;
@@ -192,6 +194,7 @@ template<typename Derived>
 struct unitOrthogonal_selector<Derived,2>
 {
   typedef typename plain_matrix_type<Derived>::type VectorType;
+  EIGEN_DEVICE_FUNC
   static inline VectorType run(const Derived& src)
   { return VectorType(-numext::conj(src.y()), numext::conj(src.x())).normalized(); }
 };
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index e135f2b66..d036c018a 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -572,7 +572,7 @@ template<class Derived>
 template<typename Derived1, typename Derived2>
 inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
-  using std::max;
+  EIGEN_USING_STD_MATH(max);
   using std::sqrt;
   Vector3 v0 = a.normalized();
   Vector3 v1 = b.normalized();
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index dfe25f424..44699b763 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -730,12 +730,14 @@ struct solve_retval<FullPivLU<_MatrixType>, Rhs>
   *
   * \sa class FullPivLU
   */
+#ifndef __CUDACC__
 template<typename Derived>
 inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::fullPivLu() const
 {
   return FullPivLU<PlainObject>(eval());
 }
+#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/LU/Inverse.h b/Eigen/src/LU/Inverse.h
index 3cf887193..8d1364e0a 100644
--- a/Eigen/src/LU/Inverse.h
+++ b/Eigen/src/LU/Inverse.h
@@ -21,6 +21,7 @@ namespace internal {
 template<typename MatrixType, typename ResultType, int Size = MatrixType::RowsAtCompileTime>
 struct compute_inverse
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     result = matrix.partialPivLu().inverse();
@@ -37,6 +38,7 @@ struct compute_inverse_and_det_with_check { /* nothing! general case not support
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 1>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename MatrixType::Scalar Scalar;
@@ -47,6 +49,7 @@ struct compute_inverse<MatrixType, ResultType, 1>
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -67,6 +70,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 1>
 ****************************/
 
 template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC 
 inline void compute_inverse_size2_helper(
     const MatrixType& matrix, const typename ResultType::Scalar& invdet,
     ResultType& result)
@@ -80,6 +84,7 @@ inline void compute_inverse_size2_helper(
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 2>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename ResultType::Scalar Scalar;
@@ -91,6 +96,7 @@ struct compute_inverse<MatrixType, ResultType, 2>
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -114,6 +120,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 2>
 ****************************/
 
 template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
 inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
 {
   enum {
@@ -127,6 +134,7 @@ inline typename MatrixType::Scalar cofactor_3x3(const MatrixType& m)
 }
 
 template<typename MatrixType, typename ResultType>
+EIGEN_DEVICE_FUNC
 inline void compute_inverse_size3_helper(
     const MatrixType& matrix,
     const typename ResultType::Scalar& invdet,
@@ -145,6 +153,7 @@ inline void compute_inverse_size3_helper(
 template<typename MatrixType, typename ResultType>
 struct compute_inverse<MatrixType, ResultType, 3>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(const MatrixType& matrix, ResultType& result)
   {
     typedef typename ResultType::Scalar Scalar;
@@ -161,6 +170,7 @@ struct compute_inverse<MatrixType, ResultType, 3>
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -188,6 +198,7 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
 ****************************/
 
 template<typename Derived>
+EIGEN_DEVICE_FUNC 
 inline const typename Derived::Scalar general_det3_helper
 (const MatrixBase<Derived>& matrix, int i1, int i2, int i3, int j1, int j2, int j3)
 {
@@ -196,6 +207,7 @@ inline const typename Derived::Scalar general_det3_helper
 }
 
 template<typename MatrixType, int i, int j>
+EIGEN_DEVICE_FUNC 
 inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
 {
   enum {
@@ -214,6 +226,7 @@ inline typename MatrixType::Scalar cofactor_4x4(const MatrixType& matrix)
 template<int Arch, typename Scalar, typename MatrixType, typename ResultType>
 struct compute_inverse_size4
 {
+  EIGEN_DEVICE_FUNC
   static void run(const MatrixType& matrix, ResultType& result)
   {
     result.coeffRef(0,0) =  cofactor_4x4<MatrixType,0,0>(matrix);
@@ -246,6 +259,7 @@ struct compute_inverse<MatrixType, ResultType, 4>
 template<typename MatrixType, typename ResultType>
 struct compute_inverse_and_det_with_check<MatrixType, ResultType, 4>
 {
+  EIGEN_DEVICE_FUNC
   static inline void run(
     const MatrixType& matrix,
     const typename MatrixType::RealScalar& absDeterminantThreshold,
@@ -279,14 +293,17 @@ struct inverse_impl : public ReturnByValue<inverse_impl<MatrixType> >
   typedef typename remove_all<MatrixTypeNested>::type MatrixTypeNestedCleaned;
   MatrixTypeNested m_matrix;
 
+  EIGEN_DEVICE_FUNC
   inverse_impl(const MatrixType& matrix)
     : m_matrix(matrix)
   {}
 
-  inline Index rows() const { return m_matrix.rows(); }
-  inline Index cols() const { return m_matrix.cols(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols(); }
 
-  template<typename Dest> inline void evalTo(Dest& dst) const
+  template<typename Dest>
+  EIGEN_DEVICE_FUNC
+  inline void evalTo(Dest& dst) const
   {
     const int Size = EIGEN_PLAIN_ENUM_MIN(MatrixType::ColsAtCompileTime,Dest::ColsAtCompileTime);
     EIGEN_ONLY_USED_FOR_DEBUG(Size);
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 740ee694c..1d389ecac 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -472,12 +472,14 @@ struct solve_retval<PartialPivLU<_MatrixType>, Rhs>
   *
   * \sa class PartialPivLU
   */
+#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::partialPivLu() const
 {
   return PartialPivLU<PlainObject>(eval());
 }
+#endif
 
 #if EIGEN2_SUPPORT_STAGE > STAGE20_RESOLVE_API_CONFLICTS
 /** \lu_module
@@ -488,6 +490,7 @@ MatrixBase<Derived>::partialPivLu() const
   *
   * \sa class PartialPivLU
   */
+#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::lu() const
@@ -496,6 +499,8 @@ MatrixBase<Derived>::lu() const
 }
 #endif
 
+#endif
+
 } // end namespace Eigen
 
 #endif // EIGEN_PARTIALLU_H
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 905bc1935..07126a9f4 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -564,6 +564,7 @@ typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHousehol
   return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate()).setLength(m_nonzero_pivots);
 }
 
+#ifndef __CUDACC__
 /** \return the column-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class ColPivHouseholderQR
@@ -574,6 +575,7 @@ MatrixBase<Derived>::colPivHouseholderQr() const
 {
   return ColPivHouseholderQR<PlainObject>(eval());
 }
+#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index a0050859c..13e14db10 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -598,6 +598,7 @@ inline typename FullPivHouseholderQR<MatrixType>::MatrixQReturnType FullPivHouse
   return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions);
 }
 
+#ifndef __CUDACC__
 /** \return the full-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class FullPivHouseholderQR
@@ -608,6 +609,7 @@ MatrixBase<Derived>::fullPivHouseholderQr() const
 {
   return FullPivHouseholderQR<PlainObject>(eval());
 }
+#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index abc61bcbb..ad156396a 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -358,6 +358,7 @@ HouseholderQR<MatrixType>& HouseholderQR<MatrixType>::compute(const MatrixType&
   return *this;
 }
 
+#ifndef __CUDACC__
 /** \return the Householder QR decomposition of \c *this.
   *
   * \sa class HouseholderQR
@@ -368,6 +369,7 @@ MatrixBase<Derived>::householderQr() const
 {
   return HouseholderQR<PlainObject>(eval());
 }
+#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 747a4f958..bf1213656 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -848,7 +848,7 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
         // if this 2x2 sub-matrix is not diagonal already...
         // notice that this comparison will evaluate to false if any NaN is involved, ensuring that NaN's don't
         // keep us iterating forever. Similarly, small denormal numbers are considered zero.
-        using std::max;
+        EIGEN_USING_STD_MATH(max);
         RealScalar threshold = (max)(considerAsZero, precision * (max)(abs(m_workMatrix.coeff(p,p)),
                                                                        abs(m_workMatrix.coeff(q,q))));
         if((max)(abs(m_workMatrix.coeff(p,q)),abs(m_workMatrix.coeff(q,p))) > threshold)
@@ -930,6 +930,7 @@ struct solve_retval<JacobiSVD<_MatrixType, QRPreconditioner>, Rhs>
 };
 } // end namespace internal
 
+#ifndef __CUDACC__
 /** \svd_module
   *
   * \return the singular value decomposition of \c *this computed by two-sided
@@ -943,6 +944,7 @@ MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
 {
   return JacobiSVD<PlainObject>(*this, computationOptions);
 }
+#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/plugins/ArrayCwiseBinaryOps.h b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
index 65d198749..0c22184c0 100644
--- a/Eigen/src/plugins/ArrayCwiseBinaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseBinaryOps.h
@@ -3,6 +3,7 @@
   * \sa MatrixBase::cwiseProduct
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)
 operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -14,6 +15,7 @@ operator*(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   * \sa MatrixBase::cwiseQuotient
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
 operator/(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -33,6 +35,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(min,internal::scalar_min_op)
   *
   * \sa max()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived,
                                         const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
@@ -58,6 +61,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(max,internal::scalar_max_op)
   *
   * \sa min()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived,
                                         const CwiseNullaryOp<internal::scalar_constant_op<Scalar>, PlainObject> >
 #ifdef EIGEN_PARSED_BY_DOXYGEN
@@ -143,12 +147,14 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator!=,std::not_equal_to)
   *
   * \sa operator+=(), operator-()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
 operator+(const Scalar& scalar) const
 {
   return CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>(derived(), internal::scalar_add_op<Scalar>(scalar));
 }
 
+EIGEN_DEVICE_FUNC
 friend inline const CwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
 operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& other)
 {
@@ -162,12 +168,14 @@ operator+(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>&
   *
   * \sa operator+(), operator-=()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>
 operator-(const Scalar& scalar) const
 {
   return CwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>(derived(), internal::scalar_sub_op<Scalar>(scalar));;
 }
 
+EIGEN_DEVICE_FUNC
 friend inline const CwiseUnaryOp<internal::scalar_rsub_op<Scalar>, const Derived>
 operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>& other)
 {
@@ -184,6 +192,7 @@ operator-(const Scalar& scalar,const EIGEN_CURRENT_STORAGE_BASE_CLASS<Derived>&
   * \sa operator||(), select()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<internal::scalar_boolean_and_op, const Derived, const OtherDerived>
 operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -202,6 +211,7 @@ operator&&(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   * \sa operator&&(), select()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>
 operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -209,3 +219,4 @@ operator||(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
                       THIS_METHOD_IS_ONLY_FOR_EXPRESSIONS_OF_BOOL);
   return CwiseBinaryOp<internal::scalar_boolean_or_op, const Derived, const OtherDerived>(derived(),other.derived());
 }
+
diff --git a/Eigen/src/plugins/ArrayCwiseUnaryOps.h b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
index b58d8667c..aea3375ed 100644
--- a/Eigen/src/plugins/ArrayCwiseUnaryOps.h
+++ b/Eigen/src/plugins/ArrayCwiseUnaryOps.h
@@ -7,6 +7,7 @@
   *
   * \sa abs2()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
 abs() const
 {
@@ -20,6 +21,7 @@ abs() const
   *
   * \sa abs(), square()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived>
 abs2() const
 {
@@ -33,6 +35,7 @@ abs2() const
   *
   * \sa pow(), log(), sin(), cos()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_exp_op<Scalar>, const Derived>
 exp() const
 {
@@ -46,6 +49,7 @@ exp() const
   *
   * \sa exp()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_log_op<Scalar>, const Derived>
 log() const
 {
@@ -59,6 +63,7 @@ log() const
   *
   * \sa pow(), square()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
 sqrt() const
 {
@@ -72,6 +77,7 @@ sqrt() const
   *
   * \sa sin(), acos()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_cos_op<Scalar>, const Derived>
 cos() const
 {
@@ -86,6 +92,7 @@ cos() const
   *
   * \sa cos(), asin()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_sin_op<Scalar>, const Derived>
 sin() const
 {
@@ -99,6 +106,7 @@ sin() const
   *
   * \sa cos(), asin()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_acos_op<Scalar>, const Derived>
 acos() const
 {
@@ -112,6 +120,7 @@ acos() const
   *
   * \sa sin(), acos()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_asin_op<Scalar>, const Derived>
 asin() const
 {
@@ -125,6 +134,7 @@ asin() const
   *
   * \sa cos(), sin()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_tan_op<Scalar>, Derived>
 tan() const
 {
@@ -139,6 +149,7 @@ tan() const
   *
   * \sa exp(), log()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
 pow(const Scalar& exponent) const
 {
@@ -154,6 +165,7 @@ pow(const Scalar& exponent) const
   *
   * \sa operator/(), operator*()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
 inverse() const
 {
@@ -167,6 +179,7 @@ inverse() const
   *
   * \sa operator/(), operator*(), abs2()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_square_op<Scalar>, const Derived>
 square() const
 {
@@ -180,6 +193,7 @@ square() const
   *
   * \sa square(), pow()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_cube_op<Scalar>, const Derived>
 cube() const
 {
@@ -187,6 +201,7 @@ cube() const
 }
 
 #define EIGEN_MAKE_SCALAR_CWISE_UNARY_OP(METHOD_NAME,FUNCTOR) \
+  EIGEN_DEVICE_FUNC \
   inline const CwiseUnaryOp<std::binder2nd<FUNCTOR<Scalar> >, const Derived> \
   METHOD_NAME(const Scalar& s) const { \
     return CwiseUnaryOp<std::binder2nd<FUNCTOR<Scalar> >, const Derived> \
diff --git a/Eigen/src/plugins/BlockMethods.h b/Eigen/src/plugins/BlockMethods.h
index 6911bedef..3bc345211 100644
--- a/Eigen/src/plugins/BlockMethods.h
+++ b/Eigen/src/plugins/BlockMethods.h
@@ -53,12 +53,14 @@ template<int Size> struct ConstFixedSegmentReturnType { typedef const VectorBloc
   *
   * \sa class Block, block(Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline Block<Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols)
 {
   return Block<Derived>(derived(), startRow, startCol, blockRows, blockCols);
 }
 
 /** This is the const version of block(Index,Index,Index,Index). */
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived> block(Index startRow, Index startCol, Index blockRows, Index blockCols) const
 {
   return Block<const Derived>(derived(), startRow, startCol, blockRows, blockCols);
@@ -77,12 +79,14 @@ inline const Block<const Derived> block(Index startRow, Index startCol, Index bl
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline Block<Derived> topRightCorner(Index cRows, Index cCols)
 {
   return Block<Derived>(derived(), 0, cols() - cCols, cRows, cCols);
 }
 
 /** This is the const version of topRightCorner(Index, Index).*/
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const
 {
   return Block<const Derived>(derived(), 0, cols() - cCols, cRows, cCols);
@@ -99,6 +103,7 @@ inline const Block<const Derived> topRightCorner(Index cRows, Index cCols) const
   * \sa class Block, block<int,int>(Index,Index)
   */
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline Block<Derived, CRows, CCols> topRightCorner()
 {
   return Block<Derived, CRows, CCols>(derived(), 0, cols() - CCols);
@@ -106,6 +111,7 @@ inline Block<Derived, CRows, CCols> topRightCorner()
 
 /** This is the const version of topRightCorner<int, int>().*/
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived, CRows, CCols> topRightCorner() const
 {
   return Block<const Derived, CRows, CCols>(derived(), 0, cols() - CCols);
@@ -153,12 +159,14 @@ inline const Block<const Derived, CRows, CCols> topRightCorner(Index cRows, Inde
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline Block<Derived> topLeftCorner(Index cRows, Index cCols)
 {
   return Block<Derived>(derived(), 0, 0, cRows, cCols);
 }
 
 /** This is the const version of topLeftCorner(Index, Index).*/
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const
 {
   return Block<const Derived>(derived(), 0, 0, cRows, cCols);
@@ -174,6 +182,7 @@ inline const Block<const Derived> topLeftCorner(Index cRows, Index cCols) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline Block<Derived, CRows, CCols> topLeftCorner()
 {
   return Block<Derived, CRows, CCols>(derived(), 0, 0);
@@ -181,6 +190,7 @@ inline Block<Derived, CRows, CCols> topLeftCorner()
 
 /** This is the const version of topLeftCorner<int, int>().*/
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived, CRows, CCols> topLeftCorner() const
 {
   return Block<const Derived, CRows, CCols>(derived(), 0, 0);
@@ -228,12 +238,14 @@ inline const Block<const Derived, CRows, CCols> topLeftCorner(Index cRows, Index
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline Block<Derived> bottomRightCorner(Index cRows, Index cCols)
 {
   return Block<Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
 }
 
 /** This is the const version of bottomRightCorner(Index, Index).*/
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) const
 {
   return Block<const Derived>(derived(), rows() - cRows, cols() - cCols, cRows, cCols);
@@ -249,6 +261,7 @@ inline const Block<const Derived> bottomRightCorner(Index cRows, Index cCols) co
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline Block<Derived, CRows, CCols> bottomRightCorner()
 {
   return Block<Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
@@ -256,6 +269,7 @@ inline Block<Derived, CRows, CCols> bottomRightCorner()
 
 /** This is the const version of bottomRightCorner<int, int>().*/
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived, CRows, CCols> bottomRightCorner() const
 {
   return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, cols() - CCols);
@@ -303,12 +317,14 @@ inline const Block<const Derived, CRows, CCols> bottomRightCorner(Index cRows, I
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline Block<Derived> bottomLeftCorner(Index cRows, Index cCols)
 {
   return Block<Derived>(derived(), rows() - cRows, 0, cRows, cCols);
 }
 
 /** This is the const version of bottomLeftCorner(Index, Index).*/
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) const
 {
   return Block<const Derived>(derived(), rows() - cRows, 0, cRows, cCols);
@@ -324,6 +340,7 @@ inline const Block<const Derived> bottomLeftCorner(Index cRows, Index cCols) con
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline Block<Derived, CRows, CCols> bottomLeftCorner()
 {
   return Block<Derived, CRows, CCols>(derived(), rows() - CRows, 0);
@@ -331,6 +348,7 @@ inline Block<Derived, CRows, CCols> bottomLeftCorner()
 
 /** This is the const version of bottomLeftCorner<int, int>().*/
 template<int CRows, int CCols>
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived, CRows, CCols> bottomLeftCorner() const
 {
   return Block<const Derived, CRows, CCols>(derived(), rows() - CRows, 0);
@@ -377,12 +395,14 @@ inline const Block<const Derived, CRows, CCols> bottomLeftCorner(Index cRows, In
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline RowsBlockXpr topRows(Index n)
 {
   return RowsBlockXpr(derived(), 0, 0, n, cols());
 }
 
 /** This is the const version of topRows(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstRowsBlockXpr topRows(Index n) const
 {
   return ConstRowsBlockXpr(derived(), 0, 0, n, cols());
@@ -398,6 +418,7 @@ inline ConstRowsBlockXpr topRows(Index n) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NRowsBlockXpr<N>::Type topRows()
 {
   return typename NRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
@@ -405,6 +426,7 @@ inline typename NRowsBlockXpr<N>::Type topRows()
 
 /** This is the const version of topRows<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNRowsBlockXpr<N>::Type topRows() const
 {
   return typename ConstNRowsBlockXpr<N>::Type(derived(), 0, 0, N, cols());
@@ -421,12 +443,14 @@ inline typename ConstNRowsBlockXpr<N>::Type topRows() const
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline RowsBlockXpr bottomRows(Index n)
 {
   return RowsBlockXpr(derived(), rows() - n, 0, n, cols());
 }
 
 /** This is the const version of bottomRows(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstRowsBlockXpr bottomRows(Index n) const
 {
   return ConstRowsBlockXpr(derived(), rows() - n, 0, n, cols());
@@ -442,6 +466,7 @@ inline ConstRowsBlockXpr bottomRows(Index n) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NRowsBlockXpr<N>::Type bottomRows()
 {
   return typename NRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
@@ -449,6 +474,7 @@ inline typename NRowsBlockXpr<N>::Type bottomRows()
 
 /** This is the const version of bottomRows<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
 {
   return typename ConstNRowsBlockXpr<N>::Type(derived(), rows() - N, 0, N, cols());
@@ -466,12 +492,14 @@ inline typename ConstNRowsBlockXpr<N>::Type bottomRows() const
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline RowsBlockXpr middleRows(Index startRow, Index numRows)
 {
   return RowsBlockXpr(derived(), startRow, 0, numRows, cols());
 }
 
 /** This is the const version of middleRows(Index,Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
 {
   return ConstRowsBlockXpr(derived(), startRow, 0, numRows, cols());
@@ -488,6 +516,7 @@ inline ConstRowsBlockXpr middleRows(Index startRow, Index numRows) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow)
 {
   return typename NRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
@@ -495,6 +524,7 @@ inline typename NRowsBlockXpr<N>::Type middleRows(Index startRow)
 
 /** This is the const version of middleRows<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow) const
 {
   return typename ConstNRowsBlockXpr<N>::Type(derived(), startRow, 0, N, cols());
@@ -511,12 +541,14 @@ inline typename ConstNRowsBlockXpr<N>::Type middleRows(Index startRow) const
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline ColsBlockXpr leftCols(Index n)
 {
   return ColsBlockXpr(derived(), 0, 0, rows(), n);
 }
 
 /** This is the const version of leftCols(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstColsBlockXpr leftCols(Index n) const
 {
   return ConstColsBlockXpr(derived(), 0, 0, rows(), n);
@@ -532,6 +564,7 @@ inline ConstColsBlockXpr leftCols(Index n) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NColsBlockXpr<N>::Type leftCols()
 {
   return typename NColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
@@ -539,6 +572,7 @@ inline typename NColsBlockXpr<N>::Type leftCols()
 
 /** This is the const version of leftCols<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNColsBlockXpr<N>::Type leftCols() const
 {
   return typename ConstNColsBlockXpr<N>::Type(derived(), 0, 0, rows(), N);
@@ -555,12 +589,14 @@ inline typename ConstNColsBlockXpr<N>::Type leftCols() const
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline ColsBlockXpr rightCols(Index n)
 {
   return ColsBlockXpr(derived(), 0, cols() - n, rows(), n);
 }
 
 /** This is the const version of rightCols(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstColsBlockXpr rightCols(Index n) const
 {
   return ConstColsBlockXpr(derived(), 0, cols() - n, rows(), n);
@@ -576,6 +612,7 @@ inline ConstColsBlockXpr rightCols(Index n) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NColsBlockXpr<N>::Type rightCols()
 {
   return typename NColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
@@ -583,6 +620,7 @@ inline typename NColsBlockXpr<N>::Type rightCols()
 
 /** This is the const version of rightCols<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNColsBlockXpr<N>::Type rightCols() const
 {
   return typename ConstNColsBlockXpr<N>::Type(derived(), 0, cols() - N, rows(), N);
@@ -600,12 +638,14 @@ inline typename ConstNColsBlockXpr<N>::Type rightCols() const
   *
   * \sa class Block, block(Index,Index,Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline ColsBlockXpr middleCols(Index startCol, Index numCols)
 {
   return ColsBlockXpr(derived(), 0, startCol, rows(), numCols);
 }
 
 /** This is the const version of middleCols(Index,Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
 {
   return ConstColsBlockXpr(derived(), 0, startCol, rows(), numCols);
@@ -622,6 +662,7 @@ inline ConstColsBlockXpr middleCols(Index startCol, Index numCols) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename NColsBlockXpr<N>::Type middleCols(Index startCol)
 {
   return typename NColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
@@ -629,6 +670,7 @@ inline typename NColsBlockXpr<N>::Type middleCols(Index startCol)
 
 /** This is the const version of middleCols<int>().*/
 template<int N>
+EIGEN_DEVICE_FUNC
 inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol) const
 {
   return typename ConstNColsBlockXpr<N>::Type(derived(), 0, startCol, rows(), N);
@@ -653,6 +695,7 @@ inline typename ConstNColsBlockXpr<N>::Type middleCols(Index startCol) const
   * \sa class Block, block(Index,Index,Index,Index)
   */
 template<int BlockRows, int BlockCols>
+EIGEN_DEVICE_FUNC
 inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol)
 {
   return Block<Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
@@ -660,6 +703,7 @@ inline Block<Derived, BlockRows, BlockCols> block(Index startRow, Index startCol
 
 /** This is the const version of block<>(Index, Index). */
 template<int BlockRows, int BlockCols>
+EIGEN_DEVICE_FUNC
 inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, Index startCol) const
 {
   return Block<const Derived, BlockRows, BlockCols>(derived(), startRow, startCol);
@@ -705,12 +749,14 @@ inline const Block<const Derived, BlockRows, BlockCols> block(Index startRow, In
   * Output: \verbinclude MatrixBase_col.out
   *
   * \sa row(), class Block */
+EIGEN_DEVICE_FUNC
 inline ColXpr col(Index i)
 {
   return ColXpr(derived(), i);
 }
 
 /** This is the const version of col(). */
+EIGEN_DEVICE_FUNC
 inline ConstColXpr col(Index i) const
 {
   return ConstColXpr(derived(), i);
@@ -722,12 +768,14 @@ inline ConstColXpr col(Index i) const
   * Output: \verbinclude MatrixBase_row.out
   *
   * \sa col(), class Block */
+EIGEN_DEVICE_FUNC
 inline RowXpr row(Index i)
 {
   return RowXpr(derived(), i);
 }
 
 /** This is the const version of row(). */
+EIGEN_DEVICE_FUNC
 inline ConstRowXpr row(Index i) const
 {
   return ConstRowXpr(derived(), i);
@@ -749,6 +797,7 @@ inline ConstRowXpr row(Index i) const
   *
   * \sa class Block, segment(Index)
   */
+EIGEN_DEVICE_FUNC
 inline SegmentReturnType segment(Index start, Index vecSize)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -757,6 +806,7 @@ inline SegmentReturnType segment(Index start, Index vecSize)
 
 
 /** This is the const version of segment(Index,Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstSegmentReturnType segment(Index start, Index vecSize) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -778,6 +828,7 @@ inline ConstSegmentReturnType segment(Index start, Index vecSize) const
   *
   * \sa class Block, block(Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline SegmentReturnType head(Index vecSize)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -785,6 +836,7 @@ inline SegmentReturnType head(Index vecSize)
 }
 
 /** This is the const version of head(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstSegmentReturnType
   head(Index vecSize) const
 {
@@ -807,6 +859,7 @@ inline ConstSegmentReturnType
   *
   * \sa class Block, block(Index,Index)
   */
+EIGEN_DEVICE_FUNC
 inline SegmentReturnType tail(Index vecSize)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -814,6 +867,7 @@ inline SegmentReturnType tail(Index vecSize)
 }
 
 /** This is the const version of tail(Index).*/
+EIGEN_DEVICE_FUNC
 inline ConstSegmentReturnType tail(Index vecSize) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -834,6 +888,7 @@ inline ConstSegmentReturnType tail(Index vecSize) const
   * \sa class Block
   */
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename FixedSegmentReturnType<Size>::Type segment(Index start)
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -842,6 +897,7 @@ inline typename FixedSegmentReturnType<Size>::Type segment(Index start)
 
 /** This is the const version of segment<int>(Index).*/
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -860,6 +916,7 @@ inline typename ConstFixedSegmentReturnType<Size>::Type segment(Index start) con
   * \sa class Block
   */
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename FixedSegmentReturnType<Size>::Type head()
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -868,6 +925,7 @@ inline typename FixedSegmentReturnType<Size>::Type head()
 
 /** This is the const version of head<int>().*/
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename ConstFixedSegmentReturnType<Size>::Type head() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -886,6 +944,7 @@ inline typename ConstFixedSegmentReturnType<Size>::Type head() const
   * \sa class Block
   */
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename FixedSegmentReturnType<Size>::Type tail()
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
@@ -894,6 +953,7 @@ inline typename FixedSegmentReturnType<Size>::Type tail()
 
 /** This is the const version of tail<int>.*/
 template<int Size>
+EIGEN_DEVICE_FUNC
 inline typename ConstFixedSegmentReturnType<Size>::Type tail() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
diff --git a/Eigen/src/plugins/CommonCwiseBinaryOps.h b/Eigen/src/plugins/CommonCwiseBinaryOps.h
index 688d22440..a8fa287c9 100644
--- a/Eigen/src/plugins/CommonCwiseBinaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseBinaryOps.h
@@ -38,6 +38,7 @@ EIGEN_MAKE_CWISE_BINARY_OP(operator+,internal::scalar_sum_op)
   * \sa class CwiseBinaryOp, operator+(), operator-(), cwiseProduct()
   */
 template<typename CustomBinaryOp, typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
 binaryExpr(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other, const CustomBinaryOp& func = CustomBinaryOp()) const
 {
diff --git a/Eigen/src/plugins/CommonCwiseUnaryOps.h b/Eigen/src/plugins/CommonCwiseUnaryOps.h
index 08e931aad..a17153e64 100644
--- a/Eigen/src/plugins/CommonCwiseUnaryOps.h
+++ b/Eigen/src/plugins/CommonCwiseUnaryOps.h
@@ -40,11 +40,13 @@ typedef CwiseUnaryView<internal::scalar_imag_ref_op<Scalar>, Derived> NonConstIm
 
 /** \returns an expression of the opposite of \c *this
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_opposite_op<typename internal::traits<Derived>::Scalar>, const Derived>
 operator-() const { return derived(); }
 
 
 /** \returns an expression of \c *this scaled by the scalar factor \a scalar */
+EIGEN_DEVICE_FUNC
 inline const ScalarMultipleReturnType
 operator*(const Scalar& scalar) const
 {
@@ -57,6 +59,7 @@ const ScalarMultipleReturnType operator*(const RealScalar& scalar) const;
 #endif
 
 /** \returns an expression of \c *this divided by the scalar value \a scalar */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_quotient1_op<typename internal::traits<Derived>::Scalar>, const Derived>
 operator/(const Scalar& scalar) const
 {
@@ -65,6 +68,7 @@ operator/(const Scalar& scalar) const
 }
 
 /** Overloaded for efficient real matrix times complex scalar value */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived>
 operator*(const std::complex<Scalar>& scalar) const
 {
@@ -72,10 +76,12 @@ operator*(const std::complex<Scalar>& scalar) const
     (*static_cast<const Derived*>(this), internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >(scalar));
 }
 
+EIGEN_DEVICE_FUNC
 inline friend const ScalarMultipleReturnType
 operator*(const Scalar& scalar, const StorageBaseType& matrix)
 { return matrix*scalar; }
 
+EIGEN_DEVICE_FUNC
 inline friend const CwiseUnaryOp<internal::scalar_multiple2_op<Scalar,std::complex<Scalar> >, const Derived>
 operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix)
 { return matrix*scalar; }
@@ -88,6 +94,7 @@ operator*(const std::complex<Scalar>& scalar, const StorageBaseType& matrix)
   * \sa class CwiseUnaryOp
   */
 template<typename NewType>
+EIGEN_DEVICE_FUNC
 typename internal::cast_return_type<Derived,const CwiseUnaryOp<internal::scalar_cast_op<typename internal::traits<Derived>::Scalar, NewType>, const Derived> >::type
 cast() const
 {
@@ -97,6 +104,7 @@ cast() const
 /** \returns an expression of the complex conjugate of \c *this.
   *
   * \sa adjoint() */
+EIGEN_DEVICE_FUNC
 inline ConjugateReturnType
 conjugate() const
 {
@@ -106,12 +114,14 @@ conjugate() const
 /** \returns a read-only expression of the real part of \c *this.
   *
   * \sa imag() */
+EIGEN_DEVICE_FUNC
 inline RealReturnType
 real() const { return derived(); }
 
 /** \returns an read-only expression of the imaginary part of \c *this.
   *
   * \sa real() */
+EIGEN_DEVICE_FUNC
 inline const ImagReturnType
 imag() const { return derived(); }
 
@@ -135,6 +145,7 @@ imag() const { return derived(); }
   * \sa class CwiseUnaryOp, class CwiseBinaryOp
   */
 template<typename CustomUnaryOp>
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<CustomUnaryOp, const Derived>
 unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
 {
@@ -153,6 +164,7 @@ unaryExpr(const CustomUnaryOp& func = CustomUnaryOp()) const
   * \sa class CwiseUnaryOp, class CwiseBinaryOp
   */
 template<typename CustomViewOp>
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryView<CustomViewOp, const Derived>
 unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
 {
@@ -162,11 +174,13 @@ unaryViewExpr(const CustomViewOp& func = CustomViewOp()) const
 /** \returns a non const expression of the real part of \c *this.
   *
   * \sa imag() */
+EIGEN_DEVICE_FUNC
 inline NonConstRealReturnType
 real() { return derived(); }
 
 /** \returns a non const expression of the imaginary part of \c *this.
   *
   * \sa real() */
+EIGEN_DEVICE_FUNC
 inline NonConstImagReturnType
 imag() { return derived(); }
diff --git a/Eigen/src/plugins/MatrixCwiseBinaryOps.h b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
index 7f62149e0..b9582a5a0 100644
--- a/Eigen/src/plugins/MatrixCwiseBinaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseBinaryOps.h
@@ -18,6 +18,7 @@
   * \sa class CwiseBinaryOp, cwiseAbs2
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const EIGEN_CWISE_PRODUCT_RETURN_TYPE(Derived,OtherDerived)
 cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -37,6 +38,7 @@ cwiseProduct(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   * \sa cwiseNotEqual(), isApprox(), isMuchSmallerThan()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<std::equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -56,6 +58,7 @@ cwiseEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   * \sa cwiseEqual(), isApprox(), isMuchSmallerThan()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 inline const CwiseBinaryOp<std::not_equal_to<Scalar>, const Derived, const OtherDerived>
 cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -70,6 +73,7 @@ cwiseNotEqual(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   * \sa class CwiseBinaryOp, max()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const OtherDerived>
 cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -80,6 +84,7 @@ cwiseMin(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   *
   * \sa class CwiseBinaryOp, min()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_min_op<Scalar>, const Derived, const ConstantReturnType>
 cwiseMin(const Scalar &other) const
 {
@@ -94,6 +99,7 @@ cwiseMin(const Scalar &other) const
   * \sa class CwiseBinaryOp, min()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const OtherDerived>
 cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
@@ -104,6 +110,7 @@ cwiseMax(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
   *
   * \sa class CwiseBinaryOp, min()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_max_op<Scalar>, const Derived, const ConstantReturnType>
 cwiseMax(const Scalar &other) const
 {
@@ -119,6 +126,7 @@ cwiseMax(const Scalar &other) const
   * \sa class CwiseBinaryOp, cwiseProduct(), cwiseInverse()
   */
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseBinaryOp<internal::scalar_quotient_op<Scalar>, const Derived, const OtherDerived>
 cwiseQuotient(const EIGEN_CURRENT_STORAGE_BASE_CLASS<OtherDerived> &other) const
 {
diff --git a/Eigen/src/plugins/MatrixCwiseUnaryOps.h b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
index 0cf0640ba..1bb15f862 100644
--- a/Eigen/src/plugins/MatrixCwiseUnaryOps.h
+++ b/Eigen/src/plugins/MatrixCwiseUnaryOps.h
@@ -17,6 +17,7 @@
   *
   * \sa cwiseAbs2()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
 cwiseAbs() const { return derived(); }
 
@@ -27,6 +28,7 @@ cwiseAbs() const { return derived(); }
   *
   * \sa cwiseAbs()
   */
+EIGEN_DEVICE_FUNC
 EIGEN_STRONG_INLINE const CwiseUnaryOp<internal::scalar_abs2_op<Scalar>, const Derived>
 cwiseAbs2() const { return derived(); }
 
@@ -37,6 +39,7 @@ cwiseAbs2() const { return derived(); }
   *
   * \sa cwisePow(), cwiseSquare()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_sqrt_op<Scalar>, const Derived>
 cwiseSqrt() const { return derived(); }
 
@@ -47,6 +50,7 @@ cwiseSqrt() const { return derived(); }
   *
   * \sa cwiseProduct()
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived>
 cwiseInverse() const { return derived(); }
 
@@ -59,6 +63,7 @@ cwiseInverse() const { return derived(); }
   *
   * \sa cwiseEqual(const MatrixBase<OtherDerived> &) const
   */
+EIGEN_DEVICE_FUNC
 inline const CwiseUnaryOp<std::binder1st<std::equal_to<Scalar> >, const Derived>
 cwiseEqual(const Scalar& s) const
 {