From 48c635e22335569465d8977421b614372d8bd852 Mon Sep 17 00:00:00 2001
From: Rasmus Munk Larsen <rmlarsen@google.com>
Date: Thu, 6 Oct 2016 10:33:10 -0700
Subject: [PATCH 1/8] Add a simple cost model to prevent Eigen's parallel GEMM
 from using too many threads when the inner dimension is small.

Timing for square matrices is unchanged, but both CPU and Wall time are significantly improved for skinny matrices. The benchmarks below are for multiplying NxK * KxN matrices with test names of the form BM_OuterishProd/N/K.

Improvements in Wall time:

Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00
CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_OuterishProd/64/1                    3088      1610    +47.9%
BM_OuterishProd/64/4                    3562      2414    +32.2%
BM_OuterishProd/64/32                   8861      7815    +11.8%
BM_OuterishProd/128/1                  11363      6504    +42.8%
BM_OuterishProd/128/4                  11128      9794    +12.0%
BM_OuterishProd/128/64                 27691     27396     +1.1%
BM_OuterishProd/256/1                  33214     28123    +15.3%
BM_OuterishProd/256/4                  34312     36818     -7.3%
BM_OuterishProd/256/128               174866    176398     -0.9%
BM_OuterishProd/512/1                7963684    104224    +98.7%
BM_OuterishProd/512/4                7987913    112867    +98.6%
BM_OuterishProd/512/256              8198378   1306500    +84.1%
BM_OuterishProd/1k/1                 7356256    324432    +95.6%
BM_OuterishProd/1k/4                 8129616    331621    +95.9%
BM_OuterishProd/1k/512              27265418   7517538    +72.4%

Improvements in CPU time:

Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00
CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_OuterishProd/64/1                    6169      1608    +73.9%
BM_OuterishProd/64/4                    7117      2412    +66.1%
BM_OuterishProd/64/32                  17702     15616    +11.8%
BM_OuterishProd/128/1                  45415      6498    +85.7%
BM_OuterishProd/128/4                  44459      9786    +78.0%
BM_OuterishProd/128/64                110657    109489     +1.1%
BM_OuterishProd/256/1                 265158     28101    +89.4%
BM_OuterishProd/256/4                 274234    183885    +32.9%
BM_OuterishProd/256/128              1397160   1408776     -0.8%
BM_OuterishProd/512/1               78947048    520703    +99.3%
BM_OuterishProd/512/4               86955578   1349742    +98.4%
BM_OuterishProd/512/256             74701613  15584661    +79.1%
BM_OuterishProd/1k/1                78352601   3877911    +95.1%
BM_OuterishProd/1k/4                78521643   3966221    +94.9%
BM_OuterishProd/1k/512              258104736  89480530    +65.3%
---
 Eigen/src/Core/products/GeneralMatrixMatrix.h | 50 +++++++++----------
 Eigen/src/Core/products/Parallelizer.h        | 19 ++++---
 2 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h
index b1465c3b5..61df3be57 100644
--- a/Eigen/src/Core/products/GeneralMatrixMatrix.h
+++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H
 #define EIGEN_GENERAL_MATRIX_MATRIX_H
 
-namespace Eigen { 
+namespace Eigen {
 
 namespace internal {
 
@@ -24,7 +24,7 @@ template<
 struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLhs,RhsScalar,RhsStorageOrder,ConjugateRhs,RowMajor>
 {
   typedef gebp_traits<RhsScalar,LhsScalar> Traits;
-  
+
   typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
   static EIGEN_STRONG_INLINE void run(
     Index rows, Index cols, Index depth,
@@ -54,7 +54,7 @@ struct general_matrix_matrix_product<Index,LhsScalar,LhsStorageOrder,ConjugateLh
 {
 
 typedef gebp_traits<LhsScalar,RhsScalar> Traits;
-  
+
 typedef typename ScalarBinaryOpTraits<LhsScalar, RhsScalar>::ReturnType ResScalar;
 static void run(Index rows, Index cols, Index depth,
   const LhsScalar* _lhs, Index lhsStride,
@@ -85,13 +85,13 @@ static void run(Index rows, Index cols, Index depth,
     // this is the parallel version!
     Index tid = omp_get_thread_num();
     Index threads = omp_get_num_threads();
-    
+
     LhsScalar* blockA = blocking.blockA();
     eigen_internal_assert(blockA!=0);
-    
+
     std::size_t sizeB = kc*nc;
     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0);
-      
+
     // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs...
     for(Index k=0; k<depth; k+=kc)
     {
@@ -114,7 +114,7 @@ static void run(Index rows, Index cols, Index depth,
 
       // Notify the other threads that the part A'_i is ready to go.
       info[tid].sync = k;
-      
+
       // Computes C_i += A' * B' per A'_i
       for(Index shift=0; shift<threads; ++shift)
       {
@@ -161,7 +161,7 @@ static void run(Index rows, Index cols, Index depth,
 
     ei_declare_aligned_stack_constructed_variable(LhsScalar, blockA, sizeA, blocking.blockA());
     ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, blocking.blockB());
-    
+
     const bool pack_rhs_once = mc!=rows && kc==depth && nc==cols;
 
     // For each horizontal panel of the rhs, and corresponding panel of the lhs...
@@ -172,24 +172,24 @@ static void run(Index rows, Index cols, Index depth,
       for(Index k2=0; k2<depth; k2+=kc)
       {
         const Index actual_kc = (std::min)(k2+kc,depth)-k2;
-        
+
         // OK, here we have selected one horizontal panel of rhs and one vertical panel of lhs.
         // => Pack lhs's panel into a sequential chunk of memory (L2/L3 caching)
         // Note that this panel will be read as many times as the number of blocks in the rhs's
         // horizontal panel which is, in practice, a very low number.
         pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc);
-        
+
         // For each kc x nc block of the rhs's horizontal panel...
         for(Index j2=0; j2<cols; j2+=nc)
         {
           const Index actual_nc = (std::min)(j2+nc,cols)-j2;
-          
+
           // We pack the rhs's block into a sequential chunk of memory (L2 caching)
           // Note that this block will be read a very high number of times, which is equal to the number of
           // micro horizontal panel of the large rhs's panel (e.g., rows/12 times).
           if((!pack_rhs_once) || i2==0)
             pack_rhs(blockB, rhs.getSubMapper(k2,j2), actual_kc, actual_nc);
-          
+
           // Everything is packed, we can now call the panel * block kernel:
           gebp(res.getSubMapper(i2, j2), blockA, blockB, actual_mc, actual_kc, actual_nc, alpha);
         }
@@ -229,7 +229,7 @@ struct gemm_functor
               (Scalar*)&(m_dest.coeffRef(row,col)), m_dest.outerStride(),
               m_actualAlpha, m_blocking, info);
   }
-  
+
   typedef typename Gemm::Traits Traits;
 
   protected:
@@ -313,7 +313,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
       this->m_blockB = reinterpret_cast<RhsScalar*>((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
 #endif
     }
-    
+
     void initParallel(Index, Index, Index, Index)
     {}
 
@@ -359,14 +359,14 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
       m_sizeA = this->m_mc * this->m_kc;
       m_sizeB = this->m_kc * this->m_nc;
     }
-    
+
     void initParallel(Index rows, Index cols, Index depth, Index num_threads)
     {
       this->m_mc = Transpose ? cols : rows;
       this->m_nc = Transpose ? rows : cols;
       this->m_kc = depth;
-      
-      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);      
+
+      eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0);
       Index m = this->m_mc;
       computeProductBlockingSizes<LhsScalar,RhsScalar,KcFactor>(this->m_kc, m, this->m_nc, num_threads);
       m_sizeA = this->m_mc * this->m_kc;
@@ -401,7 +401,7 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
 } // end namespace internal
 
 namespace internal {
-  
+
 template<typename Lhs, typename Rhs>
 struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct> >
@@ -409,21 +409,21 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
   typedef typename Product<Lhs,Rhs>::Scalar Scalar;
   typedef typename Lhs::Scalar LhsScalar;
   typedef typename Rhs::Scalar RhsScalar;
-  
+
   typedef internal::blas_traits<Lhs> LhsBlasTraits;
   typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
   typedef typename internal::remove_all<ActualLhsType>::type ActualLhsTypeCleaned;
-  
+
   typedef internal::blas_traits<Rhs> RhsBlasTraits;
   typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
   typedef typename internal::remove_all<ActualRhsType>::type ActualRhsTypeCleaned;
-  
+
   enum {
     MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime)
   };
-  
+
   typedef generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> lazyproduct;
-  
+
   template<typename Dst>
   static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
   {
@@ -453,7 +453,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
     else
       scaleAndAddTo(dst, lhs, rhs, Scalar(-1));
   }
-  
+
   template<typename Dest>
   static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha)
   {
@@ -481,7 +481,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
 
     BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true);
     internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)>
-                              (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit);
+        (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit);
   }
 };
 
diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index e0bfcc356..1dee8d714 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -10,7 +10,7 @@
 #ifndef EIGEN_PARALLELIZER_H
 #define EIGEN_PARALLELIZER_H
 
-namespace Eigen { 
+namespace Eigen {
 
 namespace internal {
 
@@ -83,7 +83,7 @@ template<typename Index> struct GemmParallelInfo
 };
 
 template<bool Condition, typename Functor, typename Index>
-void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose)
+void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose)
 {
   // TODO when EIGEN_USE_BLAS is defined,
   // we should still enable OMP for other scalar types
@@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
   // the matrix product when multithreading is enabled. This is a temporary
   // fix to support row-major destination matrices. This whole
   // parallelizer mechanism has to be redisigned anyway.
+  EIGEN_UNUSED_VARIABLE(depth);
   EIGEN_UNUSED_VARIABLE(transpose);
   func(0,rows, 0,cols);
 #else
@@ -106,6 +107,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
   // FIXME this has to be fine tuned
   Index size = transpose ? rows : cols;
   Index pb_max_threads = std::max<Index>(1,size / 32);
+  // compute the maximal number of threads from the total amount of work:
+  double work = static_cast<double>(rows) * static_cast<double>(cols) *
+      static_cast<double>(depth);
+  double kMinTaskSize = 50000;  // Heuristic.
+  max_threads = std::max<Index>(1, std::min<Index>(max_threads, work / kMinTaskSize));
+
   // compute the number of threads we are going to use
   Index threads = std::min<Index>(nbThreads(), pb_max_threads);
 
@@ -120,19 +127,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos
 
   if(transpose)
     std::swap(rows,cols);
-  
+
   ei_declare_aligned_stack_constructed_variable(GemmParallelInfo<Index>,info,threads,0);
-  
+
   #pragma omp parallel num_threads(threads)
   {
     Index i = omp_get_thread_num();
     // Note that the actual number of threads might be lower than the number of request ones.
     Index actual_threads = omp_get_num_threads();
-    
+
     Index blockCols = (cols / actual_threads) & ~Index(0x3);
     Index blockRows = (rows / actual_threads);
     blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr;
-  
+
     Index r0 = i*blockRows;
     Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows;
 

From 86711497c4584534793b186fb0c72f8002a9fe86 Mon Sep 17 00:00:00 2001
From: Robert Lukierski <robert@lukierski.eu>
Date: Wed, 12 Oct 2016 16:35:17 +0100
Subject: [PATCH 2/8] Adding EIGEN_DEVICE_FUNC in the Geometry module.
 Additional CUDA necessary fixes in the Core (mostly usage of
 EIGEN_USING_STD_MATH).

---
 Eigen/src/Core/AssignEvaluator.h         |   3 +-
 Eigen/src/Core/CwiseNullaryOp.h          |   3 +-
 Eigen/src/Core/DiagonalMatrix.h          |   2 +-
 Eigen/src/Core/MatrixBase.h              |   4 +-
 Eigen/src/Core/PlainObjectBase.h         |  10 +-
 Eigen/src/Core/TriangularMatrix.h        |  13 +-
 Eigen/src/Core/VectorwiseOp.h            |   2 +
 Eigen/src/Core/functors/BinaryFunctors.h |   2 +-
 Eigen/src/Core/functors/UnaryFunctors.h  |   2 +-
 Eigen/src/Geometry/AlignedBox.h          |  80 +++++------
 Eigen/src/Geometry/AngleAxis.h           |  56 ++++----
 Eigen/src/Geometry/EulerAngles.h         |   8 +-
 Eigen/src/Geometry/Homogeneous.h         |  58 ++++----
 Eigen/src/Geometry/Hyperplane.h          |  54 +++----
 Eigen/src/Geometry/OrthoMethods.h        |   9 +-
 Eigen/src/Geometry/ParametrizedLine.h    |  54 +++----
 Eigen/src/Geometry/Quaternion.h          | 162 ++++++++++-----------
 Eigen/src/Geometry/Rotation2D.h          |  48 +++----
 Eigen/src/Geometry/RotationBase.h        |  40 +++---
 Eigen/src/Geometry/Transform.h           | 175 +++++++++++++----------
 Eigen/src/Geometry/Translation.h         |  50 +++----
 21 files changed, 434 insertions(+), 401 deletions(-)

diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index b7cc7c0e9..30b36be11 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -517,6 +517,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
 {
   EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
   {
+    EIGEN_USING_STD_MATH(min)
     typedef typename Kernel::Scalar Scalar;
     typedef typename Kernel::PacketType PacketType;
     enum {
@@ -554,7 +555,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
         kernel.assignCoeffByOuterInner(outer, inner);
 
-      alignedStart = std::min<Index>((alignedStart+alignedStep)%packetSize, innerSize);
+      alignedStart = min((Index)(alignedStart+alignedStep)%packetSize, (Index)innerSize);
     }
   }
 };
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 25c3ef3d7..4ab32d430 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -752,7 +752,8 @@ struct setIdentity_impl<Derived, true>
   static EIGEN_STRONG_INLINE Derived& run(Derived& m)
   {
     m.setZero();
-    const Index size = (std::min)(m.rows(), m.cols());
+    EIGEN_USING_STD_MATH(min)
+    const Index size = min(m.rows(), m.cols());
     for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
     return m;
   }
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index 92b2eee71..f04facecc 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -290,7 +290,7 @@ MatrixBase<Derived>::asDiagonal() const
 template<typename Derived>
 bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
 {
-  using std::abs;
+  EIGEN_USING_STD_MATH(abs)
   if(cols() != rows()) return false;
   RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h
index d56df8249..f7cf04cde 100644
--- a/Eigen/src/Core/MatrixBase.h
+++ b/Eigen/src/Core/MatrixBase.h
@@ -399,12 +399,14 @@ template<typename Derived> class MatrixBase
     EIGEN_DEVICE_FUNC
     inline PlainObject unitOrthogonal(void) const;
 
+    EIGEN_DEVICE_FUNC
     inline Matrix<Scalar,3,1> eulerAngles(Index a0, Index a1, Index a2) const;
 
     // put this as separate enum value to work around possible GCC 4.3 bug (?)
     enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits<Derived>::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical)
                                           : ColsAtCompileTime==1 ? Vertical : Horizontal };
     typedef Homogeneous<Derived, HomogeneousReturnTypeDirection> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
     inline HomogeneousReturnType homogeneous() const;
 
     enum {
@@ -414,7 +416,7 @@ template<typename Derived> class MatrixBase
                   internal::traits<Derived>::ColsAtCompileTime==1 ? SizeMinusOne : 1,
                   internal::traits<Derived>::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne;
     typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType;
-
+    EIGEN_DEVICE_FUNC
     inline const HNormalizedReturnType hnormalized() const;
 
 ////////// Householder module ///////////
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 55b4ac057..00313920c 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -916,8 +916,9 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(rows,cols);
-      const Index common_rows = (std::min)(rows, _this.rows());
-      const Index common_cols = (std::min)(cols, _this.cols());
+      EIGEN_USING_STD_MATH(min)
+      const Index common_rows = min(rows, _this.rows());
+      const Index common_cols = min(cols, _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
@@ -950,8 +951,9 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(other);
-      const Index common_rows = (std::min)(tmp.rows(), _this.rows());
-      const Index common_cols = (std::min)(tmp.cols(), _this.cols());
+      EIGEN_USING_STD_MATH(min)
+      const Index common_rows = min(tmp.rows(), _this.rows());
+      const Index common_cols = min(tmp.cols(), _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e9606ec33..e804cf6bb 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -641,11 +641,12 @@ MatrixBase<Derived>::triangularView() const
 template<typename Derived>
 bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 {
-  using std::abs;
+  EIGEN_USING_STD_MATH(abs)
   RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
+  EIGEN_USING_STD_MATH(min)
   for(Index j = 0; j < cols(); ++j)
   {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = min(j, rows()-1);
     for(Index i = 0; i <= maxi; ++i)
     {
       RealScalar absValue = abs(coeff(i,j));
@@ -667,7 +668,8 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 template<typename Derived>
 bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
 {
-  using std::abs;
+  EIGEN_USING_STD_MATH(abs)
+  EIGEN_USING_STD_MATH(min)
   RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
     for(Index i = j; i < rows(); ++i)
@@ -678,7 +680,7 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
   RealScalar threshold = maxAbsOnLowerPart * prec;
   for(Index j = 1; j < cols(); ++j)
   {
-    Index maxi = (std::min)(j, rows()-1);
+    Index maxi = min(j, rows()-1);
     for(Index i = 0; i < maxi; ++i)
       if(abs(coeff(i, j)) > threshold) return false;
   }
@@ -891,9 +893,10 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
   EIGEN_DEVICE_FUNC
   static inline void run(Kernel &kernel)
   {
+    EIGEN_USING_STD_MATH(min)
     for(Index j = 0; j < kernel.cols(); ++j)
     {
-      Index maxi = (std::min)(j, kernel.rows());
+      Index maxi = min(j, kernel.rows());
       Index i = 0;
       if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
       {
diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h
index dd382e990..14f403594 100644
--- a/Eigen/src/Core/VectorwiseOp.h
+++ b/Eigen/src/Core/VectorwiseOp.h
@@ -625,6 +625,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
 /////////// Geometry module ///////////
 
     typedef Homogeneous<ExpressionType,Direction> HomogeneousReturnType;
+    EIGEN_DEVICE_FUNC
     HomogeneousReturnType homogeneous() const;
 
     typedef typename ExpressionType::PlainObject CrossReturnType;
@@ -654,6 +655,7 @@ template<typename ExpressionType, int Direction> class VectorwiseOp
                   Direction==Horizontal ? HNormalized_SizeMinusOne : 1> >
             HNormalizedReturnType;
 
+    EIGEN_DEVICE_FUNC
     const HNormalizedReturnType hnormalized() const;
 
   protected:
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index d82ffed02..96747bac7 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -266,7 +266,7 @@ struct scalar_hypot_op<Scalar,Scalar> : binary_op_base<Scalar,Scalar>
 //   typedef typename NumTraits<Scalar>::Real result_type;
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const
   {
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(sqrt)
     Scalar p, qp;
     if(_x>_y)
     {
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 2009f8e57..2e6a00ffd 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -321,7 +321,7 @@ struct functor_traits<scalar_log1p_op<Scalar> > {
   */
 template<typename Scalar> struct scalar_log10_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op)
-  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); }
+  EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); }
   template <typename Packet>
   EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); }
 };
diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h
index d20d17492..066eae4f9 100644
--- a/Eigen/src/Geometry/AlignedBox.h
+++ b/Eigen/src/Geometry/AlignedBox.h
@@ -62,57 +62,57 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
 
   /** Default constructor initializing a null box. */
-  inline AlignedBox()
+  EIGEN_DEVICE_FUNC inline AlignedBox()
   { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); }
 
   /** Constructs a null box with \a _dim the dimension of the ambient space. */
-  inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim)
   { setEmpty(); }
 
   /** Constructs a box with extremities \a _min and \a _max.
    * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */
   template<typename OtherVectorType1, typename OtherVectorType2>
-  inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
+  EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {}
 
   /** Constructs a box containing a single point \a p. */
   template<typename Derived>
-  inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase<Derived>& p) : m_min(p), m_max(m_min)
   { }
 
-  ~AlignedBox() {}
+  EIGEN_DEVICE_FUNC ~AlignedBox() {}
 
   /** \returns the dimension in which the box holds */
-  inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); }
 
   /** \deprecated use isEmpty() */
-  inline bool isNull() const { return isEmpty(); }
+  EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); }
 
   /** \deprecated use setEmpty() */
-  inline void setNull() { setEmpty(); }
+  EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); }
 
   /** \returns true if the box is empty.
    * \sa setEmpty */
-  inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
+  EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); }
 
   /** Makes \c *this an empty box.
    * \sa isEmpty */
-  inline void setEmpty()
+  EIGEN_DEVICE_FUNC inline void setEmpty()
   {
     m_min.setConstant( ScalarTraits::highest() );
     m_max.setConstant( ScalarTraits::lowest() );
   }
 
   /** \returns the minimal corner */
-  inline const VectorType& (min)() const { return m_min; }
+  EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; }
   /** \returns a non const reference to the minimal corner */
-  inline VectorType& (min)() { return m_min; }
+  EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; }
   /** \returns the maximal corner */
-  inline const VectorType& (max)() const { return m_max; }
+  EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; }
   /** \returns a non const reference to the maximal corner */
-  inline VectorType& (max)() { return m_max; }
+  EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; }
 
   /** \returns the center of the box */
-  inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
+  EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient)
   center() const
   { return (m_min+m_max)/RealScalar(2); }
 
@@ -120,18 +120,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * Note that this function does not get the same
     * result for integral or floating scalar types: see
     */
-  inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
+  EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> sizes() const
   { return m_max - m_min; }
 
   /** \returns the volume of the bounding box */
-  inline Scalar volume() const
+  EIGEN_DEVICE_FUNC inline Scalar volume() const
   { return sizes().prod(); }
 
   /** \returns an expression for the bounding box diagonal vector
     * if the length of the diagonal is needed: diagonal().norm()
     * will provide it.
     */
-  inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
+  EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op<Scalar,Scalar>, const VectorType, const VectorType> diagonal() const
   { return sizes(); }
 
   /** \returns the vertex of the bounding box at the corner defined by
@@ -143,7 +143,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * For 3D bounding boxes, the following names are added:
     * BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil.
     */
-  inline VectorType corner(CornerType corner) const
+  EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const
   {
     EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE);
 
@@ -161,7 +161,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** \returns a random point inside the bounding box sampled with
    * a uniform distribution */
-  inline VectorType sample() const
+  EIGEN_DEVICE_FUNC inline VectorType sample() const
   {
     VectorType r(dim());
     for(Index d=0; d<dim(); ++d)
@@ -179,25 +179,25 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** \returns true if the point \a p is inside the box \c *this. */
   template<typename Derived>
-  inline bool contains(const MatrixBase<Derived>& p) const
+  EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase<Derived>& p) const
   {
     typename internal::nested_eval<Derived,2>::type p_n(p.derived());
     return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all();
   }
 
   /** \returns true if the box \a b is entirely inside the box \c *this. */
-  inline bool contains(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const
   { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); }
 
   /** \returns true if the box \a b is intersecting the box \c *this.
    * \sa intersection, clamp */
-  inline bool intersects(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const
   { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); }
 
   /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this.
    * \sa extend(const AlignedBox&) */
   template<typename Derived>
-  inline AlignedBox& extend(const MatrixBase<Derived>& p)
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase<Derived>& p)
   {
     typename internal::nested_eval<Derived,2>::type p_n(p.derived());
     m_min = m_min.cwiseMin(p_n);
@@ -207,7 +207,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this.
    * \sa merged, extend(const MatrixBase&) */
-  inline AlignedBox& extend(const AlignedBox& b)
+  EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b)
   {
     m_min = m_min.cwiseMin(b.m_min);
     m_max = m_max.cwiseMax(b.m_max);
@@ -217,7 +217,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   /** Clamps \c *this by the box \a b and returns a reference to \c *this.
    * \note If the boxes don't intersect, the resulting box is empty.
    * \sa intersection(), intersects() */
-  inline AlignedBox& clamp(const AlignedBox& b)
+  EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b)
   {
     m_min = m_min.cwiseMax(b.m_min);
     m_max = m_max.cwiseMin(b.m_max);
@@ -227,18 +227,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
   /** Returns an AlignedBox that is the intersection of \a b and \c *this
    * \note If the boxes don't intersect, the resulting box is empty.
    * \sa intersects(), clamp, contains()  */
-  inline AlignedBox intersection(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const
   {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); }
 
   /** Returns an AlignedBox that is the union of \a b and \c *this.
    * \note Merging with an empty box may result in a box bigger than \c *this. 
    * \sa extend(const AlignedBox&) */
-  inline AlignedBox merged(const AlignedBox& b) const
+  EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const
   { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); }
 
   /** Translate \c *this by the vector \a t and returns a reference to \c *this. */
   template<typename Derived>
-  inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
+  EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase<Derived>& a_t)
   {
     const typename internal::nested_eval<Derived,2>::type t(a_t.derived());
     m_min += t;
@@ -251,28 +251,28 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&)
     */
   template<typename Derived>
-  inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase<Derived>& p) const;
 
   /** \returns the squared distance between the boxes \a b and \c *this,
     * and zero if the boxes intersect.
     * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&)
     */
-  inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
+  EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const;
 
   /** \returns the distance between the point \a p and the box \c *this,
     * and zero if \a p is inside the box.
     * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&)
     */
   template<typename Derived>
-  inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
-  { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(p))); }
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase<Derived>& p) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); }
 
   /** \returns the distance between the boxes \a b and \c *this,
     * and zero if the boxes intersect.
     * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&)
     */
-  inline NonInteger exteriorDistance(const AlignedBox& b) const
-  { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); }
+  EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const
+  { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); }
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -280,7 +280,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<AlignedBox,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AlignedBox,
            AlignedBox<NewScalarType,AmbientDimAtCompileTime> >::type cast() const
   {
     return typename internal::cast_return_type<AlignedBox,
@@ -289,7 +289,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
+  EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox<OtherScalarType,AmbientDimAtCompileTime>& other)
   {
     m_min = (other.min)().template cast<Scalar>();
     m_max = (other.max)().template cast<Scalar>();
@@ -299,7 +299,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim)
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const
   { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); }
 
 protected:
@@ -311,7 +311,7 @@ protected:
 
 template<typename Scalar,int AmbientDim>
 template<typename Derived>
-inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const MatrixBase<Derived>& a_p) const
 {
   typename internal::nested_eval<Derived,2*AmbientDim>::type p(a_p.derived());
   Scalar dist2(0);
@@ -333,7 +333,7 @@ inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const Matri
 }
 
 template<typename Scalar,int AmbientDim>
-inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
+EIGEN_DEVICE_FUNC inline Scalar AlignedBox<Scalar,AmbientDim>::squaredExteriorDistance(const AlignedBox& b) const
 {
   Scalar dist2(0);
   Scalar aux;
diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h
index 571062d00..0af3c1b08 100644
--- a/Eigen/src/Geometry/AngleAxis.h
+++ b/Eigen/src/Geometry/AngleAxis.h
@@ -69,59 +69,61 @@ protected:
 public:
 
   /** Default constructor without initialization. */
-  AngleAxis() {}
+  EIGEN_DEVICE_FUNC AngleAxis() {}
   /** Constructs and initialize the angle-axis rotation from an \a angle in radian
     * and an \a axis which \b must \b be \b normalized.
     *
     * \warning If the \a axis vector is not normalized, then the angle-axis object
     *          represents an invalid rotation. */
   template<typename Derived>
+  EIGEN_DEVICE_FUNC 
   inline AngleAxis(const Scalar& angle, const MatrixBase<Derived>& axis) : m_axis(axis), m_angle(angle) {}
   /** Constructs and initialize the angle-axis rotation from a quaternion \a q.
     * This function implicitly normalizes the quaternion \a q.
     */
-  template<typename QuatDerived> inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
+  template<typename QuatDerived> 
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase<QuatDerived>& q) { *this = q; }
   /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */
   template<typename Derived>
-  inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase<Derived>& m) { *this = m; }
 
   /** \returns the value of the rotation angle in radian */
-  Scalar angle() const { return m_angle; }
+  EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; }
   /** \returns a read-write reference to the stored angle in radian */
-  Scalar& angle() { return m_angle; }
+  EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; }
 
   /** \returns the rotation axis */
-  const Vector3& axis() const { return m_axis; }
+  EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; }
   /** \returns a read-write reference to the stored rotation axis.
     *
     * \warning The rotation axis must remain a \b unit vector.
     */
-  Vector3& axis() { return m_axis; }
+  EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; }
 
   /** Concatenates two rotations */
-  inline QuaternionType operator* (const AngleAxis& other) const
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const
   { return QuaternionType(*this) * QuaternionType(other); }
 
   /** Concatenates two rotations */
-  inline QuaternionType operator* (const QuaternionType& other) const
+  EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const
   { return QuaternionType(*this) * other; }
 
   /** Concatenates two rotations */
-  friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
+  friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b)
   { return a * QuaternionType(b); }
 
   /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */
-  AngleAxis inverse() const
+  EIGEN_DEVICE_FUNC AngleAxis inverse() const
   { return AngleAxis(-m_angle, m_axis); }
 
   template<class QuatDerived>
-  AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase<QuatDerived>& q);
   template<typename Derived>
-  AngleAxis& operator=(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase<Derived>& m);
 
   template<typename Derived>
-  AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
-  Matrix3 toRotationMatrix(void) const;
+  EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -129,24 +131,24 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type cast() const
   { return typename internal::cast_return_type<AngleAxis,AngleAxis<NewScalarType> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
+  EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis<OtherScalarType>& other)
   {
     m_axis = other.axis().template cast<Scalar>();
     m_angle = Scalar(other.angle());
   }
 
-  static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
+  EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); }
 };
 
@@ -165,10 +167,10 @@ typedef AngleAxis<double> AngleAxisd;
   */
 template<typename Scalar>
 template<typename QuatDerived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived>& q)
 {
-  using std::atan2;
-  using std::abs;
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(abs)
   Scalar n = q.vec().norm();
   if(n<NumTraits<Scalar>::epsilon())
     n = q.vec().stableNorm();
@@ -192,7 +194,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const QuaternionBase<QuatDerived
   */
 template<typename Scalar>
 template<typename Derived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
 {
   // Since a direct conversion would not be really faster,
   // let's use the robust Quaternion implementation:
@@ -204,7 +206,7 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::operator=(const MatrixBase<Derived>& mat)
 **/
 template<typename Scalar>
 template<typename Derived>
-AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
 {
   return *this = QuaternionType(mat);
 }
@@ -213,10 +215,10 @@ AngleAxis<Scalar>& AngleAxis<Scalar>::fromRotationMatrix(const MatrixBase<Derive
   */
 template<typename Scalar>
 typename AngleAxis<Scalar>::Matrix3
-AngleAxis<Scalar>::toRotationMatrix(void) const
+EIGEN_DEVICE_FUNC AngleAxis<Scalar>::toRotationMatrix(void) const
 {
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   Matrix3 res;
   Vector3 sin_axis  = sin(m_angle) * m_axis;
   Scalar c = cos(m_angle);
diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h
index 4865e58aa..c633268af 100644
--- a/Eigen/src/Geometry/EulerAngles.h
+++ b/Eigen/src/Geometry/EulerAngles.h
@@ -33,12 +33,12 @@ namespace Eigen {
   * \sa class AngleAxis
   */
 template<typename Derived>
-inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
+EIGEN_DEVICE_FUNC inline Matrix<typename MatrixBase<Derived>::Scalar,3,1>
 MatrixBase<Derived>::eulerAngles(Index a0, Index a1, Index a2) const
 {
-  using std::atan2;
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   /* Implemented from Graphics Gems IV */
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3)
 
diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h
index a23068c8d..804e5da73 100644
--- a/Eigen/src/Geometry/Homogeneous.h
+++ b/Eigen/src/Geometry/Homogeneous.h
@@ -68,17 +68,17 @@ template<typename MatrixType,int _Direction> class Homogeneous
     typedef MatrixBase<Homogeneous> Base;
     EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous)
 
-    explicit inline Homogeneous(const MatrixType& matrix)
+    EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix)
       : m_matrix(matrix)
     {}
 
-    inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
-    inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
+    EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical   ? 1 : 0); }
+    EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); }
     
-    const NestedExpression& nestedExpression() const { return m_matrix; }
+    EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; }
 
     template<typename Rhs>
-    inline const Product<Homogeneous,Rhs>
+    EIGEN_DEVICE_FUNC inline const Product<Homogeneous,Rhs>
     operator* (const MatrixBase<Rhs>& rhs) const
     {
       eigen_assert(int(Direction)==Horizontal);
@@ -86,7 +86,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Lhs> friend
-    inline const Product<Lhs,Homogeneous>
+    EIGEN_DEVICE_FUNC inline const Product<Lhs,Homogeneous>
     operator* (const MatrixBase<Lhs>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
@@ -94,7 +94,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Scalar, int Dim, int Mode, int Options> friend
-    inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
+    EIGEN_DEVICE_FUNC inline const Product<Transform<Scalar,Dim,Mode,Options>, Homogeneous >
     operator* (const Transform<Scalar,Dim,Mode,Options>& lhs, const Homogeneous& rhs)
     {
       eigen_assert(int(Direction)==Vertical);
@@ -102,7 +102,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
     }
 
     template<typename Func>
-    EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of<Func(Scalar,Scalar)>::type
     redux(const Func& func) const
     {
       return func(m_matrix.redux(func), Scalar(1));
@@ -124,7 +124,7 @@ template<typename MatrixType,int _Direction> class Homogeneous
   * \sa VectorwiseOp::homogeneous(), class Homogeneous
   */
 template<typename Derived>
-inline typename MatrixBase<Derived>::HomogeneousReturnType
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::HomogeneousReturnType
 MatrixBase<Derived>::homogeneous() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -140,7 +140,7 @@ MatrixBase<Derived>::homogeneous() const
   *
   * \sa MatrixBase::homogeneous(), class Homogeneous */
 template<typename ExpressionType, int Direction>
-inline Homogeneous<ExpressionType,Direction>
+EIGEN_DEVICE_FUNC inline Homogeneous<ExpressionType,Direction>
 VectorwiseOp<ExpressionType,Direction>::homogeneous() const
 {
   return HomogeneousReturnType(_expression());
@@ -155,7 +155,7 @@ VectorwiseOp<ExpressionType,Direction>::homogeneous() const
   *
   * \sa VectorwiseOp::hnormalized() */
 template<typename Derived>
-inline const typename MatrixBase<Derived>::HNormalizedReturnType
+EIGEN_DEVICE_FUNC inline const typename MatrixBase<Derived>::HNormalizedReturnType
 MatrixBase<Derived>::hnormalized() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived);
@@ -173,7 +173,7 @@ MatrixBase<Derived>::hnormalized() const
   *
   * \sa MatrixBase::hnormalized() */
 template<typename ExpressionType, int Direction>
-inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
+EIGEN_DEVICE_FUNC inline const typename VectorwiseOp<ExpressionType,Direction>::HNormalizedReturnType
 VectorwiseOp<ExpressionType,Direction>::hnormalized() const
 {
   return HNormalized_Block(_expression(),0,0,
@@ -197,7 +197,7 @@ template<typename MatrixOrTransformType>
 struct take_matrix_for_product
 {
   typedef MatrixOrTransformType type;
-  static const type& run(const type &x) { return x; }
+  EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; }
 };
 
 template<typename Scalar, int Dim, int Mode,int Options>
@@ -205,7 +205,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Mode, Options> >
 {
   typedef Transform<Scalar, Dim, Mode, Options> TransformType;
   typedef typename internal::add_const<typename TransformType::ConstAffinePart>::type type;
-  static type run (const TransformType& x) { return x.affine(); }
+  EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); }
 };
 
 template<typename Scalar, int Dim, int Options>
@@ -213,7 +213,7 @@ struct take_matrix_for_product<Transform<Scalar, Dim, Projective, Options> >
 {
   typedef Transform<Scalar, Dim, Projective, Options> TransformType;
   typedef typename TransformType::MatrixType type;
-  static const type& run (const TransformType& x) { return x.matrix(); }
+  EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); }
 };
 
 template<typename MatrixType,typename Lhs>
@@ -238,15 +238,15 @@ struct homogeneous_left_product_impl<Homogeneous<MatrixType,Vertical>,Lhs>
   typedef typename traits<homogeneous_left_product_impl>::LhsMatrixType LhsMatrixType;
   typedef typename remove_all<LhsMatrixType>::type LhsMatrixTypeCleaned;
   typedef typename remove_all<typename LhsMatrixTypeCleaned::Nested>::type LhsMatrixTypeNested;
-  homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
+  EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs)
     : m_lhs(take_matrix_for_product<Lhs>::run(lhs)),
       m_rhs(rhs)
   {}
 
-  inline Index rows() const { return m_lhs.rows(); }
-  inline Index cols() const { return m_rhs.cols(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
   {
     // FIXME investigate how to allow lazy evaluation of this product when possible
     dst = Block<const LhsMatrixTypeNested,
@@ -277,14 +277,14 @@ struct homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs>
   : public ReturnByValue<homogeneous_right_product_impl<Homogeneous<MatrixType,Horizontal>,Rhs> >
 {
   typedef typename remove_all<typename Rhs::Nested>::type RhsNested;
-  homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
+  EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs)
     : m_lhs(lhs), m_rhs(rhs)
   {}
 
-  inline Index rows() const { return m_lhs.rows(); }
-  inline Index cols() const { return m_rhs.cols(); }
+  EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); }
+  EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); }
 
-  template<typename Dest> void evalTo(Dest& dst) const
+  template<typename Dest> EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const
   {
     // FIXME investigate how to allow lazy evaluation of this product when possible
     dst = m_lhs * Block<const RhsNested,
@@ -317,7 +317,7 @@ struct unary_evaluator<Homogeneous<ArgType,Direction>, IndexBased>
   typedef typename XprType::PlainObject PlainObject;
   typedef evaluator<PlainObject> Base;
 
-  explicit unary_evaluator(const XprType& op)
+  EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op)
     : Base(), m_temp(op)
   {
     ::new (static_cast<Base*>(this)) Base(m_temp);
@@ -332,7 +332,7 @@ template< typename DstXprType, typename ArgType, typename Scalar>
 struct Assignment<DstXprType, Homogeneous<ArgType,Vertical>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
 {
   typedef Homogeneous<ArgType,Vertical> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
   {
     dst.template topRows<ArgType::RowsAtCompileTime>(src.nestedExpression().rows()) = src.nestedExpression();
     dst.row(dst.rows()-1).setOnes();
@@ -344,7 +344,7 @@ template< typename DstXprType, typename ArgType, typename Scalar>
 struct Assignment<DstXprType, Homogeneous<ArgType,Horizontal>, internal::assign_op<Scalar,typename ArgType::Scalar>, Dense2Dense>
 {
   typedef Homogeneous<ArgType,Horizontal> SrcXprType;
-  static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
+  EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,typename ArgType::Scalar> &)
   {
     dst.template leftCols<ArgType::ColsAtCompileTime>(src.nestedExpression().cols()) = src.nestedExpression();
     dst.col(dst.cols()-1).setOnes();
@@ -355,7 +355,7 @@ template<typename LhsArg, typename Rhs, int ProductTag>
 struct generic_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs, HomogeneousShape, DenseShape, ProductTag>
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous<LhsArg,Horizontal>& lhs, const Rhs& rhs)
   {
     homogeneous_right_product_impl<Homogeneous<LhsArg,Horizontal>, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst);
   }
@@ -396,7 +396,7 @@ template<typename Lhs, typename RhsArg, int ProductTag>
 struct generic_product_impl<Lhs, Homogeneous<RhsArg,Vertical>, DenseShape, HomogeneousShape, ProductTag>
 {
   template<typename Dest>
-  static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
   {
     homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst);
   }
@@ -450,7 +450,7 @@ struct generic_product_impl<Transform<Scalar,Dim,Mode,Options>, Homogeneous<RhsA
 {
   typedef Transform<Scalar,Dim,Mode,Options> TransformType;
   template<typename Dest>
-  static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
+  EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous<RhsArg,Vertical>& rhs)
   {
     homogeneous_left_product_impl<Homogeneous<RhsArg,Vertical>, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst);
   }
diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h
index cc89639b6..d66194287 100644
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -50,21 +50,21 @@ public:
   typedef const Block<const Coefficients,AmbientDimAtCompileTime,1> ConstNormalReturnType;
 
   /** Default constructor without initialization */
-  inline Hyperplane() {}
+  EIGEN_DEVICE_FUNC inline Hyperplane() {}
   
   template<int OtherOptions>
-  Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
    : m_coeffs(other.coeffs())
   {}
 
   /** Constructs a dynamic-size hyperplane with \a _dim the dimension
     * of the ambient space */
-  inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {}
 
   /** Construct a plane from its normal \a n and a point \a e onto the plane.
     * \warning the vector normal is assumed to be normalized.
     */
-  inline Hyperplane(const VectorType& n, const VectorType& e)
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e)
     : m_coeffs(n.size()+1)
   {
     normal() = n;
@@ -75,7 +75,7 @@ public:
     * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$.
     * \warning the vector normal is assumed to be normalized.
     */
-  inline Hyperplane(const VectorType& n, const Scalar& d)
+  EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d)
     : m_coeffs(n.size()+1)
   {
     normal() = n;
@@ -85,7 +85,7 @@ public:
   /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space
     * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made.
     */
-  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1)
   {
     Hyperplane result(p0.size());
     result.normal() = (p1 - p0).unitOrthogonal();
@@ -96,7 +96,7 @@ public:
   /** Constructs a hyperplane passing through the three points. The dimension of the ambient space
     * is required to be exactly 3.
     */
-  static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
+  EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2)
   {
     EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3)
     Hyperplane result(p0.size());
@@ -120,19 +120,19 @@ public:
     * so an arbitrary choice is made.
     */
   // FIXME to be consitent with the rest this could be implemented as a static Through function ??
-  explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
+  EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine<Scalar, AmbientDimAtCompileTime>& parametrized)
   {
     normal() = parametrized.direction().unitOrthogonal();
     offset() = -parametrized.origin().dot(normal());
   }
 
-  ~Hyperplane() {}
+  EIGEN_DEVICE_FUNC ~Hyperplane() {}
 
   /** \returns the dimension in which the plane holds */
-  inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); }
 
   /** normalizes \c *this */
-  void normalize(void)
+  EIGEN_DEVICE_FUNC void normalize(void)
   {
     m_coeffs /= normal().norm();
   }
@@ -140,45 +140,45 @@ public:
   /** \returns the signed distance between the plane \c *this and a point \a p.
     * \sa absDistance()
     */
-  inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
+  EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); }
 
   /** \returns the absolute distance between the plane \c *this and a point \a p.
     * \sa signedDistance()
     */
-  inline Scalar absDistance(const VectorType& p) const { using std::abs; return abs(signedDistance(p)); }
+  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { EIGEN_USING_STD_MATH(abs) return abs(signedDistance(p)); }
 
   /** \returns the projection of a point \a p onto the plane \c *this.
     */
-  inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
+  EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); }
 
   /** \returns a constant reference to the unit normal vector of the plane, which corresponds
     * to the linear part of the implicit equation.
     */
-  inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
+  EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); }
 
   /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds
     * to the linear part of the implicit equation.
     */
-  inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
+  EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); }
 
   /** \returns the distance to the origin, which is also the "constant term" of the implicit equation
     * \warning the vector normal is assumed to be normalized.
     */
-  inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
+  EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); }
 
   /** \returns a non-constant reference to the distance to the origin, which is also the constant part
     * of the implicit equation */
-  inline Scalar& offset() { return m_coeffs(dim()); }
+  EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); }
 
   /** \returns a constant reference to the coefficients c_i of the plane equation:
     * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
     */
-  inline const Coefficients& coeffs() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
 
   /** \returns a non-constant reference to the coefficients c_i of the plane equation:
     * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$
     */
-  inline Coefficients& coeffs() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
 
   /** \returns the intersection of *this with \a other.
     *
@@ -186,9 +186,9 @@ public:
     *
     * \note If \a other is approximately parallel to *this, this method will return any point on *this.
     */
-  VectorType intersection(const Hyperplane& other) const
+  EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
   {
-    using std::abs;
+    EIGEN_USING_STD_MATH(abs)
     EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
     Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
     // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
@@ -215,7 +215,7 @@ public:
     *               or a more generic #Affine transformation. The default is #Affine.
     */
   template<typename XprType>
-  inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase<XprType>& mat, TransformTraits traits = Affine)
   {
     if (traits==Affine)
       normal() = mat.inverse().transpose() * normal();
@@ -236,7 +236,7 @@ public:
     *               Other kind of transformations are not supported.
     */
   template<int TrOptions>
-  inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
+  EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform<Scalar,AmbientDimAtCompileTime,Affine,TrOptions>& t,
                                 TransformTraits traits = Affine)
   {
     transform(t.linear(), traits);
@@ -250,7 +250,7 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Hyperplane,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Hyperplane,
            Hyperplane<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
   {
     return typename internal::cast_return_type<Hyperplane,
@@ -259,7 +259,7 @@ public:
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType,int OtherOptions>
-  inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
@@ -267,7 +267,7 @@ public:
     *
     * \sa MatrixBase::isApprox() */
   template<int OtherOptions>
-  bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane<Scalar,AmbientDimAtCompileTime,OtherOptions>& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_coeffs.isApprox(other.m_coeffs, prec); }
 
 protected:
diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h
index c3648f51f..a035e6310 100644
--- a/Eigen/src/Geometry/OrthoMethods.h
+++ b/Eigen/src/Geometry/OrthoMethods.h
@@ -27,7 +27,7 @@ namespace Eigen {
 template<typename Derived>
 template<typename OtherDerived>
 #ifndef EIGEN_PARSED_BY_DOXYGEN
-inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::template cross_product_return_type<OtherDerived>::type
 #else
 inline typename MatrixBase<Derived>::PlainObject
 #endif
@@ -53,7 +53,7 @@ template< int Arch,typename VectorLhs,typename VectorRhs,
           typename Scalar = typename VectorLhs::Scalar,
           bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)>
 struct cross3_impl {
-  static inline typename internal::plain_matrix_type<VectorLhs>::type
+  EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type<VectorLhs>::type
   run(const VectorLhs& lhs, const VectorRhs& rhs)
   {
     return typename internal::plain_matrix_type<VectorLhs>::type(
@@ -78,7 +78,7 @@ struct cross3_impl {
   */
 template<typename Derived>
 template<typename OtherDerived>
-inline typename MatrixBase<Derived>::PlainObject
+EIGEN_DEVICE_FUNC inline typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4)
@@ -105,6 +105,7 @@ MatrixBase<Derived>::cross3(const MatrixBase<OtherDerived>& other) const
   * \sa MatrixBase::cross() */
 template<typename ExpressionType, int Direction>
 template<typename OtherDerived>
+EIGEN_DEVICE_FUNC 
 const typename VectorwiseOp<ExpressionType,Direction>::CrossReturnType
 VectorwiseOp<ExpressionType,Direction>::cross(const MatrixBase<OtherDerived>& other) const
 {
@@ -221,7 +222,7 @@ struct unitOrthogonal_selector<Derived,2>
   * \sa cross()
   */
 template<typename Derived>
-typename MatrixBase<Derived>::PlainObject
+EIGEN_DEVICE_FUNC typename MatrixBase<Derived>::PlainObject
 MatrixBase<Derived>::unitOrthogonal() const
 {
   EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h
index c43dce773..1e985d8cd 100644
--- a/Eigen/src/Geometry/ParametrizedLine.h
+++ b/Eigen/src/Geometry/ParametrizedLine.h
@@ -41,45 +41,45 @@ public:
   typedef Matrix<Scalar,AmbientDimAtCompileTime,1,Options> VectorType;
 
   /** Default constructor without initialization */
-  inline ParametrizedLine() {}
+  EIGEN_DEVICE_FUNC inline ParametrizedLine() {}
   
   template<int OtherOptions>
-  ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine<Scalar,AmbientDimAtCompileTime,OtherOptions>& other)
    : m_origin(other.origin()), m_direction(other.direction())
   {}
 
   /** Constructs a dynamic-size line with \a _dim the dimension
     * of the ambient space */
-  inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {}
 
   /** Initializes a parametrized line of direction \a direction and origin \a origin.
     * \warning the vector direction is assumed to be normalized.
     */
-  ParametrizedLine(const VectorType& origin, const VectorType& direction)
+  EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction)
     : m_origin(origin), m_direction(direction) {}
 
   template <int OtherOptions>
-  explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
+  EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane);
 
   /** Constructs a parametrized line going from \a p0 to \a p1. */
-  static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
+  EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1)
   { return ParametrizedLine(p0, (p1-p0).normalized()); }
 
-  ~ParametrizedLine() {}
+  EIGEN_DEVICE_FUNC ~ParametrizedLine() {}
 
   /** \returns the dimension in which the line holds */
-  inline Index dim() const { return m_direction.size(); }
+  EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); }
 
-  const VectorType& origin() const { return m_origin; }
-  VectorType& origin() { return m_origin; }
+  EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; }
+  EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; }
 
-  const VectorType& direction() const { return m_direction; }
-  VectorType& direction() { return m_direction; }
+  EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; }
+  EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; }
 
   /** \returns the squared distance of a point \a p to its projection onto the line \c *this.
     * \sa distance()
     */
-  RealScalar squaredDistance(const VectorType& p) const
+  EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const
   {
     VectorType diff = p - origin();
     return (diff - direction().dot(diff) * direction()).squaredNorm();
@@ -87,22 +87,22 @@ public:
   /** \returns the distance of a point \a p to its projection onto the line \c *this.
     * \sa squaredDistance()
     */
-  RealScalar distance(const VectorType& p) const { using std::sqrt; return sqrt(squaredDistance(p)); }
+  EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); }
 
   /** \returns the projection of a point \a p onto the line \c *this. */
-  VectorType projection(const VectorType& p) const
+  EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const
   { return origin() + direction().dot(p-origin()) * direction(); }
 
-  VectorType pointAt(const Scalar& t) const;
+  EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const;
   
   template <int OtherOptions>
-  Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
  
   template <int OtherOptions>
-  Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
   
   template <int OtherOptions>
-  VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
+  EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -110,7 +110,7 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<ParametrizedLine,
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<ParametrizedLine,
            ParametrizedLine<NewScalarType,AmbientDimAtCompileTime,Options> >::type cast() const
   {
     return typename internal::cast_return_type<ParametrizedLine,
@@ -119,7 +119,7 @@ public:
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType,int OtherOptions>
-  inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine<OtherScalarType,AmbientDimAtCompileTime,OtherOptions>& other)
   {
     m_origin = other.origin().template cast<Scalar>();
     m_direction = other.direction().template cast<Scalar>();
@@ -129,7 +129,7 @@ public:
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); }
 
 protected:
@@ -143,7 +143,7 @@ protected:
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
+EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
   direction() = hyperplane.normal().unitOrthogonal();
@@ -153,7 +153,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H
 /** \returns the point at \a t along this line
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
-inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
 ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
 {
   return origin() + (direction()*t); 
@@ -163,7 +163,7 @@ ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return -(hyperplane.offset()+hyperplane.normal().dot(origin()))
           / hyperplane.normal().dot(direction());
@@ -175,7 +175,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPara
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
+EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return intersectionParameter(hyperplane);
 }
@@ -184,7 +184,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(con
   */
 template <typename _Scalar, int _AmbientDim, int _Options>
 template <int OtherOptions>
-inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
+EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType
 ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const
 {
   return pointAt(intersectionParameter(hyperplane));
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index c4a0eabb5..932f149e3 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -58,37 +58,37 @@ class QuaternionBase : public RotationBase<Derived, 3>
 
 
   /** \returns the \c x coefficient */
-  inline Scalar x() const { return this->derived().coeffs().coeff(0); }
+  EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); }
   /** \returns the \c y coefficient */
-  inline Scalar y() const { return this->derived().coeffs().coeff(1); }
+  EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); }
   /** \returns the \c z coefficient */
-  inline Scalar z() const { return this->derived().coeffs().coeff(2); }
+  EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); }
   /** \returns the \c w coefficient */
-  inline Scalar w() const { return this->derived().coeffs().coeff(3); }
+  EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); }
 
   /** \returns a reference to the \c x coefficient */
-  inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
+  EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); }
   /** \returns a reference to the \c y coefficient */
-  inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
+  EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); }
   /** \returns a reference to the \c z coefficient */
-  inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
+  EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); }
   /** \returns a reference to the \c w coefficient */
-  inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
+  EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); }
 
   /** \returns a read-only vector expression of the imaginary part (x,y,z) */
-  inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
+  EIGEN_DEVICE_FUNC inline const VectorBlock<const Coefficients,3> vec() const { return coeffs().template head<3>(); }
 
   /** \returns a vector expression of the imaginary part (x,y,z) */
-  inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
+  EIGEN_DEVICE_FUNC inline VectorBlock<Coefficients,3> vec() { return coeffs().template head<3>(); }
 
   /** \returns a read-only vector expression of the coefficients (x,y,z,w) */
-  inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline const typename internal::traits<Derived>::Coefficients& coeffs() const { return derived().coeffs(); }
 
   /** \returns a vector expression of the coefficients (x,y,z,w) */
-  inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
+  EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Coefficients& coeffs() { return derived().coeffs(); }
 
-  EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
-  template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& operator=(const QuaternionBase<Derived>& other);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase<OtherDerived>& other);
 
 // disabled this copy operator as it is giving very strange compilation errors when compiling
 // test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's
@@ -97,72 +97,72 @@ class QuaternionBase : public RotationBase<Derived, 3>
 //  Derived& operator=(const QuaternionBase& other)
 //  { return operator=<Derived>(other); }
 
-  Derived& operator=(const AngleAxisType& aa);
-  template<class OtherDerived> Derived& operator=(const MatrixBase<OtherDerived>& m);
+  EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase<OtherDerived>& m);
 
   /** \returns a quaternion representing an identity rotation
     * \sa MatrixBase::Identity()
     */
-  static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
+  EIGEN_DEVICE_FUNC static inline Quaternion<Scalar> Identity() { return Quaternion<Scalar>(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); }
 
   /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity()
     */
-  inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
+  EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; }
 
   /** \returns the squared norm of the quaternion's coefficients
     * \sa QuaternionBase::norm(), MatrixBase::squaredNorm()
     */
-  inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
+  EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); }
 
   /** \returns the norm of the quaternion's coefficients
     * \sa QuaternionBase::squaredNorm(), MatrixBase::norm()
     */
-  inline Scalar norm() const { return coeffs().norm(); }
+  EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); }
 
   /** Normalizes the quaternion \c *this
     * \sa normalized(), MatrixBase::normalize() */
-  inline void normalize() { coeffs().normalize(); }
+  EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); }
   /** \returns a normalized copy of \c *this
     * \sa normalize(), MatrixBase::normalized() */
-  inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
+  EIGEN_DEVICE_FUNC inline Quaternion<Scalar> normalized() const { return Quaternion<Scalar>(coeffs().normalized()); }
 
     /** \returns the dot product of \c *this and \a other
     * Geometrically speaking, the dot product of two unit quaternions
     * corresponds to the cosine of half the angle between the two rotations.
     * \sa angularDistance()
     */
-  template<class OtherDerived> inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
+  template<class OtherDerived> EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase<OtherDerived>& other) const { return coeffs().dot(other.coeffs()); }
 
-  template<class OtherDerived> Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns an equivalent 3x3 rotation matrix */
-  Matrix3 toRotationMatrix() const;
+  EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const;
 
   /** \returns the quaternion which transform \a a into \a b through a rotation */
   template<typename Derived1, typename Derived2>
-  Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+  EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  template<class OtherDerived> EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
-  template<class OtherDerived> EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<Scalar> operator* (const QuaternionBase<OtherDerived>& q) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase<OtherDerived>& q);
 
   /** \returns the quaternion describing the inverse rotation */
-  Quaternion<Scalar> inverse() const;
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> inverse() const;
 
   /** \returns the conjugated quaternion */
-  Quaternion<Scalar> conjugate() const;
+  EIGEN_DEVICE_FUNC Quaternion<Scalar> conjugate() const;
 
-  template<class OtherDerived> Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
+  template<class OtherDerived> EIGEN_DEVICE_FUNC Quaternion<Scalar> slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const;
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
   template<class OtherDerived>
-  bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase<OtherDerived>& other, const RealScalar& prec = NumTraits<Scalar>::dummy_precision()) const
   { return coeffs().isApprox(other.coeffs(), prec); }
 
   /** return the result vector of \a v through the rotation*/
-  EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const;
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -170,7 +170,7 @@ class QuaternionBase : public RotationBase<Derived, 3>
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type cast() const
   {
     return typename internal::cast_return_type<Derived,Quaternion<NewScalarType> >::type(derived());
   }
@@ -239,7 +239,7 @@ public:
   typedef typename Base::AngleAxisType AngleAxisType;
 
   /** Default constructor leaving the quaternion uninitialized. */
-  inline Quaternion() {}
+  EIGEN_DEVICE_FUNC inline Quaternion() {}
 
   /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from
     * its four coefficients \a w, \a x, \a y and \a z.
@@ -248,36 +248,36 @@ public:
     * while internally the coefficients are stored in the following order:
     * [\c x, \c y, \c z, \c w]
     */
-  inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
+  EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){}
 
   /** Constructs and initialize a quaternion from the array data */
-  explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {}
 
   /** Copy constructor */
-  template<class Derived> EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
+  template<class Derived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase<Derived>& other) { this->Base::operator=(other); }
 
   /** Constructs and initializes a quaternion from the angle-axis \a aa */
-  explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; }
 
   /** Constructs and initializes a quaternion from either:
     *  - a rotation matrix expression,
     *  - a 4D vector expression representing quaternion coefficients.
     */
   template<typename Derived>
-  explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase<Derived>& other) { *this = other; }
 
   /** Explicit copy constructor with scalar conversion */
   template<typename OtherScalar, int OtherOptions>
-  explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
+  EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion<OtherScalar, OtherOptions>& other)
   { m_coeffs = other.coeffs().template cast<Scalar>(); }
 
-  static Quaternion UnitRandom();
+  EIGEN_DEVICE_FUNC static Quaternion UnitRandom();
 
   template<typename Derived1, typename Derived2>
-  static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
+  EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b);
 
-  inline Coefficients& coeffs() { return m_coeffs;}
-  inline const Coefficients& coeffs() const { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;}
+  EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment))
   
@@ -357,9 +357,9 @@ class Map<const Quaternion<_Scalar>, _Options >
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
-    explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    inline const Coefficients& coeffs() const { return m_coeffs;}
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;}
 
   protected:
     const Coefficients m_coeffs;
@@ -394,10 +394,10 @@ class Map<Quaternion<_Scalar>, _Options >
       * \code *coeffs == {x, y, z, w} \endcode
       *
       * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */
-    explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
+    EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {}
 
-    inline Coefficients& coeffs() { return m_coeffs; }
-    inline const Coefficients& coeffs() const { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; }
+    EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; }
 
   protected:
     Coefficients m_coeffs;
@@ -425,7 +425,7 @@ typedef Map<Quaternion<double>, Aligned>  QuaternionMapAlignedd;
 namespace internal {
 template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options> struct quat_product
 {
-  static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived1>& a, const QuaternionBase<Derived2>& b){
     return Quaternion<Scalar>
     (
       a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(),
@@ -440,7 +440,7 @@ template<int Arch, class Derived1, class Derived2, typename Scalar, int _Options
 /** \returns the concatenation of two rotations as a quaternion-quaternion product */
 template <class Derived>
 template <class OtherDerived>
-EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) const
 {
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename OtherDerived::Scalar>::value),
@@ -453,7 +453,7 @@ QuaternionBase<Derived>::operator* (const QuaternionBase<OtherDerived>& other) c
 /** \sa operator*(Quaternion) */
 template <class Derived>
 template <class OtherDerived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const QuaternionBase<OtherDerived>& other)
 {
   derived() = derived() * other.derived();
   return derived();
@@ -467,7 +467,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator*= (const Quaterni
   *   - Via a Matrix3: 24 + 15n
   */
 template <class Derived>
-EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase<Derived>::Vector3
 QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 {
     // Note that this algorithm comes from the optimization by hand
@@ -481,7 +481,7 @@ QuaternionBase<Derived>::_transformVector(const Vector3& v) const
 }
 
 template<class Derived>
-EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(const QuaternionBase<Derived>& other)
 {
   coeffs() = other.coeffs();
   return derived();
@@ -489,7 +489,7 @@ EIGEN_STRONG_INLINE QuaternionBase<Derived>& QuaternionBase<Derived>::operator=(
 
 template<class Derived>
 template<class OtherDerived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const QuaternionBase<OtherDerived>& other)
 {
   coeffs() = other.coeffs();
   return derived();
@@ -498,10 +498,10 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const Quaternion
 /** Set \c *this from an angle-axis \a aa and returns a reference to \c *this
   */
 template<class Derived>
-EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisType& aa)
 {
-  using std::cos;
-  using std::sin;
+  EIGEN_USING_STD_MATH(cos)
+  EIGEN_USING_STD_MATH(sin)
   Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings
   this->w() = cos(ha);
   this->vec() = sin(ha) * aa.axis();
@@ -516,7 +516,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase<Derived>::operator=(const AngleAxisT
 
 template<class Derived>
 template<class MatrixDerived>
-inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerived>& xpr)
 {
   EIGEN_STATIC_ASSERT((internal::is_same<typename Derived::Scalar, typename MatrixDerived::Scalar>::value),
    YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
@@ -528,7 +528,7 @@ inline Derived& QuaternionBase<Derived>::operator=(const MatrixBase<MatrixDerive
   * be normalized, otherwise the result is undefined.
   */
 template<class Derived>
-inline typename QuaternionBase<Derived>::Matrix3
+EIGEN_DEVICE_FUNC inline typename QuaternionBase<Derived>::Matrix3
 QuaternionBase<Derived>::toRotationMatrix(void) const
 {
   // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!)
@@ -575,9 +575,9 @@ QuaternionBase<Derived>::toRotationMatrix(void) const
   */
 template<class Derived>
 template<typename Derived1, typename Derived2>
-inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+EIGEN_DEVICE_FUNC inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
-  using std::sqrt;
+  EIGEN_USING_STD_MATH(sqrt)
   Vector3 v0 = a.normalized();
   Vector3 v1 = b.normalized();
   Scalar c = v1.dot(v0);
@@ -616,11 +616,11 @@ inline Derived& QuaternionBase<Derived>::setFromTwoVectors(const MatrixBase<Deri
   * \note The implementation is based on http://planning.cs.uiuc.edu/node198.html
   */
 template<typename Scalar, int Options>
-Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
 {
-  using std::sqrt;
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sqrt)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   const Scalar u1 = internal::random<Scalar>(0, 1),
                u2 = internal::random<Scalar>(0, 2*EIGEN_PI),
                u3 = internal::random<Scalar>(0, 2*EIGEN_PI);
@@ -642,7 +642,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::UnitRandom()
   */
 template<typename Scalar, int Options>
 template<typename Derived1, typename Derived2>
-Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
+EIGEN_DEVICE_FUNC Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const MatrixBase<Derived1>& a, const MatrixBase<Derived2>& b)
 {
     Quaternion quat;
     quat.setFromTwoVectors(a, b);
@@ -657,7 +657,7 @@ Quaternion<Scalar,Options> Quaternion<Scalar,Options>::FromTwoVectors(const Matr
   * \sa QuaternionBase::conjugate()
   */
 template <class Derived>
-inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Derived>::inverse() const
 {
   // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite()  ??
   Scalar n2 = this->squaredNorm();
@@ -674,7 +674,7 @@ inline Quaternion<typename internal::traits<Derived>::Scalar> QuaternionBase<Der
 namespace internal {
 template<int Arch, class Derived, typename Scalar, int _Options> struct quat_conj
 {
-  static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion<Scalar> run(const QuaternionBase<Derived>& q){
     return Quaternion<Scalar>(q.w(),-q.x(),-q.y(),-q.z());
   }
 };
@@ -687,7 +687,7 @@ template<int Arch, class Derived, typename Scalar, int _Options> struct quat_con
   * \sa Quaternion2::inverse()
   */
 template <class Derived>
-inline Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC inline Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::conjugate() const
 {
   return internal::quat_conj<Architecture::Target, Derived,
@@ -701,11 +701,11 @@ QuaternionBase<Derived>::conjugate() const
   */
 template <class Derived>
 template <class OtherDerived>
-inline typename internal::traits<Derived>::Scalar
+EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
 QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
-  using std::atan2;
-  using std::abs;
+  EIGEN_USING_STD_MATH(atan2)
+  EIGEN_USING_STD_MATH(abs)
   Quaternion<Scalar> d = (*this) * other.conjugate();
   return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
 }
@@ -720,12 +720,12 @@ QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& oth
   */
 template <class Derived>
 template <class OtherDerived>
-Quaternion<typename internal::traits<Derived>::Scalar>
+EIGEN_DEVICE_FUNC Quaternion<typename internal::traits<Derived>::Scalar>
 QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerived>& other) const
 {
-  using std::acos;
-  using std::sin;
-  using std::abs;
+  EIGEN_USING_STD_MATH(acos)
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(abs)
   const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
   Scalar d = this->dot(other);
   Scalar absD = abs(d);
@@ -759,10 +759,10 @@ template<typename Other>
 struct quaternionbase_assign_impl<Other,3,3>
 {
   typedef typename Other::Scalar Scalar;
-  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& a_mat)
   {
     const typename internal::nested_eval<Other,2>::type mat(a_mat);
-    using std::sqrt;
+    EIGEN_USING_STD_MATH(sqrt)
     // This algorithm comes from  "Quaternion Calculus and Fast Animation",
     // Ken Shoemake, 1987 SIGGRAPH course notes
     Scalar t = mat.trace();
@@ -800,7 +800,7 @@ template<typename Other>
 struct quaternionbase_assign_impl<Other,4,1>
 {
   typedef typename Other::Scalar Scalar;
-  template<class Derived> static inline void run(QuaternionBase<Derived>& q, const Other& vec)
+  template<class Derived> EIGEN_DEVICE_FUNC static inline void run(QuaternionBase<Derived>& q, const Other& vec)
   {
     q.coeffs() = vec;
   }
diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h
index b42a7df70..884b7d0ee 100644
--- a/Eigen/src/Geometry/Rotation2D.h
+++ b/Eigen/src/Geometry/Rotation2D.h
@@ -59,35 +59,35 @@ protected:
 public:
 
   /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */
-  explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
+  EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {}
   
   /** Default constructor wihtout initialization. The represented rotation is undefined. */
-  Rotation2D() {}
+  EIGEN_DEVICE_FUNC Rotation2D() {}
 
   /** Construct a 2D rotation from a 2x2 rotation matrix \a mat.
     *
     * \sa fromRotationMatrix()
     */
   template<typename Derived>
-  explicit Rotation2D(const MatrixBase<Derived>& m)
+  EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase<Derived>& m)
   {
     fromRotationMatrix(m.derived());
   }
 
   /** \returns the rotation angle */
-  inline Scalar angle() const { return m_angle; }
+  EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; }
 
   /** \returns a read-write reference to the rotation angle */
-  inline Scalar& angle() { return m_angle; }
+  EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; }
   
   /** \returns the rotation angle in [0,2pi] */
-  inline Scalar smallestPositiveAngle() const {
+  EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const {
     Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
     return tmp<Scalar(0) ? tmp + Scalar(2*EIGEN_PI) : tmp;
   }
   
   /** \returns the rotation angle in [-pi,pi] */
-  inline Scalar smallestAngle() const {
+  EIGEN_DEVICE_FUNC inline Scalar smallestAngle() const {
     Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI));
     if(tmp>Scalar(EIGEN_PI))       tmp -= Scalar(2*EIGEN_PI);
     else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI);
@@ -95,23 +95,23 @@ public:
   }
 
   /** \returns the inverse rotation */
-  inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
+  EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); }
 
   /** Concatenates two rotations */
-  inline Rotation2D operator*(const Rotation2D& other) const
+  EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const
   { return Rotation2D(m_angle + other.m_angle); }
 
   /** Concatenates two rotations */
-  inline Rotation2D& operator*=(const Rotation2D& other)
+  EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other)
   { m_angle += other.m_angle; return *this; }
 
   /** Applies the rotation to a 2D vector */
-  Vector2 operator* (const Vector2& vec) const
+  EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const
   { return toRotationMatrix() * vec; }
   
   template<typename Derived>
-  Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
-  Matrix2 toRotationMatrix() const;
+  EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase<Derived>& m);
+  EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const;
 
   /** Set \c *this from a 2x2 rotation matrix \a mat.
     * In other words, this function extract the rotation angle from the rotation matrix.
@@ -121,13 +121,13 @@ public:
     * \sa fromRotationMatrix()
     */
   template<typename Derived>
-  Rotation2D& operator=(const  MatrixBase<Derived>& m)
+  EIGEN_DEVICE_FUNC Rotation2D& operator=(const  MatrixBase<Derived>& m)
   { return fromRotationMatrix(m.derived()); }
 
   /** \returns the spherical interpolation between \c *this and \a other using
     * parameter \a t. It is in fact equivalent to a linear interpolation.
     */
-  inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
+  EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const
   {
     Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle();
     return Rotation2D(m_angle + dist*t);
@@ -139,23 +139,23 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type cast() const
   { return typename internal::cast_return_type<Rotation2D,Rotation2D<NewScalarType> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
+  EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D<OtherScalarType>& other)
   {
     m_angle = Scalar(other.angle());
   }
 
-  static inline Rotation2D Identity() { return Rotation2D(0); }
+  EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return internal::isApprox(m_angle,other.m_angle, prec); }
   
 };
@@ -173,9 +173,9 @@ typedef Rotation2D<double> Rotation2Dd;
   */
 template<typename Scalar>
 template<typename Derived>
-Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Derived>& mat)
 {
-  using std::atan2;
+  EIGEN_USING_STD_MATH(atan2)
   EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
   m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0));
   return *this;
@@ -185,10 +185,10 @@ Rotation2D<Scalar>& Rotation2D<Scalar>::fromRotationMatrix(const MatrixBase<Deri
   */
 template<typename Scalar>
 typename Rotation2D<Scalar>::Matrix2
-Rotation2D<Scalar>::toRotationMatrix(void) const
+EIGEN_DEVICE_FUNC Rotation2D<Scalar>::toRotationMatrix(void) const
 {
-  using std::sin;
-  using std::cos;
+  EIGEN_USING_STD_MATH(sin)
+  EIGEN_USING_STD_MATH(cos)
   Scalar sinA = sin(m_angle);
   Scalar cosA = cos(m_angle);
   return (Matrix2() << cosA, -sinA, sinA, cosA).finished();
diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h
index fadfd9151..f0ee0bd03 100644
--- a/Eigen/src/Geometry/RotationBase.h
+++ b/Eigen/src/Geometry/RotationBase.h
@@ -38,26 +38,26 @@ class RotationBase
     typedef Matrix<Scalar,Dim,1> VectorType;
 
   public:
-    inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
-    inline Derived& derived() { return *static_cast<Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast<const Derived*>(this); }
+    EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast<Derived*>(this); }
 
     /** \returns an equivalent rotation matrix */
-    inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
+    EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); }
 
     /** \returns an equivalent rotation matrix 
       * This function is added to be conform with the Transform class' naming scheme.
       */
-    inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
+    EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); }
 
     /** \returns the inverse rotation */
-    inline Derived inverse() const { return derived().inverse(); }
+    EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); }
 
     /** \returns the concatenation of the rotation \c *this with a translation \a t */
-    inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Isometry> operator*(const Translation<Scalar,Dim>& t) const
     { return Transform<Scalar,Dim,Isometry>(*this) * t; }
 
     /** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */
-    inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling<Scalar>& s) const
     { return toRotationMatrix() * s.factor(); }
 
     /** \returns the concatenation of the rotation \c *this with a generic expression \a e
@@ -67,17 +67,17 @@ class RotationBase
       *  - a vector of size Dim
       */
     template<typename OtherDerived>
-    EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
+    EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector<Derived,OtherDerived,OtherDerived::IsVectorAtCompileTime>::ReturnType
     operator*(const EigenBase<OtherDerived>& e) const
     { return internal::rotation_base_generic_product_selector<Derived,OtherDerived>::run(derived(), e.derived()); }
 
     /** \returns the concatenation of a linear transformation \a l with the rotation \a r */
     template<typename OtherDerived> friend
-    inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
+    EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase<OtherDerived>& l, const Derived& r)
     { return l.derived() * r.toRotationMatrix(); }
 
     /** \returns the concatenation of a scaling \a l with the rotation \a r */
-    friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
+    EIGEN_DEVICE_FUNC friend inline Transform<Scalar,Dim,Affine> operator*(const DiagonalMatrix<Scalar,Dim>& l, const Derived& r)
     { 
       Transform<Scalar,Dim,Affine> res(r);
       res.linear().applyOnTheLeft(l);
@@ -86,11 +86,11 @@ class RotationBase
 
     /** \returns the concatenation of the rotation \c *this with a transformation \a t */
     template<int Mode, int Options>
-    inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
+    EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator*(const Transform<Scalar,Dim,Mode,Options>& t) const
     { return toRotationMatrix() * t; }
 
     template<typename OtherVectorType>
-    inline VectorType _transformVector(const OtherVectorType& v) const
+    EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const
     { return toRotationMatrix() * v; }
 };
 
@@ -102,7 +102,7 @@ struct rotation_base_generic_product_selector<RotationDerived,MatrixType,false>
 {
   enum { Dim = RotationDerived::Dim };
   typedef Matrix<typename RotationDerived::Scalar,Dim,Dim> ReturnType;
-  static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m)
   { return r.toRotationMatrix() * m; }
 };
 
@@ -110,7 +110,7 @@ template<typename RotationDerived, typename Scalar, int Dim, int MaxDim>
 struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix<Scalar,Dim,MaxDim>, false >
 {
   typedef Transform<Scalar,Dim,Affine> ReturnType;
-  static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
+  EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix<Scalar,Dim,MaxDim>& m)
   {
     ReturnType res(r);
     res.linear() *= m;
@@ -123,7 +123,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
 {
   enum { Dim = RotationDerived::Dim };
   typedef Matrix<typename RotationDerived::Scalar,Dim,1> ReturnType;
-  static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
+  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v)
   {
     return r._transformVector(v);
   }
@@ -137,7 +137,7 @@ struct rotation_base_generic_product_selector<RotationDerived,OtherVectorType,tr
   */
 template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
 template<typename OtherDerived>
-Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::Matrix(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
   EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim))
@@ -150,7 +150,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
   */
 template<typename _Scalar, int _Rows, int _Cols, int _Storage, int _MaxRows, int _MaxCols>
 template<typename OtherDerived>
-Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
+EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>&
 Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>
 ::operator=(const RotationBase<OtherDerived,ColsAtCompileTime>& r)
 {
@@ -179,20 +179,20 @@ namespace internal {
   * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis
   */
 template<typename Scalar, int Dim>
-static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,2,2> toRotationMatrix(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE)
   return Rotation2D<Scalar>(s).toRotationMatrix();
 }
 
 template<typename Scalar, int Dim, typename OtherDerived>
-static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
+EIGEN_DEVICE_FUNC static inline Matrix<Scalar,Dim,Dim> toRotationMatrix(const RotationBase<OtherDerived,Dim>& r)
 {
   return r.toRotationMatrix();
 }
 
 template<typename Scalar, int Dim, typename OtherDerived>
-static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
+EIGEN_DEVICE_FUNC static inline const MatrixBase<OtherDerived>& toRotationMatrix(const MatrixBase<OtherDerived>& mat)
 {
   EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim,
     YOU_MADE_A_PROGRAMMING_MISTAKE)
diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h
index 8f6c62d63..3f31ee45d 100644
--- a/Eigen/src/Geometry/Transform.h
+++ b/Eigen/src/Geometry/Transform.h
@@ -253,43 +253,43 @@ public:
 
   /** Default constructor without initialization of the meaningful coefficients.
     * If Mode==Affine, then the last row is set to [0 ... 0 1] */
-  inline Transform()
+  EIGEN_DEVICE_FUNC inline Transform()
   {
     check_template_params();
     internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix);
   }
 
-  inline Transform(const Transform& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform& other)
   {
     check_template_params();
     m_matrix = other.m_matrix;
   }
 
-  inline explicit Transform(const TranslationType& t)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t)
   {
     check_template_params();
     *this = t;
   }
-  inline explicit Transform(const UniformScaling<Scalar>& s)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling<Scalar>& s)
   {
     check_template_params();
     *this = s;
   }
   template<typename Derived>
-  inline explicit Transform(const RotationBase<Derived, Dim>& r)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase<Derived, Dim>& r)
   {
     check_template_params();
     *this = r;
   }
 
-  inline Transform& operator=(const Transform& other)
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other)
   { m_matrix = other.m_matrix; return *this; }
 
   typedef internal::transform_take_affine_part<Transform> take_affine_part;
 
   /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline explicit Transform(const EigenBase<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase<OtherDerived>& other)
   {
     EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
       YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -300,7 +300,7 @@ public:
 
   /** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */
   template<typename OtherDerived>
-  inline Transform& operator=(const EigenBase<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase<OtherDerived>& other)
   {
     EIGEN_STATIC_ASSERT((internal::is_same<Scalar,typename OtherDerived::Scalar>::value),
       YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY);
@@ -310,7 +310,7 @@ public:
   }
   
   template<int OtherOptions>
-  inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,Mode,OtherOptions>& other)
   {
     check_template_params();
     // only the options change, we can directly copy the matrices
@@ -318,7 +318,7 @@ public:
   }
 
   template<int OtherMode,int OtherOptions>
-  inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
+  EIGEN_DEVICE_FUNC inline Transform(const Transform<Scalar,Dim,OtherMode,OtherOptions>& other)
   {
     check_template_params();
     // prevent conversions as:
@@ -359,14 +359,14 @@ public:
   }
 
   template<typename OtherDerived>
-  Transform(const ReturnByValue<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC Transform(const ReturnByValue<OtherDerived>& other)
   {
     check_template_params();
     other.evalTo(*this);
   }
 
   template<typename OtherDerived>
-  Transform& operator=(const ReturnByValue<OtherDerived>& other)
+  EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue<OtherDerived>& other)
   {
     other.evalTo(*this);
     return *this;
@@ -381,35 +381,35 @@ public:
   inline QTransform toQTransform(void) const;
   #endif
   
-  Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
-  Index cols() const { return m_matrix.cols(); }
+  EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); }
+  EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); }
 
   /** shortcut for m_matrix(row,col);
     * \sa MatrixBase::operator(Index,Index) const */
-  inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
+  EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); }
   /** shortcut for m_matrix(row,col);
     * \sa MatrixBase::operator(Index,Index) */
-  inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
+  EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); }
 
   /** \returns a read-only expression of the transformation matrix */
-  inline const MatrixType& matrix() const { return m_matrix; }
+  EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; }
   /** \returns a writable expression of the transformation matrix */
-  inline MatrixType& matrix() { return m_matrix; }
+  EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; }
 
   /** \returns a read-only expression of the linear part of the transformation */
-  inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
+  EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); }
   /** \returns a writable expression of the linear part of the transformation */
-  inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
+  EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); }
 
   /** \returns a read-only expression of the Dim x HDim affine part of the transformation */
-  inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
+  EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); }
   /** \returns a writable expression of the Dim x HDim affine part of the transformation */
-  inline AffinePart affine() { return take_affine_part::run(m_matrix); }
+  EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); }
 
   /** \returns a read-only expression of the translation vector of the transformation */
-  inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
+  EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); }
   /** \returns a writable expression of the translation vector of the transformation */
-  inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
+  EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); }
 
   /** \returns an expression of the product between the transform \c *this and a matrix expression \a other.
     *
@@ -437,7 +437,7 @@ public:
     */
   // note: this function is defined here because some compilers cannot find the respective declaration
   template<typename OtherDerived>
-  EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl<Transform, OtherDerived>::ResultType
   operator * (const EigenBase<OtherDerived> &other) const
   { return internal::transform_right_product_impl<Transform, OtherDerived>::run(*this,other.derived()); }
 
@@ -449,7 +449,7 @@ public:
     * \li a general transformation matrix of size Dim+1 x Dim+1.
     */
   template<typename OtherDerived> friend
-  inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
+  EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl<OtherDerived,Mode,Options,_Dim,_Dim+1>::ResultType
     operator * (const EigenBase<OtherDerived> &a, const Transform &b)
   { return internal::transform_left_product_impl<OtherDerived,Mode,Options,Dim,HDim>::run(a.derived(),b); }
 
@@ -460,7 +460,7 @@ public:
     * mode is no isometry. In that case, the returned transform is an affinity.
     */
   template<typename DiagonalDerived>
-  inline const TransformTimeDiagonalReturnType
+  EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType
     operator * (const DiagonalBase<DiagonalDerived> &b) const
   {
     TransformTimeDiagonalReturnType res(*this);
@@ -475,7 +475,7 @@ public:
     * mode is no isometry. In that case, the returned transform is an affinity.
     */
   template<typename DiagonalDerived>
-  friend inline TransformTimeDiagonalReturnType
+  EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType
     operator * (const DiagonalBase<DiagonalDerived> &a, const Transform &b)
   {
     TransformTimeDiagonalReturnType res;
@@ -487,10 +487,10 @@ public:
   }
 
   template<typename OtherDerived>
-  inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase<OtherDerived>& other) { return *this = *this * other; }
 
   /** Concatenates two transformations */
-  inline const Transform operator * (const Transform& other) const
+  EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const
   {
     return internal::transform_transform_product_impl<Transform,Transform>::run(*this,other);
   }
@@ -522,7 +522,7 @@ public:
   #else
   /** Concatenates two different transformations */
   template<int OtherMode,int OtherOptions>
-  inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
+  EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::ResultType
     operator * (const Transform<Scalar,Dim,OtherMode,OtherOptions>& other) const
   {
     return internal::transform_transform_product_impl<Transform,Transform<Scalar,Dim,OtherMode,OtherOptions> >::run(*this,other);
@@ -530,47 +530,61 @@ public:
   #endif
 
   /** \sa MatrixBase::setIdentity() */
-  void setIdentity() { m_matrix.setIdentity(); }
+  EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); }
 
   /**
    * \brief Returns an identity transformation.
    * \todo In the future this function should be returning a Transform expression.
    */
-  static const Transform Identity()
+  EIGEN_DEVICE_FUNC static const Transform Identity()
   {
     return Transform(MatrixType::Identity());
   }
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC 
   inline Transform& scale(const MatrixBase<OtherDerived> &other);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& prescale(const MatrixBase<OtherDerived> &other);
 
-  inline Transform& scale(const Scalar& s);
-  inline Transform& prescale(const Scalar& s);
+  EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s);
+  EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& translate(const MatrixBase<OtherDerived> &other);
 
   template<typename OtherDerived>
+  EIGEN_DEVICE_FUNC
   inline Transform& pretranslate(const MatrixBase<OtherDerived> &other);
 
   template<typename RotationType>
+  EIGEN_DEVICE_FUNC
   inline Transform& rotate(const RotationType& rotation);
 
   template<typename RotationType>
+  EIGEN_DEVICE_FUNC
   inline Transform& prerotate(const RotationType& rotation);
 
-  Transform& shear(const Scalar& sx, const Scalar& sy);
-  Transform& preshear(const Scalar& sx, const Scalar& sy);
+  EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy);
+  EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy);
 
-  inline Transform& operator=(const TranslationType& t);
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t);
+  
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); }
-  inline Transform operator*(const TranslationType& t) const;
+  
+  EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const;
 
+  EIGEN_DEVICE_FUNC 
   inline Transform& operator=(const UniformScaling<Scalar>& t);
+  
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const UniformScaling<Scalar>& s) { return scale(s.factor()); }
+  
+  EIGEN_DEVICE_FUNC
   inline TransformTimeDiagonalReturnType operator*(const UniformScaling<Scalar>& s) const
   {
     TransformTimeDiagonalReturnType res = *this;
@@ -578,31 +592,36 @@ public:
     return res;
   }
 
+  EIGEN_DEVICE_FUNC
   inline Transform& operator*=(const DiagonalMatrix<Scalar,Dim>& s) { linearExt() *= s; return *this; }
 
   template<typename Derived>
-  inline Transform& operator=(const RotationBase<Derived,Dim>& r);
+  EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase<Derived,Dim>& r);
   template<typename Derived>
-  inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
+  EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase<Derived,Dim>& r) { return rotate(r.toRotationMatrix()); }
   template<typename Derived>
-  inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
+  EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase<Derived,Dim>& r) const;
 
-  const LinearMatrixType rotation() const;
+  EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const;
   template<typename RotationMatrixType, typename ScalingMatrixType>
+  EIGEN_DEVICE_FUNC
   void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const;
   template<typename ScalingMatrixType, typename RotationMatrixType>
+  EIGEN_DEVICE_FUNC
   void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const;
 
   template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
+  EIGEN_DEVICE_FUNC
   Transform& fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
     const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale);
 
+  EIGEN_DEVICE_FUNC
   inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const;
 
   /** \returns a const pointer to the column major internal matrix */
-  const Scalar* data() const { return m_matrix.data(); }
+  EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); }
   /** \returns a non-const pointer to the column major internal matrix */
-  Scalar* data() { return m_matrix.data(); }
+  EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); }
 
   /** \returns \c *this with scalar type casted to \a NewScalarType
     *
@@ -610,12 +629,12 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type cast() const
   { return typename internal::cast_return_type<Transform,Transform<NewScalarType,Dim,Mode,Options> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
+  EIGEN_DEVICE_FUNC inline explicit Transform(const Transform<OtherScalarType,Dim,Mode,Options>& other)
   {
     check_template_params();
     m_matrix = other.matrix().template cast<Scalar>();
@@ -625,12 +644,12 @@ public:
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_matrix.isApprox(other.m_matrix, prec); }
 
   /** Sets the last row to [0 ... 0 1]
     */
-  void makeAffine()
+  EIGEN_DEVICE_FUNC void makeAffine()
   {
     internal::transform_make_affine<int(Mode)>::run(m_matrix);
   }
@@ -639,26 +658,26 @@ public:
     * \returns the Dim x Dim linear part if the transformation is affine,
     *          and the HDim x Dim part for projective transformations.
     */
-  inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt()
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
   /** \internal
     * \returns the Dim x Dim linear part if the transformation is affine,
     *          and the HDim x Dim part for projective transformations.
     */
-  inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,Dim> linearExt() const
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,Dim>(0,0); }
 
   /** \internal
     * \returns the translation part if the transformation is affine,
     *          and the last column for projective transformations.
     */
-  inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
+  EIGEN_DEVICE_FUNC inline Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt()
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
   /** \internal
     * \returns the translation part if the transformation is affine,
     *          and the last column for projective transformations.
     */
-  inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
+  EIGEN_DEVICE_FUNC inline const Block<MatrixType,int(Mode)==int(Projective)?HDim:Dim,1> translationExt() const
   { return m_matrix.template block<int(Mode)==int(Projective)?HDim:Dim,1>(0,Dim); }
 
 
@@ -668,7 +687,7 @@ public:
   
 protected:
   #ifndef EIGEN_PARSED_BY_DOXYGEN
-    static EIGEN_STRONG_INLINE void check_template_params()
+    EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params()
     {
       EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS)
     }
@@ -821,7 +840,7 @@ QTransform Transform<Scalar,Dim,Mode,Options>::toQTransform(void) const
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -835,7 +854,7 @@ Transform<Scalar,Dim,Mode,Options>::scale(const MatrixBase<OtherDerived> &other)
   * \sa prescale(Scalar)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::scale(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
   linearExt() *= s;
@@ -848,7 +867,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::s
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -862,7 +881,7 @@ Transform<Scalar,Dim,Mode,Options>::prescale(const MatrixBase<OtherDerived> &oth
   * \sa scale(Scalar)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::prescale(const Scalar& s)
 {
   EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS)
   m_matrix.template topRows<Dim>() *= s;
@@ -875,7 +894,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::p
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -889,7 +908,7 @@ Transform<Scalar,Dim,Mode,Options>::translate(const MatrixBase<OtherDerived> &ot
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename OtherDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived> &other)
 {
   EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim))
@@ -919,7 +938,7 @@ Transform<Scalar,Dim,Mode,Options>::pretranslate(const MatrixBase<OtherDerived>
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationType>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
 {
   linearExt() *= internal::toRotationMatrix<Scalar,Dim>(rotation);
@@ -935,7 +954,7 @@ Transform<Scalar,Dim,Mode,Options>::rotate(const RotationType& rotation)
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationType>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
 {
   m_matrix.template block<Dim,HDim>(0,0) = internal::toRotationMatrix<Scalar,Dim>(rotation)
@@ -949,7 +968,7 @@ Transform<Scalar,Dim,Mode,Options>::prerotate(const RotationType& rotation)
   * \sa preshear()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
 {
   EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -965,7 +984,7 @@ Transform<Scalar,Dim,Mode,Options>::shear(const Scalar& sx, const Scalar& sy)
   * \sa shear()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
 {
   EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE)
@@ -979,7 +998,7 @@ Transform<Scalar,Dim,Mode,Options>::preshear(const Scalar& sx, const Scalar& sy)
 ******************************************************/
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const TranslationType& t)
 {
   linear().setIdentity();
   translation() = t.vector();
@@ -988,7 +1007,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 }
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const TranslationType& t) const
 {
   Transform res = *this;
   res.translate(t.vector());
@@ -996,7 +1015,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
 }
 
 template<typename Scalar, int Dim, int Mode, int Options>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const UniformScaling<Scalar>& s)
 {
   m_matrix.setZero();
   linear().diagonal().fill(s.factor());
@@ -1006,7 +1025,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename Derived>
-inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::operator=(const RotationBase<Derived,Dim>& r)
 {
   linear() = internal::toRotationMatrix<Scalar,Dim>(r);
   translation().setZero();
@@ -1016,7 +1035,7 @@ inline Transform<Scalar,Dim,Mode,Options>& Transform<Scalar,Dim,Mode,Options>::o
 
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename Derived>
-inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
+EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::operator*(const RotationBase<Derived,Dim>& r) const
 {
   Transform res = *this;
   res.rotate(r.derived());
@@ -1035,7 +1054,7 @@ inline Transform<Scalar,Dim,Mode,Options> Transform<Scalar,Dim,Mode,Options>::op
   * \sa computeRotationScaling(), computeScalingRotation(), class SVD
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
+EIGEN_DEVICE_FUNC const typename Transform<Scalar,Dim,Mode,Options>::LinearMatrixType
 Transform<Scalar,Dim,Mode,Options>::rotation() const
 {
   LinearMatrixType result;
@@ -1057,7 +1076,7 @@ Transform<Scalar,Dim,Mode,Options>::rotation() const
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename RotationMatrixType, typename ScalingMatrixType>
-void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const
 {
   JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
 
@@ -1086,7 +1105,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeRotationScaling(RotationMatrixTy
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename ScalingMatrixType, typename RotationMatrixType>
-void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
+EIGEN_DEVICE_FUNC void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const
 {
   JacobiSVD<LinearMatrixType> svd(linear(), ComputeFullU | ComputeFullV);
 
@@ -1107,7 +1126,7 @@ void Transform<Scalar,Dim,Mode,Options>::computeScalingRotation(ScalingMatrixTyp
   */
 template<typename Scalar, int Dim, int Mode, int Options>
 template<typename PositionDerived, typename OrientationType, typename ScaleDerived>
-Transform<Scalar,Dim,Mode,Options>&
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>&
 Transform<Scalar,Dim,Mode,Options>::fromPositionOrientationScale(const MatrixBase<PositionDerived> &position,
   const OrientationType& orientation, const MatrixBase<ScaleDerived> &scale)
 {
@@ -1124,7 +1143,7 @@ template<int Mode>
 struct transform_make_affine
 {
   template<typename MatrixType>
-  static void run(MatrixType &mat)
+  EIGEN_DEVICE_FUNC static void run(MatrixType &mat)
   {
     static const int Dim = MatrixType::ColsAtCompileTime-1;
     mat.template block<1,Dim>(Dim,0).setZero();
@@ -1135,21 +1154,21 @@ struct transform_make_affine
 template<>
 struct transform_make_affine<AffineCompact>
 {
-  template<typename MatrixType> static void run(MatrixType &) { }
+  template<typename MatrixType> EIGEN_DEVICE_FUNC static void run(MatrixType &) { }
 };
     
 // selector needed to avoid taking the inverse of a 3x4 matrix
 template<typename TransformType, int Mode=TransformType::Mode>
 struct projective_transform_inverse
 {
-  static inline void run(const TransformType&, TransformType&)
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&)
   {}
 };
 
 template<typename TransformType>
 struct projective_transform_inverse<TransformType, Projective>
 {
-  static inline void run(const TransformType& m, TransformType& res)
+  EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res)
   {
     res.matrix() = m.matrix().inverse();
   }
@@ -1179,7 +1198,7 @@ struct projective_transform_inverse<TransformType, Projective>
   * \sa MatrixBase::inverse()
   */
 template<typename Scalar, int Dim, int Mode, int Options>
-Transform<Scalar,Dim,Mode,Options>
+EIGEN_DEVICE_FUNC Transform<Scalar,Dim,Mode,Options>
 Transform<Scalar,Dim,Mode,Options>::inverse(TransformTraits hint) const
 {
   Transform res;
diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h
index b9b9a590c..51d9a82eb 100644
--- a/Eigen/src/Geometry/Translation.h
+++ b/Eigen/src/Geometry/Translation.h
@@ -51,16 +51,16 @@ protected:
 public:
 
   /** Default constructor without initialization. */
-  Translation() {}
+  EIGEN_DEVICE_FUNC Translation() {}
   /**  */
-  inline Translation(const Scalar& sx, const Scalar& sy)
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy)
   {
     eigen_assert(Dim==2);
     m_coeffs.x() = sx;
     m_coeffs.y() = sy;
   }
   /**  */
-  inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
+  EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz)
   {
     eigen_assert(Dim==3);
     m_coeffs.x() = sx;
@@ -68,48 +68,48 @@ public:
     m_coeffs.z() = sz;
   }
   /** Constructs and initialize the translation transformation from a vector of translation coefficients */
-  explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
+  EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {}
 
   /** \brief Retruns the x-translation by value. **/
-  inline Scalar x() const { return m_coeffs.x(); }
+  EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); }
   /** \brief Retruns the y-translation by value. **/
-  inline Scalar y() const { return m_coeffs.y(); }
+  EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); }
   /** \brief Retruns the z-translation by value. **/
-  inline Scalar z() const { return m_coeffs.z(); }
+  EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); }
 
   /** \brief Retruns the x-translation as a reference. **/
-  inline Scalar& x() { return m_coeffs.x(); }
+  EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); }
   /** \brief Retruns the y-translation as a reference. **/
-  inline Scalar& y() { return m_coeffs.y(); }
+  EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); }
   /** \brief Retruns the z-translation as a reference. **/
-  inline Scalar& z() { return m_coeffs.z(); }
+  EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); }
 
-  const VectorType& vector() const { return m_coeffs; }
-  VectorType& vector() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; }
 
-  const VectorType& translation() const { return m_coeffs; }
-  VectorType& translation() { return m_coeffs; }
+  EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; }
+  EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; }
 
   /** Concatenates two translation */
-  inline Translation operator* (const Translation& other) const
+  EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const
   { return Translation(m_coeffs + other.m_coeffs); }
 
   /** Concatenates a translation and a uniform scaling */
-  inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling<Scalar>& other) const;
 
   /** Concatenates a translation and a linear transformation */
   template<typename OtherDerived>
-  inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase<OtherDerived>& linear) const;
 
   /** Concatenates a translation and a rotation */
   template<typename Derived>
-  inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
+  EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase<Derived,Dim>& r) const
   { return *this * IsometryTransformType(r); }
 
   /** \returns the concatenation of a linear transformation \a l with the translation \a t */
   // its a nightmare to define a templated friend function outside its declaration
   template<typename OtherDerived> friend
-  inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
+  EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase<OtherDerived>& linear, const Translation& t)
   {
     AffineTransformType res;
     res.matrix().setZero();
@@ -122,7 +122,7 @@ public:
 
   /** Concatenates a translation and a transformation */
   template<int Mode, int Options>
-  inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
+  EIGEN_DEVICE_FUNC inline Transform<Scalar,Dim,Mode> operator* (const Transform<Scalar,Dim,Mode,Options>& t) const
   {
     Transform<Scalar,Dim,Mode> res = t;
     res.pretranslate(m_coeffs);
@@ -152,19 +152,19 @@ public:
     * then this function smartly returns a const reference to \c *this.
     */
   template<typename NewScalarType>
-  inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
+  EIGEN_DEVICE_FUNC inline typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type cast() const
   { return typename internal::cast_return_type<Translation,Translation<NewScalarType,Dim> >::type(*this); }
 
   /** Copy constructor with scalar type conversion */
   template<typename OtherScalarType>
-  inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
+  EIGEN_DEVICE_FUNC inline explicit Translation(const Translation<OtherScalarType,Dim>& other)
   { m_coeffs = other.vector().template cast<Scalar>(); }
 
   /** \returns \c true if \c *this is approximately equal to \a other, within the precision
     * determined by \a prec.
     *
     * \sa MatrixBase::isApprox() */
-  bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
+  EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits<Scalar>::Real& prec = NumTraits<Scalar>::dummy_precision()) const
   { return m_coeffs.isApprox(other.m_coeffs, prec); }
 
 };
@@ -178,7 +178,7 @@ typedef Translation<double,3> Translation3d;
 //@}
 
 template<typename Scalar, int Dim>
-inline typename Translation<Scalar,Dim>::AffineTransformType
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
 Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 {
   AffineTransformType res;
@@ -191,7 +191,7 @@ Translation<Scalar,Dim>::operator* (const UniformScaling<Scalar>& other) const
 
 template<typename Scalar, int Dim>
 template<typename OtherDerived>
-inline typename Translation<Scalar,Dim>::AffineTransformType
+EIGEN_DEVICE_FUNC inline typename Translation<Scalar,Dim>::AffineTransformType
 Translation<Scalar,Dim>::operator* (const EigenBase<OtherDerived>& linear) const
 {
   AffineTransformType res;

From 471075f7ad8e78d4d3e400bc1124e39102515aec Mon Sep 17 00:00:00 2001
From: Robert Lukierski <robert@lukierski.eu>
Date: Wed, 12 Oct 2016 18:59:05 +0100
Subject: [PATCH 3/8] Fixes min() warnings.

---
 Eigen/src/Core/AssignEvaluator.h  | 2 +-
 Eigen/src/Core/CwiseNullaryOp.h   | 2 +-
 Eigen/src/Core/PlainObjectBase.h  | 8 ++++----
 Eigen/src/Core/TriangularMatrix.h | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 30b36be11..844b85ab3 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -555,7 +555,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
         kernel.assignCoeffByOuterInner(outer, inner);
 
-      alignedStart = min((Index)(alignedStart+alignedStep)%packetSize, (Index)innerSize);
+      alignedStart = (min)((Index)(alignedStart+alignedStep)%packetSize, (Index)innerSize);
     }
   }
 };
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index 4ab32d430..c47993e88 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -753,7 +753,7 @@ struct setIdentity_impl<Derived, true>
   {
     m.setZero();
     EIGEN_USING_STD_MATH(min)
-    const Index size = min(m.rows(), m.cols());
+    const Index size = (min)(m.rows(), m.cols());
     for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
     return m;
   }
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index 00313920c..a4ade63b8 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -917,8 +917,8 @@ struct conservative_resize_like_impl
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(rows,cols);
       EIGEN_USING_STD_MATH(min)
-      const Index common_rows = min(rows, _this.rows());
-      const Index common_cols = min(cols, _this.cols());
+      const Index common_rows = (min)(rows, _this.rows());
+      const Index common_cols = (min)(cols, _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
@@ -952,8 +952,8 @@ struct conservative_resize_like_impl
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(other);
       EIGEN_USING_STD_MATH(min)
-      const Index common_rows = min(tmp.rows(), _this.rows());
-      const Index common_cols = min(tmp.cols(), _this.cols());
+      const Index common_rows = (min)(tmp.rows(), _this.rows());
+      const Index common_cols = (min)(tmp.cols(), _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index e804cf6bb..17fcfeeb9 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -646,7 +646,7 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
   EIGEN_USING_STD_MATH(min)
   for(Index j = 0; j < cols(); ++j)
   {
-    Index maxi = min(j, rows()-1);
+    Index maxi = (min)(j, rows()-1);
     for(Index i = 0; i <= maxi; ++i)
     {
       RealScalar absValue = abs(coeff(i,j));
@@ -680,7 +680,7 @@ bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
   RealScalar threshold = maxAbsOnLowerPart * prec;
   for(Index j = 1; j < cols(); ++j)
   {
-    Index maxi = min(j, rows()-1);
+    Index maxi = (min)(j, rows()-1);
     for(Index i = 0; i < maxi; ++i)
       if(abs(coeff(i, j)) > threshold) return false;
   }
@@ -896,7 +896,7 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
     EIGEN_USING_STD_MATH(min)
     for(Index j = 0; j < kernel.cols(); ++j)
     {
-      Index maxi = min(j, kernel.rows());
+      Index maxi = (min)(j, kernel.rows());
       Index i = 0;
       if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
       {

From ac63d6891ce87256de666c4d8f81eafaf99b8450 Mon Sep 17 00:00:00 2001
From: Avi Ginsburg <avi.ginsburg@mail.huji.ac.il>
Date: Thu, 13 Oct 2016 08:47:32 +0000
Subject: [PATCH 4/8] Patch to allow VS2015 & CUDA 8.0 to compile with Eigen
 included. I'm not sure whether to limit the check to this compiler
 combination (` || (EIGEN_COMP_MSVC == 1900 &&  __CUDACC_VER__) `) or to leave
 it as it is. I also don't know if this will have any affect on including
 Eigen in device code (I'm not in my current project).

---
 Eigen/src/Core/util/Macros.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h
index 318ab9477..ce715716c 100644
--- a/Eigen/src/Core/util/Macros.h
+++ b/Eigen/src/Core/util/Macros.h
@@ -801,7 +801,7 @@ namespace Eigen {
 // just an empty macro !
 #define EIGEN_EMPTY
 
-#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000)
+#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 ||  __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
   #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
     using Base::operator =;
 #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653)

From a94791b69a53504aba88f06caad596afa1ac6a34 Mon Sep 17 00:00:00 2001
From: Robert Lukierski <robert@lukierski.eu>
Date: Thu, 13 Oct 2016 15:00:22 +0100
Subject: [PATCH 5/8] Fixes for min and abs after Benoit's comments, switched
 to numext.

---
 Eigen/src/Core/AssignEvaluator.h  |  3 +--
 Eigen/src/Core/CwiseNullaryOp.h   |  3 +--
 Eigen/src/Core/DiagonalMatrix.h   |  3 +--
 Eigen/src/Core/PlainObjectBase.h  | 10 ++++------
 Eigen/src/Core/TriangularMatrix.h | 19 +++++++------------
 Eigen/src/Geometry/Hyperplane.h   |  5 ++---
 Eigen/src/Geometry/Quaternion.h   |  6 ++----
 7 files changed, 18 insertions(+), 31 deletions(-)

diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h
index 844b85ab3..abad8c790 100644
--- a/Eigen/src/Core/AssignEvaluator.h
+++ b/Eigen/src/Core/AssignEvaluator.h
@@ -517,7 +517,6 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
 {
   EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
   {
-    EIGEN_USING_STD_MATH(min)
     typedef typename Kernel::Scalar Scalar;
     typedef typename Kernel::PacketType PacketType;
     enum {
@@ -555,7 +554,7 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
       for(Index inner = alignedEnd; inner<innerSize ; ++inner)
         kernel.assignCoeffByOuterInner(outer, inner);
 
-      alignedStart = (min)((Index)(alignedStart+alignedStep)%packetSize, (Index)innerSize);
+      alignedStart = numext::mini((alignedStart+alignedStep)%packetSize, innerSize);
     }
   }
 };
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index c47993e88..1caf37bb7 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -752,8 +752,7 @@ struct setIdentity_impl<Derived, true>
   static EIGEN_STRONG_INLINE Derived& run(Derived& m)
   {
     m.setZero();
-    EIGEN_USING_STD_MATH(min)
-    const Index size = (min)(m.rows(), m.cols());
+    const Index size = numext::mini(m.rows(), m.cols());
     for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1);
     return m;
   }
diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h
index f04facecc..c682c6d7f 100644
--- a/Eigen/src/Core/DiagonalMatrix.h
+++ b/Eigen/src/Core/DiagonalMatrix.h
@@ -290,12 +290,11 @@ MatrixBase<Derived>::asDiagonal() const
 template<typename Derived>
 bool MatrixBase<Derived>::isDiagonal(const RealScalar& prec) const
 {
-  EIGEN_USING_STD_MATH(abs)
   if(cols() != rows()) return false;
   RealScalar maxAbsOnDiagonal = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
   {
-    RealScalar absOnDiagonal = abs(coeff(j,j));
+    RealScalar absOnDiagonal = numext::abs(coeff(j,j));
     if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal;
   }
   for(Index j = 0; j < cols(); ++j)
diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h
index a4ade63b8..2dcd929e6 100644
--- a/Eigen/src/Core/PlainObjectBase.h
+++ b/Eigen/src/Core/PlainObjectBase.h
@@ -916,9 +916,8 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(rows,cols);
-      EIGEN_USING_STD_MATH(min)
-      const Index common_rows = (min)(rows, _this.rows());
-      const Index common_cols = (min)(cols, _this.cols());
+      const Index common_rows = numext::mini(rows, _this.rows());
+      const Index common_cols = numext::mini(cols, _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
@@ -951,9 +950,8 @@ struct conservative_resize_like_impl
     {
       // The storage order does not allow us to use reallocation.
       typename Derived::PlainObject tmp(other);
-      EIGEN_USING_STD_MATH(min)
-      const Index common_rows = (min)(tmp.rows(), _this.rows());
-      const Index common_cols = (min)(tmp.cols(), _this.cols());
+      const Index common_rows = numext::mini(tmp.rows(), _this.rows());
+      const Index common_cols = numext::mini(tmp.cols(), _this.cols());
       tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols);
       _this.derived().swap(tmp);
     }
diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h
index 17fcfeeb9..71f5d4f29 100644
--- a/Eigen/src/Core/TriangularMatrix.h
+++ b/Eigen/src/Core/TriangularMatrix.h
@@ -641,22 +641,20 @@ MatrixBase<Derived>::triangularView() const
 template<typename Derived>
 bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 {
-  EIGEN_USING_STD_MATH(abs)
   RealScalar maxAbsOnUpperPart = static_cast<RealScalar>(-1);
-  EIGEN_USING_STD_MATH(min)
   for(Index j = 0; j < cols(); ++j)
   {
-    Index maxi = (min)(j, rows()-1);
+    Index maxi = numext::mini(j, rows()-1);
     for(Index i = 0; i <= maxi; ++i)
     {
-      RealScalar absValue = abs(coeff(i,j));
+      RealScalar absValue = numext::abs(coeff(i,j));
       if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue;
     }
   }
   RealScalar threshold = maxAbsOnUpperPart * prec;
   for(Index j = 0; j < cols(); ++j)
     for(Index i = j+1; i < rows(); ++i)
-      if(abs(coeff(i, j)) > threshold) return false;
+      if(numext::abs(coeff(i, j)) > threshold) return false;
   return true;
 }
 
@@ -668,21 +666,19 @@ bool MatrixBase<Derived>::isUpperTriangular(const RealScalar& prec) const
 template<typename Derived>
 bool MatrixBase<Derived>::isLowerTriangular(const RealScalar& prec) const
 {
-  EIGEN_USING_STD_MATH(abs)
-  EIGEN_USING_STD_MATH(min)
   RealScalar maxAbsOnLowerPart = static_cast<RealScalar>(-1);
   for(Index j = 0; j < cols(); ++j)
     for(Index i = j; i < rows(); ++i)
     {
-      RealScalar absValue = abs(coeff(i,j));
+      RealScalar absValue = numext::abs(coeff(i,j));
       if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue;
     }
   RealScalar threshold = maxAbsOnLowerPart * prec;
   for(Index j = 1; j < cols(); ++j)
   {
-    Index maxi = (min)(j, rows()-1);
+    Index maxi = numext::mini(j, rows()-1);
     for(Index i = 0; i < maxi; ++i)
-      if(abs(coeff(i, j)) > threshold) return false;
+      if(numext::abs(coeff(i, j)) > threshold) return false;
   }
   return true;
 }
@@ -893,10 +889,9 @@ struct triangular_assignment_loop<Kernel, Mode, Dynamic, SetOpposite>
   EIGEN_DEVICE_FUNC
   static inline void run(Kernel &kernel)
   {
-    EIGEN_USING_STD_MATH(min)
     for(Index j = 0; j < kernel.cols(); ++j)
     {
-      Index maxi = (min)(j, kernel.rows());
+      Index maxi = numext::mini(j, kernel.rows());
       Index i = 0;
       if (((Mode&Lower) && SetOpposite) || (Mode&Upper))
       {
diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h
index d66194287..07f2659b2 100644
--- a/Eigen/src/Geometry/Hyperplane.h
+++ b/Eigen/src/Geometry/Hyperplane.h
@@ -145,7 +145,7 @@ public:
   /** \returns the absolute distance between the plane \c *this and a point \a p.
     * \sa signedDistance()
     */
-  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { EIGEN_USING_STD_MATH(abs) return abs(signedDistance(p)); }
+  EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); }
 
   /** \returns the projection of a point \a p onto the plane \c *this.
     */
@@ -188,14 +188,13 @@ public:
     */
   EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const
   {
-    EIGEN_USING_STD_MATH(abs)
     EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2)
     Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0);
     // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests
     // whether the two lines are approximately parallel.
     if(internal::isMuchSmallerThan(det, Scalar(1)))
     {   // special case where the two lines are approximately parallel. Pick any point on the first line.
-        if(abs(coeffs().coeff(1))>abs(coeffs().coeff(0)))
+        if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0)))
             return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0));
         else
             return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0));
diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h
index 932f149e3..f6ef1bcf6 100644
--- a/Eigen/src/Geometry/Quaternion.h
+++ b/Eigen/src/Geometry/Quaternion.h
@@ -705,9 +705,8 @@ EIGEN_DEVICE_FUNC inline typename internal::traits<Derived>::Scalar
 QuaternionBase<Derived>::angularDistance(const QuaternionBase<OtherDerived>& other) const
 {
   EIGEN_USING_STD_MATH(atan2)
-  EIGEN_USING_STD_MATH(abs)
   Quaternion<Scalar> d = (*this) * other.conjugate();
-  return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) );
+  return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) );
 }
 
  
@@ -725,10 +724,9 @@ QuaternionBase<Derived>::slerp(const Scalar& t, const QuaternionBase<OtherDerive
 {
   EIGEN_USING_STD_MATH(acos)
   EIGEN_USING_STD_MATH(sin)
-  EIGEN_USING_STD_MATH(abs)
   const Scalar one = Scalar(1) - NumTraits<Scalar>::epsilon();
   Scalar d = this->dot(other);
-  Scalar absD = abs(d);
+  Scalar absD = numext::abs(d);
 
   Scalar scale0;
   Scalar scale1;

From d0ee2267d6eef6a1c5e6a69a6f7333bd4b49fbfb Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Thu, 13 Oct 2016 10:59:46 -0700
Subject: [PATCH 6/8] Relaxed the resizing checks so that they don't fail with
 gcc >= 5.3

---
 unsupported/test/cxx11_tensor_simple.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp
index fe860c970..5a0d339ef 100644
--- a/unsupported/test/cxx11_tensor_simple.cpp
+++ b/unsupported/test/cxx11_tensor_simple.cpp
@@ -299,22 +299,21 @@ static void test_resize()
   VERIFY_IS_EQUAL(epsilon.dimension(0), 2);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
 
   const int* old_data = epsilon.data();
   epsilon.resize(3,2,7);
   VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 2);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7);
+  VERIFY_IS_EQUAL(epsilon.size(), 2*3*7);
   VERIFY_IS_EQUAL(epsilon.data(), old_data);
 
   epsilon.resize(3,5,7);
   VERIFY_IS_EQUAL(epsilon.dimension(0), 3);
   VERIFY_IS_EQUAL(epsilon.dimension(1), 5);
   VERIFY_IS_EQUAL(epsilon.dimension(2), 7);
-  VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 3*5*7);
-  VERIFY_IS_NOT_EQUAL(epsilon.data(), old_data);
+  VERIFY_IS_EQUAL(epsilon.size(), 3*5*7);
 }
 
 void test_cxx11_tensor_simple()

From 692b30ca95397cbb42c53c813cf5dd4b7f0acc8d Mon Sep 17 00:00:00 2001
From: Gael Guennebaud <g.gael@free.fr>
Date: Fri, 14 Oct 2016 17:16:28 +0200
Subject: [PATCH 7/8] Fix previous merge.

---
 Eigen/src/Core/products/Parallelizer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h
index 1dee8d714..2a31e4cbe 100644
--- a/Eigen/src/Core/products/Parallelizer.h
+++ b/Eigen/src/Core/products/Parallelizer.h
@@ -111,7 +111,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
   double work = static_cast<double>(rows) * static_cast<double>(cols) *
       static_cast<double>(depth);
   double kMinTaskSize = 50000;  // Heuristic.
-  max_threads = std::max<Index>(1, std::min<Index>(max_threads, work / kMinTaskSize));
+  pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
 
   // compute the number of threads we are going to use
   Index threads = std::min<Index>(nbThreads(), pb_max_threads);

From d3943cd50c99d7169c3d5a1ea4aa4d8660190e2d Mon Sep 17 00:00:00 2001
From: Benoit Steiner <benoit.steiner.goog@gmail.com>
Date: Wed, 19 Oct 2016 12:56:12 -0700
Subject: [PATCH 8/8] Fixed a few typos in the ternary tensor expressions types

---
 Eigen/Core                                           | 4 ++++
 unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 2 +-
 unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h      | 4 ++--
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Eigen/Core b/Eigen/Core
index fcc107721..d4274770e 100644
--- a/Eigen/Core
+++ b/Eigen/Core
@@ -37,6 +37,10 @@
   // All functions callable from CUDA code must be qualified with __device__
   #define EIGEN_DEVICE_FUNC __host__ __device__
 
+  // We need math_functions.hpp to ensure that that EIGEN_USING_STD_MATH macro
+  // works properly on the device side
+  #include <math_functions.hpp>
+
 #else
   #define EIGEN_DEVICE_FUNC
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 61c111cec..f0383c6bf 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -495,7 +495,7 @@ struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type,
  private:
   const TernaryOp m_functor;
   TensorEvaluator<Arg1Type, Device> m_arg1Impl;
-  TensorEvaluator<Arg1Type, Device> m_arg2Impl;
+  TensorEvaluator<Arg2Type, Device> m_arg2Impl;
   TensorEvaluator<Arg3Type, Device> m_arg3Impl;
 };
 
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
index 5f2e329f2..85dfc7a69 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@@ -283,7 +283,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
     arg1Expression() const { return m_arg1_xpr; }
 
     EIGEN_DEVICE_FUNC
-    const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+    const typename internal::remove_all<typename Arg2XprType::Nested>::type&
     arg2Expression() const { return m_arg2_xpr; }
 
     EIGEN_DEVICE_FUNC
@@ -292,7 +292,7 @@ class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, A
 
   protected:
     typename Arg1XprType::Nested m_arg1_xpr;
-    typename Arg1XprType::Nested m_arg2_xpr;
+    typename Arg2XprType::Nested m_arg2_xpr;
     typename Arg3XprType::Nested m_arg3_xpr;
     const TernaryOp m_functor;
 };