diff --git a/Eigen/src/Cholesky/LDLT.h b/Eigen/src/Cholesky/LDLT.h
index 795d19dce..fcee7b2e3 100644
--- a/Eigen/src/Cholesky/LDLT.h
+++ b/Eigen/src/Cholesky/LDLT.h
@@ -642,7 +642,6 @@ MatrixType LDLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return res;
 }
 
-#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the Cholesky decomposition with full pivoting without square root of \c *this
   * \sa MatrixBase::ldlt()
@@ -664,7 +663,6 @@ MatrixBase<Derived>::ldlt() const
 {
   return LDLT<PlainObject>(derived());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/Cholesky/LLT.h b/Eigen/src/Cholesky/LLT.h
index bd966656d..ddf4875ab 100644
--- a/Eigen/src/Cholesky/LLT.h
+++ b/Eigen/src/Cholesky/LLT.h
@@ -507,7 +507,6 @@ MatrixType LLT<MatrixType,_UpLo>::reconstructedMatrix() const
   return matrixL() * matrixL().adjoint().toDenseMatrix();
 }
 
-#ifndef __CUDACC__
 /** \cholesky_module
   * \returns the LLT decomposition of \c *this
   * \sa SelfAdjointView::llt()
@@ -529,7 +528,6 @@ SelfAdjointView<MatrixType, UpLo>::llt() const
 {
   return LLT<PlainObject,UpLo>(m_matrix);
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 5766b6f9d..7a5540593 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -343,29 +343,29 @@ template<typename Scalar,typename NullaryOp,
          bool has_binary  = has_binary_operator<NullaryOp>::value>
 struct nullary_wrapper
 {
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j) const { return op(i,j); }
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i) const { return op(i); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { return op(i,j); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
 
-  template <typename T, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j) const { return op.template packetOp<T>(i,j); }
-  template <typename T, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i) const { return op.template packetOp<T>(i); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { return op.template packetOp<T>(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
 };
 
 template<typename Scalar,typename NullaryOp>
 struct nullary_wrapper<Scalar,NullaryOp,true,false,false>
 {
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index=0, Index=0) const { return op(); }
-  template <typename T, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index=0, Index=0) const { return op.template packetOp<T>(); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType=0, IndexType=0) const { return op(); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType=0, IndexType=0) const { return op.template packetOp<T>(); }
 };
 
 template<typename Scalar,typename NullaryOp>
 struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
 {
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j=0) const { return op(i,j); }
-  template <typename T, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j=0) const { return op.template packetOp<T>(i,j); }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j=0) const { return op(i,j); }
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j=0) const { return op.template packetOp<T>(i,j); }
 };
 
 // We need the following specialization for vector-only functors assigned to a runtime vector,
@@ -373,25 +373,84 @@ struct nullary_wrapper<Scalar,NullaryOp,false,false,true>
 // In this case, i==0 and j is used for the actual iteration.
 template<typename Scalar,typename NullaryOp>
 struct nullary_wrapper<Scalar,NullaryOp,false,true,false>
-  : nullary_wrapper<Scalar,NullaryOp,false,true,true> // to get the identity wrapper
 {
-  typedef nullary_wrapper<Scalar,NullaryOp,false,true,true> base;
-  using base::operator();
-  using base::packetOp;
-  template <typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, Index i, Index j) const {
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
     eigen_assert(i==0 || j==0);
     return op(i+j);
   }
-  template <typename T, typename Index> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, Index i, Index j) const {
+  template <typename T, typename IndexType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
     eigen_assert(i==0 || j==0);
     return op.template packetOp<T>(i+j);
   }
+
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { return op(i); }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { return op.template packetOp<T>(i); }
 };
 
 template<typename Scalar,typename NullaryOp>
 struct nullary_wrapper<Scalar,NullaryOp,false,false,false> {};
 
+#if 0 && EIGEN_COMP_MSVC>0
+// Disable this ugly workaround. This is now handled in traits<Ref>::match,
+// but this piece of code might still become handly if some other weird compilation
+// erros pop up again.
+
+// MSVC exhibits a weird compilation error when
+// compiling:
+//    Eigen::MatrixXf A = MatrixXf::Random(3,3);
+//    Ref<const MatrixXf> R = 2.f*A;
+// and that has_*ary_operator<scalar_constant_op<float>> have not been instantiated yet.
+// The "problem" is that evaluator<2.f*A> is instantiated by traits<Ref>::match<2.f*A>
+// and at that time has_*ary_operator<T> returns true regardless of T.
+// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>.
+// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(),
+// and packet() are really instantiated as implemented below:
+
+// This is a simple wrapper around Index to enforce the re-instantiation of
+// has_*ary_operator when needed.
+template<typename T> struct nullary_wrapper_workaround_msvc {
+  nullary_wrapper_workaround_msvc(const T&);
+  operator T()const;
+};
+
+template<typename Scalar,typename NullaryOp>
+struct nullary_wrapper<Scalar,NullaryOp,true,true,true>
+{
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i,j);
+  }
+  template <typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().operator()(op,i);
+  }
+
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i,j);
+  }
+  template <typename T, typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const {
+    return nullary_wrapper<Scalar,NullaryOp,
+    has_nullary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_unary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value,
+    has_binary_operator<NullaryOp,nullary_wrapper_workaround_msvc<IndexType> >::value>().template packetOp<T>(op,i);
+  }
+};
+#endif // MSVC workaround
+
 template<typename NullaryOp, typename PlainObjectType>
 struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
   : evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
@@ -418,30 +477,30 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
 
   typedef typename XprType::CoeffReturnType CoeffReturnType;
 
-  template <typename Index>
+  template <typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  CoeffReturnType coeff(Index row, Index col) const
+  CoeffReturnType coeff(IndexType row, IndexType col) const
   {
     return m_wrapper(m_functor, row, col);
   }
 
-  template <typename Index>
+  template <typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  CoeffReturnType coeff(Index index) const
+  CoeffReturnType coeff(IndexType index) const
   {
     return m_wrapper(m_functor,index);
   }
 
-  template<int LoadMode, typename PacketType, typename Index>
+  template<int LoadMode, typename PacketType, typename IndexType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index row, Index col) const
+  PacketType packet(IndexType row, IndexType col) const
   {
     return m_wrapper.template packetOp<PacketType>(m_functor, row, col);
   }
 
-  template<int LoadMode, typename PacketType, typename Index>
+  template<int LoadMode, typename PacketType, typename IndexType>
   EIGEN_STRONG_INLINE
-  PacketType packet(Index index) const
+  PacketType packet(IndexType index) const
   {
     return m_wrapper.template packetOp<PacketType>(m_functor, index);
   }
diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h
index ad6b1923d..e3f20894d 100644
--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@@ -38,6 +38,14 @@ struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectT
   * However, if you want to write a function returning such an expression, you
   * will need to use this class.
   *
+  * The functor NullaryOp must expose one of the following method:
+    <table class="manual">
+    <tr            ><td>\c operator()() </td><td>if the procedural generation does not depend on the coefficient entries (e.g., random numbers)</td></tr>
+    <tr class="alt"><td>\c operator()(Index i)</td><td>if the procedural generation makes sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace) </td></tr>
+    <tr            ><td>\c operator()(Index i,Index j)</td><td>if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., to generate a checkerboard with 0 and 1)</td></tr>
+    </table>
+  * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized for vectors.
+  *
   * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding
   * C++11 random number generators.
   *
diff --git a/Eigen/src/Core/NumTraits.h b/Eigen/src/Core/NumTraits.h
index 42cffbd3b..dd61195bc 100644
--- a/Eigen/src/Core/NumTraits.h
+++ b/Eigen/src/Core/NumTraits.h
@@ -97,23 +97,6 @@ template<typename T> struct GenericNumTraits
     MulCost = 1
   };
 
-  // Division is messy but important, because it is expensive and throughput
-  // varies significantly. The following numbers are based on min division
-  // throughput on Haswell.
-  template<bool Vectorized>
-  struct Div {
-    enum {
-#ifdef EIGEN_VECTORIZE_AVX
-      AVX = true,
-#else
-      AVX = false,
-#endif
-      Cost = IsInteger ? (sizeof(T) == 8 ? (IsSigned ? 24 : 21) : (IsSigned ? 8 : 9)):
-          Vectorized ? (sizeof(T) == 8 ? (AVX ? 16 : 8) : (AVX ? 14 : 7)) : 8
-    };
-  };
-
-
   typedef T Real;
   typedef typename internal::conditional<
                      IsInteger,
@@ -255,6 +238,9 @@ private:
   static inline std::string quiet_NaN();
 };
 
+// Empty specialization for void to allow template specialization based on NumTraits<T>::Real with T==void and SFINAE.
+template<> struct NumTraits<void> {};
+
 } // end namespace Eigen
 
 #endif // EIGEN_NUMTRAITS_H
diff --git a/Eigen/src/Core/Ref.h b/Eigen/src/Core/Ref.h
index 17065fdd5..bdf24f52a 100644
--- a/Eigen/src/Core/Ref.h
+++ b/Eigen/src/Core/Ref.h
@@ -35,7 +35,13 @@ struct traits<Ref<_PlainObjectType, _Options, _StrideType> >
                       || (int(StrideType::InnerStrideAtCompileTime)==0 && int(Derived::InnerStrideAtCompileTime)==1),
       OuterStrideMatch = Derived::IsVectorAtCompileTime
                       || int(StrideType::OuterStrideAtCompileTime)==int(Dynamic) || int(StrideType::OuterStrideAtCompileTime)==int(Derived::OuterStrideAtCompileTime),
-      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (int(evaluator<Derived>::Alignment) >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
+      // NOTE, this indirection of evaluator<Derived>::Alignment is needed
+      // to workaround a very strange bug in MSVC related to the instantiation
+      // of has_*ary_operator in evaluator<CwiseNullaryOp>.
+      // This line is surprisingly very sensitive. For instance, simply adding parenthesis
+      // as "DerivedAlignment = (int(evaluator<Derived>::Alignment))," will make MSVC fail...
+      DerivedAlignment = int(evaluator<Derived>::Alignment),
+      AlignmentMatch = (int(traits<PlainObjectType>::Alignment)==int(Unaligned)) || (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should be replaced by the required alignment
       ScalarTypeMatch = internal::is_same<typename PlainObjectType::Scalar, typename Derived::Scalar>::value,
       MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && AlignmentMatch && ScalarTypeMatch
     };
diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h
index 7014a6889..dae0ca5d0 100644
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@@ -94,6 +94,9 @@ template<> struct packet_traits<double> : default_packet_traits
   };
 };
 
+template<> struct scalar_div_cost<float,true> { enum { value = 14 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 16 }; };
+
 /* Proper support for integers is only provided by AVX2. In the meantime, we'll
    use SSE instructions and packets to deal with integers.
 template<> struct packet_traits<int>    : default_packet_traits
diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h
index 0057e2062..baad692e3 100755
--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@@ -162,6 +162,11 @@ template<> struct unpacket_traits<Packet4f> { typedef float  type; enum {size=4,
 template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16}; typedef Packet2d half; };
 template<> struct unpacket_traits<Packet4i> { typedef int    type; enum {size=4, alignment=Aligned16}; typedef Packet4i half; };
 
+#ifndef EIGEN_VECTORIZE_AVX
+template<> struct scalar_div_cost<float,true> { enum { value = 7 }; };
+template<> struct scalar_div_cost<double,true> { enum { value = 8 }; };
+#endif
+
 #if EIGEN_COMP_MSVC==1500
 // Workaround MSVC 9 internal compiler error.
 // TODO: It has been detected with win64 builds (amd64), so let's check whether it also happens in 32bits+SSE mode
diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h
index dc3690444..d82ffed02 100644
--- a/Eigen/src/Core/functors/BinaryFunctors.h
+++ b/Eigen/src/Core/functors/BinaryFunctors.h
@@ -287,7 +287,7 @@ struct functor_traits<scalar_hypot_op<Scalar,Scalar> > {
   {
     Cost = 3 * NumTraits<Scalar>::AddCost +
            2 * NumTraits<Scalar>::MulCost +
-           2 * NumTraits<Scalar>::template Div<false>::Cost,
+           2 * scalar_div_cost<Scalar,false>::value,
     PacketAccess = false
   };
 };
@@ -375,7 +375,7 @@ struct functor_traits<scalar_quotient_op<LhsScalar,RhsScalar> > {
   typedef typename scalar_quotient_op<LhsScalar,RhsScalar>::result_type result_type;
   enum {
     PacketAccess = is_same<LhsScalar,RhsScalar>::value && packet_traits<LhsScalar>::HasDiv && packet_traits<RhsScalar>::HasDiv,
-    Cost = NumTraits<result_type>::template Div<PacketAccess>::Cost
+    Cost = scalar_div_cost<result_type,PacketAccess>::value
   };
 };
 
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index 692242f7d..a2154d3b5 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -30,8 +30,8 @@ struct functor_traits<scalar_constant_op<Scalar> >
 
 template<typename Scalar> struct scalar_identity_op {
   EIGEN_EMPTY_STRUCT_CTOR(scalar_identity_op)
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index row, Index col) const { return row==col ? Scalar(1) : Scalar(0); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType row, IndexType col) const { return row==col ? Scalar(1) : Scalar(0); }
 };
 template<typename Scalar>
 struct functor_traits<scalar_identity_op<Scalar> >
@@ -55,15 +55,15 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/false,/*IsInteger*/false>
     m_packetStep(pset1<Packet>(unpacket_traits<Packet>::size*m_step)),
     m_base(padd(pset1<Packet>(low), pmul(pset1<Packet>(m_step),plset<Packet>(-unpacket_traits<Packet>::size)))) {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const 
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const
   { 
     m_base = padd(m_base, pset1<Packet>(m_step));
     return m_low+Scalar(i)*m_step; 
   }
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index) const { return m_base = padd(m_base,m_packetStep); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType) const { return m_base = padd(m_base,m_packetStep); }
 
   const Scalar m_low;
   const Scalar m_step;
@@ -81,11 +81,11 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/false>
     m_low(low), m_step(num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1)),
     m_lowPacket(pset1<Packet>(m_low)), m_stepPacket(pset1<Packet>(m_step)), m_interPacket(plset<Packet>(0)) {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return m_low+i*m_step; }
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const
   { return internal::padd(m_lowPacket, pmul(m_stepPacket, padd(pset1<Packet>(Scalar(i)),m_interPacket))); }
 
   const Scalar m_low;
@@ -102,15 +102,15 @@ struct linspaced_op_impl<Scalar,Packet,/*RandomAccess*/true,/*IsInteger*/true>
     m_low(low), m_length(high-low), m_divisor(convert_index<Scalar>(num_steps==1?1:num_steps-1)), m_interPacket(plset<Packet>(0))
   {}
 
-  template<typename Index>
+  template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Scalar operator() (Index i) const {
+  const Scalar operator() (IndexType i) const {
     return m_low + (m_length*Scalar(i))/m_divisor;
   }
 
-  template<typename Index>
+  template<typename IndexType>
   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
-  const Packet packetOp(Index i) const {
+  const Packet packetOp(IndexType i) const {
     return internal::padd(pset1<Packet>(m_low), pdiv(pmul(pset1<Packet>(m_length), padd(pset1<Packet>(Scalar(i)),m_interPacket)),
                                                      pset1<Packet>(m_divisor))); }
 
@@ -142,11 +142,11 @@ template <typename Scalar, typename PacketType, bool RandomAccess> struct linspa
     : impl((num_steps==1 ? high : low),high,num_steps)
   {}
 
-  template<typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return impl(i); }
+  template<typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (IndexType i) const { return impl(i); }
 
-  template<typename Packet,typename Index>
-  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(Index i) const { return impl.packetOp(i); }
+  template<typename Packet,typename IndexType>
+  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const { return impl.packetOp(i); }
 
   // This proxy object handles the actual required temporaries, the different
   // implementations (random vs. sequential access) as well as the
diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h
index 59b9edf69..2009f8e57 100644
--- a/Eigen/src/Core/functors/UnaryFunctors.h
+++ b/Eigen/src/Core/functors/UnaryFunctors.h
@@ -248,7 +248,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
      // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
      : (14 * NumTraits<Scalar>::AddCost +
         6 * NumTraits<Scalar>::MulCost +
-        NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
 #else
     Cost =
     (sizeof(Scalar) == 4
@@ -257,7 +257,7 @@ struct functor_traits<scalar_exp_op<Scalar> > {
      // double: 7 pmadd, 5 pmul, 3 padd/psub, 1 div,  13 other
      : (23 * NumTraits<Scalar>::AddCost +
         12 * NumTraits<Scalar>::MulCost +
-        NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost))
+        scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value))
 #endif
   };
 };
@@ -514,17 +514,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
                 // 9 pmadd, 2 pmul, 1 div, 2 other
                 ? (2 * NumTraits<Scalar>::AddCost +
                    6 * NumTraits<Scalar>::MulCost +
-                   NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
 #else
                 ? (11 * NumTraits<Scalar>::AddCost +
                    11 * NumTraits<Scalar>::MulCost +
-                   NumTraits<Scalar>::template Div<packet_traits<Scalar>::HasDiv>::Cost)
+                   scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value)
 #endif
                 // This number assumes a naive implementation of tanh
                 : (6 * NumTraits<Scalar>::AddCost +
                    3 * NumTraits<Scalar>::MulCost +
-                   2 * NumTraits<Scalar>::template Div<
-                           packet_traits<Scalar>::HasDiv>::Cost +
+                   2 * scalar_div_cost<Scalar,packet_traits<Scalar>::HasDiv>::value +
                    functor_traits<scalar_exp_op<Scalar> >::Cost))
   };
 };
diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h
index bbef83ea8..d4460bb77 100755
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@@ -383,7 +383,7 @@ struct has_ReturnType
 
 template<typename T> const T& return_ref();
 
-template <typename T>
+template <typename T, typename IndexType=Index>
 struct has_nullary_operator
 {
   template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()())>0)>::type * = 0);
@@ -392,19 +392,19 @@ struct has_nullary_operator
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
 };
 
-template <typename T>
+template <typename T, typename IndexType=Index>
 struct has_unary_operator
 {
-  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(Index(0)))>0)>::type * = 0);
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0)))>0)>::type * = 0);
   static meta_no testFunctor(...);
 
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
 };
 
-template <typename T>
+template <typename T, typename IndexType=Index>
 struct has_binary_operator
 {
-  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(Index(0),Index(0)))>0)>::type * = 0);
+  template <typename C> static meta_yes testFunctor(C const *,typename enable_if<(sizeof(return_ref<C>().operator()(IndexType(0),IndexType(0)))>0)>::type * = 0);
   static meta_no testFunctor(...);
 
   enum { value = sizeof(testFunctor(static_cast<T*>(0))) == sizeof(meta_yes) };
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index 7e43f4fea..fa60008ef 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -664,6 +664,20 @@ bool is_same_dense(const T1 &, const T2 &, typename enable_if<!(has_direct_acces
   return false;
 }
 
+// Internal helper defining the cost of a scalar division for the type T.
+// The default heuristic can be specialized for each scalar type and architecture.
+template<typename T,bool Vectorized=false,typename EnaleIf = void>
+struct scalar_div_cost {
+  enum { value = 8*NumTraits<T>::MulCost };
+};
+
+
+template<bool Vectorized>
+struct scalar_div_cost<signed long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 24 }; };
+template<bool Vectorized>
+struct scalar_div_cost<unsigned long,Vectorized,typename conditional<sizeof(long)==8,void,false_type>::type> { enum { value = 21 }; };
+
+
 #ifdef EIGEN_DEBUG_ASSIGN
 std::string demangle_traversal(int t)
 {
@@ -707,7 +721,7 @@ std::string demangle_flags(int f)
   * This class permits to control the scalar return type of any binary operation performed on two different scalar types through (partial) template specializations.
   *
   * For instance, let \c U1, \c U2 and \c U3 be three user defined scalar types for which most operations between instances of \c U1 and \c U2 returns an \c U3.
-  * You can let Eigen knows that by defining:
+  * You can let %Eigen knows that by defining:
     \code
     template<typename BinaryOp>
     struct ScalarBinaryOpTraits<U1,U2,BinaryOp> { typedef U3 ReturnType;  };
@@ -725,6 +739,14 @@ std::string demangle_flags(int f)
     struct ScalarBinaryOpTraits<U1,U2,internal::scalar_sum_op<U1,U2> > { typedef U1 ReturnType; };
     \endcode
   *
+  * By default, the following generic combinations are supported:
+  <table class="manual">
+  <tr><th>ScalarA</th><th>ScalarB</th><th>BinaryOp</th><th>ReturnType</th><th>Note</th></tr>
+  <tr            ><td>\c T </td><td>\c T </td><td>\c * </td><td>\c T </td><td></td></tr>
+  <tr class="alt"><td>\c NumTraits<T>::Real </td><td>\c T </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  <tr            ><td>\c T </td><td>\c NumTraits<T>::Real </td><td>\c * </td><td>\c T </td><td>Only if \c NumTraits<T>::IsComplex </td></tr>
+  </table>
+  *
   * \sa CwiseBinaryOp
   */
 template<typename ScalarA, typename ScalarB, typename BinaryOp=internal::scalar_product_op<ScalarA,ScalarB> >
@@ -741,6 +763,17 @@ struct ScalarBinaryOpTraits<T,T,BinaryOp>
   typedef T ReturnType;
 };
 
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<T, typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, BinaryOp>
+{
+  typedef T ReturnType;
+};
+template <typename T, typename BinaryOp>
+struct ScalarBinaryOpTraits<typename NumTraits<typename internal::enable_if<NumTraits<T>::IsComplex,T>::type>::Real, T, BinaryOp>
+{
+  typedef T ReturnType;
+};
+
 // For Matrix * Permutation
 template<typename T, typename BinaryOp>
 struct ScalarBinaryOpTraits<T,void,BinaryOp>
@@ -762,18 +795,6 @@ struct ScalarBinaryOpTraits<void,void,BinaryOp>
   typedef void ReturnType;
 };
 
-template<typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<T,std::complex<T>,BinaryOp>
-{
-  typedef std::complex<T> ReturnType;
-};
-
-template<typename T, typename BinaryOp>
-struct ScalarBinaryOpTraits<std::complex<T>, T,BinaryOp>
-{
-  typedef std::complex<T> ReturnType;
-};
-
 // We require Lhs and Rhs to have "compatible" scalar types.
 // It is tempting to always allow mixing different types but remember that this is often impossible in the vectorized paths.
 // So allowing mixing different types gives very unexpected errors when enabling vectorization, when the user tries to
diff --git a/Eigen/src/LU/FullPivLU.h b/Eigen/src/LU/FullPivLU.h
index 1632d3ac3..fbb5a61bf 100644
--- a/Eigen/src/LU/FullPivLU.h
+++ b/Eigen/src/LU/FullPivLU.h
@@ -879,14 +879,12 @@ struct Assignment<DstXprType, Inverse<FullPivLU<MatrixType> >, internal::assign_
   *
   * \sa class FullPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const FullPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::fullPivLu() const
 {
   return FullPivLU<PlainObject>(eval());
 }
-#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/LU/PartialPivLU.h b/Eigen/src/LU/PartialPivLU.h
index 87ac6a281..6c35e88d3 100644
--- a/Eigen/src/LU/PartialPivLU.h
+++ b/Eigen/src/LU/PartialPivLU.h
@@ -584,14 +584,12 @@ struct Assignment<DstXprType, Inverse<PartialPivLU<MatrixType> >, internal::assi
   *
   * \sa class PartialPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::partialPivLu() const
 {
   return PartialPivLU<PlainObject>(eval());
 }
-#endif
 
 /** \lu_module
   *
@@ -601,14 +599,12 @@ MatrixBase<Derived>::partialPivLu() const
   *
   * \sa class PartialPivLU
   */
-#ifndef __CUDACC__
 template<typename Derived>
 inline const PartialPivLU<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::lu() const
 {
   return PartialPivLU<PlainObject>(eval());
 }
-#endif
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/ColPivHouseholderQR.h b/Eigen/src/QR/ColPivHouseholderQR.h
index 35e19b85b..9650781d6 100644
--- a/Eigen/src/QR/ColPivHouseholderQR.h
+++ b/Eigen/src/QR/ColPivHouseholderQR.h
@@ -637,7 +637,6 @@ typename ColPivHouseholderQR<MatrixType>::HouseholderSequenceType ColPivHousehol
   return HouseholderSequenceType(m_qr, m_hCoeffs.conjugate());
 }
 
-#ifndef __CUDACC__
 /** \return the column-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class ColPivHouseholderQR
@@ -648,7 +647,6 @@ MatrixBase<Derived>::colPivHouseholderQr() const
 {
   return ColPivHouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/CompleteOrthogonalDecomposition.h b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
index f299d3c00..41e4ecdfd 100644
--- a/Eigen/src/QR/CompleteOrthogonalDecomposition.h
+++ b/Eigen/src/QR/CompleteOrthogonalDecomposition.h
@@ -547,7 +547,6 @@ CompleteOrthogonalDecomposition<MatrixType>::householderQ() const {
   return m_cpqr.householderQ();
 }
 
-#ifndef __CUDACC__
 /** \return the complete orthogonal decomposition of \c *this.
   *
   * \sa class CompleteOrthogonalDecomposition
@@ -557,7 +556,6 @@ const CompleteOrthogonalDecomposition<typename MatrixBase<Derived>::PlainObject>
 MatrixBase<Derived>::completeOrthogonalDecomposition() const {
   return CompleteOrthogonalDecomposition<PlainObject>(eval());
 }
-#endif  // __CUDACC__
 
 }  // end namespace Eigen
 
diff --git a/Eigen/src/QR/FullPivHouseholderQR.h b/Eigen/src/QR/FullPivHouseholderQR.h
index a46d3f9f3..e0e15100d 100644
--- a/Eigen/src/QR/FullPivHouseholderQR.h
+++ b/Eigen/src/QR/FullPivHouseholderQR.h
@@ -660,7 +660,6 @@ inline typename FullPivHouseholderQR<MatrixType>::MatrixQReturnType FullPivHouse
   return MatrixQReturnType(m_qr, m_hCoeffs, m_rows_transpositions);
 }
 
-#ifndef __CUDACC__
 /** \return the full-pivoting Householder QR decomposition of \c *this.
   *
   * \sa class FullPivHouseholderQR
@@ -671,7 +670,6 @@ MatrixBase<Derived>::fullPivHouseholderQr() const
 {
   return FullPivHouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/QR/HouseholderQR.h b/Eigen/src/QR/HouseholderQR.h
index 5e69cfee9..3513d995c 100644
--- a/Eigen/src/QR/HouseholderQR.h
+++ b/Eigen/src/QR/HouseholderQR.h
@@ -393,7 +393,6 @@ void HouseholderQR<MatrixType>::computeInPlace()
   m_isInitialized = true;
 }
 
-#ifndef __CUDACC__
 /** \return the Householder QR decomposition of \c *this.
   *
   * \sa class HouseholderQR
@@ -404,7 +403,6 @@ MatrixBase<Derived>::householderQr() const
 {
   return HouseholderQR<PlainObject>(eval());
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/Eigen/src/SVD/JacobiSVD.h b/Eigen/src/SVD/JacobiSVD.h
index 605c1a2a6..78dfd1d59 100644
--- a/Eigen/src/SVD/JacobiSVD.h
+++ b/Eigen/src/SVD/JacobiSVD.h
@@ -783,7 +783,6 @@ JacobiSVD<MatrixType, QRPreconditioner>::compute(const MatrixType& matrix, unsig
   return *this;
 }
 
-#ifndef __CUDACC__
 /** \svd_module
   *
   * \return the singular value decomposition of \c *this computed by two-sided
@@ -797,7 +796,6 @@ MatrixBase<Derived>::jacobiSvd(unsigned int computationOptions) const
 {
   return JacobiSVD<PlainObject>(*this, computationOptions);
 }
-#endif // __CUDACC__
 
 } // end namespace Eigen
 
diff --git a/test/cuda_basic.cu b/test/cuda_basic.cu
index 3cf37d221..cb2e4167a 100644
--- a/test/cuda_basic.cu
+++ b/test/cuda_basic.cu
@@ -20,11 +20,14 @@
 
 #include <math_constants.h>
 #include <cuda.h>
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include "cuda_common.h"
 
-#include <Eigen/Eigenvalues>
+// Check that dense modules can be properly parsed by nvcc
+#include <Eigen/Dense>
 
 // struct Foo{
 //   EIGEN_DEVICE_FUNC
diff --git a/test/integer_types.cpp b/test/integer_types.cpp
index 950f8e9be..a21f73a81 100644
--- a/test/integer_types.cpp
+++ b/test/integer_types.cpp
@@ -158,4 +158,12 @@ void test_integer_types()
 
     CALL_SUBTEST_8( integer_type_tests(Matrix<unsigned long long, Dynamic, 5>(1, 5)) );
   }
+#ifdef EIGEN_TEST_PART_9
+  VERIFY_IS_EQUAL(internal::scalar_div_cost<int>::value, 8);
+  VERIFY_IS_EQUAL(internal::scalar_div_cost<unsigned int>::value, 8);
+  if(sizeof(long)>sizeof(int)) {
+    VERIFY(internal::scalar_div_cost<long>::value > internal::scalar_div_cost<int>::value);
+    VERIFY(internal::scalar_div_cost<unsigned long>::value > internal::scalar_div_cost<int>::value);
+  }
+#endif
 }
diff --git a/test/nullary.cpp b/test/nullary.cpp
index ad306c1e9..9063c6de8 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -170,5 +170,30 @@ void test_nullary()
   VERIFY((  internal::has_unary_operator<internal::linspaced_op<float,float,false> >::value ));
   VERIFY(( !internal::has_binary_operator<internal::linspaced_op<float,float,false> >::value ));
   VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<float,float,false> >::ret ));
+
+  // Regression unit test for a weird MSVC bug.
+  // Search "nullary_wrapper_workaround_msvc" in CoreEvaluators.h for the details.
+  // See also traits<Ref>::match.
+  {
+    MatrixXf A = MatrixXf::Random(3,3);
+    Ref<const MatrixXf> R = 2.0*A;
+    VERIFY_IS_APPROX(R, A+A);
+
+    Ref<const MatrixXf> R1 = MatrixXf::Random(3,3)+A;
+
+    VectorXi V = VectorXi::Random(3);
+    Ref<const VectorXi> R2 = VectorXi::LinSpaced(3,1,3)+V;
+    VERIFY_IS_APPROX(R2, V+Vector3i(1,2,3));
+
+    VERIFY((  internal::has_nullary_operator<internal::scalar_constant_op<float> >::value ));
+    VERIFY(( !internal::has_unary_operator<internal::scalar_constant_op<float> >::value ));
+    VERIFY(( !internal::has_binary_operator<internal::scalar_constant_op<float> >::value ));
+    VERIFY((  internal::functor_has_linear_access<internal::scalar_constant_op<float> >::ret ));
+
+    VERIFY(( !internal::has_nullary_operator<internal::linspaced_op<int,int,false> >::value ));
+    VERIFY((  internal::has_unary_operator<internal::linspaced_op<int,int,false> >::value ));
+    VERIFY(( !internal::has_binary_operator<internal::linspaced_op<int,int,false> >::value ));
+    VERIFY((  internal::functor_has_linear_access<internal::linspaced_op<int,int,false> >::ret ));
+  }
 #endif
 }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 3f623afa4..fc75dbb5c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -25,7 +25,7 @@ struct scalar_mod_op {
 };
 template <typename Scalar>
 struct functor_traits<scalar_mod_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
 
 
 /** \internal
@@ -38,7 +38,7 @@ struct scalar_mod2_op {
 };
 template <typename Scalar>
 struct functor_traits<scalar_mod2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
 
 template <typename Scalar>
 struct scalar_fmod_op {
diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
index 3b6a69aff..866a8a460 100644
--- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
+++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
@@ -628,15 +628,15 @@ ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD>&
       m_info = Success;
     }
 
-    delete select;
+    delete[] select;
   }
 
-  delete v;
-  delete iparam;
-  delete ipntr;
-  delete workd;
-  delete workl;
-  delete resid;
+  delete[] v;
+  delete[] iparam;
+  delete[] ipntr;
+  delete[] workd;
+  delete[] workl;
+  delete[] resid;
 
   m_isInitialized = true;
 
diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu
index c3f4bfa53..6fe8982f2 100644
--- a/unsupported/test/cxx11_tensor_argmax_cuda.cu
+++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu
@@ -12,7 +12,9 @@
 #define EIGEN_TEST_FUNC cxx11_tensor_cuda
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
index 559aae269..88c233994 100644
--- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -13,7 +13,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 5282dc3b9..767e9c678 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cu
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -14,7 +14,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index f1e91a879..bf216587a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -12,7 +12,9 @@
 #define EIGEN_TEST_FUNC cxx11_tensor_cuda
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index bbda95906..fde20ddf2 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -13,7 +13,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index a6375d34a..cbf401c86 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -13,7 +13,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu
index 32f71e808..b3be199e1 100644
--- a/unsupported/test/cxx11_tensor_random_cuda.cu
+++ b/unsupported/test/cxx11_tensor_random_cuda.cu
@@ -13,7 +13,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu
index 9b5e60e99..80e151d43 100644
--- a/unsupported/test/cxx11_tensor_reduction_cuda.cu
+++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu
@@ -12,7 +12,9 @@
 #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 
diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu
index c8a99f62b..761d11fd1 100644
--- a/unsupported/test/cxx11_tensor_scan_cuda.cu
+++ b/unsupported/test/cxx11_tensor_scan_cuda.cu
@@ -13,7 +13,9 @@
 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
 #define EIGEN_USE_GPU
 
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
 #include <cuda_fp16.h>
+#endif
 #include "main.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 

ScalarA	ScalarB	BinaryOp	ReturnType	Note
\c T	\c T	\c *	\c T
\c NumTraits::Real	\c T	\c *	\c T	Only if \c NumTraits::IsComplex
\c T	\c NumTraits::Real	\c *	\c T	Only if \c NumTraits::IsComplex