From 97a1f1c2735e0f393b8492485f3db63cea4ba7c0 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 14 Mar 2016 08:37:58 -0700 Subject: [PATCH] Make sure we only use the half float intrinsic when compiling with a version of CUDA that is recent enough to provide them --- Eigen/src/Core/arch/CUDA/Half.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 5ce2be165..35e216028 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -90,7 +90,7 @@ struct half : public __half { } }; -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 +#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 // Intrinsics for native fp16 support. Note that on current hardware, // these are no faster than fp32 arithmetic (you need to use the half2 @@ -143,7 +143,7 @@ __device__ bool operator > (const half& a, const half& b) { return __hgt(a, b); } -#else // Not CUDA 530 +#else // Emulate support for half floats // Definitions for CPUs and older CUDA, mostly working through conversion // to/from fp32. @@ -194,7 +194,7 @@ static inline EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) { return float(a) > float(b); } -#endif // Not CUDA 530 +#endif // Emulate support for half floats // Conversion routines, including fallbacks for the host or older CUDA. // Note that newer Intel CPUs (Haswell or newer) have vectorized versions of