Make sure we only use the half float intrinsic when compiling with a version of CUDA that is recent enough to provide them

This commit is contained in:
Benoit Steiner 2016-03-14 08:37:58 -07:00
parent e29c9676b1
commit 97a1f1c273

View File

@ -90,7 +90,7 @@ struct half : public __half {
} }
}; };
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// Intrinsics for native fp16 support. Note that on current hardware, // Intrinsics for native fp16 support. Note that on current hardware,
// these are no faster than fp32 arithmetic (you need to use the half2 // these are no faster than fp32 arithmetic (you need to use the half2
@ -143,7 +143,7 @@ __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b); return __hgt(a, b);
} }
#else // Not CUDA 530 #else // Emulate support for half floats
// Definitions for CPUs and older CUDA, mostly working through conversion // Definitions for CPUs and older CUDA, mostly working through conversion
// to/from fp32. // to/from fp32.
@ -194,7 +194,7 @@ static inline EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
return float(a) > float(b); return float(a) > float(b);
} }
#endif // Not CUDA 530 #endif // Emulate support for half floats
// Conversion routines, including fallbacks for the host or older CUDA. // Conversion routines, including fallbacks for the host or older CUDA.
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of // Note that newer Intel CPUs (Haswell or newer) have vectorized versions of