Make sure we only use the half float intrinsic when compiling with a version of CUDA that is recent enough to provide them

This commit is contained in:
Benoit Steiner 2016-03-14 08:37:58 -07:00
parent e29c9676b1
commit 97a1f1c273

View File

@ -90,7 +90,7 @@ struct half : public __half {
}
};
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
// Intrinsics for native fp16 support. Note that on current hardware,
// these are no faster than fp32 arithmetic (you need to use the half2
@ -143,7 +143,7 @@ __device__ bool operator > (const half& a, const half& b) {
return __hgt(a, b);
}
#else // Not CUDA 530
#else // Emulate support for half floats
// Definitions for CPUs and older CUDA, mostly working through conversion
// to/from fp32.
@ -194,7 +194,7 @@ static inline EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
return float(a) > float(b);
}
#endif // Not CUDA 530
#endif // Emulate support for half floats
// Conversion routines, including fallbacks for the host or older CUDA.
// Note that newer Intel CPUs (Haswell or newer) have vectorized versions of