Make sure we only use the half float intrinsic when compiling with a version of CUDA that is recent enough to provide them

2025-07-16 01:51:51 +08:00 · 2016-03-14 08:37:58 -07:00 · 2016-03-14 08:37:58 -07:00 · 97a1f1c273
commit 97a1f1c273
parent e29c9676b1
1 changed files with 3 additions and 3 deletions
--- a/Eigen/src/Core/arch/CUDA/Half.h
+++ b/Eigen/src/Core/arch/CUDA/Half.h
@ -90,7 +90,7 @@ struct half : public __half {
  }
 };

-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530
+#if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530

 // Intrinsics for native fp16 support. Note that on current hardware,
 // these are no faster than fp32 arithmetic (you need to use the half2
@ -143,7 +143,7 @@ __device__ bool operator > (const half& a, const half& b) {
  return __hgt(a, b);
 }

-#else  // Not CUDA 530
+#else  // Emulate support for half floats

 // Definitions for CPUs and older CUDA, mostly working through conversion
 // to/from fp32.
@ -194,7 +194,7 @@ static inline EIGEN_DEVICE_FUNC bool operator > (const half& a, const half& b) {
  return float(a) > float(b);
 }

-#endif // Not CUDA 530
+#endif  // Emulate support for half floats

 // Conversion routines, including fallbacks for the host or older CUDA.
 // Note that newer Intel CPUs (Haswell or newer) have vectorized versions of