Improved support for CUDA 8.0

2025-09-26 08:13:13 +08:00 · 2016-05-31 09:47:59 -07:00 · 2016-05-31 09:47:59 -07:00 · b6e306f189
commit b6e306f189
parent c4bd3b1f21
1 changed files with 26 additions and 0 deletions
--- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
+++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h
@ -264,6 +264,30 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul<half2>(c
 #endif
 }
 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530
 template<>  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 half2 plog<half2>(const half2& a) {
  return h2log(a);
 }
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 half2 pexp<half2>(const half2& a) {
  return h2exp(a);
 }
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 half2 psqrt<half2>(const half2& a) {
  return h2sqrt(a);
 }
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
 half2 prsqrt<half2>(const half2& a) {
  return h2rsqrt(a);
 }
 #else
 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog<half2>(const half2& a) {
  float a1 = __low2float(a);
  float a2 = __high2float(a);
@ -296,6 +320,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt<half2>(const half2
  return __floats2half2_rn(r1, r2);
 }
 #endif
 #elif defined EIGEN_VECTORIZE_AVX
 typedef struct {