diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 4f052dcf2..51386506f 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -264,6 +264,30 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(c #endif } +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +half2 plog(const half2& a) { + return h2log(a); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +half2 pexp(const half2& a) { + return h2exp(a); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +half2 psqrt(const half2& a) { + return h2sqrt(a); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +half2 prsqrt(const half2& a) { + return h2rsqrt(a); +} + +#else + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); @@ -296,6 +320,8 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 prsqrt(const half2 return __floats2half2_rn(r1, r2); } +#endif + #elif defined EIGEN_VECTORIZE_AVX typedef struct {