diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h index 7873f8ec0..6f1362f2b 100644 --- a/Eigen/src/Core/arch/GPU/Half.h +++ b/Eigen/src/Core/arch/GPU/Half.h @@ -52,37 +52,13 @@ namespace half_impl { #if !defined(EIGEN_HAS_GPU_FP16) // Make our own __half_raw definition that is similar to CUDA's. struct __half_raw { - // The default constructor cannot initialize its member, otherwise the - // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC. - EIGEN_DEVICE_FUNC __half_raw() {} + EIGEN_DEVICE_FUNC __half_raw() : x(0) {} explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} unsigned short x; }; #elif defined(EIGEN_HAS_HIP_FP16) - #if defined(EIGEN_HAS_OLD_HIP_FP16) -// Make a __half_raw definition that is -// ++ compatible with that of Eigen and -// ++ add an implicit conversion to the native __half of the old HIP implementation. -// -// Keeping ".x" as "unsigned short" keeps the interface the same between the Eigen and HIP implementation. -// -// In the old HIP implementation, -// ++ __half is a typedef of __fp16 -// ++ the "__h*" routines take "__half" arguments -// so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make -// that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine -struct __half_raw { - // The default constructor cannot initialize its member, otherwise the - // derived class Eigen::Half cannot be used as __shared__ variable in HIPCC. - EIGEN_DEVICE_FUNC __half_raw() {} - explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {} - union { - unsigned short x; - __half data; - }; - operator __half(void) const { return data; } -}; - #endif + // Nothing to do here + // HIP fp16 header file has a definition for __half_raw #elif defined(EIGEN_HAS_CUDA_FP16) #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000 // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw @@ -105,11 +81,7 @@ struct half_base : public __half_raw { #if defined(EIGEN_HAS_GPU_FP16) #if defined(EIGEN_HAS_HIP_FP16) - #if defined(EIGEN_HAS_OLD_HIP_FP16) - EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(__half_as_ushort(h)) {} - #else EIGEN_DEVICE_FUNC half_base(const __half& h) { x = __half_as_ushort(h); } - #endif #elif defined(EIGEN_HAS_CUDA_FP16) #if (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000) EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {} @@ -128,9 +100,8 @@ struct half : public half_impl::half_base { #if !defined(EIGEN_HAS_GPU_FP16) typedef half_impl::__half_raw __half_raw; #elif defined(EIGEN_HAS_HIP_FP16) - #if defined(EIGEN_HAS_OLD_HIP_FP16) - typedef half_impl::__half_raw __half_raw; - #endif + // Nothing to do here + // HIP fp16 header file has a definition for __half_raw #elif defined(EIGEN_HAS_CUDA_FP16) // Note that EIGEN_CUDACC_VER is set to 0 even when compiling with HIP, so (EIGEN_CUDACC_VER < 90000) is true even for HIP! // So keeping this within #if defined(EIGEN_HAS_CUDA_FP16) is needed diff --git a/Eigen/src/Core/arch/GPU/PacketMathHalf.h b/Eigen/src/Core/arch/GPU/PacketMathHalf.h index 869fa7ec6..cd518c7e4 100644 --- a/Eigen/src/Core/arch/GPU/PacketMathHalf.h +++ b/Eigen/src/Core/arch/GPU/PacketMathHalf.h @@ -45,18 +45,7 @@ template<> struct packet_traits : default_packet_traits template<> struct unpacket_traits { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef half2 half; }; template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1(const Eigen::half& from) { - -#if defined(EIGEN_HIP_DEVICE_COMPILE) - -#if defined(EIGEN_HAS_OLD_HIP_FP16) - return half2half2(from); -#else return __half2half2(from); -#endif - -#else // EIGEN_CUDA_ARCH - return __half2half2(from); -#endif } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload(const Eigen::half* from) { @@ -85,11 +74,7 @@ template<> #if defined(EIGEN_HIP_DEVICE_COMPILE) -#if defined(EIGEN_HAS_OLD_HIP_FP16) - return __halves2half2((*(from+0)), (*(from+1))); -#else return __ldg((const half2*)from); -#endif #else // EIGEN_CUDA_ARCH @@ -107,11 +92,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro(const Ei #if defined(EIGEN_HIP_DEVICE_COMPILE) -#if defined(EIGEN_HAS_OLD_HIP_FP16) - return __halves2half2((*(from+0)), (*(from+1))); -#else return __halves2half2(__ldg(from+0), __ldg(from+1)); -#endif #else // EIGEN_CUDA_ARCH @@ -357,11 +338,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd(const half2& template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv(const half2& a, const half2& b) { #if defined(EIGEN_HIP_DEVICE_COMPILE) -#if defined(EIGEN_HAS_OLD_HIP_FP16) - return h2div(a, b); -#else return __h2div(a, b); -#endif #else // EIGEN_CUDA_ARCH diff --git a/Eigen/src/Core/util/ConfigureVectorization.h b/Eigen/src/Core/util/ConfigureVectorization.h index b00d8b038..2f6136ebc 100644 --- a/Eigen/src/Core/util/ConfigureVectorization.h +++ b/Eigen/src/Core/util/ConfigureVectorization.h @@ -396,17 +396,8 @@ #endif #if defined(EIGEN_HIP_DEVICE_COMPILE) - #define EIGEN_HAS_HIP_FP16 #include - - #define HIP_PATCH_WITH_NEW_FP16 18215 - #if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16) - #define EIGEN_HAS_OLD_HIP_FP16 - // Old HIP implementation does not have a explicit typedef for "half2" - typedef __half2 half2; - #endif - #endif