updates requested in the PR feedback. Also droping coded within #ifdef EIGEN_HAS_OLD_HIP_FP16

This commit is contained in:
Deven Desai 2019-03-19 21:45:25 +00:00
parent 2dbea5510f
commit 51e399fc15
3 changed files with 5 additions and 66 deletions

View File

@ -52,37 +52,13 @@ namespace half_impl {
#if !defined(EIGEN_HAS_GPU_FP16)
// Make our own __half_raw definition that is similar to CUDA's.
struct __half_raw {
// The default constructor cannot initialize its member, otherwise the
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
EIGEN_DEVICE_FUNC __half_raw() {}
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
unsigned short x;
};
#elif defined(EIGEN_HAS_HIP_FP16)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
// Make a __half_raw definition that is
// ++ compatible with that of Eigen and
// ++ add an implicit conversion to the native __half of the old HIP implementation.
//
// Keeping ".x" as "unsigned short" keeps the interface the same between the Eigen and HIP implementation.
//
// In the old HIP implementation,
// ++ __half is a typedef of __fp16
// ++ the "__h*" routines take "__half" arguments
// so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make
// that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine
struct __half_raw {
// The default constructor cannot initialize its member, otherwise the
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
EIGEN_DEVICE_FUNC __half_raw() {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
union {
unsigned short x;
__half data;
};
operator __half(void) const { return data; }
};
#endif
// Nothing to do here
// HIP fp16 header file has a definition for __half_raw
#elif defined(EIGEN_HAS_CUDA_FP16)
#if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000
// In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw
@ -105,11 +81,7 @@ struct half_base : public __half_raw {
#if defined(EIGEN_HAS_GPU_FP16)
#if defined(EIGEN_HAS_HIP_FP16)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(__half_as_ushort(h)) {}
#else
EIGEN_DEVICE_FUNC half_base(const __half& h) { x = __half_as_ushort(h); }
#endif
#elif defined(EIGEN_HAS_CUDA_FP16)
#if (defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER >= 90000)
EIGEN_DEVICE_FUNC half_base(const __half& h) : __half_raw(*(__half_raw*)&h) {}
@ -128,9 +100,8 @@ struct half : public half_impl::half_base {
#if !defined(EIGEN_HAS_GPU_FP16)
typedef half_impl::__half_raw __half_raw;
#elif defined(EIGEN_HAS_HIP_FP16)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
typedef half_impl::__half_raw __half_raw;
#endif
// Nothing to do here
// HIP fp16 header file has a definition for __half_raw
#elif defined(EIGEN_HAS_CUDA_FP16)
// Note that EIGEN_CUDACC_VER is set to 0 even when compiling with HIP, so (EIGEN_CUDACC_VER < 90000) is true even for HIP!
// So keeping this within #if defined(EIGEN_HAS_CUDA_FP16) is needed

View File

@ -45,18 +45,7 @@ template<> struct packet_traits<Eigen::half> : default_packet_traits
template<> struct unpacket_traits<half2> { typedef Eigen::half type; enum {size=2, alignment=Aligned16, vectorizable=true}; typedef half2 half; };
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pset1<half2>(const Eigen::half& from) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
return half2half2(from);
#else
return __half2half2(from);
#endif
#else // EIGEN_CUDA_ARCH
return __half2half2(from);
#endif
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pload<half2>(const Eigen::half* from) {
@ -85,11 +74,7 @@ template<>
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
return __halves2half2((*(from+0)), (*(from+1)));
#else
return __ldg((const half2*)from);
#endif
#else // EIGEN_CUDA_ARCH
@ -107,11 +92,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE half2 ploadt_ro<half2, Unaligned>(const Ei
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
return __halves2half2((*(from+0)), (*(from+1)));
#else
return __halves2half2(__ldg(from+0), __ldg(from+1));
#endif
#else // EIGEN_CUDA_ARCH
@ -357,11 +338,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pmadd<half2>(const half2&
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pdiv<half2>(const half2& a, const half2& b) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#if defined(EIGEN_HAS_OLD_HIP_FP16)
return h2div(a, b);
#else
return __h2div(a, b);
#endif
#else // EIGEN_CUDA_ARCH

View File

@ -396,17 +396,8 @@
#endif
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_HAS_HIP_FP16
#include <hip/hip_fp16.h>
#define HIP_PATCH_WITH_NEW_FP16 18215
#if (HIP_VERSION_PATCH < HIP_PATCH_WITH_NEW_FP16)
#define EIGEN_HAS_OLD_HIP_FP16
// Old HIP implementation does not have a explicit typedef for "half2"
typedef __half2 half2;
#endif
#endif