ROCm/HIP specfic fixes + updates

1. Eigen/src/Core/arch/GPU/Half.h

   Updating the HIPCC implementation half so that it can declared as a __shared__ variable


2. Eigen/src/Core/util/Macros.h, Eigen/src/Core/util/Memory.h

   introducing a EIGEN_USE_STD(func) macro that calls
   - std::func be default
   - ::func when eigen is being compiled with HIPCC

   This change was requested in the previous HIP PR
   (https://bitbucket.org/eigen/eigen/pull-requests/518/pr-with-hip-specific-fixes-for-the-eigen/diff)


3. unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h

   Removing EIGEN_DEVICE_FUNC attribute from pure virtual methods as it is not supported by HIPCC


4. unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h

   Disabling the template specializations of InnerMostDimReducer as they run into HIPCC link errors
This commit is contained in:
Deven Desai 2018-11-19 18:13:59 +00:00
parent 6a510fe69c
commit e7e6809e6b
5 changed files with 37 additions and 41 deletions

View File

@ -52,7 +52,9 @@ namespace half_impl {
#if !defined(EIGEN_HAS_GPU_FP16)
// Make our own __half_raw definition that is similar to CUDA's.
struct __half_raw {
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
// The default constructor cannot initialize its member, otherwise the
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
EIGEN_DEVICE_FUNC __half_raw() {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
unsigned short x;
};
@ -70,7 +72,9 @@ struct __half_raw {
// so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make
// that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine
struct __half_raw {
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
// The default constructor cannot initialize its member, otherwise the
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
EIGEN_DEVICE_FUNC __half_raw() {}
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
union {
unsigned short x;

View File

@ -896,6 +896,19 @@ namespace Eigen {
#endif
// When compiling HIP device code with HIPCC, certain functions
// from the stdlib need to be pulled in from the global namespace
// (as opposed to from the std:: namespace). This is because HIPCC
// does not natively support all the std:: routines in device code.
// Instead it contains header files that declare the corresponding
// routines in the global namespace such they can be used in device code.
#if defined(EIGEN_HIP_DEVICE_COMPILE)
#define EIGEN_USING_STD(FUNC) using ::FUNC;
#else
#define EIGEN_USING_STD(FUNC) using std::FUNC;
#endif
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \

View File

@ -99,12 +99,9 @@ inline void throw_std_bad_alloc()
EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
#if defined(EIGEN_HIP_DEVICE_COMPILE)
void *original = ::malloc(size+alignment);
#else
void *original = std::malloc(size+alignment);
#endif
EIGEN_USING_STD(malloc)
void *original = malloc(size+alignment);
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
@ -116,11 +113,8 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
if (ptr) {
#if defined(EIGEN_HIP_DEVICE_COMPILE)
::free(*(reinterpret_cast<void**>(ptr) - 1));
#else
std::free(*(reinterpret_cast<void**>(ptr) - 1));
#endif
EIGEN_USING_STD(free)
free(*(reinterpret_cast<void**>(ptr) - 1));
}
}
@ -183,11 +177,8 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
void *result;
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
#if defined(EIGEN_HIP_DEVICE_COMPILE)
result = ::malloc(size);
#else
result = std::malloc(size);
#endif
EIGEN_USING_STD(malloc)
result = malloc(size);
#if EIGEN_DEFAULT_ALIGN_BYTES==16
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
@ -207,11 +198,8 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
{
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
#if defined(EIGEN_HIP_DEVICE_COMPILE)
::free(ptr);
#else
std::free(ptr);
#endif
EIGEN_USING_STD(free)
free(ptr);
#else
handmade_aligned_free(ptr);
@ -256,11 +244,8 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
{
check_that_malloc_is_allowed();
#if defined(EIGEN_HIP_DEVICE_COMPILE)
void *result = ::malloc(size);
#else
void *result = std::malloc(size);
#endif
EIGEN_USING_STD(malloc)
void *result = malloc(size);
if(!result && size)
throw_std_bad_alloc();
@ -275,11 +260,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
{
#if defined(EIGEN_HIP_DEVICE_COMPILE)
::free(ptr);
#else
std::free(ptr);
#endif
EIGEN_USING_STD(free)
free(ptr);
}
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
@ -540,11 +522,8 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
#if defined(EIGEN_HIP_DEVICE_COMPILE)
::memcpy(target, start, size);
#else
std::memcpy(target, start, size);
#endif
EIGEN_USING_STD(memcpy)
memcpy(target, start, size);
}
};

View File

@ -45,8 +45,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
class Allocator {
public:
virtual ~Allocator() {}
EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
virtual void* allocate(size_t num_bytes) const = 0;
virtual void deallocate(void* buffer) const = 0;
};
// Build a thread pool device on top the an existing pool of threads.

View File

@ -195,6 +195,7 @@ struct InnerMostDimReducer<Self, Op, true, false> {
}
};
#if !defined(EIGEN_HIPCC)
static const int kLeafSize = 1024;
template <typename Self, typename Op>
@ -218,7 +219,6 @@ struct InnerMostDimReducer<Self, Op, false, true> {
}
};
#if !defined(EIGEN_HIPCC)
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, true, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType