mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-21 20:09:06 +08:00
ROCm/HIP specfic fixes + updates
1. Eigen/src/Core/arch/GPU/Half.h Updating the HIPCC implementation half so that it can declared as a __shared__ variable 2. Eigen/src/Core/util/Macros.h, Eigen/src/Core/util/Memory.h introducing a EIGEN_USE_STD(func) macro that calls - std::func be default - ::func when eigen is being compiled with HIPCC This change was requested in the previous HIP PR (https://bitbucket.org/eigen/eigen/pull-requests/518/pr-with-hip-specific-fixes-for-the-eigen/diff) 3. unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h Removing EIGEN_DEVICE_FUNC attribute from pure virtual methods as it is not supported by HIPCC 4. unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h Disabling the template specializations of InnerMostDimReducer as they run into HIPCC link errors
This commit is contained in:
parent
6a510fe69c
commit
e7e6809e6b
@ -52,7 +52,9 @@ namespace half_impl {
|
||||
#if !defined(EIGEN_HAS_GPU_FP16)
|
||||
// Make our own __half_raw definition that is similar to CUDA's.
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
// The default constructor cannot initialize its member, otherwise the
|
||||
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
|
||||
EIGEN_DEVICE_FUNC __half_raw() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
unsigned short x;
|
||||
};
|
||||
@ -70,7 +72,9 @@ struct __half_raw {
|
||||
// so we need to implicitly convert "__half_raw" to "__half" to avoid having to explicitly make
|
||||
// that conversiion in each call to a "__h*" routine...that is why we have "operator __half" routine
|
||||
struct __half_raw {
|
||||
EIGEN_DEVICE_FUNC __half_raw() : x(0) {}
|
||||
// The default constructor cannot initialize its member, otherwise the
|
||||
// derived class Eigen::Half cannot be used as __shared__ variable in HIPCC.
|
||||
EIGEN_DEVICE_FUNC __half_raw() {}
|
||||
explicit EIGEN_DEVICE_FUNC __half_raw(unsigned short raw) : x(raw) {}
|
||||
union {
|
||||
unsigned short x;
|
||||
|
@ -896,6 +896,19 @@ namespace Eigen {
|
||||
#endif
|
||||
|
||||
|
||||
// When compiling HIP device code with HIPCC, certain functions
|
||||
// from the stdlib need to be pulled in from the global namespace
|
||||
// (as opposed to from the std:: namespace). This is because HIPCC
|
||||
// does not natively support all the std:: routines in device code.
|
||||
// Instead it contains header files that declare the corresponding
|
||||
// routines in the global namespace such they can be used in device code.
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
#define EIGEN_USING_STD(FUNC) using ::FUNC;
|
||||
#else
|
||||
#define EIGEN_USING_STD(FUNC) using std::FUNC;
|
||||
#endif
|
||||
|
||||
|
||||
#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || EIGEN_CUDACC_VER>0)
|
||||
// for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324)
|
||||
#define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \
|
||||
|
@ -100,11 +100,8 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si
|
||||
{
|
||||
eigen_assert(alignment >= sizeof(void*) && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*) and a power of 2");
|
||||
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
void *original = ::malloc(size+alignment);
|
||||
#else
|
||||
void *original = std::malloc(size+alignment);
|
||||
#endif
|
||||
EIGEN_USING_STD(malloc)
|
||||
void *original = malloc(size+alignment);
|
||||
|
||||
if (original == 0) return 0;
|
||||
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
|
||||
@ -116,11 +113,8 @@ EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::si
|
||||
EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
|
||||
{
|
||||
if (ptr) {
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
::free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
#else
|
||||
std::free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
#endif
|
||||
EIGEN_USING_STD(free)
|
||||
free(*(reinterpret_cast<void**>(ptr) - 1));
|
||||
}
|
||||
}
|
||||
|
||||
@ -183,11 +177,8 @@ EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
|
||||
void *result;
|
||||
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
result = ::malloc(size);
|
||||
#else
|
||||
result = std::malloc(size);
|
||||
#endif
|
||||
EIGEN_USING_STD(malloc)
|
||||
result = malloc(size);
|
||||
|
||||
#if EIGEN_DEFAULT_ALIGN_BYTES==16
|
||||
eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
|
||||
@ -207,11 +198,8 @@ EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
|
||||
{
|
||||
#if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
|
||||
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
::free(ptr);
|
||||
#else
|
||||
std::free(ptr);
|
||||
#endif
|
||||
EIGEN_USING_STD(free)
|
||||
free(ptr);
|
||||
|
||||
#else
|
||||
handmade_aligned_free(ptr);
|
||||
@ -256,11 +244,8 @@ template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std:
|
||||
{
|
||||
check_that_malloc_is_allowed();
|
||||
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
void *result = ::malloc(size);
|
||||
#else
|
||||
void *result = std::malloc(size);
|
||||
#endif
|
||||
EIGEN_USING_STD(malloc)
|
||||
void *result = malloc(size);
|
||||
|
||||
if(!result && size)
|
||||
throw_std_bad_alloc();
|
||||
@ -275,11 +260,8 @@ template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
|
||||
{
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
::free(ptr);
|
||||
#else
|
||||
std::free(ptr);
|
||||
#endif
|
||||
EIGEN_USING_STD(free)
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
template<bool Align> inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
|
||||
@ -540,11 +522,8 @@ template<typename T> struct smart_copy_helper<T,true> {
|
||||
IntPtr size = IntPtr(end)-IntPtr(start);
|
||||
if(size==0) return;
|
||||
eigen_internal_assert(start!=0 && end!=0 && target!=0);
|
||||
#if defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
::memcpy(target, start, size);
|
||||
#else
|
||||
std::memcpy(target, start, size);
|
||||
#endif
|
||||
EIGEN_USING_STD(memcpy)
|
||||
memcpy(target, start, size);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -45,8 +45,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
|
||||
class Allocator {
|
||||
public:
|
||||
virtual ~Allocator() {}
|
||||
EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
|
||||
EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
|
||||
virtual void* allocate(size_t num_bytes) const = 0;
|
||||
virtual void deallocate(void* buffer) const = 0;
|
||||
};
|
||||
|
||||
// Build a thread pool device on top the an existing pool of threads.
|
||||
|
@ -195,6 +195,7 @@ struct InnerMostDimReducer<Self, Op, true, false> {
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(EIGEN_HIPCC)
|
||||
static const int kLeafSize = 1024;
|
||||
|
||||
template <typename Self, typename Op>
|
||||
@ -218,7 +219,6 @@ struct InnerMostDimReducer<Self, Op, false, true> {
|
||||
}
|
||||
};
|
||||
|
||||
#if !defined(EIGEN_HIPCC)
|
||||
template <typename Self, typename Op>
|
||||
struct InnerMostDimReducer<Self, Op, true, true> {
|
||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
|
||||
|
Loading…
x
Reference in New Issue
Block a user