mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-08 22:21:49 +08:00
Extended the functionality of the TensorDeviceType classes
This commit is contained in:
parent
7d53633e05
commit
c285fda7f4
@ -21,6 +21,12 @@ struct DefaultDevice {
|
|||||||
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
internal::aligned_free(buffer);
|
internal::aligned_free(buffer);
|
||||||
}
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
|
::memcpy(dst, src, n);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
|
::memset(buffer, c, n);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -37,6 +43,12 @@ struct ThreadPoolDevice {
|
|||||||
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
internal::aligned_free(buffer);
|
internal::aligned_free(buffer);
|
||||||
}
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
|
::memcpy(dst, src, n);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
|
::memset(buffer, c, n);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// todo: NUMA, ...
|
// todo: NUMA, ...
|
||||||
@ -47,20 +59,61 @@ struct ThreadPoolDevice {
|
|||||||
|
|
||||||
// GPU offloading
|
// GPU offloading
|
||||||
#ifdef EIGEN_USE_GPU
|
#ifdef EIGEN_USE_GPU
|
||||||
|
static int m_numMultiProcessors = 0;
|
||||||
|
static int m_maxThreadsPerBlock = 0;
|
||||||
|
static int m_maxThreadsPerMultiProcessor = 0;
|
||||||
|
|
||||||
|
static inline int getNumCudaMultiProcessors() {
|
||||||
|
if (m_numMultiProcessors == 0) {
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
cudaGetDeviceProperties(&deviceProp, 0);
|
||||||
|
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||||
|
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||||
|
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||||
|
}
|
||||||
|
return m_numMultiProcessors;
|
||||||
|
}
|
||||||
|
static inline int maxCudaThreadsPerBlock() {
|
||||||
|
if (m_maxThreadsPerBlock == 0) {
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
cudaGetDeviceProperties(&deviceProp, 0);
|
||||||
|
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||||
|
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||||
|
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||||
|
}
|
||||||
|
return m_maxThreadsPerBlock;
|
||||||
|
}
|
||||||
|
static inline int maxCudaThreadsPerMultiProcessor() {
|
||||||
|
if (m_maxThreadsPerBlock == 0) {
|
||||||
|
cudaDeviceProp deviceProp;
|
||||||
|
cudaGetDeviceProperties(&deviceProp, 0);
|
||||||
|
m_numMultiProcessors = deviceProp.multiProcessorCount;
|
||||||
|
m_maxThreadsPerBlock = deviceProp.maxThreadsPerBlock;
|
||||||
|
m_maxThreadsPerMultiProcessor = deviceProp.maxThreadsPerMultiProcessor;
|
||||||
|
}
|
||||||
|
return m_maxThreadsPerMultiProcessor;
|
||||||
|
}
|
||||||
|
|
||||||
struct GpuDevice {
|
struct GpuDevice {
|
||||||
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
// The cudastream is not owned: the caller is responsible for its initialization and eventual destruction.
|
||||||
GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); }
|
GpuDevice(const cudaStream_t* stream) : stream_(stream) { eigen_assert(stream); }
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
/*EIGEN_DEVICE_FUNC*/ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||||
void* result;
|
void* result;
|
||||||
cudaMalloc(&result, num_bytes);
|
cudaMalloc(&result, num_bytes);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
/*EIGEN_DEVICE_FUNC */EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
cudaFree(buffer);
|
cudaFree(buffer);
|
||||||
}
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
|
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_);
|
||||||
|
}
|
||||||
|
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
|
cudaMemsetAsync(buffer, c, n, *stream_);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// TODO: multigpu.
|
// TODO: multigpu.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user