mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-04 17:54:07 +08:00
Improved support for CUDA devices.
Improved contractions on GPU
This commit is contained in:
parent
1269392822
commit
af2e5995e2
@ -44,6 +44,7 @@
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h"
|
||||
|
1206
unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
Normal file
1206
unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -104,19 +104,41 @@ struct GpuDevice {
|
||||
|
||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
||||
|
||||
/*EIGEN_DEVICE_FUNC*/ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
void* result;
|
||||
cudaMalloc(&result, num_bytes);
|
||||
assert(cudaMalloc(&result, num_bytes) == cudaSuccess);
|
||||
assert(result != NULL);
|
||||
return result;
|
||||
#else
|
||||
assert(false && "The default device should be used instead to generate kernel code");
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
/*EIGEN_DEVICE_FUNC */EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||
cudaFree(buffer);
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
assert(buffer != NULL);
|
||||
assert(cudaFree(buffer) == cudaSuccess);
|
||||
#else
|
||||
assert(false && "The default device should be used instead to generate kernel code");
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_);
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
assert(cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_) == cudaSuccess);
|
||||
#else
|
||||
assert(false && "The default device should be used instead to generate kernel code");
|
||||
#endif
|
||||
}
|
||||
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||
cudaMemsetAsync(buffer, c, n, *stream_);
|
||||
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||
#ifndef __CUDA_ARCH__
|
||||
assert(cudaMemsetAsync(buffer, c, n, *stream_) == cudaSuccess);
|
||||
#else
|
||||
assert(false && "The default device should be used instead to generate kernel code");
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||
|
Loading…
x
Reference in New Issue
Block a user