mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-05 02:04:07 +08:00
Improved support for CUDA devices.
Improved contractions on GPU
This commit is contained in:
parent
1269392822
commit
af2e5995e2
@ -44,6 +44,7 @@
|
|||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConcatenation.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h"
|
||||||
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h"
|
||||||
#include "unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h"
|
#include "unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h"
|
||||||
|
1206
unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
Normal file
1206
unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -104,19 +104,41 @@ struct GpuDevice {
|
|||||||
|
|
||||||
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
EIGEN_STRONG_INLINE const cudaStream_t& stream() const { return *stream_; }
|
||||||
|
|
||||||
/*EIGEN_DEVICE_FUNC*/ EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
void* result;
|
void* result;
|
||||||
cudaMalloc(&result, num_bytes);
|
assert(cudaMalloc(&result, num_bytes) == cudaSuccess);
|
||||||
|
assert(result != NULL);
|
||||||
return result;
|
return result;
|
||||||
|
#else
|
||||||
|
assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
return NULL;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
/*EIGEN_DEVICE_FUNC */EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
|
||||||
cudaFree(buffer);
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
assert(buffer != NULL);
|
||||||
|
assert(cudaFree(buffer) == cudaSuccess);
|
||||||
|
#else
|
||||||
|
assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
|
||||||
cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_);
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
assert(cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice, *stream_) == cudaSuccess);
|
||||||
|
#else
|
||||||
|
assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
|
||||||
cudaMemsetAsync(buffer, c, n, *stream_);
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void* buffer, int c, size_t n) const {
|
||||||
|
#ifndef __CUDA_ARCH__
|
||||||
|
assert(cudaMemsetAsync(buffer, c, n, *stream_) == cudaSuccess);
|
||||||
|
#else
|
||||||
|
assert(false && "The default device should be used instead to generate kernel code");
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE size_t numThreads() const {
|
EIGEN_STRONG_INLINE size_t numThreads() const {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user