mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 04:35:57 +08:00
Updated the cuda tests to use the new GpuDevice constructor
This commit is contained in:
parent
e892524efe
commit
8315e025e1
@ -52,8 +52,7 @@ static void test_cuda_contraction(int m_size, int k_size, int n_size)
|
|||||||
cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
|
cudaMemcpy(d_t_left, t_left.data(), t_left_bytes, cudaMemcpyHostToDevice);
|
||||||
cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
|
cudaMemcpy(d_t_right, t_right.data(), t_right_bytes, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
cudaStream_t stream;
|
Eigen::CudaStreamDevice stream;
|
||||||
assert(cudaStreamCreate(&stream) == cudaSuccess);
|
|
||||||
Eigen::GpuDevice gpu_device(&stream);
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
|
|
||||||
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
|
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> >
|
||||||
|
@ -77,14 +77,11 @@ struct GPUContext {
|
|||||||
assert(cudaMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == cudaSuccess);
|
assert(cudaMalloc((void**)(&kernel_3d_), 8*sizeof(float)) == cudaSuccess);
|
||||||
float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f};
|
float kernel_3d_val[] = {3.14f, -1.0f, 2.7f, -0.3f, 0.2f, -0.7f, 7.0f, -0.5f};
|
||||||
assert(cudaMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
|
assert(cudaMemcpy(kernel_3d_, kernel_3d_val, 8*sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);
|
||||||
|
|
||||||
assert(cudaStreamCreate(&stream_) == cudaSuccess);
|
|
||||||
}
|
}
|
||||||
~GPUContext() {
|
~GPUContext() {
|
||||||
assert(cudaFree(kernel_1d_) == cudaSuccess);
|
assert(cudaFree(kernel_1d_) == cudaSuccess);
|
||||||
assert(cudaFree(kernel_2d_) == cudaSuccess);
|
assert(cudaFree(kernel_2d_) == cudaSuccess);
|
||||||
assert(cudaFree(kernel_3d_) == cudaSuccess);
|
assert(cudaFree(kernel_3d_) == cudaSuccess);
|
||||||
assert(cudaStreamDestroy(stream_) == cudaSuccess);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const Eigen::GpuDevice& device() const { return gpu_device_; }
|
const Eigen::GpuDevice& device() const { return gpu_device_; }
|
||||||
@ -105,7 +102,7 @@ struct GPUContext {
|
|||||||
float* kernel_2d_;
|
float* kernel_2d_;
|
||||||
float* kernel_3d_;
|
float* kernel_3d_;
|
||||||
|
|
||||||
cudaStream_t stream_;
|
Eigen::CudaStreamDevice stream_;
|
||||||
Eigen::GpuDevice gpu_device_;
|
Eigen::GpuDevice gpu_device_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,7 +19,8 @@
|
|||||||
template<int DataLayout>
|
template<int DataLayout>
|
||||||
static void test_full_reductions() {
|
static void test_full_reductions() {
|
||||||
|
|
||||||
Eigen::GpuDevice gpu_device;
|
Eigen::CudaStreamDevice stream;
|
||||||
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
|
|
||||||
const int num_rows = internal::random<int>(1024, 5*1024);
|
const int num_rows = internal::random<int>(1024, 5*1024);
|
||||||
const int num_cols = internal::random<int>(1024, 5*1024);
|
const int num_cols = internal::random<int>(1024, 5*1024);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user