mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-02 00:34:14 +08:00
Added CUDA tests.
This commit is contained in:
parent
ffd770ce94
commit
eb0ae602bd
@ -626,6 +626,127 @@ void test_cuda_digamma()
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_zeta()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(6);
|
||||
Tensor<Scalar, 1> in_q(6);
|
||||
Tensor<Scalar, 1> out(6);
|
||||
Tensor<Scalar, 1> expected_out(6);
|
||||
out.setZero();
|
||||
|
||||
in_x(0) = Scalar(1);
|
||||
in_x(1) = Scalar(1.5);
|
||||
in_x(2) = Scalar(4);
|
||||
in_x(3) = Scalar(-10.5);
|
||||
in_x(4) = Scalar(10000.5);
|
||||
in_x(5) = Scalar(3);
|
||||
|
||||
in_q(0) = Scalar(1.2345);
|
||||
in_q(1) = Scalar(2);
|
||||
in_q(2) = Scalar(1.5);
|
||||
in_q(3) = Scalar(3);
|
||||
in_q(4) = Scalar(1.0001);
|
||||
in_q(5) = Scalar(-2.5);
|
||||
|
||||
expected_out(0) = std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(1) = Scalar(1.61237534869);
|
||||
expected_out(2) = Scalar(0.234848505667);
|
||||
expected_out(3) = Scalar(1.03086757337e-5);
|
||||
expected_out(4) = Scalar(0.367879440865);
|
||||
expected_out(5) = Scalar(0.054102025820864097);
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in_x, d_in_q;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in_x), bytes);
|
||||
cudaMalloc((void**)(&d_in_q), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_in_q, in_q.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_q(d_in_q, 6);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in_x.zeta(gpu_in_q);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_polygamma()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(7);
|
||||
Tensor<Scalar, 1> in_n(7);
|
||||
Tensor<Scalar, 1> out(7);
|
||||
Tensor<Scalar, 1> expected_out(7);
|
||||
out.setZero();
|
||||
|
||||
in_n(0) = Scalar(1);
|
||||
in_n(1) = Scalar(1);
|
||||
in_n(2) = Scalar(1);
|
||||
in_n(3) = Scalar(17);
|
||||
in_n(4) = Scalar(31);
|
||||
in_n(5) = Scalar(28);
|
||||
in_n(6) = Scalar(8);
|
||||
|
||||
in_x(0) = Scalar(2);
|
||||
in_x(1) = Scalar(3);
|
||||
in_x(2) = Scalar(25.5);
|
||||
in_x(3) = Scalar(4.7);
|
||||
in_x(4) = Scalar(11.8);
|
||||
in_x(5) = Scalar(17.7);
|
||||
in_x(6) = Scalar(30.2);
|
||||
|
||||
expected_out(0) = Scalar(0.644934066848);
|
||||
expected_out(1) = Scalar(0.394934066848);
|
||||
expected_out(2) = Scalar(0.0399946696496);
|
||||
expected_out(3) = Scalar(293.334565435);
|
||||
expected_out(4) = Scalar(0.445487887616);
|
||||
expected_out(5) = Scalar(-2.47810300902e-07);
|
||||
expected_out(6) = Scalar(-8.29668781082e-09);
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in_x, d_in_n;
|
||||
Scalar* d_out;
|
||||
cudaMalloc((void**)(&d_in_x), bytes);
|
||||
cudaMalloc((void**)(&d_in_n), bytes);
|
||||
cudaMalloc((void**)(&d_out), bytes);
|
||||
|
||||
cudaMemcpy(d_in_x, in_x.data(), bytes, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_in_n, in_n.data(), bytes, cudaMemcpyHostToDevice);
|
||||
|
||||
Eigen::CudaStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 7);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_n(d_in_n, 7);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 7);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in_n.zeta(gpu_in_x);
|
||||
|
||||
assert(cudaMemcpyAsync(out.data(), d_out, bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_cuda_igamma()
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user