Sharded the cxx11_tensor_cuda test and fixed a memory leak

This commit is contained in:
Benoit Steiner 2016-01-30 11:47:09 -08:00
parent 9de155d153
commit bd21aba181

View File

@ -63,6 +63,10 @@ void test_cuda_elementwise_small() {
out(Eigen::array<int, 1>(i)),
in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
}
void test_cuda_elementwise()
@ -113,6 +117,11 @@ void test_cuda_elementwise()
}
}
}
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_in3);
cudaFree(d_out);
}
void test_cuda_reduction()
@ -158,10 +167,13 @@ void test_cuda_reduction()
VERIFY_IS_APPROX(out(i,j), expected);
}
}
cudaFree(d_in1);
cudaFree(d_out);
}
template<int DataLayout>
static void test_cuda_contraction()
void test_cuda_contraction()
{
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
@ -216,10 +228,14 @@ static void test_cuda_contraction()
assert(false);
}
}
cudaFree(d_t_left);
cudaFree(d_t_right);
cudaFree(d_t_result);
}
template<int DataLayout>
static void test_cuda_convolution_1d()
void test_cuda_convolution_1d()
{
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 1, DataLayout> kernel(4);
@ -266,9 +282,13 @@ static void test_cuda_convolution_1d()
}
}
}
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
}
static void test_cuda_convolution_inner_dim_col_major_1d()
void test_cuda_convolution_inner_dim_col_major_1d()
{
Tensor<float, 4, ColMajor> input(74,9,11,7);
Tensor<float, 1, ColMajor> kernel(4);
@ -315,9 +335,13 @@ static void test_cuda_convolution_inner_dim_col_major_1d()
}
}
}
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
}
static void test_cuda_convolution_inner_dim_row_major_1d()
void test_cuda_convolution_inner_dim_row_major_1d()
{
Tensor<float, 4, RowMajor> input(7,9,11,74);
Tensor<float, 1, RowMajor> kernel(4);
@ -364,10 +388,14 @@ static void test_cuda_convolution_inner_dim_row_major_1d()
}
}
}
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
}
template<int DataLayout>
static void test_cuda_convolution_2d()
void test_cuda_convolution_2d()
{
Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 2, DataLayout> kernel(3,4);
@ -424,10 +452,14 @@ static void test_cuda_convolution_2d()
}
}
}
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
}
template<int DataLayout>
static void test_cuda_convolution_3d()
void test_cuda_convolution_3d()
{
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2);
@ -498,6 +530,10 @@ static void test_cuda_convolution_3d()
}
}
}
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
}
@ -535,6 +571,9 @@ void test_cuda_lgamma(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
}
}
cudaFree(d_in);
cudaFree(d_out);
}
template <typename Scalar>
@ -571,6 +610,9 @@ void test_cuda_erf(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
}
}
cudaFree(d_in);
cudaFree(d_out);
}
template <typename Scalar>
@ -607,47 +649,50 @@ void test_cuda_erfc(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
}
}
cudaFree(d_in);
cudaFree(d_out);
}
void test_cxx11_tensor_cuda()
{
CALL_SUBTEST(test_cuda_elementwise_small());
CALL_SUBTEST(test_cuda_elementwise());
CALL_SUBTEST(test_cuda_reduction());
CALL_SUBTEST(test_cuda_contraction<ColMajor>());
CALL_SUBTEST(test_cuda_contraction<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
CALL_SUBTEST(test_cuda_lgamma<float>(1.0f));
CALL_SUBTEST(test_cuda_lgamma<float>(100.0f));
CALL_SUBTEST(test_cuda_lgamma<float>(0.01f));
CALL_SUBTEST(test_cuda_lgamma<float>(0.001f));
CALL_SUBTEST(test_cuda_erf<float>(1.0f));
CALL_SUBTEST(test_cuda_erf<float>(100.0f));
CALL_SUBTEST(test_cuda_erf<float>(0.01f));
CALL_SUBTEST(test_cuda_erf<float>(0.001f));
CALL_SUBTEST(test_cuda_erfc<float>(1.0f));
CALL_SUBTEST_1(test_cuda_elementwise_small());
CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_1(test_cuda_reduction());
CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>());
CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>());
CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d());
CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d());
CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>());
CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
// CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST(test_cuda_erfc<float>(0.01f));
CALL_SUBTEST(test_cuda_erfc<float>(0.001f));
CALL_SUBTEST(test_cuda_lgamma<double>(1.0));
CALL_SUBTEST(test_cuda_lgamma<double>(100.0));
CALL_SUBTEST(test_cuda_lgamma<double>(0.01));
CALL_SUBTEST(test_cuda_lgamma<double>(0.001));
CALL_SUBTEST(test_cuda_erf<double>(1.0));
CALL_SUBTEST(test_cuda_erf<double>(100.0));
CALL_SUBTEST(test_cuda_erf<double>(0.01));
CALL_SUBTEST(test_cuda_erf<double>(0.001));
CALL_SUBTEST(test_cuda_erfc<double>(1.0));
CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
// CALL_SUBTEST(test_cuda_erfc<double>(100.0));
CALL_SUBTEST(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST(test_cuda_erfc<double>(0.01));
CALL_SUBTEST(test_cuda_erfc<double>(0.001));
CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
}