Sharded the cxx11_tensor_cuda test and fixed a memory leak

This commit is contained in:
Benoit Steiner 2016-01-30 11:47:09 -08:00
parent 9de155d153
commit bd21aba181

View File

@ -63,6 +63,10 @@ void test_cuda_elementwise_small() {
out(Eigen::array<int, 1>(i)), out(Eigen::array<int, 1>(i)),
in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i))); in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
} }
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_out);
} }
void test_cuda_elementwise() void test_cuda_elementwise()
@ -113,6 +117,11 @@ void test_cuda_elementwise()
} }
} }
} }
cudaFree(d_in1);
cudaFree(d_in2);
cudaFree(d_in3);
cudaFree(d_out);
} }
void test_cuda_reduction() void test_cuda_reduction()
@ -158,10 +167,13 @@ void test_cuda_reduction()
VERIFY_IS_APPROX(out(i,j), expected); VERIFY_IS_APPROX(out(i,j), expected);
} }
} }
cudaFree(d_in1);
cudaFree(d_out);
} }
template<int DataLayout> template<int DataLayout>
static void test_cuda_contraction() void test_cuda_contraction()
{ {
// with these dimensions, the output has 300 * 140 elements, which is // with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on // more than 30 * 1024, which is the number of threads in blocks on
@ -216,10 +228,14 @@ static void test_cuda_contraction()
assert(false); assert(false);
} }
} }
cudaFree(d_t_left);
cudaFree(d_t_right);
cudaFree(d_t_result);
} }
template<int DataLayout> template<int DataLayout>
static void test_cuda_convolution_1d() void test_cuda_convolution_1d()
{ {
Tensor<float, 4, DataLayout> input(74,37,11,137); Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 1, DataLayout> kernel(4); Tensor<float, 1, DataLayout> kernel(4);
@ -266,9 +282,13 @@ static void test_cuda_convolution_1d()
} }
} }
} }
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
} }
static void test_cuda_convolution_inner_dim_col_major_1d() void test_cuda_convolution_inner_dim_col_major_1d()
{ {
Tensor<float, 4, ColMajor> input(74,9,11,7); Tensor<float, 4, ColMajor> input(74,9,11,7);
Tensor<float, 1, ColMajor> kernel(4); Tensor<float, 1, ColMajor> kernel(4);
@ -315,9 +335,13 @@ static void test_cuda_convolution_inner_dim_col_major_1d()
} }
} }
} }
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
} }
static void test_cuda_convolution_inner_dim_row_major_1d() void test_cuda_convolution_inner_dim_row_major_1d()
{ {
Tensor<float, 4, RowMajor> input(7,9,11,74); Tensor<float, 4, RowMajor> input(7,9,11,74);
Tensor<float, 1, RowMajor> kernel(4); Tensor<float, 1, RowMajor> kernel(4);
@ -364,10 +388,14 @@ static void test_cuda_convolution_inner_dim_row_major_1d()
} }
} }
} }
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
} }
template<int DataLayout> template<int DataLayout>
static void test_cuda_convolution_2d() void test_cuda_convolution_2d()
{ {
Tensor<float, 4, DataLayout> input(74,37,11,137); Tensor<float, 4, DataLayout> input(74,37,11,137);
Tensor<float, 2, DataLayout> kernel(3,4); Tensor<float, 2, DataLayout> kernel(3,4);
@ -424,10 +452,14 @@ static void test_cuda_convolution_2d()
} }
} }
} }
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
} }
template<int DataLayout> template<int DataLayout>
static void test_cuda_convolution_3d() void test_cuda_convolution_3d()
{ {
Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17)); Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2); Tensor<float, 3, DataLayout> kernel(3,4,2);
@ -498,6 +530,10 @@ static void test_cuda_convolution_3d()
} }
} }
} }
cudaFree(d_input);
cudaFree(d_kernel);
cudaFree(d_out);
} }
@ -535,6 +571,9 @@ void test_cuda_lgamma(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j))); VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
} }
} }
cudaFree(d_in);
cudaFree(d_out);
} }
template <typename Scalar> template <typename Scalar>
@ -571,6 +610,9 @@ void test_cuda_erf(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j))); VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
} }
} }
cudaFree(d_in);
cudaFree(d_out);
} }
template <typename Scalar> template <typename Scalar>
@ -607,47 +649,50 @@ void test_cuda_erfc(const Scalar stddev)
VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j))); VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
} }
} }
cudaFree(d_in);
cudaFree(d_out);
} }
void test_cxx11_tensor_cuda() void test_cxx11_tensor_cuda()
{ {
CALL_SUBTEST(test_cuda_elementwise_small()); CALL_SUBTEST_1(test_cuda_elementwise_small());
CALL_SUBTEST(test_cuda_elementwise()); CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST(test_cuda_reduction()); CALL_SUBTEST_1(test_cuda_reduction());
CALL_SUBTEST(test_cuda_contraction<ColMajor>()); CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
CALL_SUBTEST(test_cuda_contraction<RowMajor>()); CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>()); CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>()); CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d()); CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d());
CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d()); CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d());
CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>()); CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>()); CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>()); CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>()); CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
CALL_SUBTEST(test_cuda_lgamma<float>(1.0f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
CALL_SUBTEST(test_cuda_lgamma<float>(100.0f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
CALL_SUBTEST(test_cuda_lgamma<float>(0.01f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
CALL_SUBTEST(test_cuda_lgamma<float>(0.001f)); CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
CALL_SUBTEST(test_cuda_erf<float>(1.0f)); CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
CALL_SUBTEST(test_cuda_erf<float>(100.0f)); CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
CALL_SUBTEST(test_cuda_erf<float>(0.01f)); CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
CALL_SUBTEST(test_cuda_erf<float>(0.001f)); CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
CALL_SUBTEST(test_cuda_erfc<float>(1.0f)); CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
// CALL_SUBTEST(test_cuda_erfc<float>(100.0f)); // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST(test_cuda_erfc<float>(0.01f)); CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
CALL_SUBTEST(test_cuda_erfc<float>(0.001f)); CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
CALL_SUBTEST(test_cuda_lgamma<double>(1.0)); CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
CALL_SUBTEST(test_cuda_lgamma<double>(100.0)); CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
CALL_SUBTEST(test_cuda_lgamma<double>(0.01)); CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
CALL_SUBTEST(test_cuda_lgamma<double>(0.001)); CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
CALL_SUBTEST(test_cuda_erf<double>(1.0)); CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
CALL_SUBTEST(test_cuda_erf<double>(100.0)); CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
CALL_SUBTEST(test_cuda_erf<double>(0.01)); CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
CALL_SUBTEST(test_cuda_erf<double>(0.001)); CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
CALL_SUBTEST(test_cuda_erfc<double>(1.0)); CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
// CALL_SUBTEST(test_cuda_erfc<double>(100.0)); // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
CALL_SUBTEST(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
CALL_SUBTEST(test_cuda_erfc<double>(0.01)); CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
CALL_SUBTEST(test_cuda_erfc<double>(0.001)); CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
} }