Sharded the cxx11_tensor_cuda test and fixed a memory leak

2025-07-12 16:11:49 +08:00 · 2016-01-30 11:47:09 -08:00 · 2016-01-30 11:47:09 -08:00 · bd21aba181
commit bd21aba181
parent 9de155d153
1 changed files with 88 additions and 43 deletions
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@ -63,6 +63,10 @@ void test_cuda_elementwise_small() {
        out(Eigen::array<int, 1>(i)),
        in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_out);
 }

 void test_cuda_elementwise()
@ -113,6 +117,11 @@ void test_cuda_elementwise()
      }
    }
  }
+
+  cudaFree(d_in1);
+  cudaFree(d_in2);
+  cudaFree(d_in3);
+  cudaFree(d_out);
 }

 void test_cuda_reduction()
@ -158,10 +167,13 @@ void test_cuda_reduction()
      VERIFY_IS_APPROX(out(i,j), expected);
    }
  }
+
+  cudaFree(d_in1);
+  cudaFree(d_out);
 }

 template<int DataLayout>
-static void test_cuda_contraction()
+void test_cuda_contraction()
 {
  // with these dimensions, the output has 300 * 140 elements, which is
  // more than 30 * 1024, which is the number of threads in blocks on
@ -216,10 +228,14 @@ static void test_cuda_contraction()
      assert(false);
    }
  }
+
+  cudaFree(d_t_left);
+  cudaFree(d_t_right);
+  cudaFree(d_t_result);
 }

 template<int DataLayout>
-static void test_cuda_convolution_1d()
+void test_cuda_convolution_1d()
 {
  Tensor<float, 4, DataLayout> input(74,37,11,137);
  Tensor<float, 1, DataLayout> kernel(4);
@ -266,9 +282,13 @@ static void test_cuda_convolution_1d()
      }
    }
  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
 }

-static void test_cuda_convolution_inner_dim_col_major_1d()
+void test_cuda_convolution_inner_dim_col_major_1d()
 {
  Tensor<float, 4, ColMajor> input(74,9,11,7);
  Tensor<float, 1, ColMajor> kernel(4);
@ -315,9 +335,13 @@ static void test_cuda_convolution_inner_dim_col_major_1d()
      }
    }
  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
 }

-static void test_cuda_convolution_inner_dim_row_major_1d()
+void test_cuda_convolution_inner_dim_row_major_1d()
 {
  Tensor<float, 4, RowMajor> input(7,9,11,74);
  Tensor<float, 1, RowMajor> kernel(4);
@ -364,10 +388,14 @@ static void test_cuda_convolution_inner_dim_row_major_1d()
      }
    }
  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
 }

 template<int DataLayout>
-static void test_cuda_convolution_2d()
+void test_cuda_convolution_2d()
 {
  Tensor<float, 4, DataLayout> input(74,37,11,137);
  Tensor<float, 2, DataLayout> kernel(3,4);
@ -424,10 +452,14 @@ static void test_cuda_convolution_2d()
      }
    }
  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
 }

 template<int DataLayout>
-static void test_cuda_convolution_3d()
+void test_cuda_convolution_3d()
 {
  Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
  Tensor<float, 3, DataLayout> kernel(3,4,2);
@ -498,6 +530,10 @@ static void test_cuda_convolution_3d()
      }
    }
  }
+
+  cudaFree(d_input);
+  cudaFree(d_kernel);
+  cudaFree(d_out);
 }


@ -535,6 +571,9 @@ void test_cuda_lgamma(const Scalar stddev)
      VERIFY_IS_APPROX(out(i,j), (std::lgamma)(in(i,j)));
    }
  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
 }

 template <typename Scalar>
@ -571,6 +610,9 @@ void test_cuda_erf(const Scalar stddev)
      VERIFY_IS_APPROX(out(i,j), (std::erf)(in(i,j)));
    }
  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
 }

 template <typename Scalar>
@ -607,47 +649,50 @@ void test_cuda_erfc(const Scalar stddev)
      VERIFY_IS_APPROX(out(i,j), (std::erfc)(in(i,j)));
    }
  }
+
+  cudaFree(d_in);
+  cudaFree(d_out);
 }

 void test_cxx11_tensor_cuda()
 {
-  CALL_SUBTEST(test_cuda_elementwise_small());
-  CALL_SUBTEST(test_cuda_elementwise());
-  CALL_SUBTEST(test_cuda_reduction());
-  CALL_SUBTEST(test_cuda_contraction<ColMajor>());
-  CALL_SUBTEST(test_cuda_contraction<RowMajor>());
-  CALL_SUBTEST(test_cuda_convolution_1d<ColMajor>());
-  CALL_SUBTEST(test_cuda_convolution_1d<RowMajor>());
-  CALL_SUBTEST(test_cuda_convolution_inner_dim_col_major_1d());
-  CALL_SUBTEST(test_cuda_convolution_inner_dim_row_major_1d());
-  CALL_SUBTEST(test_cuda_convolution_2d<ColMajor>());
-  CALL_SUBTEST(test_cuda_convolution_2d<RowMajor>());
-  CALL_SUBTEST(test_cuda_convolution_3d<ColMajor>());
-  CALL_SUBTEST(test_cuda_convolution_3d<RowMajor>());
-  CALL_SUBTEST(test_cuda_lgamma<float>(1.0f));
-  CALL_SUBTEST(test_cuda_lgamma<float>(100.0f));
-  CALL_SUBTEST(test_cuda_lgamma<float>(0.01f));
-  CALL_SUBTEST(test_cuda_lgamma<float>(0.001f));
-  CALL_SUBTEST(test_cuda_erf<float>(1.0f));
-  CALL_SUBTEST(test_cuda_erf<float>(100.0f));
-  CALL_SUBTEST(test_cuda_erf<float>(0.01f));
-  CALL_SUBTEST(test_cuda_erf<float>(0.001f));
-  CALL_SUBTEST(test_cuda_erfc<float>(1.0f));
+  CALL_SUBTEST_1(test_cuda_elementwise_small());
+  CALL_SUBTEST_1(test_cuda_elementwise());
+  CALL_SUBTEST_1(test_cuda_reduction());
+  CALL_SUBTEST_2(test_cuda_contraction<ColMajor>());
+  CALL_SUBTEST_2(test_cuda_contraction<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_1d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_1d<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_inner_dim_col_major_1d());
+  CALL_SUBTEST_3(test_cuda_convolution_inner_dim_row_major_1d());
+  CALL_SUBTEST_3(test_cuda_convolution_2d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_2d<RowMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_3d<ColMajor>());
+  CALL_SUBTEST_3(test_cuda_convolution_3d<RowMajor>());
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(1.0f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(100.0f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_lgamma<float>(0.001f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(1.0f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(100.0f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_erf<float>(0.001f));
+  CALL_SUBTEST_4(test_cuda_erfc<float>(1.0f));
  // CALL_SUBTEST(test_cuda_erfc<float>(100.0f));
-  CALL_SUBTEST(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
-  CALL_SUBTEST(test_cuda_erfc<float>(0.01f));
-  CALL_SUBTEST(test_cuda_erfc<float>(0.001f));
-  CALL_SUBTEST(test_cuda_lgamma<double>(1.0));
-  CALL_SUBTEST(test_cuda_lgamma<double>(100.0));
-  CALL_SUBTEST(test_cuda_lgamma<double>(0.01));
-  CALL_SUBTEST(test_cuda_lgamma<double>(0.001));
-  CALL_SUBTEST(test_cuda_erf<double>(1.0));
-  CALL_SUBTEST(test_cuda_erf<double>(100.0));
-  CALL_SUBTEST(test_cuda_erf<double>(0.01));
-  CALL_SUBTEST(test_cuda_erf<double>(0.001));
-  CALL_SUBTEST(test_cuda_erfc<double>(1.0));
+  CALL_SUBTEST_4(test_cuda_erfc<float>(5.0f)); // CUDA erfc lacks precision for large inputs
+  CALL_SUBTEST_4(test_cuda_erfc<float>(0.01f));
+  CALL_SUBTEST_4(test_cuda_erfc<float>(0.001f));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(1.0));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(100.0));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_lgamma<double>(0.001));
+  CALL_SUBTEST_4(test_cuda_erf<double>(1.0));
+  CALL_SUBTEST_4(test_cuda_erf<double>(100.0));
+  CALL_SUBTEST_4(test_cuda_erf<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_erf<double>(0.001));
+  CALL_SUBTEST_4(test_cuda_erfc<double>(1.0));
  // CALL_SUBTEST(test_cuda_erfc<double>(100.0));
-  CALL_SUBTEST(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
-  CALL_SUBTEST(test_cuda_erfc<double>(0.01));
-  CALL_SUBTEST(test_cuda_erfc<double>(0.001));
+  CALL_SUBTEST_4(test_cuda_erfc<double>(5.0)); // CUDA erfc lacks precision for large inputs
+  CALL_SUBTEST_4(test_cuda_erfc<double>(0.01));
+  CALL_SUBTEST_4(test_cuda_erfc<double>(0.001));
 }