mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
Enable subtests which use device side malloc since this has been fixed in ROCm 5.2.
This commit is contained in:
parent
84cf3ff18d
commit
06a458a13d
@ -21,7 +21,7 @@ struct AddKernel {
|
|||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
Type3 operator()(const Type1& A, const Type2& B, Type3& C) const {
|
Type3 operator()(const Type1& A, const Type2& B, Type3& C) const {
|
||||||
C = A + B; // Populate output parameter.
|
C = A + B; // Populate output parameter.
|
||||||
Type3 D = A + B; // Populate return value.
|
Type3 D = A + B; // Populate return value.
|
||||||
return D;
|
return D;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -31,7 +31,7 @@ template <typename T>
|
|||||||
void test_add(const T& type) {
|
void test_add(const T& type) {
|
||||||
const Index rows = type.rows();
|
const Index rows = type.rows();
|
||||||
const Index cols = type.cols();
|
const Index cols = type.cols();
|
||||||
|
|
||||||
// Create random inputs.
|
// Create random inputs.
|
||||||
const T A = T::Random(rows, cols);
|
const T A = T::Random(rows, cols);
|
||||||
const T B = T::Random(rows, cols);
|
const T B = T::Random(rows, cols);
|
||||||
@ -39,17 +39,17 @@ void test_add(const T& type) {
|
|||||||
|
|
||||||
// Create kernel.
|
// Create kernel.
|
||||||
AddKernel add_kernel;
|
AddKernel add_kernel;
|
||||||
|
|
||||||
// Run add_kernel(A, B, C) via run(...).
|
// Run add_kernel(A, B, C) via run(...).
|
||||||
// This will run on the GPU if using a GPU compiler, or CPU otherwise,
|
// This will run on the GPU if using a GPU compiler, or CPU otherwise,
|
||||||
// facilitating generic tests that can run on either.
|
// facilitating generic tests that can run on either.
|
||||||
T D = run(add_kernel, A, B, C);
|
T D = run(add_kernel, A, B, C);
|
||||||
|
|
||||||
// Check that both output parameter and return value are correctly populated.
|
// Check that both output parameter and return value are correctly populated.
|
||||||
const T expected = A + B;
|
const T expected = A + B;
|
||||||
VERIFY_IS_CWISE_EQUAL(C, expected);
|
VERIFY_IS_CWISE_EQUAL(C, expected);
|
||||||
VERIFY_IS_CWISE_EQUAL(D, expected);
|
VERIFY_IS_CWISE_EQUAL(D, expected);
|
||||||
|
|
||||||
// In a GPU-only test, we can verify that the CPU and GPU produce the
|
// In a GPU-only test, we can verify that the CPU and GPU produce the
|
||||||
// same results.
|
// same results.
|
||||||
T C_cpu, C_gpu;
|
T C_cpu, C_gpu;
|
||||||
@ -70,31 +70,30 @@ struct MultiplyKernel {
|
|||||||
|
|
||||||
template <typename T1, typename T2, typename T3>
|
template <typename T1, typename T2, typename T3>
|
||||||
void test_multiply(const T1& type1, const T2& type2, const T3& type3) {
|
void test_multiply(const T1& type1, const T2& type2, const T3& type3) {
|
||||||
|
|
||||||
const T1 A = T1::Random(type1.rows(), type1.cols());
|
const T1 A = T1::Random(type1.rows(), type1.cols());
|
||||||
const T2 B = T2::Random(type2.rows(), type2.cols());
|
const T2 B = T2::Random(type2.rows(), type2.cols());
|
||||||
T3 C;
|
T3 C;
|
||||||
|
|
||||||
MultiplyKernel multiply_kernel;
|
MultiplyKernel multiply_kernel;
|
||||||
|
|
||||||
// The run(...) family of functions uses a memory buffer to transfer data back
|
// The run(...) family of functions uses a memory buffer to transfer data back
|
||||||
// and forth to and from the device. The size of this buffer is estimated
|
// and forth to and from the device. The size of this buffer is estimated
|
||||||
// from the size of all input parameters. If the estimated buffer size is
|
// from the size of all input parameters. If the estimated buffer size is
|
||||||
// not sufficient for transferring outputs from device-to-host, then an
|
// not sufficient for transferring outputs from device-to-host, then an
|
||||||
// explicit buffer size needs to be specified.
|
// explicit buffer size needs to be specified.
|
||||||
|
|
||||||
// 2 outputs of size (A * B). For each matrix output, the buffer will store
|
// 2 outputs of size (A * B). For each matrix output, the buffer will store
|
||||||
// the number of rows, columns, and the data.
|
// the number of rows, columns, and the data.
|
||||||
size_t buffer_capacity_hint = 2 * ( // 2 output parameters
|
size_t buffer_capacity_hint = 2 * ( // 2 output parameters
|
||||||
2 * sizeof(typename T3::Index) // # Rows, # Cols
|
2 * sizeof(typename T3::Index) // # Rows, # Cols
|
||||||
+ A.rows() * B.cols() * sizeof(typename T3::Scalar)); // Output data
|
+ A.rows() * B.cols() * sizeof(typename T3::Scalar)); // Output data
|
||||||
|
|
||||||
T3 D = run_with_hint(buffer_capacity_hint, multiply_kernel, A, B, C);
|
T3 D = run_with_hint(buffer_capacity_hint, multiply_kernel, A, B, C);
|
||||||
|
|
||||||
const T3 expected = A * B;
|
const T3 expected = A * B;
|
||||||
VERIFY_IS_CWISE_APPROX(C, expected);
|
VERIFY_IS_CWISE_APPROX(C, expected);
|
||||||
VERIFY_IS_CWISE_APPROX(D, expected);
|
VERIFY_IS_CWISE_APPROX(D, expected);
|
||||||
|
|
||||||
T3 C_cpu, C_gpu;
|
T3 C_cpu, C_gpu;
|
||||||
T3 D_cpu = run_on_cpu(multiply_kernel, A, B, C_cpu);
|
T3 D_cpu = run_on_cpu(multiply_kernel, A, B, C_cpu);
|
||||||
T3 D_gpu = run_on_gpu_with_hint(buffer_capacity_hint,
|
T3 D_gpu = run_on_gpu_with_hint(buffer_capacity_hint,
|
||||||
@ -107,30 +106,24 @@ void test_multiply(const T1& type1, const T2& type2, const T3& type3) {
|
|||||||
EIGEN_DECLARE_TEST(gpu_example)
|
EIGEN_DECLARE_TEST(gpu_example)
|
||||||
{
|
{
|
||||||
// For the number of repeats, call the desired subtests.
|
// For the number of repeats, call the desired subtests.
|
||||||
for(int i = 0; i < g_repeat; i++) {
|
for(int i = 0; i < g_repeat; i++) {
|
||||||
// Call subtests with different sized/typed inputs.
|
// Call subtests with different sized/typed inputs.
|
||||||
CALL_SUBTEST( test_add(Eigen::Vector3f()) );
|
CALL_SUBTEST( test_add(Eigen::Vector3f()) );
|
||||||
CALL_SUBTEST( test_add(Eigen::Matrix3d()) );
|
CALL_SUBTEST( test_add(Eigen::Matrix3d()) );
|
||||||
#if !defined(EIGEN_USE_HIP) // FIXME
|
|
||||||
CALL_SUBTEST( test_add(Eigen::MatrixX<int>(10, 10)) );
|
CALL_SUBTEST( test_add(Eigen::MatrixX<int>(10, 10)) );
|
||||||
#endif
|
|
||||||
|
|
||||||
CALL_SUBTEST( test_add(Eigen::Array44f()) );
|
CALL_SUBTEST( test_add(Eigen::Array44f()) );
|
||||||
#if !defined(EIGEN_USE_HIP)
|
|
||||||
CALL_SUBTEST( test_add(Eigen::ArrayXd(20)) );
|
CALL_SUBTEST( test_add(Eigen::ArrayXd(20)) );
|
||||||
CALL_SUBTEST( test_add(Eigen::ArrayXXi(13, 17)) );
|
CALL_SUBTEST( test_add(Eigen::ArrayXXi(13, 17)) );
|
||||||
#endif
|
|
||||||
|
|
||||||
CALL_SUBTEST( test_multiply(Eigen::Matrix3d(),
|
CALL_SUBTEST( test_multiply(Eigen::Matrix3d(),
|
||||||
Eigen::Matrix3d(),
|
Eigen::Matrix3d(),
|
||||||
Eigen::Matrix3d()) );
|
Eigen::Matrix3d()) );
|
||||||
#if !defined(EIGEN_USE_HIP)
|
|
||||||
CALL_SUBTEST( test_multiply(Eigen::MatrixX<int>(10, 10),
|
CALL_SUBTEST( test_multiply(Eigen::MatrixX<int>(10, 10),
|
||||||
Eigen::MatrixX<int>(10, 10),
|
Eigen::MatrixX<int>(10, 10),
|
||||||
Eigen::MatrixX<int>()) );
|
Eigen::MatrixX<int>()) );
|
||||||
CALL_SUBTEST( test_multiply(Eigen::MatrixXf(12, 1),
|
CALL_SUBTEST( test_multiply(Eigen::MatrixXf(12, 1),
|
||||||
Eigen::MatrixXf(1, 32),
|
Eigen::MatrixXf(1, 32),
|
||||||
Eigen::MatrixXf()) );
|
Eigen::MatrixXf()) );
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user