From 1fe0b749042320501c59378f2860d9322b0c6e19 Mon Sep 17 00:00:00 2001 From: Deven Desai Date: Wed, 11 Jul 2018 09:28:44 -0400 Subject: [PATCH] deleting hip specific files that are no longer required --- test/hip_basic.cu | 172 ---------------------------------------------- test/hip_common.h | 103 --------------------------- 2 files changed, 275 deletions(-) delete mode 100644 test/hip_basic.cu delete mode 100644 test/hip_common.h diff --git a/test/hip_basic.cu b/test/hip_basic.cu deleted file mode 100644 index 2e1bf94a4..000000000 --- a/test/hip_basic.cu +++ /dev/null @@ -1,172 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015-2016 Gael Guennebaud -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -// workaround issue between gcc >= 4.7 and cuda 5.5 -#if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7) - #undef _GLIBCXX_ATOMIC_BUILTINS - #undef _GLIBCXX_USE_INT128 -#endif - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC hip_basic -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int - -#include - -#include "main.h" -#include "hip_common.h" - -// Check that dense modules can be properly parsed by hipcc -#include - -// struct Foo{ -// EIGEN_DEVICE_FUNC -// void operator()(int i, const float* mats, float* vecs) const { -// using namespace Eigen; -// // Matrix3f M(data); -// // Vector3f x(data+9); -// // Map(data+9) = M.inverse() * x; -// Matrix3f M(mats+i/16); -// Vector3f x(vecs+i*3); -// // using std::min; -// // using std::sqrt; -// Map(vecs+i*3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() * x) / x.x(); -// //x = x*2 + x.y() * x + x * x.maxCoeff() - x / x.sum(); -// } -// }; - -template -struct coeff_wise { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const - { - using namespace Eigen; - T x1(in+i); - T x2(in+i+1); - T x3(in+i+2); - Map res(out+i*T::MaxSizeAtCompileTime); - - res.array() += (in[0] * x1 + x2).array() * x3.array(); - } -}; - -template -struct replicate { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const - { - using namespace Eigen; - T x1(in+i); - int step = x1.size() * 4; - int stride = 3 * step; - - typedef Map > MapType; - MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2); - MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3); - MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3); - } -}; - -template -struct redux { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const - { - using namespace Eigen; - int N = 10; - T x1(in+i); - out[i*N+0] = x1.minCoeff(); - out[i*N+1] = x1.maxCoeff(); - out[i*N+2] = x1.sum(); - out[i*N+3] = x1.prod(); - out[i*N+4] = x1.matrix().squaredNorm(); - out[i*N+5] = x1.matrix().norm(); - out[i*N+6] = x1.colwise().sum().maxCoeff(); - out[i*N+7] = x1.rowwise().maxCoeff().sum(); - out[i*N+8] = x1.matrix().colwise().squaredNorm().sum(); - } -}; - -template -struct prod_test { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const - { - using namespace Eigen; - typedef Matrix T3; - T1 x1(in+i); - T2 x2(in+i+1); - Map res(out+i*T3::MaxSizeAtCompileTime); - res += in[i] * x1 * x2; - } -}; - -template -struct diagonal { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const - { - using namespace Eigen; - T1 x1(in+i); - Map res(out+i*T2::MaxSizeAtCompileTime); - res += x1.diagonal(); - } -}; - -template -struct eigenvalues { - EIGEN_DEVICE_FUNC - void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const - { - using namespace Eigen; - typedef Matrix Vec; - T M(in+i); - Map res(out+i*Vec::MaxSizeAtCompileTime); - T A = M*M.adjoint(); - SelfAdjointEigenSolver eig; - eig.computeDirect(M); - res = eig.eigenvalues(); - } -}; - -void test_hip_basic() -{ - ei_test_init_hip(); - - int nthreads = 100; - Eigen::VectorXf in, out; - - #ifndef __HIP_DEVICE_COMPILE__ - int data_size = nthreads * 512; - in.setRandom(data_size); - out.setRandom(data_size); - #endif - - CALL_SUBTEST( run_and_compare_to_hip(coeff_wise(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_hip(coeff_wise(), nthreads, in, out) ); - - // FIXME compile fails when we uncomment the followig two tests - // CALL_SUBTEST( run_and_compare_to_hip(replicate(), nthreads, in, out) ); - // CALL_SUBTEST( run_and_compare_to_hip(replicate(), nthreads, in, out) ); - - CALL_SUBTEST( run_and_compare_to_hip(redux(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_hip(redux(), nthreads, in, out) ); - - CALL_SUBTEST( run_and_compare_to_hip(prod_test(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_hip(prod_test(), nthreads, in, out) ); - - CALL_SUBTEST( run_and_compare_to_hip(diagonal(), nthreads, in, out) ); - CALL_SUBTEST( run_and_compare_to_hip(diagonal(), nthreads, in, out) ); - - // FIXME : Runtime failure occurs when we uncomment the following two tests - // CALL_SUBTEST( run_and_compare_to_hip(eigenvalues(), nthreads, in, out) ); - // CALL_SUBTEST( run_and_compare_to_hip(eigenvalues(), nthreads, in, out) ); - -} diff --git a/test/hip_common.h b/test/hip_common.h deleted file mode 100644 index 251585c52..000000000 --- a/test/hip_common.h +++ /dev/null @@ -1,103 +0,0 @@ - -#ifndef EIGEN_TEST_HIP_COMMON_H -#define EIGEN_TEST_HIP_COMMON_H - -#include "hip/hip_runtime.h" -#include "hip/hip_runtime_api.h" -#include - -#ifndef __HIPCC__ -dim3 threadIdx, blockDim, blockIdx; -#endif - -template -void run_on_cpu(const Kernel& ker, int n, const Input& in, Output& out) -{ - for(int i=0; i -__global__ __attribute__((used)) -void run_on_hip_meta_kernel(const Kernel ker, int n, const Input* in, Output* out) -{ - int i = hipThreadIdx_x + hipBlockIdx_x*hipBlockDim_x; - if(i -void run_on_hip(const Kernel& ker, int n, const Input& in, Output& out) -{ - typename Input::Scalar* d_in; - typename Output::Scalar* d_out; - std::ptrdiff_t in_bytes = in.size() * sizeof(typename Input::Scalar); - std::ptrdiff_t out_bytes = out.size() * sizeof(typename Output::Scalar); - - hipMalloc((void**)(&d_in), in_bytes); - hipMalloc((void**)(&d_out), out_bytes); - - hipMemcpy(d_in, in.data(), in_bytes, hipMemcpyHostToDevice); - hipMemcpy(d_out, out.data(), out_bytes, hipMemcpyHostToDevice); - - // Simple and non-optimal 1D mapping assuming n is not too large - // That's only for unit testing! - dim3 Blocks(128); - dim3 Grids( (n+int(Blocks.x)-1)/int(Blocks.x) ); - - hipDeviceSynchronize(); - hipLaunchKernelGGL(HIP_KERNEL_NAME(run_on_hip_meta_kernel::type, - typename std::decay::type>), - dim3(Grids), dim3(Blocks), 0, 0, ker, n, d_in, d_out); - hipDeviceSynchronize(); - - // check inputs have not been modified - hipMemcpy(const_cast(in.data()), d_in, in_bytes, hipMemcpyDeviceToHost); - hipMemcpy(out.data(), d_out, out_bytes, hipMemcpyDeviceToHost); - - hipFree(d_in); - hipFree(d_out); -} - - -template -void run_and_compare_to_hip(const Kernel& ker, int n, const Input& in, Output& out) -{ - Input in_ref, in_hip; - Output out_ref, out_hip; - #ifndef __HIP_DEVICE_COMPILE__ - in_ref = in_hip = in; - out_ref = out_hip = out; - #endif - run_on_cpu (ker, n, in_ref, out_ref); - run_on_hip(ker, n, in_hip, out_hip); - #ifndef __HIP_DEVICE_COMPILE__ - VERIFY_IS_APPROX(in_ref, in_hip); - VERIFY_IS_APPROX(out_ref, out_hip); - #endif -} - - -void ei_test_init_hip() -{ - int device = 0; - hipDeviceProp_t deviceProp; - hipGetDeviceProperties(&deviceProp, device); - std::cout << "HIP device info:\n"; - std::cout << " name: " << deviceProp.name << "\n"; - std::cout << " capability: " << deviceProp.major << "." << deviceProp.minor << "\n"; - std::cout << " multiProcessorCount: " << deviceProp.multiProcessorCount << "\n"; - std::cout << " maxThreadsPerMultiProcessor: " << deviceProp.maxThreadsPerMultiProcessor << "\n"; - std::cout << " warpSize: " << deviceProp.warpSize << "\n"; - std::cout << " regsPerBlock: " << deviceProp.regsPerBlock << "\n"; - std::cout << " concurrentKernels: " << deviceProp.concurrentKernels << "\n"; - std::cout << " clockRate: " << deviceProp.clockRate << "\n"; - std::cout << " canMapHostMemory: " << deviceProp.canMapHostMemory << "\n"; - std::cout << " computeMode: " << deviceProp.computeMode << "\n"; -} - -#endif // EIGEN_TEST_HIP_COMMON_H