applying EIGEN_DECLARE_TEST to *gpu* tests

Also, a few minor fixes for GPU tests running in HIP mode.

1. Adding an include for hip/hip_runtime.h in the Macros.h file
   For HIP __host__ and __device__ are macros which are defined in hip headers.
   Their definitions need to be included before their use in the file.

2. Fixing the compile failure in TensorContractionGpu introduced by the commit to
   "Fuse computations into the Tensor contractions using output kernel"

3. Fixing a HIP/clang specific compile error by making the struct-member assignment explicit
This commit is contained in:
Deven Desai 2018-07-17 14:16:48 -04:00
parent 82f0ce2726
commit f124f07965
14 changed files with 24 additions and 22 deletions

View File

@ -395,8 +395,10 @@
// Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)
#define EIGEN_HIPCC __HIPCC__
// We need hip_common.h here because __HIP_DEVICE_COMPILE__ is defined in this header.
#include <hip/hip_common.h>
// We need to include hip_runtime.h here because it pulls in
// ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__
// ++ host_defines.h which contains the defines for the __host__ and __device__ macros
#include <hip/hip_runtime.h>
#if defined(__HIP_DEVICE_COMPILE__)
// analogous to EIGEN_CUDA_ARCH, but for HIP

View File

@ -580,7 +580,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH
#if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC

View File

@ -610,6 +610,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)
{
EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
Index i0;
// Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):
mat.diagonal().cwiseAbs().maxCoeff(&i0);
@ -620,8 +621,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
VectorType c0, c1;
n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();
n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();
if(n0>n1) res = c0/std::sqrt(n0);
else res = c1/std::sqrt(n1);
if(n0>n1) res = c0/sqrt(n0);
else res = c1/sqrt(n1);
return true;
}
@ -723,7 +724,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
EIGEN_DEVICE_FUNC
static inline void computeRoots(const MatrixType& m, VectorType& roots)
{
using std::sqrt;
EIGEN_USING_STD_MATH(sqrt);
const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0)));
const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));
roots(0) = t1 - t0;

View File

@ -449,8 +449,7 @@ struct TensorContractionEvaluatorBase
// tensor dimensions (i, j) into the original tensor dimensions.
// TODO(ezhulenev): Add parameters required to infer output tensor index for
// more complex contractions than 2x2 on internal dimension.
m_tensor_contraction_params = {
/**swapped_arguments=*/static_cast<int>(Layout) == RowMajor};
m_tensor_contraction_params.swapped_arguments = static_cast<int>(Layout) == RowMajor;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }

View File

@ -1215,16 +1215,16 @@ EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
}
template<typename Indices, typename LeftArgType, typename RightArgType>
struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> :
public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > {
template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> :
public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> > {
typedef GpuDevice Device;
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self;
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;

View File

@ -242,7 +242,7 @@ void test_gpu_argmin_dim()
}
}
void test_cxx11_tensor_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_argmax_gpu)
{
CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>());
CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>());

View File

@ -72,7 +72,7 @@ void test_fallback_conversion() {
}
void test_cxx11_tensor_cast_float16_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_cast_float16_gpu)
{
CALL_SUBTEST(test_gpu_conversion());
CALL_SUBTEST(test_fallback_conversion());

View File

@ -193,7 +193,7 @@ void test_gpu_contraction_sizes() {
}
}
void test_cxx11_tensor_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_contract_gpu)
{
CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128));

View File

@ -389,7 +389,7 @@ void test_gpu() {
}
void test_cxx11_tensor_device()
EIGEN_DECLARE_TEST(cxx11_tensor_device)
{
CALL_SUBTEST_1(test_cpu());
CALL_SUBTEST_2(test_gpu());

View File

@ -1472,7 +1472,7 @@ void test_gpu_gamma_sample_der_alpha()
gpuFree(d_out);
}
void test_cxx11_tensor_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
{
CALL_SUBTEST_1(test_gpu_nullary());
CALL_SUBTEST_1(test_gpu_elementwise_small());

View File

@ -479,7 +479,7 @@ void test_gpu_forced_evals() {
#endif
void test_cxx11_tensor_of_float16_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_of_float16_gpu)
{
CALL_SUBTEST_1(test_gpu_numext<void>());

View File

@ -78,7 +78,7 @@ static void test_complex()
}
void test_cxx11_tensor_random_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_random_gpu)
{
CALL_SUBTEST(test_gpu_random_uniform());
CALL_SUBTEST(test_gpu_random_normal());

View File

@ -134,7 +134,7 @@ static void test_last_dim_reductions() {
}
void test_cxx11_tensor_reduction_gpu() {
EIGEN_DECLARE_TEST(cxx11_tensor_reduction_gpu) {
CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));

View File

@ -71,7 +71,7 @@ void test_gpu_cumsum(int m_size, int k_size, int n_size)
}
void test_cxx11_tensor_scan_gpu()
EIGEN_DECLARE_TEST(cxx11_tensor_scan_gpu)
{
CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128));
CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128));