Merged in deven-amd/eigen (pull request PR-425)

applying EIGEN_DECLARE_TEST to *gpu  unit tests
This commit is contained in:
Gael Guennebaud 2018-07-17 21:14:40 +00:00
commit 038b55464b
14 changed files with 24 additions and 22 deletions

View File

@ -395,8 +395,10 @@
// Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP) // Means the compiler is HIPCC (analogous to EIGEN_CUDACC, but for HIP)
#define EIGEN_HIPCC __HIPCC__ #define EIGEN_HIPCC __HIPCC__
// We need hip_common.h here because __HIP_DEVICE_COMPILE__ is defined in this header. // We need to include hip_runtime.h here because it pulls in
#include <hip/hip_common.h> // ++ hip_common.h which contains the define for __HIP_DEVICE_COMPILE__
// ++ host_defines.h which contains the defines for the __host__ and __device__ macros
#include <hip/hip_runtime.h>
#if defined(__HIP_DEVICE_COMPILE__) #if defined(__HIP_DEVICE_COMPILE__)
// analogous to EIGEN_CUDA_ARCH, but for HIP // analogous to EIGEN_CUDA_ARCH, but for HIP

View File

@ -580,7 +580,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function // to the appropriate stack allocation function
#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH #if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca) #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca #define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC #elif EIGEN_COMP_MSVC

View File

@ -610,6 +610,7 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative) static inline bool extract_kernel(MatrixType& mat, Ref<VectorType> res, Ref<VectorType> representative)
{ {
EIGEN_USING_STD_MATH(abs); EIGEN_USING_STD_MATH(abs);
EIGEN_USING_STD_MATH(sqrt);
Index i0; Index i0;
// Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal): // Find non-zero column i0 (by construction, there must exist a non zero coefficient on the diagonal):
mat.diagonal().cwiseAbs().maxCoeff(&i0); mat.diagonal().cwiseAbs().maxCoeff(&i0);
@ -620,8 +621,8 @@ template<typename SolverType> struct direct_selfadjoint_eigenvalues<SolverType,3
VectorType c0, c1; VectorType c0, c1;
n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm(); n0 = (c0 = representative.cross(mat.col((i0+1)%3))).squaredNorm();
n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm(); n1 = (c1 = representative.cross(mat.col((i0+2)%3))).squaredNorm();
if(n0>n1) res = c0/std::sqrt(n0); if(n0>n1) res = c0/sqrt(n0);
else res = c1/std::sqrt(n1); else res = c1/sqrt(n1);
return true; return true;
} }
@ -723,7 +724,7 @@ struct direct_selfadjoint_eigenvalues<SolverType,2,false>
EIGEN_DEVICE_FUNC EIGEN_DEVICE_FUNC
static inline void computeRoots(const MatrixType& m, VectorType& roots) static inline void computeRoots(const MatrixType& m, VectorType& roots)
{ {
using std::sqrt; EIGEN_USING_STD_MATH(sqrt);
const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0))); const Scalar t0 = Scalar(0.5) * sqrt( numext::abs2(m(0,0)-m(1,1)) + Scalar(4)*numext::abs2(m(1,0)));
const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1)); const Scalar t1 = Scalar(0.5) * (m(0,0) + m(1,1));
roots(0) = t1 - t0; roots(0) = t1 - t0;

View File

@ -449,8 +449,7 @@ struct TensorContractionEvaluatorBase
// tensor dimensions (i, j) into the original tensor dimensions. // tensor dimensions (i, j) into the original tensor dimensions.
// TODO(ezhulenev): Add parameters required to infer output tensor index for // TODO(ezhulenev): Add parameters required to infer output tensor index for
// more complex contractions than 2x2 on internal dimension. // more complex contractions than 2x2 on internal dimension.
m_tensor_contraction_params = { m_tensor_contraction_params.swapped_arguments = static_cast<int>(Layout) == RowMajor;
/**swapped_arguments=*/static_cast<int>(Layout) == RowMajor};
} }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }

View File

@ -1215,16 +1215,16 @@ EigenFloatContractionKernel16x16(const LhsMapper lhs, const RhsMapper rhs,
} }
template<typename Indices, typename LeftArgType, typename RightArgType> template<typename Indices, typename LeftArgType, typename RightArgType, typename OutputKernelType>
struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> : struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> :
public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, GpuDevice> > { public TensorContractionEvaluatorBase<TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, GpuDevice> > {
typedef GpuDevice Device; typedef GpuDevice Device;
typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType>, Device> Self; typedef TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType>, Device> Self;
typedef TensorContractionEvaluatorBase<Self> Base; typedef TensorContractionEvaluatorBase<Self> Base;
typedef TensorContractionOp<Indices, LeftArgType, RightArgType> XprType; typedef TensorContractionOp<Indices, LeftArgType, RightArgType, OutputKernelType> XprType;
typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar; typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::Index Index; typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType; typedef typename XprType::CoeffReturnType CoeffReturnType;

View File

@ -242,7 +242,7 @@ void test_gpu_argmin_dim()
} }
} }
void test_cxx11_tensor_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_argmax_gpu)
{ {
CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>()); CALL_SUBTEST_1(test_gpu_simple_argmax<RowMajor>());
CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>()); CALL_SUBTEST_1(test_gpu_simple_argmax<ColMajor>());

View File

@ -72,7 +72,7 @@ void test_fallback_conversion() {
} }
void test_cxx11_tensor_cast_float16_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_cast_float16_gpu)
{ {
CALL_SUBTEST(test_gpu_conversion()); CALL_SUBTEST(test_gpu_conversion());
CALL_SUBTEST(test_fallback_conversion()); CALL_SUBTEST(test_fallback_conversion());

View File

@ -193,7 +193,7 @@ void test_gpu_contraction_sizes() {
} }
} }
void test_cxx11_tensor_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_contract_gpu)
{ {
CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_gpu_contraction<ColMajor>(128, 128, 128));
CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128)); CALL_SUBTEST_1(test_gpu_contraction<RowMajor>(128, 128, 128));

View File

@ -389,7 +389,7 @@ void test_gpu() {
} }
void test_cxx11_tensor_device() EIGEN_DECLARE_TEST(cxx11_tensor_device)
{ {
CALL_SUBTEST_1(test_cpu()); CALL_SUBTEST_1(test_cpu());
CALL_SUBTEST_2(test_gpu()); CALL_SUBTEST_2(test_gpu());

View File

@ -1472,7 +1472,7 @@ void test_gpu_gamma_sample_der_alpha()
gpuFree(d_out); gpuFree(d_out);
} }
void test_cxx11_tensor_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
{ {
CALL_SUBTEST_1(test_gpu_nullary()); CALL_SUBTEST_1(test_gpu_nullary());
CALL_SUBTEST_1(test_gpu_elementwise_small()); CALL_SUBTEST_1(test_gpu_elementwise_small());

View File

@ -479,7 +479,7 @@ void test_gpu_forced_evals() {
#endif #endif
void test_cxx11_tensor_of_float16_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_of_float16_gpu)
{ {
CALL_SUBTEST_1(test_gpu_numext<void>()); CALL_SUBTEST_1(test_gpu_numext<void>());

View File

@ -78,7 +78,7 @@ static void test_complex()
} }
void test_cxx11_tensor_random_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_random_gpu)
{ {
CALL_SUBTEST(test_gpu_random_uniform()); CALL_SUBTEST(test_gpu_random_uniform());
CALL_SUBTEST(test_gpu_random_normal()); CALL_SUBTEST(test_gpu_random_normal());

View File

@ -134,7 +134,7 @@ static void test_last_dim_reductions() {
} }
void test_cxx11_tensor_reduction_gpu() { EIGEN_DECLARE_TEST(cxx11_tensor_reduction_gpu) {
CALL_SUBTEST_1((test_full_reductions<float, ColMajor>())); CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
CALL_SUBTEST_1((test_full_reductions<double, ColMajor>())); CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
CALL_SUBTEST_2((test_full_reductions<float, RowMajor>())); CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));

View File

@ -71,7 +71,7 @@ void test_gpu_cumsum(int m_size, int k_size, int n_size)
} }
void test_cxx11_tensor_scan_gpu() EIGEN_DECLARE_TEST(cxx11_tensor_scan_gpu)
{ {
CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128)); CALL_SUBTEST_1(test_gpu_cumsum<ColMajor>(128, 128, 128));
CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128)); CALL_SUBTEST_2(test_gpu_cumsum<RowMajor>(128, 128, 128));