From cdb377d0cba4889fc909d1bbdd430b988db0db97 Mon Sep 17 00:00:00 2001
From: Deven Desai <deven.desai.amd@gmail.com>
Date: Fri, 6 Sep 2019 16:03:49 +0000
Subject: [PATCH] Fix for the HIP build+test errors introduced by the ndtri
 support.

The fixes needed are
 * adding EIGEN_DEVICE_FUNC attribute to a couple of funcs (else HIPCC will error out when non-device funcs are called from global/device funcs)
 * switching to using ::<math_func> instead std::<math_func> (only for HIPCC) in cases where the std::<math_func> is not recognized as a device func by HIPCC
 * removing an errant "j" from a testcase (don't know how that made it in to begin with!)
---
 Eigen/src/Core/GenericPacketMath.h                            | 4 ++--
 Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h      | 4 ++--
 unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h | 1 +
 unsupported/test/cxx11_tensor_gpu.cu                          | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h
index 5ce984caf..3118e4e5e 100644
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@@ -542,7 +542,7 @@ Packet pexpm1(const Packet& a) { return numext::expm1(a); }
 
 /** \internal \returns the log of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet plog(const Packet& a) { using std::log; return log(a); }
+Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
 
 /** \internal \returns the log1p of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@@ -554,7 +554,7 @@ Packet plog10(const Packet& a) { using std::log10; return log10(a); }
 
 /** \internal \returns the square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
-Packet psqrt(const Packet& a) { using std::sqrt; return sqrt(a); }
+Packet psqrt(const Packet& a) { EIGEN_USING_STD(sqrt); return sqrt(a); }
 
 /** \internal \returns the reciprocal square-root of \a a (coeff-wise) */
 template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
index 367d14dad..518db2207 100644
--- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
+++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
@@ -556,7 +556,7 @@ Packet pcos_float(const Packet& x)
  */
 template <typename Packet, int N>
 struct ppolevl {
-  static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
     EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
     return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
   }
@@ -564,7 +564,7 @@ struct ppolevl {
 
 template <typename Packet>
 struct ppolevl<Packet, 0> {
-  static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
+  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
     EIGEN_UNUSED_VARIABLE(x);
     return pset1<Packet>(coeff[0]);
   }
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index 78050d0a1..a9bc205ab 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -624,6 +624,7 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
 }
 
 template <typename T, typename ScalarType>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
 T generic_ndtri(const T& a) {
   const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
   const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
diff --git a/unsupported/test/cxx11_tensor_gpu.cu b/unsupported/test/cxx11_tensor_gpu.cu
index 0a2fa8e61..aa8470123 100644
--- a/unsupported/test/cxx11_tensor_gpu.cu
+++ b/unsupported/test/cxx11_tensor_gpu.cu
@@ -1091,7 +1091,7 @@ void test_gpu_ndtri()
   expected_out(1) = -std::numeric_limits<Scalar>::infinity();
   expected_out(2) = Scalar(0.0);
   expected_out(3) = Scalar(-0.8416212335729142);
-  expected_out(4) = Scalar(0.8416212335729142);j
+  expected_out(4) = Scalar(0.8416212335729142);
   expected_out(5) = Scalar(1.2815515655446004);
   expected_out(6) = Scalar(-1.2815515655446004);
   expected_out(7) = Scalar(2.3263478740408408);