From 283d871a3f461512d560f1bcbb2b8e1d650941b9 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 8 Nov 2024 14:25:57 -0800 Subject: [PATCH] Add missing EIGEN_DEVICE_FUNCTION decorations. --- .../arch/Default/GenericPacketMathFunctions.h | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h index d3c067e45..4e441b498 100644 --- a/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +++ b/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h @@ -1634,7 +1634,7 @@ struct psign_impl -EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { n = pround(x); r = psub(x, n); } @@ -1642,7 +1642,7 @@ EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) { // This function computes the sum {s, r}, such that x + y = s_hi + s_lo // holds exactly, and s_hi = fl(x+y), if |x| >= |y|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) { s_hi = padd(x, y); const Packet t = psub(s_hi, x); s_lo = psub(y, t); @@ -1654,7 +1654,7 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and // p_hi = fl(x * y). template -EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { p_hi = pmul(x, y); p_lo = pmsub(x, y, p_hi); } @@ -1662,7 +1662,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, // A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that // x * y = xy + p_lo holds exactly. template -EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { return pmsub(x, y, xy); } @@ -1674,7 +1674,7 @@ EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const P // This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) { typedef typename unpacket_traits::type Scalar; EIGEN_CONSTEXPR int shift = (NumTraits::digits() + 1) / 2; const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr. @@ -1689,7 +1689,7 @@ EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packe // {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and // p_hi = fl(x * y). template -EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) { Packet x_hi, x_lo, y_hi, y_lo; veltkamp_splitting(x, x_hi, x_lo); veltkamp_splitting(y, y_hi, y_lo); @@ -1704,7 +1704,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, // A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that // x * y = xy + p_lo holds exactly. template -EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) { Packet x_hi, x_lo, y_hi, y_lo; veltkamp_splitting(x, x_hi, x_lo); veltkamp_splitting(y, y_hi, y_lo); @@ -1725,8 +1725,8 @@ EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const P // This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& s_hi, Packet& s_lo) { const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi)); Packet r_hi_1, r_lo_1; fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1); @@ -1744,8 +1744,8 @@ EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Pa // This is a version of twosum for double word numbers, // which assumes that |x_hi| >= |y_hi|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& s_hi, Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& s_hi, Packet& s_lo) { Packet r_hi, r_lo; fast_twosum(x_hi, y_hi, r_hi, r_lo); const Packet s = padd(padd(y_lo, r_lo), x_lo); @@ -1756,8 +1756,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, con // double word number {y_hi, y_lo} number, with the assumption // that |x| >= |y_hi|. template -EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, Packet& s_hi, - Packet& s_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, + Packet& s_hi, Packet& s_lo) { Packet r_hi, r_lo; fast_twosum(x, y_hi, r_hi, r_lo); const Packet s = padd(y_lo, r_lo); @@ -1773,7 +1773,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const // This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions", // 3rd edition, Birkh\"auser, 2016. template -EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, + Packet& p_hi, Packet& p_lo) { Packet c_hi, c_lo1; twoprod(x_hi, y, c_hi, c_lo1); const Packet c_lo2 = pmul(x_lo, y); @@ -1790,8 +1791,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P // of less than 2*2^{-2p}, where p is the number of significand bit // in the floating point type. template -EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo, - Packet& p_hi, Packet& p_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, + const Packet& y_lo, Packet& p_hi, Packet& p_lo) { Packet p_hi_hi, p_hi_lo; twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo); Packet p_lo_hi, p_lo_lo; @@ -1804,7 +1805,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P // for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu, // 2017. https://hal.archives-ouvertes.fr/hal-01351529 template -void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& z_hi, Packet& z_lo) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, + Packet& z_hi, Packet& z_lo) { const Packet t_hi = pdiv(x_hi, y); Packet pi_hi, pi_lo; twoprod(t_hi, y, pi_hi, pi_lo); @@ -1819,7 +1821,7 @@ void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, template struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { log2_x_hi = plog2(x); log2_x_lo = pzero(x); } @@ -1834,7 +1836,7 @@ struct accurate_log2 { template <> struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) { // The function log(1+x)/x is approximated in the interval // [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form // Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))), @@ -1914,7 +1916,7 @@ struct accurate_log2 { template <> struct accurate_log2 { template - EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) { // We use a transformation of variables: // r = c * (x-1) / (x+1), // such that @@ -2000,7 +2002,7 @@ struct accurate_log2 { template struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { return generic_exp2(x); } }; @@ -2012,7 +2014,7 @@ struct fast_accurate_exp2 { template <> struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { // This function approximates exp2(x) by a degree 6 polynomial of the form // Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in // single precision, and the remaining steps are evaluated with extra precision using @@ -2069,7 +2071,7 @@ struct fast_accurate_exp2 { template <> struct fast_accurate_exp2 { template - EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) { // This function approximates exp2(x) by a degree 10 polynomial of the form // Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in // single precision, and the remaining steps are evaluated with extra precision using @@ -2135,7 +2137,7 @@ struct fast_accurate_exp2 { // TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it // easier to specialize or turn off for specific types and/or backends.x template -EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) { +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) { typedef typename unpacket_traits::type Scalar; // Split x into exponent e_x and mantissa m_x. Packet e_x;