mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 12:46:00 +08:00
Add missing EIGEN_DEVICE_FUNCTION decorations.
This commit is contained in:
parent
0d366f6532
commit
283d871a3f
@ -1634,7 +1634,7 @@ struct psign_impl<Packet, std::enable_if_t<NumTraits<typename unpacket_traits<Pa
|
|||||||
// This function splits x into the nearest integer n and fractional part r,
|
// This function splits x into the nearest integer n and fractional part r,
|
||||||
// such that x = n + r holds exactly.
|
// such that x = n + r holds exactly.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
||||||
n = pround(x);
|
n = pround(x);
|
||||||
r = psub(x, n);
|
r = psub(x, n);
|
||||||
}
|
}
|
||||||
@ -1642,7 +1642,7 @@ EIGEN_STRONG_INLINE void absolute_split(const Packet& x, Packet& n, Packet& r) {
|
|||||||
// This function computes the sum {s, r}, such that x + y = s_hi + s_lo
|
// This function computes the sum {s, r}, such that x + y = s_hi + s_lo
|
||||||
// holds exactly, and s_hi = fl(x+y), if |x| >= |y|.
|
// holds exactly, and s_hi = fl(x+y), if |x| >= |y|.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
|
||||||
s_hi = padd(x, y);
|
s_hi = padd(x, y);
|
||||||
const Packet t = psub(s_hi, x);
|
const Packet t = psub(s_hi, x);
|
||||||
s_lo = psub(y, t);
|
s_lo = psub(y, t);
|
||||||
@ -1654,7 +1654,7 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s
|
|||||||
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
||||||
// p_hi = fl(x * y).
|
// p_hi = fl(x * y).
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||||
p_hi = pmul(x, y);
|
p_hi = pmul(x, y);
|
||||||
p_lo = pmsub(x, y, p_hi);
|
p_lo = pmsub(x, y, p_hi);
|
||||||
}
|
}
|
||||||
@ -1662,7 +1662,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
|||||||
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
||||||
// x * y = xy + p_lo holds exactly.
|
// x * y = xy + p_lo holds exactly.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
||||||
return pmsub(x, y, xy);
|
return pmsub(x, y, xy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1674,7 +1674,7 @@ EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const P
|
|||||||
// This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions",
|
// This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions",
|
||||||
// 3rd edition, Birkh\"auser, 2016.
|
// 3rd edition, Birkh\"auser, 2016.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packet& x_lo) {
|
||||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||||
EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;
|
EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;
|
||||||
const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr.
|
const Scalar shift_scale = Scalar(uint64_t(1) << shift); // Scalar constructor not necessarily constexpr.
|
||||||
@ -1689,7 +1689,7 @@ EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet& x, Packet& x_hi, Packe
|
|||||||
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
||||||
// p_hi = fl(x * y).
|
// p_hi = fl(x * y).
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
||||||
Packet x_hi, x_lo, y_hi, y_lo;
|
Packet x_hi, x_lo, y_hi, y_lo;
|
||||||
veltkamp_splitting(x, x_hi, x_lo);
|
veltkamp_splitting(x, x_hi, x_lo);
|
||||||
veltkamp_splitting(y, y_hi, y_lo);
|
veltkamp_splitting(y, y_hi, y_lo);
|
||||||
@ -1704,7 +1704,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
|||||||
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
// A version of twoprod that takes x, y, and fl(x*y) as input and returns the p_lo such that
|
||||||
// x * y = xy + p_lo holds exactly.
|
// x * y = xy + p_lo holds exactly.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const Packet& xy) {
|
||||||
Packet x_hi, x_lo, y_hi, y_lo;
|
Packet x_hi, x_lo, y_hi, y_lo;
|
||||||
veltkamp_splitting(x, x_hi, x_lo);
|
veltkamp_splitting(x, x_hi, x_lo);
|
||||||
veltkamp_splitting(y, y_hi, y_lo);
|
veltkamp_splitting(y, y_hi, y_lo);
|
||||||
@ -1725,8 +1725,8 @@ EIGEN_STRONG_INLINE Packet twoprod_low(const Packet& x, const Packet& y, const P
|
|||||||
// This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions",
|
// This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions",
|
||||||
// 3rd edition, Birkh\"auser, 2016.
|
// 3rd edition, Birkh\"auser, 2016.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||||
Packet& s_hi, Packet& s_lo) {
|
const Packet& y_lo, Packet& s_hi, Packet& s_lo) {
|
||||||
const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
|
const Packet x_greater_mask = pcmp_lt(pabs(y_hi), pabs(x_hi));
|
||||||
Packet r_hi_1, r_lo_1;
|
Packet r_hi_1, r_lo_1;
|
||||||
fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1);
|
fast_twosum(x_hi, y_hi, r_hi_1, r_lo_1);
|
||||||
@ -1744,8 +1744,8 @@ EIGEN_STRONG_INLINE void twosum(const Packet& x_hi, const Packet& x_lo, const Pa
|
|||||||
// This is a version of twosum for double word numbers,
|
// This is a version of twosum for double word numbers,
|
||||||
// which assumes that |x_hi| >= |y_hi|.
|
// which assumes that |x_hi| >= |y_hi|.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||||
Packet& s_hi, Packet& s_lo) {
|
const Packet& y_lo, Packet& s_hi, Packet& s_lo) {
|
||||||
Packet r_hi, r_lo;
|
Packet r_hi, r_lo;
|
||||||
fast_twosum(x_hi, y_hi, r_hi, r_lo);
|
fast_twosum(x_hi, y_hi, r_hi, r_lo);
|
||||||
const Packet s = padd(padd(y_lo, r_lo), x_lo);
|
const Packet s = padd(padd(y_lo, r_lo), x_lo);
|
||||||
@ -1756,8 +1756,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x_hi, const Packet& x_lo, con
|
|||||||
// double word number {y_hi, y_lo} number, with the assumption
|
// double word number {y_hi, y_lo} number, with the assumption
|
||||||
// that |x| >= |y_hi|.
|
// that |x| >= |y_hi|.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo, Packet& s_hi,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const Packet& y_lo,
|
||||||
Packet& s_lo) {
|
Packet& s_hi, Packet& s_lo) {
|
||||||
Packet r_hi, r_lo;
|
Packet r_hi, r_lo;
|
||||||
fast_twosum(x, y_hi, r_hi, r_lo);
|
fast_twosum(x, y_hi, r_hi, r_lo);
|
||||||
const Packet s = padd(y_lo, r_lo);
|
const Packet s = padd(y_lo, r_lo);
|
||||||
@ -1773,7 +1773,8 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y_hi, const
|
|||||||
// This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions",
|
// This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions",
|
||||||
// 3rd edition, Birkh\"auser, 2016.
|
// 3rd edition, Birkh\"auser, 2016.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& p_hi, Packet& p_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
||||||
|
Packet& p_hi, Packet& p_lo) {
|
||||||
Packet c_hi, c_lo1;
|
Packet c_hi, c_lo1;
|
||||||
twoprod(x_hi, y, c_hi, c_lo1);
|
twoprod(x_hi, y, c_hi, c_lo1);
|
||||||
const Packet c_lo2 = pmul(x_lo, y);
|
const Packet c_lo2 = pmul(x_lo, y);
|
||||||
@ -1790,8 +1791,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P
|
|||||||
// of less than 2*2^{-2p}, where p is the number of significand bit
|
// of less than 2*2^{-2p}, where p is the number of significand bit
|
||||||
// in the floating point type.
|
// in the floating point type.
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi, const Packet& y_lo,
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const Packet& y_hi,
|
||||||
Packet& p_hi, Packet& p_lo) {
|
const Packet& y_lo, Packet& p_hi, Packet& p_lo) {
|
||||||
Packet p_hi_hi, p_hi_lo;
|
Packet p_hi_hi, p_hi_lo;
|
||||||
twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
|
twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
|
||||||
Packet p_lo_hi, p_lo_lo;
|
Packet p_lo_hi, p_lo_lo;
|
||||||
@ -1804,7 +1805,8 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x_hi, const Packet& x_lo, const P
|
|||||||
// for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu,
|
// for basic building blocks of double-word arithmetic", Joldes, Muller, & Popescu,
|
||||||
// 2017. https://hal.archives-ouvertes.fr/hal-01351529
|
// 2017. https://hal.archives-ouvertes.fr/hal-01351529
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y, Packet& z_hi, Packet& z_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
||||||
|
Packet& z_hi, Packet& z_lo) {
|
||||||
const Packet t_hi = pdiv(x_hi, y);
|
const Packet t_hi = pdiv(x_hi, y);
|
||||||
Packet pi_hi, pi_lo;
|
Packet pi_hi, pi_lo;
|
||||||
twoprod(t_hi, y, pi_hi, pi_lo);
|
twoprod(t_hi, y, pi_hi, pi_lo);
|
||||||
@ -1819,7 +1821,7 @@ void doubleword_div_fp(const Packet& x_hi, const Packet& x_lo, const Packet& y,
|
|||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct accurate_log2 {
|
struct accurate_log2 {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||||
log2_x_hi = plog2(x);
|
log2_x_hi = plog2(x);
|
||||||
log2_x_lo = pzero(x);
|
log2_x_lo = pzero(x);
|
||||||
}
|
}
|
||||||
@ -1834,7 +1836,7 @@ struct accurate_log2 {
|
|||||||
template <>
|
template <>
|
||||||
struct accurate_log2<float> {
|
struct accurate_log2<float> {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||||
// The function log(1+x)/x is approximated in the interval
|
// The function log(1+x)/x is approximated in the interval
|
||||||
// [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form
|
// [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form
|
||||||
// Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))),
|
// Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))),
|
||||||
@ -1914,7 +1916,7 @@ struct accurate_log2<float> {
|
|||||||
template <>
|
template <>
|
||||||
struct accurate_log2<double> {
|
struct accurate_log2<double> {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(const Packet& x, Packet& log2_x_hi, Packet& log2_x_lo) {
|
||||||
// We use a transformation of variables:
|
// We use a transformation of variables:
|
||||||
// r = c * (x-1) / (x+1),
|
// r = c * (x-1) / (x+1),
|
||||||
// such that
|
// such that
|
||||||
@ -2000,7 +2002,7 @@ struct accurate_log2<double> {
|
|||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct fast_accurate_exp2 {
|
struct fast_accurate_exp2 {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||||
return generic_exp2(x);
|
return generic_exp2(x);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -2012,7 +2014,7 @@ struct fast_accurate_exp2 {
|
|||||||
template <>
|
template <>
|
||||||
struct fast_accurate_exp2<float> {
|
struct fast_accurate_exp2<float> {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||||
// This function approximates exp2(x) by a degree 6 polynomial of the form
|
// This function approximates exp2(x) by a degree 6 polynomial of the form
|
||||||
// Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in
|
// Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in
|
||||||
// single precision, and the remaining steps are evaluated with extra precision using
|
// single precision, and the remaining steps are evaluated with extra precision using
|
||||||
@ -2069,7 +2071,7 @@ struct fast_accurate_exp2<float> {
|
|||||||
template <>
|
template <>
|
||||||
struct fast_accurate_exp2<double> {
|
struct fast_accurate_exp2<double> {
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet operator()(const Packet& x) {
|
||||||
// This function approximates exp2(x) by a degree 10 polynomial of the form
|
// This function approximates exp2(x) by a degree 10 polynomial of the form
|
||||||
// Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in
|
// Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in
|
||||||
// single precision, and the remaining steps are evaluated with extra precision using
|
// single precision, and the remaining steps are evaluated with extra precision using
|
||||||
@ -2135,7 +2137,7 @@ struct fast_accurate_exp2<double> {
|
|||||||
// TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it
|
// TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it
|
||||||
// easier to specialize or turn off for specific types and/or backends.x
|
// easier to specialize or turn off for specific types and/or backends.x
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
|
||||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||||
// Split x into exponent e_x and mantissa m_x.
|
// Split x into exponent e_x and mantissa m_x.
|
||||||
Packet e_x;
|
Packet e_x;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user