mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-12 17:33:15 +08:00
Vectorize atan() for double.
This commit is contained in:
parent
1e1848fdb1
commit
c475228b28
@ -50,6 +50,12 @@ patan<Packet8f>(const Packet8f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
patan<Packet4d>(const Packet4d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog<Packet8f>(const Packet8f& _x) {
|
||||
|
@ -109,6 +109,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasATan = 1,
|
||||
HasBlend = 1,
|
||||
HasRound = 1,
|
||||
HasFloor = 1,
|
||||
|
@ -275,6 +275,12 @@ patan<Packet16f>(const Packet16f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
patan<Packet8d>(const Packet8d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
ptanh<Packet16f>(const Packet16f& _x) {
|
||||
|
@ -161,6 +161,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasSqrt = EIGEN_FAST_MATH,
|
||||
HasRsqrt = EIGEN_FAST_MATH,
|
||||
#endif
|
||||
HasATan = 1,
|
||||
HasCmp = 1,
|
||||
HasDiv = 1,
|
||||
HasRound = 1,
|
||||
|
@ -60,6 +60,12 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
#ifdef __VSX__
|
||||
#ifndef EIGEN_COMP_CLANG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
|
@ -2708,6 +2708,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasAbs = 1,
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasATan = 1,
|
||||
HasLog = 0,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
|
@ -864,6 +864,68 @@ Packet patan_float(const Packet& x_in) {
|
||||
return pselect(neg_mask, pnegate(p), p);
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan_double(const Packet& x_in) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, double>::value, "Scalar type must be double");
|
||||
|
||||
const Packet cst_one = pset1<Packet>(1.0);
|
||||
constexpr double kPiOverTwo = static_cast<double>(M_PI_2);
|
||||
const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
|
||||
constexpr double kPiOverFour = static_cast<double>(M_PI_4);
|
||||
const Packet cst_pi_over_four = pset1<Packet>(kPiOverFour);
|
||||
const Packet cst_large = pset1<Packet>(2.4142135623730950488016887); // tan(3*pi/8);
|
||||
const Packet cst_medium = pset1<Packet>(0.4142135623730950488016887); // tan(pi/8);
|
||||
const Packet q0 = pset1<Packet>(-0.33333333333330028569463365784031338989734649658203);
|
||||
const Packet q2 = pset1<Packet>(0.199999999990664090177006073645316064357757568359375);
|
||||
const Packet q4 = pset1<Packet>(-0.142857141937123677255527809393242932856082916259766);
|
||||
const Packet q6 = pset1<Packet>(0.111111065991039953404495577160560060292482376098633);
|
||||
const Packet q8 = pset1<Packet>(-9.0907812986129224452902519715280504897236824035645e-2);
|
||||
const Packet q10 = pset1<Packet>(7.6900542950704739442180368769186316058039665222168e-2);
|
||||
const Packet q12 = pset1<Packet>(-6.6410112986494976294871150912513257935643196105957e-2);
|
||||
const Packet q14 = pset1<Packet>(5.6920144995467943094258345126945641823112964630127e-2);
|
||||
const Packet q16 = pset1<Packet>(-4.3577020814990513608577771265117917209863662719727e-2);
|
||||
const Packet q18 = pset1<Packet>(2.1244050233624342527427586446719942614436149597168e-2);
|
||||
|
||||
const Packet neg_mask = pcmp_lt(x_in, pzero(x_in));
|
||||
Packet x = pabs(x_in);
|
||||
|
||||
// Use the same range reduction strategy (to [0:tan(pi/8)]) as the
|
||||
// Cephes library:
|
||||
// "Large": For x >= tan(3*pi/8), use atan(1/x) = pi/2 - atan(x).
|
||||
// "Medium": For x in [tan(pi/8) : tan(3*pi/8)),
|
||||
// use atan(x) = pi/4 + atan((x-1)/(x+1)).
|
||||
// "Small": For x < tan(pi/8), approximate atan(x) directly by a polynomial
|
||||
// calculated using Sollya.
|
||||
const Packet large_mask = pcmp_lt(cst_large, x);
|
||||
x = pselect(large_mask, preciprocal(x), x);
|
||||
const Packet medium_mask = pandnot(pcmp_lt(cst_medium, x), large_mask);
|
||||
x = pselect(medium_mask, pdiv(psub(x, cst_one), padd(x, cst_one)), x);
|
||||
|
||||
// Approximate atan(x) on [0:tan(pi/8)] by a polynomial of the form
|
||||
// P(x) = x + x^3 * Q(x^2),
|
||||
// where Q(x^2) is a 9th order polynomial in x^2.
|
||||
const Packet x2 = pmul(x, x);
|
||||
const Packet x4 = pmul(x2, x2);
|
||||
Packet q_odd = pmadd(q18, x4, q14);
|
||||
Packet q_even = pmadd(q16, x4, q12);
|
||||
q_odd = pmadd(q_odd, x4, q10);
|
||||
q_even = pmadd(q_even, x4, q8);
|
||||
q_odd = pmadd(q_odd, x4, q6);
|
||||
q_even = pmadd(q_even, x4, q4);
|
||||
q_odd = pmadd(q_odd, x4, q2);
|
||||
q_even = pmadd(q_even, x4, q0);
|
||||
const Packet q = pmadd(q_odd, x2, q_even);
|
||||
Packet p = pmadd(q, pmul(x, x2), x);
|
||||
|
||||
// Apply transformations according to the range reduction masks.
|
||||
p = pselect(large_mask, psub(cst_pi_over_two, p), p);
|
||||
p = pselect(medium_mask, padd(cst_pi_over_four, p), p);
|
||||
return pselect(neg_mask, pnegate(p), p);
|
||||
}
|
||||
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pdiv_complex(const Packet& x, const Packet& y) {
|
||||
@ -958,8 +1020,8 @@ Packet psqrt_complex(const Packet& a) {
|
||||
|
||||
// Step 4. Compute solution for inputs with negative real part:
|
||||
// [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]
|
||||
const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
|
||||
const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
|
||||
const RealPacket cst_imag_sign_mask =
|
||||
pset1<Packet>(Scalar(RealScalar(0.0), RealScalar(-0.0))).v;
|
||||
RealPacket imag_signs = pand(a.v, cst_imag_sign_mask);
|
||||
Packet negative_real_result;
|
||||
// Notice that rho is positive, so taking it's absolute value is a noop.
|
||||
|
@ -104,6 +104,11 @@ template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan_float(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) for double precision float */
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan_double(const Packet& x);
|
||||
|
||||
/** \internal \returns sqrt(x) for complex types */
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
|
@ -102,6 +102,9 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp<Pac
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog<Packet2d>(const Packet2d& x)
|
||||
{ return plog_double(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan<Packet2d>(const Packet2d& x)
|
||||
{ return patan_double(x); }
|
||||
|
||||
#endif
|
||||
|
||||
} // end namespace internal
|
||||
|
@ -3762,10 +3762,13 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasCeil = 1,
|
||||
HasRint = 1,
|
||||
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
HasExp = 1,
|
||||
HasLog = 1,
|
||||
HasATan = 1,
|
||||
#endif
|
||||
HasSin = 0,
|
||||
HasCos = 0,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = 0,
|
||||
|
@ -81,6 +81,11 @@ Packet4f pacos<Packet4f>(const Packet4f& _x)
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pasin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
|
@ -178,6 +178,7 @@ struct packet_traits<double> : default_packet_traits {
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasATan = 1,
|
||||
HasBlend = 1,
|
||||
HasFloor = 1,
|
||||
HasCeil = 1,
|
||||
|
Loading…
x
Reference in New Issue
Block a user