mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 16:19:37 +08:00
Vectorize erfc() for float
This commit is contained in:
parent
78f3c654ee
commit
7eea0a9213
@ -345,6 +345,49 @@ struct erf_impl<double> {
|
||||
/***************************************************************************
|
||||
* Implementation of erfc, requires C++11/C99 *
|
||||
****************************************************************************/
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erfc_float(const T& x) {
|
||||
const T x_abs = pmin(pabs(x), pset1<T>(10.0f));
|
||||
const T one = pset1<T>(1.0f);
|
||||
const T x_abs_gt_one_mask = pcmp_lt(one, x_abs);
|
||||
|
||||
// erfc(x) = 1 + x * S(x^2), |x| <= 1.
|
||||
//
|
||||
// Coefficients for S and T generated with Rminimax command:
|
||||
// ./ratapprox --function="erfc(x)-1" --dom='[-1,1]' --type=[11,0] --num="odd"
|
||||
// --numF="[SG]" --denF="[SG]" --log --dispCoeff="dec"
|
||||
constexpr float alpha[] = {5.61802298761904239654541015625e-04, -4.91381669417023658752441406250e-03,
|
||||
2.67075151205062866210937500000e-02, -1.12800106406211853027343750000e-01,
|
||||
3.76122951507568359375000000000e-01, -1.12837910652160644531250000000e+00};
|
||||
const T x2 = pmul(x, x);
|
||||
const T erfc_small = pmadd(x, ppolevl<T, 5>::run(x2, alpha), one);
|
||||
|
||||
// Return early if we don't need the more expensive approximation for any
|
||||
// entry in a.
|
||||
if (!predux_any(x_abs_gt_one_mask)) return erfc_small;
|
||||
|
||||
// erfc(x) = exp(-x^2) * 1/x * P(1/x^2) / Q(1/x^2), 1 < x < 9.
|
||||
//
|
||||
// Coefficients for P and Q generated with Rminimax command:
|
||||
// ./ratapprox --function="erfc(1/sqrt(x))*exp(1/x)/sqrt(x)"
|
||||
// --dom='[0.01,1]' --type=[3,4] --numF="[SG]" --denF="[SG]" --log
|
||||
// --dispCoeff="dec"
|
||||
constexpr float gamma[] = {1.0208116471767425537109375e-01f, 4.2920666933059692382812500e-01f,
|
||||
3.2379078865051269531250000e-01f, 5.3971976041793823242187500e-02f};
|
||||
constexpr float delta[] = {1.7251677811145782470703125e-02f, 3.9137163758277893066406250e-01f,
|
||||
1.0000000000000000000000000e+00f, 6.2173241376876831054687500e-01f,
|
||||
9.5662862062454223632812500e-02f};
|
||||
const T z = pexp(pnegate(x2));
|
||||
const T q2 = preciprocal(x2);
|
||||
const T num = ppolevl<T, 3>::run(q2, gamma);
|
||||
const T denom = pmul(x_abs, ppolevl<T, 4>::run(q2, delta));
|
||||
const T r = pdiv(num, denom);
|
||||
// If x < -1 then use erfc(x) = 2 - erfc(|x|).
|
||||
const T x_negative = pcmp_lt(x, pset1<T>(0.0f));
|
||||
const T erfc_large = pselect(x_negative, pnmadd(z, r, pset1<T>(2.0f)), pmul(z, r));
|
||||
|
||||
return pselect(x_abs_gt_one_mask, erfc_large, erfc_small);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
struct erfc_impl {
|
||||
@ -365,7 +408,7 @@ struct erfc_impl<float> {
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
return cl::sycl::erfc(x);
|
||||
#else
|
||||
return ::erfcf(x);
|
||||
return generic_fast_erfc_float(x);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user