mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-11 19:29:02 +08:00
Move implementation of vectorized error function erf() to SpecialFunctionsImpl.h.
This commit is contained in:
parent
7c8bc0d928
commit
13ef08e5ac
@ -531,10 +531,6 @@ Packet pcosh(const Packet& a) { EIGEN_USING_STD_MATH(cosh); return cosh(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); }
|
||||
|
||||
/** \internal \returns the error function of \a a (coeff-wise). */
|
||||
template <typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perf(const Packet& a) { return numext::erf(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); }
|
||||
|
@ -891,7 +891,6 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
|
||||
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
|
||||
|
||||
template<typename T> T generic_fast_tanh_float(const T& a_x);
|
||||
template<typename T> T generic_fast_erf_float(const T& a_x);
|
||||
} // end namespace internal
|
||||
|
||||
/****************************************************************************
|
||||
@ -1579,36 +1578,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double tanh(const double &x) { return ::tanh(x); }
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T erf(const T &x) {
|
||||
EIGEN_USING_STD_MATH(erf);
|
||||
return erf(x);
|
||||
}
|
||||
#else
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T erf(const T& x);
|
||||
#endif
|
||||
|
||||
#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float erf(float x) { return internal::generic_fast_erf_float(x); }
|
||||
#endif
|
||||
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(erf, erf)
|
||||
#endif
|
||||
|
||||
#if !EIGEN_HAS_CXX11 || defined(EIGEN_GPUCC)
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float erf(const float &x) { return ::erff(x); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double erf(const double &x) { return ::erf(x); }
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T fmod(const T& a, const T& b) {
|
||||
|
@ -66,58 +66,6 @@ T generic_fast_tanh_float(const T& a_x)
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
/** \internal \returns the error function of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/8-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-4, 4], outside of which
|
||||
fl(erf(x)) = +/-1.
|
||||
|
||||
This implementation works on both scalars and Ts.
|
||||
*/
|
||||
template <typename T>
|
||||
T generic_fast_erf_float(const T& a_x) {
|
||||
// Clamp the inputs to the range [-4, 4] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_4 = pset1<T>(4.f);
|
||||
const T minus_4 = pset1<T>(-4.f);
|
||||
const T x = pmax(pmin(a_x, plus_4), minus_4);
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
|
||||
const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
|
||||
const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
|
||||
const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
|
||||
const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
|
||||
const T alpha_11 = pset1<T>(2.77068142495902e-08f);
|
||||
const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const T beta_0 = pset1<T>(-1.42647390514189e-02f);
|
||||
const T beta_2 = pset1<T>(-7.37332916720468e-03f);
|
||||
const T beta_4 = pset1<T>(-1.68282697438203e-03f);
|
||||
const T beta_6 = pset1<T>(-2.13374055278905e-04f);
|
||||
const T beta_8 = pset1<T>(-1.45660718464996e-05f);
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
T p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial p.
|
||||
T q = pmadd(x2, beta_8, beta_6);
|
||||
q = pmadd(x2, q, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template<typename RealScalar>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
@ -126,7 +74,7 @@ RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||
RealScalar p, qp;
|
||||
p = numext::maxi(x,y);
|
||||
if(p==RealScalar(0)) return RealScalar(0);
|
||||
qp = numext::mini(y,x) / p;
|
||||
qp = numext::mini(y,x) / p;
|
||||
return p * sqrt(RealScalar(1) + qp*qp);
|
||||
}
|
||||
|
||||
|
@ -62,13 +62,6 @@ ptanh<Packet8f>(const Packet8f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||
perf<Packet8f>(const Packet8f& x) {
|
||||
return internal::generic_fast_erf_float(x);
|
||||
}
|
||||
|
||||
// Exponential function for doubles.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||
|
@ -411,12 +411,6 @@ ptanh<Packet16f>(const Packet16f& _x) {
|
||||
return internal::generic_fast_tanh_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||
perf<Packet16f>(const Packet16f& _x) {
|
||||
return internal::generic_fast_erf_float(_x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -83,13 +83,6 @@ ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
perf<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_erf_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -321,13 +321,6 @@ pcos<Packet4f>(const Packet4f& x) {
|
||||
return psincos_inner_msa_float</* sine */ false>(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
perf<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_erf_float(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
|
||||
pexp<Packet2d>(const Packet2d& _x) {
|
||||
|
@ -43,13 +43,6 @@ ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
perf<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_erf_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -147,13 +147,6 @@ ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
perf<Packet4f>(const Packet4f& a) {
|
||||
return internal::generic_fast_erf_float(a);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
|
@ -232,13 +232,6 @@ ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
// Error function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||
perf<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_erf_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -279,13 +279,63 @@ struct digamma_impl {
|
||||
* Implementation of erf, requires C++11/C99 *
|
||||
****************************************************************************/
|
||||
|
||||
template <typename Scalar>
|
||||
/** \internal \returns the error function of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/8-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-4, 4], outside of which
|
||||
fl(erf(x)) = +/-1.
|
||||
|
||||
This implementation works on both scalars and Ts.
|
||||
*/
|
||||
template <typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
|
||||
// Clamp the inputs to the range [-4, 4] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const T plus_4 = pset1<T>(4.f);
|
||||
const T minus_4 = pset1<T>(-4.f);
|
||||
const T x = pmax(pmin(a_x, plus_4), minus_4);
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
|
||||
const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
|
||||
const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
|
||||
const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
|
||||
const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
|
||||
const T alpha_11 = pset1<T>(2.77068142495902e-08f);
|
||||
const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const T beta_0 = pset1<T>(-1.42647390514189e-02f);
|
||||
const T beta_2 = pset1<T>(-7.37332916720468e-03f);
|
||||
const T beta_4 = pset1<T>(-1.68282697438203e-03f);
|
||||
const T beta_6 = pset1<T>(-2.13374055278905e-04f);
|
||||
const T beta_8 = pset1<T>(-1.45660718464996e-05f);
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const T x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
T p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial p.
|
||||
T q = pmadd(x2, beta_8, beta_6);
|
||||
q = pmadd(x2, q, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct erf_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
static EIGEN_STRONG_INLINE T run(const T x) {
|
||||
return generic_fast_erf_float(x);
|
||||
}
|
||||
};
|
||||
|
||||
@ -302,7 +352,7 @@ struct erf_impl<float> {
|
||||
#if defined(SYCL_DEVICE_ONLY)
|
||||
return cl::sycl::erf(x);
|
||||
#else
|
||||
return ::erff(x);
|
||||
return generic_fast_erf_float(x);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
@ -1892,6 +1942,12 @@ polygamma(const Scalar& n, const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
|
||||
erf(const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
|
||||
erfc(const Scalar& x) {
|
||||
|
@ -30,6 +30,10 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
|
||||
|
||||
/** \internal \returns the erf(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
||||
|
||||
/** \internal \returns the erfc(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user