mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 01:29:35 +08:00
PR 681: Add ndtri function, the inverse of the normal distribution function.
This commit is contained in:
parent
f59bed7a13
commit
e38dd48a27
@ -83,6 +83,7 @@ struct default_packet_traits
|
||||
HasPolygamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
HasNdtri = 0,
|
||||
HasI0e = 0,
|
||||
HasI1e = 0,
|
||||
HasIGamma = 0,
|
||||
@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent);
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pzero(const Packet& a) { return pxor(a,a); }
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
|
||||
EIGEN_UNUSED_VARIABLE(a);
|
||||
return 0.;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
|
||||
EIGEN_UNUSED_VARIABLE(a);
|
||||
return 0.;
|
||||
}
|
||||
|
||||
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
||||
return por(pand(a,mask),pandnot(b,mask));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
|
||||
const float& mask, const float& a, const float&b) {
|
||||
return mask == 0 ? b : a;
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
|
||||
const double& mask, const double& a, const double& b) {
|
||||
return mask == 0 ? b : a;
|
||||
}
|
||||
|
||||
/** \internal \returns a <= b as a bit mask */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
|
||||
|
@ -76,6 +76,7 @@ namespace Eigen
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
|
||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||
|
@ -72,6 +72,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
HasExp = 1,
|
||||
HasNdtri = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
HasTanh = EIGEN_FAST_MATH,
|
||||
|
@ -97,6 +97,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
HasNdtri = 1,
|
||||
#endif
|
||||
HasExp = 1,
|
||||
HasSqrt = EIGEN_FAST_MATH,
|
||||
|
@ -512,5 +512,60 @@ Packet pcos_float(const Packet& x)
|
||||
return psincos_float<false>(x);
|
||||
}
|
||||
|
||||
/* polevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate polynomial
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N+1];
|
||||
*
|
||||
* y = polevl<decltype(x), N>( x, coef);
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates polynomial of degree N:
|
||||
*
|
||||
* 2 N
|
||||
* y = C + C x + C x +...+ C x
|
||||
* 0 1 2 N
|
||||
*
|
||||
* Coefficients are stored in reverse order:
|
||||
*
|
||||
* coef[0] = C , ..., coef[N] = C .
|
||||
* N 0
|
||||
*
|
||||
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||
* omitted from the array. Its calling arguments are
|
||||
* otherwise the same as polevl().
|
||||
*
|
||||
*
|
||||
* The Eigen implementation is templatized. For best speed, store
|
||||
* coef as a const array (constexpr), e.g.
|
||||
*
|
||||
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||
*
|
||||
*/
|
||||
template <typename Packet, int N>
|
||||
struct ppolevl {
|
||||
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Packet>
|
||||
struct ppolevl<Packet, 0> {
|
||||
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
|
||||
EIGEN_UNUSED_VARIABLE(x);
|
||||
return pset1<Packet>(coeff[0]);
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
@ -44,6 +44,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasNdtri = 1,
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
@ -78,6 +79,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
HasPolygamma = 1,
|
||||
HasErf = 1,
|
||||
HasErfc = 1,
|
||||
HasNdtri = 1,
|
||||
HasI0e = 1,
|
||||
HasI1e = 1,
|
||||
HasIGamma = 1,
|
||||
|
@ -112,6 +112,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
HasNdtri = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
HasRsqrt = 1,
|
||||
|
@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits {
|
||||
HasPolygamma = 0,
|
||||
HasErf = 0,
|
||||
HasErfc = 0,
|
||||
HasNdtri = 0,
|
||||
HasIGamma = 0,
|
||||
HasIGammac = 0,
|
||||
HasBetaInc = 0,
|
||||
|
@ -214,6 +214,7 @@ template<typename Scalar> struct scalar_lgamma_op;
|
||||
template<typename Scalar> struct scalar_digamma_op;
|
||||
template<typename Scalar> struct scalar_erf_op;
|
||||
template<typename Scalar> struct scalar_erfc_op;
|
||||
template<typename Scalar> struct scalar_ndtri_op;
|
||||
template<typename Scalar> struct scalar_igamma_op;
|
||||
template<typename Scalar> struct scalar_igammac_op;
|
||||
template<typename Scalar> struct scalar_zeta_op;
|
||||
|
@ -536,14 +536,12 @@ typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaRe
|
||||
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
||||
typedef CwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived> NdtriReturnType;
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
|
||||
*
|
||||
* \specialfunctions_module
|
||||
*
|
||||
* Example: \include Cwise_lgamma.cpp
|
||||
* Output: \verbinclude Cwise_lgamma.out
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
|
||||
* type T to be supported.
|
||||
@ -579,9 +577,6 @@ digamma() const
|
||||
*
|
||||
* \specialfunctions_module
|
||||
*
|
||||
* Example: \include Cwise_erf.cpp
|
||||
* Output: \verbinclude Cwise_erf.out
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
|
||||
* type T to be supported.
|
||||
@ -600,9 +595,6 @@ erf() const
|
||||
*
|
||||
* \specialfunctions_module
|
||||
*
|
||||
* Example: \include Cwise_erfc.cpp
|
||||
* Output: \verbinclude Cwise_erfc.out
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
|
||||
* type T to be supported.
|
||||
@ -615,3 +607,21 @@ erfc() const
|
||||
{
|
||||
return ErfcReturnType(derived());
|
||||
}
|
||||
|
||||
/** \cpp11 \returns an expression of the coefficient-wise Complementary error
|
||||
* function of *this.
|
||||
*
|
||||
* \specialfunctions_module
|
||||
*
|
||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||
* or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar
|
||||
* type T to be supported.
|
||||
*
|
||||
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf()
|
||||
*/
|
||||
EIGEN_DEVICE_FUNC
|
||||
inline const NdtriReturnType
|
||||
ndtri() const
|
||||
{
|
||||
return NdtriReturnType(derived());
|
||||
}
|
||||
|
@ -590,6 +590,12 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
||||
h.store(data2, internal::perfc(h.load(data1)));
|
||||
VERIFY((numext::isnan)(data2[0]));
|
||||
}
|
||||
{
|
||||
for (int i=0; i<size; ++i) {
|
||||
data1[i] = internal::random<Scalar>(0,1);
|
||||
}
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasNdtri, numext::ndtri, internal::pndtri);
|
||||
}
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
for (int i=0; i<size; ++i)
|
||||
|
@ -201,6 +201,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
||||
return unaryExpr(internal::scalar_erfc_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived>
|
||||
ndtri() const {
|
||||
return unaryExpr(internal::scalar_ndtri_op<Scalar>());
|
||||
}
|
||||
|
||||
EIGEN_DEVICE_FUNC
|
||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
|
||||
sigmoid() const {
|
||||
|
@ -33,6 +33,7 @@ namespace Eigen {
|
||||
* - gamma_sample_der_alpha
|
||||
* - igammac
|
||||
* - digamma
|
||||
* - ndtri
|
||||
* - polygamma
|
||||
* - zeta
|
||||
* - betainc
|
||||
|
@ -283,6 +283,31 @@ struct functor_traits<scalar_erfc_op<Scalar> >
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the Inverse of the normal distribution
|
||||
* function of a scalar
|
||||
* \sa class CwiseUnaryOp, Cwise::ndtri()
|
||||
*/
|
||||
template<typename Scalar> struct scalar_ndtri_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
|
||||
using numext::ndtri; return ndtri(a);
|
||||
}
|
||||
typedef typename packet_traits<Scalar>::type Packet;
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); }
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_ndtri_op<Scalar> >
|
||||
{
|
||||
enum {
|
||||
// On average, We are evaluating rational functions with degree N=9 in the
|
||||
// numerator and denominator. This results in 2*N additions and 2*N
|
||||
// multiplications.
|
||||
Cost = 18 * NumTraits<Scalar>::MulCost + 18 * NumTraits<Scalar>::AddCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasNdtri
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
* \brief Template functor to compute the exponentially scaled modified Bessel
|
||||
* function of order zero
|
||||
|
@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
|
||||
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) {
|
||||
return Eigen::half(Eigen::numext::ndtri(static_cast<float>(a)));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
|
||||
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
|
||||
}
|
||||
|
@ -38,63 +38,6 @@ namespace internal {
|
||||
|
||||
namespace cephes {
|
||||
|
||||
/* polevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate polynomial
|
||||
*
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* int N;
|
||||
* Scalar x, y, coef[N+1];
|
||||
*
|
||||
* y = polevl<decltype(x), N>( x, coef);
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Evaluates polynomial of degree N:
|
||||
*
|
||||
* 2 N
|
||||
* y = C + C x + C x +...+ C x
|
||||
* 0 1 2 N
|
||||
*
|
||||
* Coefficients are stored in reverse order:
|
||||
*
|
||||
* coef[0] = C , ..., coef[N] = C .
|
||||
* N 0
|
||||
*
|
||||
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||
* omitted from the array. Its calling arguments are
|
||||
* otherwise the same as polevl().
|
||||
*
|
||||
*
|
||||
* The Eigen implementation is templatized. For best speed, store
|
||||
* coef as a const array (constexpr), e.g.
|
||||
*
|
||||
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||
*
|
||||
*/
|
||||
template <typename Scalar, int N>
|
||||
struct polevl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
|
||||
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
|
||||
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct polevl<Scalar, 0> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
|
||||
return coef[0];
|
||||
}
|
||||
};
|
||||
|
||||
/* chbevl (modified for Eigen)
|
||||
*
|
||||
* Evaluate Chebyshev series
|
||||
@ -190,7 +133,7 @@ template <>
|
||||
struct lgamma_impl<float> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE float run(float x) {
|
||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
|
||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
|
||||
int dummy;
|
||||
return ::lgammaf_r(x, &dummy);
|
||||
#elif defined(SYCL_DEVICE_ONLY)
|
||||
@ -205,7 +148,7 @@ template <>
|
||||
struct lgamma_impl<double> {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE double run(double x) {
|
||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
|
||||
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
|
||||
int dummy;
|
||||
return ::lgamma_r(x, &dummy);
|
||||
#elif defined(SYCL_DEVICE_ONLY)
|
||||
@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly<float> {
|
||||
float z;
|
||||
if (s < 1.0e8f) {
|
||||
z = 1.0f / (s * s);
|
||||
return z * cephes::polevl<float, 3>::run(z, A);
|
||||
return z * internal::ppolevl<float, 3>::run(z, A);
|
||||
} else return 0.0f;
|
||||
}
|
||||
};
|
||||
@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly<double> {
|
||||
double z;
|
||||
if (s < 1.0e17) {
|
||||
z = 1.0 / (s * s);
|
||||
return z * cephes::polevl<double, 6>::run(z, A);
|
||||
return z * internal::ppolevl<double, 6>::run(z, A);
|
||||
}
|
||||
else return 0.0;
|
||||
}
|
||||
@ -494,6 +437,246 @@ struct erfc_impl<double> {
|
||||
};
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
|
||||
/***************************************************************************
|
||||
* Implementation of ndtri. *
|
||||
****************************************************************************/
|
||||
|
||||
/* Inverse of Normal distribution function (modified for Eigen).
|
||||
*
|
||||
*
|
||||
* SYNOPSIS:
|
||||
*
|
||||
* double x, y, ndtri();
|
||||
*
|
||||
* x = ndtri( y );
|
||||
*
|
||||
*
|
||||
*
|
||||
* DESCRIPTION:
|
||||
*
|
||||
* Returns the argument, x, for which the area under the
|
||||
* Gaussian probability density function (integrated from
|
||||
* minus infinity to x) is equal to y.
|
||||
*
|
||||
*
|
||||
* For small arguments 0 < y < exp(-2), the program computes
|
||||
* z = sqrt( -2.0 * log(y) ); then the approximation is
|
||||
* x = z - log(z)/z - (1/z) P(1/z) / Q(1/z).
|
||||
* There are two rational functions P/Q, one for 0 < y < exp(-32)
|
||||
* and the other for y up to exp(-2). For larger arguments,
|
||||
* w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)).
|
||||
*
|
||||
*
|
||||
* ACCURACY:
|
||||
*
|
||||
* Relative error:
|
||||
* arithmetic domain # trials peak rms
|
||||
* DEC 0.125, 1 5500 9.5e-17 2.1e-17
|
||||
* DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17
|
||||
* IEEE 0.125, 1 20000 7.2e-16 1.3e-16
|
||||
* IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17
|
||||
*
|
||||
*
|
||||
* ERROR MESSAGES:
|
||||
*
|
||||
* message condition value returned
|
||||
* ndtri domain x <= 0 -MAXNUM
|
||||
* ndtri domain x >= 1 MAXNUM
|
||||
*
|
||||
*/
|
||||
/*
|
||||
Cephes Math Library Release 2.2: June, 1992
|
||||
Copyright 1985, 1987, 1992 by Stephen L. Moshier
|
||||
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
|
||||
*/
|
||||
|
||||
|
||||
// TODO: Add a cheaper approximation for float.
|
||||
|
||||
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign(
|
||||
const T& should_flipsign, const T& x) {
|
||||
const T sign_mask = pset1<T>(-0.0);
|
||||
T sign_bit = pand<T>(should_flipsign, sign_mask);
|
||||
return pxor<T>(sign_bit, x);
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign<double>(
|
||||
const double& should_flipsign, const double& x) {
|
||||
return should_flipsign == 0 ? x : -x;
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign<float>(
|
||||
const float& should_flipsign, const float& x) {
|
||||
return should_flipsign == 0 ? x : -x;
|
||||
}
|
||||
|
||||
// We split this computation in to two so that in the scalar path
|
||||
// only one branch is evaluated (due to our template specialization of pselect
|
||||
// being an if statement.)
|
||||
|
||||
template <typename T, typename ScalarType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) {
|
||||
const ScalarType p0[] = {
|
||||
ScalarType(-5.99633501014107895267e1),
|
||||
ScalarType(9.80010754185999661536e1),
|
||||
ScalarType(-5.66762857469070293439e1),
|
||||
ScalarType(1.39312609387279679503e1),
|
||||
ScalarType(-1.23916583867381258016e0)
|
||||
};
|
||||
const ScalarType q0[] = {
|
||||
ScalarType(1.0),
|
||||
ScalarType(1.95448858338141759834e0),
|
||||
ScalarType(4.67627912898881538453e0),
|
||||
ScalarType(8.63602421390890590575e1),
|
||||
ScalarType(-2.25462687854119370527e2),
|
||||
ScalarType(2.00260212380060660359e2),
|
||||
ScalarType(-8.20372256168333339912e1),
|
||||
ScalarType(1.59056225126211695515e1),
|
||||
ScalarType(-1.18331621121330003142e0)
|
||||
};
|
||||
const T sqrt2pi = pset1<T>(ScalarType(2.50662827463100050242e0));
|
||||
const T half = pset1<T>(ScalarType(0.5));
|
||||
T c, c2, ndtri_gt_exp_neg_two;
|
||||
|
||||
c = psub(b, half);
|
||||
c2 = pmul(c, c);
|
||||
ndtri_gt_exp_neg_two = pmadd(c, pmul(
|
||||
c2, pdiv(
|
||||
internal::ppolevl<T, 4>::run(c2, p0),
|
||||
internal::ppolevl<T, 8>::run(c2, q0))), c);
|
||||
return pmul(ndtri_gt_exp_neg_two, sqrt2pi);
|
||||
}
|
||||
|
||||
template <typename T, typename ScalarType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
|
||||
const T& b, const T& should_flipsign) {
|
||||
/* Approximation for interval z = sqrt(-2 log a ) between 2 and 8
|
||||
* i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14.
|
||||
*/
|
||||
const ScalarType p1[] = {
|
||||
ScalarType(4.05544892305962419923e0),
|
||||
ScalarType(3.15251094599893866154e1),
|
||||
ScalarType(5.71628192246421288162e1),
|
||||
ScalarType(4.40805073893200834700e1),
|
||||
ScalarType(1.46849561928858024014e1),
|
||||
ScalarType(2.18663306850790267539e0),
|
||||
ScalarType(-1.40256079171354495875e-1),
|
||||
ScalarType(-3.50424626827848203418e-2),
|
||||
ScalarType(-8.57456785154685413611e-4)
|
||||
};
|
||||
const ScalarType q1[] = {
|
||||
ScalarType(1.0),
|
||||
ScalarType(1.57799883256466749731e1),
|
||||
ScalarType(4.53907635128879210584e1),
|
||||
ScalarType(4.13172038254672030440e1),
|
||||
ScalarType(1.50425385692907503408e1),
|
||||
ScalarType(2.50464946208309415979e0),
|
||||
ScalarType(-1.42182922854787788574e-1),
|
||||
ScalarType(-3.80806407691578277194e-2),
|
||||
ScalarType(-9.33259480895457427372e-4)
|
||||
};
|
||||
/* Approximation for interval z = sqrt(-2 log a ) between 8 and 64
|
||||
* i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
|
||||
*/
|
||||
const ScalarType p2[] = {
|
||||
ScalarType(3.23774891776946035970e0),
|
||||
ScalarType(6.91522889068984211695e0),
|
||||
ScalarType(3.93881025292474443415e0),
|
||||
ScalarType(1.33303460815807542389e0),
|
||||
ScalarType(2.01485389549179081538e-1),
|
||||
ScalarType(1.23716634817820021358e-2),
|
||||
ScalarType(3.01581553508235416007e-4),
|
||||
ScalarType(2.65806974686737550832e-6),
|
||||
ScalarType(6.23974539184983293730e-9)
|
||||
};
|
||||
const ScalarType q2[] = {
|
||||
ScalarType(1.0),
|
||||
ScalarType(6.02427039364742014255e0),
|
||||
ScalarType(3.67983563856160859403e0),
|
||||
ScalarType(1.37702099489081330271e0),
|
||||
ScalarType(2.16236993594496635890e-1),
|
||||
ScalarType(1.34204006088543189037e-2),
|
||||
ScalarType(3.28014464682127739104e-4),
|
||||
ScalarType(2.89247864745380683936e-6),
|
||||
ScalarType(6.79019408009981274425e-9)
|
||||
};
|
||||
const T eight = pset1<T>(ScalarType(8.0));
|
||||
const T one = pset1<T>(ScalarType(1));
|
||||
const T neg_two = pset1<T>(ScalarType(-2));
|
||||
T x, x0, x1, z;
|
||||
|
||||
x = psqrt(pmul(neg_two, plog(b)));
|
||||
x0 = psub(x, pdiv(plog(x), x));
|
||||
z = one / x;
|
||||
x1 = pmul(
|
||||
z, pselect(
|
||||
pcmp_lt(x, eight),
|
||||
pdiv(internal::ppolevl<T, 8>::run(z, p1),
|
||||
internal::ppolevl<T, 8>::run(z, q1)),
|
||||
pdiv(internal::ppolevl<T, 8>::run(z, p2),
|
||||
internal::ppolevl<T, 8>::run(z, q2))));
|
||||
return flipsign(should_flipsign, psub(x0, x1));
|
||||
}
|
||||
|
||||
template <typename T, typename ScalarType>
|
||||
T generic_ndtri(const T& a) {
|
||||
const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
|
||||
const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
|
||||
|
||||
const T zero = pset1<T>(ScalarType(0));
|
||||
const T one = pset1<T>(ScalarType(1));
|
||||
// exp(-2)
|
||||
const T exp_neg_two = pset1<T>(ScalarType(0.13533528323661269189));
|
||||
T b, ndtri, should_flipsign;
|
||||
|
||||
should_flipsign = pcmp_le(a, psub(one, exp_neg_two));
|
||||
b = pselect(should_flipsign, a, psub(one, a));
|
||||
|
||||
ndtri = pselect(
|
||||
pcmp_lt(exp_neg_two, b),
|
||||
generic_ndtri_gt_exp_neg_two<T, ScalarType>(b),
|
||||
generic_ndtri_lt_exp_neg_two<T, ScalarType>(b, should_flipsign));
|
||||
|
||||
return pselect(
|
||||
pcmp_le(a, zero), neg_maxnum,
|
||||
pselect(pcmp_le(one, a), maxnum, ndtri));
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
struct ndtri_retval {
|
||||
typedef Scalar type;
|
||||
};
|
||||
|
||||
#if !EIGEN_HAS_C99_MATH
|
||||
|
||||
template <typename Scalar>
|
||||
struct ndtri_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||
return Scalar(0);
|
||||
}
|
||||
};
|
||||
|
||||
# else
|
||||
|
||||
template <typename Scalar>
|
||||
struct ndtri_impl {
|
||||
EIGEN_DEVICE_FUNC
|
||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
|
||||
return generic_ndtri<Scalar, Scalar>(x);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
|
||||
/**************************************************************************************************************
|
||||
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
|
||||
**************************************************************************************************************/
|
||||
@ -2120,6 +2303,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
|
||||
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar)
|
||||
ndtri(const Scalar& x) {
|
||||
return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
|
||||
igamma(const Scalar& a, const Scalar& x) {
|
||||
|
@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||
|
||||
/** \internal \returns the ndtri(\a a) (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pndtri(const Packet& a) {
|
||||
typedef typename unpacket_traits<Packet>::type ScalarType;
|
||||
using internal::generic_ndtri; return generic_ndtri<Packet, ScalarType>(a);
|
||||
}
|
||||
|
||||
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
|
||||
|
@ -101,6 +101,19 @@ double2 perfc<double2>(const double2& a)
|
||||
return make_double2(erfc(a.x), erfc(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pndtri<float4>(const float4& a)
|
||||
{
|
||||
using numext::ndtri;
|
||||
return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
double2 pndtri<double2>(const double2& a)
|
||||
{
|
||||
using numext::ndtri;
|
||||
return make_double2(ndtri(a.x), ndtri(a.y));
|
||||
}
|
||||
|
||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||
float4 pigamma<float4>(const float4& a, const float4& x)
|
||||
|
@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev)
|
||||
gpuFree(d_out);
|
||||
}
|
||||
#endif
|
||||
template <typename Scalar>
|
||||
void test_gpu_ndtri()
|
||||
{
|
||||
Tensor<Scalar, 1> in_x(8);
|
||||
Tensor<Scalar, 1> out(8);
|
||||
Tensor<Scalar, 1> expected_out(8);
|
||||
out.setZero();
|
||||
|
||||
in_x(0) = Scalar(1);
|
||||
in_x(1) = Scalar(0.);
|
||||
in_x(2) = Scalar(0.5);
|
||||
in_x(3) = Scalar(0.2);
|
||||
in_x(4) = Scalar(0.8);
|
||||
in_x(5) = Scalar(0.9);
|
||||
in_x(6) = Scalar(0.1);
|
||||
in_x(7) = Scalar(0.99);
|
||||
in_x(8) = Scalar(0.01);
|
||||
|
||||
expected_out(0) = std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(1) = -std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(2) = Scalar(0.0);
|
||||
expected_out(3) = Scalar(-0.8416212335729142);
|
||||
expected_out(4) = Scalar(0.8416212335729142);j
|
||||
expected_out(5) = Scalar(1.2815515655446004);
|
||||
expected_out(6) = Scalar(-1.2815515655446004);
|
||||
expected_out(7) = Scalar(2.3263478740408408);
|
||||
expected_out(8) = Scalar(-2.3263478740408408);
|
||||
|
||||
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||
|
||||
Scalar* d_in_x;
|
||||
Scalar* d_out;
|
||||
gpuMalloc((void**)(&d_in_x), bytes);
|
||||
gpuMalloc((void**)(&d_out), bytes);
|
||||
|
||||
gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice);
|
||||
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
|
||||
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
|
||||
|
||||
gpu_out.device(gpu_device) = gpu_in_x.ndtri();
|
||||
|
||||
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||
VERIFY((std::isnan)(out(3)));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
if (i != 3) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
}
|
||||
|
||||
gpuFree(d_in_x);
|
||||
gpuFree(d_out);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void test_gpu_betainc()
|
||||
@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
|
||||
|
||||
#if !defined(EIGEN_USE_HIP)
|
||||
// disable these tests on HIP for now.
|
||||
|
||||
CALL_SUBTEST_5(test_gpu_ndtri<float>());
|
||||
CALL_SUBTEST_5(test_gpu_ndtri<double>());
|
||||
|
||||
CALL_SUBTEST_5(test_gpu_digamma<float>());
|
||||
CALL_SUBTEST_5(test_gpu_digamma<double>());
|
||||
|
||||
|
@ -133,6 +133,26 @@ template<typename ArrayType> void array_special_functions()
|
||||
}
|
||||
#endif // EIGEN_HAS_C99_MATH
|
||||
|
||||
// Check the ndtri function against scipy.special.ndtri
|
||||
{
|
||||
ArrayType x(7), res(7), ref(7);
|
||||
x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01;
|
||||
ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408;
|
||||
CALL_SUBTEST( verify_component_wise(ref, ref); );
|
||||
CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); );
|
||||
CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); );
|
||||
|
||||
// ndtri(normal_cdf(x)) ~= x
|
||||
CALL_SUBTEST(
|
||||
ArrayType m1 = ArrayType::Random(32);
|
||||
using std::sqrt;
|
||||
|
||||
ArrayType cdf_val = (m1 / sqrt(2.)).erf();
|
||||
cdf_val = (cdf_val + 1.) / 2.;
|
||||
verify_component_wise(cdf_val.ndtri(), m1););
|
||||
|
||||
}
|
||||
|
||||
// Check the zeta function against scipy.special.zeta
|
||||
{
|
||||
ArrayType x(7), q(7), res(7), ref(7);
|
||||
|
Loading…
x
Reference in New Issue
Block a user