PR 681: Add ndtri function, the inverse of the normal distribution function.

This commit is contained in:
Srinivas Vasudevan 2019-08-12 19:26:29 -04:00
parent f59bed7a13
commit e38dd48a27
20 changed files with 498 additions and 70 deletions

View File

@ -83,6 +83,7 @@ struct default_packet_traits
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
HasNdtri = 0,
HasI0e = 0,
HasI1e = 0,
HasIGamma = 0,
@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent);
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pzero(const Packet& a) { return pxor(a,a); }
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
EIGEN_UNUSED_VARIABLE(a);
return 0.;
}
template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
EIGEN_UNUSED_VARIABLE(a);
return 0.;
}
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pselect(const Packet& mask, const Packet& a, const Packet& b) {
return por(pand(a,mask),pandnot(b,mask));
}
template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
const float& mask, const float& a, const float&b) {
return mask == 0 ? b : a;
}
template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
const double& mask, const double& a, const double& b) {
return mask == 0 ? b : a;
}
/** \internal \returns a <= b as a bit mask */
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }

View File

@ -76,6 +76,7 @@ namespace Eigen
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)

View File

@ -72,6 +72,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog1p = 1,
HasExpm1 = 1,
HasExp = 1,
HasNdtri = 1,
HasSqrt = 1,
HasRsqrt = 1,
HasTanh = EIGEN_FAST_MATH,

View File

@ -97,6 +97,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,
HasNdtri = 1,
#endif
HasExp = 1,
HasSqrt = EIGEN_FAST_MATH,

View File

@ -512,5 +512,60 @@ Packet pcos_float(const Packet& x)
return psincos_float<false>(x);
}
/* polevl (modified for Eigen)
*
* Evaluate polynomial
*
*
*
* SYNOPSIS:
*
* int N;
* Scalar x, y, coef[N+1];
*
* y = polevl<decltype(x), N>( x, coef);
*
*
*
* DESCRIPTION:
*
* Evaluates polynomial of degree N:
*
* 2 N
* y = C + C x + C x +...+ C x
* 0 1 2 N
*
* Coefficients are stored in reverse order:
*
* coef[0] = C , ..., coef[N] = C .
* N 0
*
* The function p1evl() assumes that coef[N] = 1.0 and is
* omitted from the array. Its calling arguments are
* otherwise the same as polevl().
*
*
* The Eigen implementation is templatized. For best speed, store
* coef as a const array (constexpr), e.g.
*
* const double coef[] = {1.0, 2.0, 3.0, ...};
*
*/
template <typename Packet, int N>
struct ppolevl {
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
}
};
template <typename Packet>
struct ppolevl<Packet, 0> {
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
EIGEN_UNUSED_VARIABLE(x);
return pset1<Packet>(coeff[0]);
}
};
} // end namespace internal
} // end namespace Eigen

View File

@ -44,6 +44,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
HasNdtri = 1,
HasI0e = 1,
HasI1e = 1,
HasIGamma = 1,
@ -78,6 +79,7 @@ template<> struct packet_traits<double> : default_packet_traits
HasPolygamma = 1,
HasErf = 1,
HasErfc = 1,
HasNdtri = 1,
HasI0e = 1,
HasI1e = 1,
HasIGamma = 1,

View File

@ -112,6 +112,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,
HasNdtri = 1,
HasExp = 1,
HasSqrt = 1,
HasRsqrt = 1,

View File

@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits {
HasPolygamma = 0,
HasErf = 0,
HasErfc = 0,
HasNdtri = 0,
HasIGamma = 0,
HasIGammac = 0,
HasBetaInc = 0,

View File

@ -214,6 +214,7 @@ template<typename Scalar> struct scalar_lgamma_op;
template<typename Scalar> struct scalar_digamma_op;
template<typename Scalar> struct scalar_erf_op;
template<typename Scalar> struct scalar_erfc_op;
template<typename Scalar> struct scalar_ndtri_op;
template<typename Scalar> struct scalar_igamma_op;
template<typename Scalar> struct scalar_igammac_op;
template<typename Scalar> struct scalar_zeta_op;

View File

@ -536,14 +536,12 @@ typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaRe
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
typedef CwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived> NdtriReturnType;
/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
*
* \specialfunctions_module
*
* Example: \include Cwise_lgamma.cpp
* Output: \verbinclude Cwise_lgamma.out
*
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
* type T to be supported.
@ -579,9 +577,6 @@ digamma() const
*
* \specialfunctions_module
*
* Example: \include Cwise_erf.cpp
* Output: \verbinclude Cwise_erf.out
*
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
* type T to be supported.
@ -600,9 +595,6 @@ erf() const
*
* \specialfunctions_module
*
* Example: \include Cwise_erfc.cpp
* Output: \verbinclude Cwise_erfc.out
*
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
* type T to be supported.
@ -615,3 +607,21 @@ erfc() const
{
return ErfcReturnType(derived());
}
/** \cpp11 \returns an expression of the coefficient-wise Complementary error
* function of *this.
*
* \specialfunctions_module
*
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
* or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar
* type T to be supported.
*
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf()
*/
EIGEN_DEVICE_FUNC
inline const NdtriReturnType
ndtri() const
{
return NdtriReturnType(derived());
}

View File

@ -590,6 +590,12 @@ template<typename Scalar,typename Packet> void packetmath_real()
h.store(data2, internal::perfc(h.load(data1)));
VERIFY((numext::isnan)(data2[0]));
}
{
for (int i=0; i<size; ++i) {
data1[i] = internal::random<Scalar>(0,1);
}
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasNdtri, numext::ndtri, internal::pndtri);
}
#endif // EIGEN_HAS_C99_MATH
for (int i=0; i<size; ++i)

View File

@ -201,6 +201,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_erfc_op<Scalar>());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived>
ndtri() const {
return unaryExpr(internal::scalar_ndtri_op<Scalar>());
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
sigmoid() const {

View File

@ -33,6 +33,7 @@ namespace Eigen {
* - gamma_sample_der_alpha
* - igammac
* - digamma
* - ndtri
* - polygamma
* - zeta
* - betainc

View File

@ -283,6 +283,31 @@ struct functor_traits<scalar_erfc_op<Scalar> >
};
};
/** \internal
* \brief Template functor to compute the Inverse of the normal distribution
* function of a scalar
* \sa class CwiseUnaryOp, Cwise::ndtri()
*/
template<typename Scalar> struct scalar_ndtri_op {
EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
using numext::ndtri; return ndtri(a);
}
typedef typename packet_traits<Scalar>::type Packet;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); }
};
template<typename Scalar>
struct functor_traits<scalar_ndtri_op<Scalar> >
{
enum {
// On average, We are evaluating rational functions with degree N=9 in the
// numerator and denominator. This results in 2*N additions and 2*N
// multiplications.
Cost = 18 * NumTraits<Scalar>::MulCost + 18 * NumTraits<Scalar>::AddCost,
PacketAccess = packet_traits<Scalar>::HasNdtri
};
};
/** \internal
* \brief Template functor to compute the exponentially scaled modified Bessel
* function of order zero

View File

@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) {
return Eigen::half(Eigen::numext::ndtri(static_cast<float>(a)));
}
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
}

View File

@ -38,63 +38,6 @@ namespace internal {
namespace cephes {
/* polevl (modified for Eigen)
*
* Evaluate polynomial
*
*
*
* SYNOPSIS:
*
* int N;
* Scalar x, y, coef[N+1];
*
* y = polevl<decltype(x), N>( x, coef);
*
*
*
* DESCRIPTION:
*
* Evaluates polynomial of degree N:
*
* 2 N
* y = C + C x + C x +...+ C x
* 0 1 2 N
*
* Coefficients are stored in reverse order:
*
* coef[0] = C , ..., coef[N] = C .
* N 0
*
* The function p1evl() assumes that coef[N] = 1.0 and is
* omitted from the array. Its calling arguments are
* otherwise the same as polevl().
*
*
* The Eigen implementation is templatized. For best speed, store
* coef as a const array (constexpr), e.g.
*
* const double coef[] = {1.0, 2.0, 3.0, ...};
*
*/
template <typename Scalar, int N>
struct polevl {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
}
};
template <typename Scalar>
struct polevl<Scalar, 0> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
return coef[0];
}
};
/* chbevl (modified for Eigen)
*
* Evaluate Chebyshev series
@ -190,7 +133,7 @@ template <>
struct lgamma_impl<float> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE float run(float x) {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgammaf_r(x, &dummy);
#elif defined(SYCL_DEVICE_ONLY)
@ -205,7 +148,7 @@ template <>
struct lgamma_impl<double> {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE double run(double x) {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgamma_r(x, &dummy);
#elif defined(SYCL_DEVICE_ONLY)
@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly<float> {
float z;
if (s < 1.0e8f) {
z = 1.0f / (s * s);
return z * cephes::polevl<float, 3>::run(z, A);
return z * internal::ppolevl<float, 3>::run(z, A);
} else return 0.0f;
}
};
@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly<double> {
double z;
if (s < 1.0e17) {
z = 1.0 / (s * s);
return z * cephes::polevl<double, 6>::run(z, A);
return z * internal::ppolevl<double, 6>::run(z, A);
}
else return 0.0;
}
@ -494,6 +437,246 @@ struct erfc_impl<double> {
};
#endif // EIGEN_HAS_C99_MATH
/***************************************************************************
* Implementation of ndtri. *
****************************************************************************/
/* Inverse of Normal distribution function (modified for Eigen).
*
*
* SYNOPSIS:
*
* double x, y, ndtri();
*
* x = ndtri( y );
*
*
*
* DESCRIPTION:
*
* Returns the argument, x, for which the area under the
* Gaussian probability density function (integrated from
* minus infinity to x) is equal to y.
*
*
* For small arguments 0 < y < exp(-2), the program computes
* z = sqrt( -2.0 * log(y) ); then the approximation is
* x = z - log(z)/z - (1/z) P(1/z) / Q(1/z).
* There are two rational functions P/Q, one for 0 < y < exp(-32)
* and the other for y up to exp(-2). For larger arguments,
* w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)).
*
*
* ACCURACY:
*
* Relative error:
* arithmetic domain # trials peak rms
* DEC 0.125, 1 5500 9.5e-17 2.1e-17
* DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17
* IEEE 0.125, 1 20000 7.2e-16 1.3e-16
* IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17
*
*
* ERROR MESSAGES:
*
* message condition value returned
* ndtri domain x <= 0 -MAXNUM
* ndtri domain x >= 1 MAXNUM
*
*/
/*
Cephes Math Library Release 2.2: June, 1992
Copyright 1985, 1987, 1992 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/
// TODO: Add a cheaper approximation for float.
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign(
const T& should_flipsign, const T& x) {
const T sign_mask = pset1<T>(-0.0);
T sign_bit = pand<T>(should_flipsign, sign_mask);
return pxor<T>(sign_bit, x);
}
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign<double>(
const double& should_flipsign, const double& x) {
return should_flipsign == 0 ? x : -x;
}
template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign<float>(
const float& should_flipsign, const float& x) {
return should_flipsign == 0 ? x : -x;
}
// We split this computation in to two so that in the scalar path
// only one branch is evaluated (due to our template specialization of pselect
// being an if statement.)
template <typename T, typename ScalarType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) {
const ScalarType p0[] = {
ScalarType(-5.99633501014107895267e1),
ScalarType(9.80010754185999661536e1),
ScalarType(-5.66762857469070293439e1),
ScalarType(1.39312609387279679503e1),
ScalarType(-1.23916583867381258016e0)
};
const ScalarType q0[] = {
ScalarType(1.0),
ScalarType(1.95448858338141759834e0),
ScalarType(4.67627912898881538453e0),
ScalarType(8.63602421390890590575e1),
ScalarType(-2.25462687854119370527e2),
ScalarType(2.00260212380060660359e2),
ScalarType(-8.20372256168333339912e1),
ScalarType(1.59056225126211695515e1),
ScalarType(-1.18331621121330003142e0)
};
const T sqrt2pi = pset1<T>(ScalarType(2.50662827463100050242e0));
const T half = pset1<T>(ScalarType(0.5));
T c, c2, ndtri_gt_exp_neg_two;
c = psub(b, half);
c2 = pmul(c, c);
ndtri_gt_exp_neg_two = pmadd(c, pmul(
c2, pdiv(
internal::ppolevl<T, 4>::run(c2, p0),
internal::ppolevl<T, 8>::run(c2, q0))), c);
return pmul(ndtri_gt_exp_neg_two, sqrt2pi);
}
template <typename T, typename ScalarType>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
const T& b, const T& should_flipsign) {
/* Approximation for interval z = sqrt(-2 log a ) between 2 and 8
* i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14.
*/
const ScalarType p1[] = {
ScalarType(4.05544892305962419923e0),
ScalarType(3.15251094599893866154e1),
ScalarType(5.71628192246421288162e1),
ScalarType(4.40805073893200834700e1),
ScalarType(1.46849561928858024014e1),
ScalarType(2.18663306850790267539e0),
ScalarType(-1.40256079171354495875e-1),
ScalarType(-3.50424626827848203418e-2),
ScalarType(-8.57456785154685413611e-4)
};
const ScalarType q1[] = {
ScalarType(1.0),
ScalarType(1.57799883256466749731e1),
ScalarType(4.53907635128879210584e1),
ScalarType(4.13172038254672030440e1),
ScalarType(1.50425385692907503408e1),
ScalarType(2.50464946208309415979e0),
ScalarType(-1.42182922854787788574e-1),
ScalarType(-3.80806407691578277194e-2),
ScalarType(-9.33259480895457427372e-4)
};
/* Approximation for interval z = sqrt(-2 log a ) between 8 and 64
* i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
*/
const ScalarType p2[] = {
ScalarType(3.23774891776946035970e0),
ScalarType(6.91522889068984211695e0),
ScalarType(3.93881025292474443415e0),
ScalarType(1.33303460815807542389e0),
ScalarType(2.01485389549179081538e-1),
ScalarType(1.23716634817820021358e-2),
ScalarType(3.01581553508235416007e-4),
ScalarType(2.65806974686737550832e-6),
ScalarType(6.23974539184983293730e-9)
};
const ScalarType q2[] = {
ScalarType(1.0),
ScalarType(6.02427039364742014255e0),
ScalarType(3.67983563856160859403e0),
ScalarType(1.37702099489081330271e0),
ScalarType(2.16236993594496635890e-1),
ScalarType(1.34204006088543189037e-2),
ScalarType(3.28014464682127739104e-4),
ScalarType(2.89247864745380683936e-6),
ScalarType(6.79019408009981274425e-9)
};
const T eight = pset1<T>(ScalarType(8.0));
const T one = pset1<T>(ScalarType(1));
const T neg_two = pset1<T>(ScalarType(-2));
T x, x0, x1, z;
x = psqrt(pmul(neg_two, plog(b)));
x0 = psub(x, pdiv(plog(x), x));
z = one / x;
x1 = pmul(
z, pselect(
pcmp_lt(x, eight),
pdiv(internal::ppolevl<T, 8>::run(z, p1),
internal::ppolevl<T, 8>::run(z, q1)),
pdiv(internal::ppolevl<T, 8>::run(z, p2),
internal::ppolevl<T, 8>::run(z, q2))));
return flipsign(should_flipsign, psub(x0, x1));
}
template <typename T, typename ScalarType>
T generic_ndtri(const T& a) {
const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
const T zero = pset1<T>(ScalarType(0));
const T one = pset1<T>(ScalarType(1));
// exp(-2)
const T exp_neg_two = pset1<T>(ScalarType(0.13533528323661269189));
T b, ndtri, should_flipsign;
should_flipsign = pcmp_le(a, psub(one, exp_neg_two));
b = pselect(should_flipsign, a, psub(one, a));
ndtri = pselect(
pcmp_lt(exp_neg_two, b),
generic_ndtri_gt_exp_neg_two<T, ScalarType>(b),
generic_ndtri_lt_exp_neg_two<T, ScalarType>(b, should_flipsign));
return pselect(
pcmp_le(a, zero), neg_maxnum,
pselect(pcmp_le(one, a), maxnum, ndtri));
}
template <typename Scalar>
struct ndtri_retval {
typedef Scalar type;
};
#if !EIGEN_HAS_C99_MATH
template <typename Scalar>
struct ndtri_impl {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
THIS_TYPE_IS_NOT_SUPPORTED);
return Scalar(0);
}
};
# else
template <typename Scalar>
struct ndtri_impl {
EIGEN_DEVICE_FUNC
static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
return generic_ndtri<Scalar, Scalar>(x);
}
};
#endif // EIGEN_HAS_C99_MATH
/**************************************************************************************************************
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
**************************************************************************************************************/
@ -2120,6 +2303,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
}
template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar)
ndtri(const Scalar& x) {
return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x);
}
template <typename Scalar>
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
igamma(const Scalar& a, const Scalar& x) {

View File

@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
/** \internal \returns the ndtri(\a a) (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pndtri(const Packet& a) {
typedef typename unpacket_traits<Packet>::type ScalarType;
using internal::generic_ndtri; return generic_ndtri<Packet, ScalarType>(a);
}
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }

View File

@ -101,6 +101,19 @@ double2 perfc<double2>(const double2& a)
return make_double2(erfc(a.x), erfc(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pndtri<float4>(const float4& a)
{
using numext::ndtri;
return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
double2 pndtri<double2>(const double2& a)
{
using numext::ndtri;
return make_double2(ndtri(a.x), ndtri(a.y));
}
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
float4 pigamma<float4>(const float4& a, const float4& x)

View File

@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev)
gpuFree(d_out);
}
#endif
template <typename Scalar>
void test_gpu_ndtri()
{
Tensor<Scalar, 1> in_x(8);
Tensor<Scalar, 1> out(8);
Tensor<Scalar, 1> expected_out(8);
out.setZero();
in_x(0) = Scalar(1);
in_x(1) = Scalar(0.);
in_x(2) = Scalar(0.5);
in_x(3) = Scalar(0.2);
in_x(4) = Scalar(0.8);
in_x(5) = Scalar(0.9);
in_x(6) = Scalar(0.1);
in_x(7) = Scalar(0.99);
in_x(8) = Scalar(0.01);
expected_out(0) = std::numeric_limits<Scalar>::infinity();
expected_out(1) = -std::numeric_limits<Scalar>::infinity();
expected_out(2) = Scalar(0.0);
expected_out(3) = Scalar(-0.8416212335729142);
expected_out(4) = Scalar(0.8416212335729142);j
expected_out(5) = Scalar(1.2815515655446004);
expected_out(6) = Scalar(-1.2815515655446004);
expected_out(7) = Scalar(2.3263478740408408);
expected_out(8) = Scalar(-2.3263478740408408);
std::size_t bytes = in_x.size() * sizeof(Scalar);
Scalar* d_in_x;
Scalar* d_out;
gpuMalloc((void**)(&d_in_x), bytes);
gpuMalloc((void**)(&d_out), bytes);
gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
gpu_out.device(gpu_device) = gpu_in_x.ndtri();
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
}
gpuFree(d_in_x);
gpuFree(d_out);
}
template <typename Scalar>
void test_gpu_betainc()
@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
#if !defined(EIGEN_USE_HIP)
// disable these tests on HIP for now.
CALL_SUBTEST_5(test_gpu_ndtri<float>());
CALL_SUBTEST_5(test_gpu_ndtri<double>());
CALL_SUBTEST_5(test_gpu_digamma<float>());
CALL_SUBTEST_5(test_gpu_digamma<double>());

View File

@ -133,6 +133,26 @@ template<typename ArrayType> void array_special_functions()
}
#endif // EIGEN_HAS_C99_MATH
// Check the ndtri function against scipy.special.ndtri
{
ArrayType x(7), res(7), ref(7);
x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01;
ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408;
CALL_SUBTEST( verify_component_wise(ref, ref); );
CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); );
CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); );
// ndtri(normal_cdf(x)) ~= x
CALL_SUBTEST(
ArrayType m1 = ArrayType::Random(32);
using std::sqrt;
ArrayType cdf_val = (m1 / sqrt(2.)).erf();
cdf_val = (cdf_val + 1.) / 2.;
verify_component_wise(cdf_val.ndtri(), m1););
}
// Check the zeta function against scipy.special.zeta
{
ArrayType x(7), q(7), res(7), ref(7);