mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00
PR 681: Add ndtri function, the inverse of the normal distribution function.
This commit is contained in:
parent
f59bed7a13
commit
e38dd48a27
@ -83,6 +83,7 @@ struct default_packet_traits
|
|||||||
HasPolygamma = 0,
|
HasPolygamma = 0,
|
||||||
HasErf = 0,
|
HasErf = 0,
|
||||||
HasErfc = 0,
|
HasErfc = 0,
|
||||||
|
HasNdtri = 0,
|
||||||
HasI0e = 0,
|
HasI0e = 0,
|
||||||
HasI1e = 0,
|
HasI1e = 0,
|
||||||
HasIGamma = 0,
|
HasIGamma = 0,
|
||||||
@ -257,12 +258,32 @@ pldexp(const Packet &a, const Packet &exponent) { return std::ldexp(a,exponent);
|
|||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pzero(const Packet& a) { return pxor(a,a); }
|
pzero(const Packet& a) { return pxor(a,a); }
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline float pzero<float>(const float& a) {
|
||||||
|
EIGEN_UNUSED_VARIABLE(a);
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline double pzero<double>(const double& a) {
|
||||||
|
EIGEN_UNUSED_VARIABLE(a);
|
||||||
|
return 0.;
|
||||||
|
}
|
||||||
|
|
||||||
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
|
/** \internal \returns bits of \a or \b according to the input bit mask \a mask */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
pselect(const Packet& mask, const Packet& a, const Packet& b) {
|
||||||
return por(pand(a,mask),pandnot(b,mask));
|
return por(pand(a,mask),pandnot(b,mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline float pselect<float>(
|
||||||
|
const float& mask, const float& a, const float&b) {
|
||||||
|
return mask == 0 ? b : a;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC inline double pselect<double>(
|
||||||
|
const double& mask, const double& a, const double& b) {
|
||||||
|
return mask == 0 ? b : a;
|
||||||
|
}
|
||||||
|
|
||||||
/** \internal \returns a <= b as a bit mask */
|
/** \internal \returns a <= b as a bit mask */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
|
pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
|
||||||
|
@ -76,6 +76,7 @@ namespace Eigen
|
|||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma,scalar_digamma_op,derivative of lgamma,\sa ArrayBase::digamma)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf,scalar_erf_op,error function,\sa ArrayBase::erf)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc,scalar_erfc_op,complement error function,\sa ArrayBase::erfc)
|
||||||
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri,scalar_ndtri_op,inverse normal distribution function,\sa ArrayBase::ndtri)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp,scalar_exp_op,exponential,\sa ArrayBase::exp)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1,scalar_expm1_op,exponential of a value minus 1,\sa ArrayBase::expm1)
|
||||||
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log,scalar_log_op,natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log)
|
||||||
|
@ -72,6 +72,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasLog1p = 1,
|
HasLog1p = 1,
|
||||||
HasExpm1 = 1,
|
HasExpm1 = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
|
HasNdtri = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
@ -97,6 +97,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasLog1p = 1,
|
HasLog1p = 1,
|
||||||
HasExpm1 = 1,
|
HasExpm1 = 1,
|
||||||
|
HasNdtri = 1,
|
||||||
#endif
|
#endif
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
HasSqrt = EIGEN_FAST_MATH,
|
||||||
|
@ -512,5 +512,60 @@ Packet pcos_float(const Packet& x)
|
|||||||
return psincos_float<false>(x);
|
return psincos_float<false>(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* polevl (modified for Eigen)
|
||||||
|
*
|
||||||
|
* Evaluate polynomial
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* SYNOPSIS:
|
||||||
|
*
|
||||||
|
* int N;
|
||||||
|
* Scalar x, y, coef[N+1];
|
||||||
|
*
|
||||||
|
* y = polevl<decltype(x), N>( x, coef);
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DESCRIPTION:
|
||||||
|
*
|
||||||
|
* Evaluates polynomial of degree N:
|
||||||
|
*
|
||||||
|
* 2 N
|
||||||
|
* y = C + C x + C x +...+ C x
|
||||||
|
* 0 1 2 N
|
||||||
|
*
|
||||||
|
* Coefficients are stored in reverse order:
|
||||||
|
*
|
||||||
|
* coef[0] = C , ..., coef[N] = C .
|
||||||
|
* N 0
|
||||||
|
*
|
||||||
|
* The function p1evl() assumes that coef[N] = 1.0 and is
|
||||||
|
* omitted from the array. Its calling arguments are
|
||||||
|
* otherwise the same as polevl().
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* The Eigen implementation is templatized. For best speed, store
|
||||||
|
* coef as a const array (constexpr), e.g.
|
||||||
|
*
|
||||||
|
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Packet, int N>
|
||||||
|
struct ppolevl {
|
||||||
|
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
|
||||||
|
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||||
|
return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Packet>
|
||||||
|
struct ppolevl<Packet, 0> {
|
||||||
|
static EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
|
||||||
|
EIGEN_UNUSED_VARIABLE(x);
|
||||||
|
return pset1<Packet>(coeff[0]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -44,6 +44,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasPolygamma = 1,
|
HasPolygamma = 1,
|
||||||
HasErf = 1,
|
HasErf = 1,
|
||||||
HasErfc = 1,
|
HasErfc = 1,
|
||||||
|
HasNdtri = 1,
|
||||||
HasI0e = 1,
|
HasI0e = 1,
|
||||||
HasI1e = 1,
|
HasI1e = 1,
|
||||||
HasIGamma = 1,
|
HasIGamma = 1,
|
||||||
@ -78,6 +79,7 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
HasPolygamma = 1,
|
HasPolygamma = 1,
|
||||||
HasErf = 1,
|
HasErf = 1,
|
||||||
HasErfc = 1,
|
HasErfc = 1,
|
||||||
|
HasNdtri = 1,
|
||||||
HasI0e = 1,
|
HasI0e = 1,
|
||||||
HasI1e = 1,
|
HasI1e = 1,
|
||||||
HasIGamma = 1,
|
HasIGamma = 1,
|
||||||
|
@ -112,6 +112,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasLog1p = 1,
|
HasLog1p = 1,
|
||||||
HasExpm1 = 1,
|
HasExpm1 = 1,
|
||||||
|
HasNdtri = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
|
@ -54,6 +54,7 @@ struct sycl_packet_traits : default_packet_traits {
|
|||||||
HasPolygamma = 0,
|
HasPolygamma = 0,
|
||||||
HasErf = 0,
|
HasErf = 0,
|
||||||
HasErfc = 0,
|
HasErfc = 0,
|
||||||
|
HasNdtri = 0,
|
||||||
HasIGamma = 0,
|
HasIGamma = 0,
|
||||||
HasIGammac = 0,
|
HasIGammac = 0,
|
||||||
HasBetaInc = 0,
|
HasBetaInc = 0,
|
||||||
|
@ -214,6 +214,7 @@ template<typename Scalar> struct scalar_lgamma_op;
|
|||||||
template<typename Scalar> struct scalar_digamma_op;
|
template<typename Scalar> struct scalar_digamma_op;
|
||||||
template<typename Scalar> struct scalar_erf_op;
|
template<typename Scalar> struct scalar_erf_op;
|
||||||
template<typename Scalar> struct scalar_erfc_op;
|
template<typename Scalar> struct scalar_erfc_op;
|
||||||
|
template<typename Scalar> struct scalar_ndtri_op;
|
||||||
template<typename Scalar> struct scalar_igamma_op;
|
template<typename Scalar> struct scalar_igamma_op;
|
||||||
template<typename Scalar> struct scalar_igammac_op;
|
template<typename Scalar> struct scalar_igammac_op;
|
||||||
template<typename Scalar> struct scalar_zeta_op;
|
template<typename Scalar> struct scalar_zeta_op;
|
||||||
|
@ -536,14 +536,12 @@ typedef CwiseUnaryOp<internal::scalar_lgamma_op<Scalar>, const Derived> LgammaRe
|
|||||||
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
|
typedef CwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived> DigammaReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
typedef CwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived> ErfReturnType;
|
||||||
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
typedef CwiseUnaryOp<internal::scalar_erfc_op<Scalar>, const Derived> ErfcReturnType;
|
||||||
|
typedef CwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived> NdtriReturnType;
|
||||||
|
|
||||||
/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
|
/** \cpp11 \returns an expression of the coefficient-wise ln(|gamma(*this)|).
|
||||||
*
|
*
|
||||||
* \specialfunctions_module
|
* \specialfunctions_module
|
||||||
*
|
*
|
||||||
* Example: \include Cwise_lgamma.cpp
|
|
||||||
* Output: \verbinclude Cwise_lgamma.out
|
|
||||||
*
|
|
||||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||||
* or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
|
* or float/double in non c++11 mode, the user has to provide implementations of lgamma(T) for any scalar
|
||||||
* type T to be supported.
|
* type T to be supported.
|
||||||
@ -579,9 +577,6 @@ digamma() const
|
|||||||
*
|
*
|
||||||
* \specialfunctions_module
|
* \specialfunctions_module
|
||||||
*
|
*
|
||||||
* Example: \include Cwise_erf.cpp
|
|
||||||
* Output: \verbinclude Cwise_erf.out
|
|
||||||
*
|
|
||||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||||
* or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
|
* or float/double in non c++11 mode, the user has to provide implementations of erf(T) for any scalar
|
||||||
* type T to be supported.
|
* type T to be supported.
|
||||||
@ -600,9 +595,6 @@ erf() const
|
|||||||
*
|
*
|
||||||
* \specialfunctions_module
|
* \specialfunctions_module
|
||||||
*
|
*
|
||||||
* Example: \include Cwise_erfc.cpp
|
|
||||||
* Output: \verbinclude Cwise_erfc.out
|
|
||||||
*
|
|
||||||
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||||
* or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
|
* or float/double in non c++11 mode, the user has to provide implementations of erfc(T) for any scalar
|
||||||
* type T to be supported.
|
* type T to be supported.
|
||||||
@ -615,3 +607,21 @@ erfc() const
|
|||||||
{
|
{
|
||||||
return ErfcReturnType(derived());
|
return ErfcReturnType(derived());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \cpp11 \returns an expression of the coefficient-wise Complementary error
|
||||||
|
* function of *this.
|
||||||
|
*
|
||||||
|
* \specialfunctions_module
|
||||||
|
*
|
||||||
|
* \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
|
||||||
|
* or float/double in non c++11 mode, the user has to provide implementations of ndtri(T) for any scalar
|
||||||
|
* type T to be supported.
|
||||||
|
*
|
||||||
|
* \sa <a href="group__CoeffwiseMathFunctions.html#cwisetable_ndtri">Math functions</a>, erf()
|
||||||
|
*/
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
inline const NdtriReturnType
|
||||||
|
ndtri() const
|
||||||
|
{
|
||||||
|
return NdtriReturnType(derived());
|
||||||
|
}
|
||||||
|
@ -590,6 +590,12 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
|||||||
h.store(data2, internal::perfc(h.load(data1)));
|
h.store(data2, internal::perfc(h.load(data1)));
|
||||||
VERIFY((numext::isnan)(data2[0]));
|
VERIFY((numext::isnan)(data2[0]));
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
for (int i=0; i<size; ++i) {
|
||||||
|
data1[i] = internal::random<Scalar>(0,1);
|
||||||
|
}
|
||||||
|
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasNdtri, numext::ndtri, internal::pndtri);
|
||||||
|
}
|
||||||
#endif // EIGEN_HAS_C99_MATH
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
for (int i=0; i<size; ++i)
|
for (int i=0; i<size; ++i)
|
||||||
|
@ -201,6 +201,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
|
|||||||
return unaryExpr(internal::scalar_erfc_op<Scalar>());
|
return unaryExpr(internal::scalar_erfc_op<Scalar>());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_ndtri_op<Scalar>, const Derived>
|
||||||
|
ndtri() const {
|
||||||
|
return unaryExpr(internal::scalar_ndtri_op<Scalar>());
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC
|
EIGEN_DEVICE_FUNC
|
||||||
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
|
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_logistic_op<Scalar>, const Derived>
|
||||||
sigmoid() const {
|
sigmoid() const {
|
||||||
|
@ -33,6 +33,7 @@ namespace Eigen {
|
|||||||
* - gamma_sample_der_alpha
|
* - gamma_sample_der_alpha
|
||||||
* - igammac
|
* - igammac
|
||||||
* - digamma
|
* - digamma
|
||||||
|
* - ndtri
|
||||||
* - polygamma
|
* - polygamma
|
||||||
* - zeta
|
* - zeta
|
||||||
* - betainc
|
* - betainc
|
||||||
|
@ -283,6 +283,31 @@ struct functor_traits<scalar_erfc_op<Scalar> >
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** \internal
|
||||||
|
* \brief Template functor to compute the Inverse of the normal distribution
|
||||||
|
* function of a scalar
|
||||||
|
* \sa class CwiseUnaryOp, Cwise::ndtri()
|
||||||
|
*/
|
||||||
|
template<typename Scalar> struct scalar_ndtri_op {
|
||||||
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_ndtri_op)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
|
||||||
|
using numext::ndtri; return ndtri(a);
|
||||||
|
}
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::pndtri(a); }
|
||||||
|
};
|
||||||
|
template<typename Scalar>
|
||||||
|
struct functor_traits<scalar_ndtri_op<Scalar> >
|
||||||
|
{
|
||||||
|
enum {
|
||||||
|
// On average, We are evaluating rational functions with degree N=9 in the
|
||||||
|
// numerator and denominator. This results in 2*N additions and 2*N
|
||||||
|
// multiplications.
|
||||||
|
Cost = 18 * NumTraits<Scalar>::MulCost + 18 * NumTraits<Scalar>::AddCost,
|
||||||
|
PacketAccess = packet_traits<Scalar>::HasNdtri
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the exponentially scaled modified Bessel
|
* \brief Template functor to compute the exponentially scaled modified Bessel
|
||||||
* function of order zero
|
* function of order zero
|
||||||
|
@ -30,6 +30,9 @@ template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::ha
|
|||||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
|
||||||
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
|
return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
|
||||||
}
|
}
|
||||||
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half ndtri(const Eigen::half& a) {
|
||||||
|
return Eigen::half(Eigen::numext::ndtri(static_cast<float>(a)));
|
||||||
|
}
|
||||||
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
|
template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
|
||||||
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
|
return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
|
||||||
}
|
}
|
||||||
|
@ -38,63 +38,6 @@ namespace internal {
|
|||||||
|
|
||||||
namespace cephes {
|
namespace cephes {
|
||||||
|
|
||||||
/* polevl (modified for Eigen)
|
|
||||||
*
|
|
||||||
* Evaluate polynomial
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* SYNOPSIS:
|
|
||||||
*
|
|
||||||
* int N;
|
|
||||||
* Scalar x, y, coef[N+1];
|
|
||||||
*
|
|
||||||
* y = polevl<decltype(x), N>( x, coef);
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* DESCRIPTION:
|
|
||||||
*
|
|
||||||
* Evaluates polynomial of degree N:
|
|
||||||
*
|
|
||||||
* 2 N
|
|
||||||
* y = C + C x + C x +...+ C x
|
|
||||||
* 0 1 2 N
|
|
||||||
*
|
|
||||||
* Coefficients are stored in reverse order:
|
|
||||||
*
|
|
||||||
* coef[0] = C , ..., coef[N] = C .
|
|
||||||
* N 0
|
|
||||||
*
|
|
||||||
* The function p1evl() assumes that coef[N] = 1.0 and is
|
|
||||||
* omitted from the array. Its calling arguments are
|
|
||||||
* otherwise the same as polevl().
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* The Eigen implementation is templatized. For best speed, store
|
|
||||||
* coef as a const array (constexpr), e.g.
|
|
||||||
*
|
|
||||||
* const double coef[] = {1.0, 2.0, 3.0, ...};
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
template <typename Scalar, int N>
|
|
||||||
struct polevl {
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
|
|
||||||
EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
|
||||||
|
|
||||||
return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename Scalar>
|
|
||||||
struct polevl<Scalar, 0> {
|
|
||||||
EIGEN_DEVICE_FUNC
|
|
||||||
static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
|
|
||||||
return coef[0];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/* chbevl (modified for Eigen)
|
/* chbevl (modified for Eigen)
|
||||||
*
|
*
|
||||||
* Evaluate Chebyshev series
|
* Evaluate Chebyshev series
|
||||||
@ -264,7 +207,7 @@ struct digamma_impl_maybe_poly<float> {
|
|||||||
float z;
|
float z;
|
||||||
if (s < 1.0e8f) {
|
if (s < 1.0e8f) {
|
||||||
z = 1.0f / (s * s);
|
z = 1.0f / (s * s);
|
||||||
return z * cephes::polevl<float, 3>::run(z, A);
|
return z * internal::ppolevl<float, 3>::run(z, A);
|
||||||
} else return 0.0f;
|
} else return 0.0f;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -286,7 +229,7 @@ struct digamma_impl_maybe_poly<double> {
|
|||||||
double z;
|
double z;
|
||||||
if (s < 1.0e17) {
|
if (s < 1.0e17) {
|
||||||
z = 1.0 / (s * s);
|
z = 1.0 / (s * s);
|
||||||
return z * cephes::polevl<double, 6>::run(z, A);
|
return z * internal::ppolevl<double, 6>::run(z, A);
|
||||||
}
|
}
|
||||||
else return 0.0;
|
else return 0.0;
|
||||||
}
|
}
|
||||||
@ -494,6 +437,246 @@ struct erfc_impl<double> {
|
|||||||
};
|
};
|
||||||
#endif // EIGEN_HAS_C99_MATH
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Implementation of ndtri. *
|
||||||
|
****************************************************************************/
|
||||||
|
|
||||||
|
/* Inverse of Normal distribution function (modified for Eigen).
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* SYNOPSIS:
|
||||||
|
*
|
||||||
|
* double x, y, ndtri();
|
||||||
|
*
|
||||||
|
* x = ndtri( y );
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* DESCRIPTION:
|
||||||
|
*
|
||||||
|
* Returns the argument, x, for which the area under the
|
||||||
|
* Gaussian probability density function (integrated from
|
||||||
|
* minus infinity to x) is equal to y.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* For small arguments 0 < y < exp(-2), the program computes
|
||||||
|
* z = sqrt( -2.0 * log(y) ); then the approximation is
|
||||||
|
* x = z - log(z)/z - (1/z) P(1/z) / Q(1/z).
|
||||||
|
* There are two rational functions P/Q, one for 0 < y < exp(-32)
|
||||||
|
* and the other for y up to exp(-2). For larger arguments,
|
||||||
|
* w = y - 0.5, and x/sqrt(2pi) = w + w**3 R(w**2)/S(w**2)).
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* ACCURACY:
|
||||||
|
*
|
||||||
|
* Relative error:
|
||||||
|
* arithmetic domain # trials peak rms
|
||||||
|
* DEC 0.125, 1 5500 9.5e-17 2.1e-17
|
||||||
|
* DEC 6e-39, 0.135 3500 5.7e-17 1.3e-17
|
||||||
|
* IEEE 0.125, 1 20000 7.2e-16 1.3e-16
|
||||||
|
* IEEE 3e-308, 0.135 50000 4.6e-16 9.8e-17
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* ERROR MESSAGES:
|
||||||
|
*
|
||||||
|
* message condition value returned
|
||||||
|
* ndtri domain x <= 0 -MAXNUM
|
||||||
|
* ndtri domain x >= 1 MAXNUM
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
Cephes Math Library Release 2.2: June, 1992
|
||||||
|
Copyright 1985, 1987, 1992 by Stephen L. Moshier
|
||||||
|
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Add a cheaper approximation for float.
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T flipsign(
|
||||||
|
const T& should_flipsign, const T& x) {
|
||||||
|
const T sign_mask = pset1<T>(-0.0);
|
||||||
|
T sign_bit = pand<T>(should_flipsign, sign_mask);
|
||||||
|
return pxor<T>(sign_bit, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double flipsign<double>(
|
||||||
|
const double& should_flipsign, const double& x) {
|
||||||
|
return should_flipsign == 0 ? x : -x;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float flipsign<float>(
|
||||||
|
const float& should_flipsign, const float& x) {
|
||||||
|
return should_flipsign == 0 ? x : -x;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We split this computation in to two so that in the scalar path
|
||||||
|
// only one branch is evaluated (due to our template specialization of pselect
|
||||||
|
// being an if statement.)
|
||||||
|
|
||||||
|
template <typename T, typename ScalarType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_gt_exp_neg_two(const T& b) {
|
||||||
|
const ScalarType p0[] = {
|
||||||
|
ScalarType(-5.99633501014107895267e1),
|
||||||
|
ScalarType(9.80010754185999661536e1),
|
||||||
|
ScalarType(-5.66762857469070293439e1),
|
||||||
|
ScalarType(1.39312609387279679503e1),
|
||||||
|
ScalarType(-1.23916583867381258016e0)
|
||||||
|
};
|
||||||
|
const ScalarType q0[] = {
|
||||||
|
ScalarType(1.0),
|
||||||
|
ScalarType(1.95448858338141759834e0),
|
||||||
|
ScalarType(4.67627912898881538453e0),
|
||||||
|
ScalarType(8.63602421390890590575e1),
|
||||||
|
ScalarType(-2.25462687854119370527e2),
|
||||||
|
ScalarType(2.00260212380060660359e2),
|
||||||
|
ScalarType(-8.20372256168333339912e1),
|
||||||
|
ScalarType(1.59056225126211695515e1),
|
||||||
|
ScalarType(-1.18331621121330003142e0)
|
||||||
|
};
|
||||||
|
const T sqrt2pi = pset1<T>(ScalarType(2.50662827463100050242e0));
|
||||||
|
const T half = pset1<T>(ScalarType(0.5));
|
||||||
|
T c, c2, ndtri_gt_exp_neg_two;
|
||||||
|
|
||||||
|
c = psub(b, half);
|
||||||
|
c2 = pmul(c, c);
|
||||||
|
ndtri_gt_exp_neg_two = pmadd(c, pmul(
|
||||||
|
c2, pdiv(
|
||||||
|
internal::ppolevl<T, 4>::run(c2, p0),
|
||||||
|
internal::ppolevl<T, 8>::run(c2, q0))), c);
|
||||||
|
return pmul(ndtri_gt_exp_neg_two, sqrt2pi);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename ScalarType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_ndtri_lt_exp_neg_two(
|
||||||
|
const T& b, const T& should_flipsign) {
|
||||||
|
/* Approximation for interval z = sqrt(-2 log a ) between 2 and 8
|
||||||
|
* i.e., a between exp(-2) = .135 and exp(-32) = 1.27e-14.
|
||||||
|
*/
|
||||||
|
const ScalarType p1[] = {
|
||||||
|
ScalarType(4.05544892305962419923e0),
|
||||||
|
ScalarType(3.15251094599893866154e1),
|
||||||
|
ScalarType(5.71628192246421288162e1),
|
||||||
|
ScalarType(4.40805073893200834700e1),
|
||||||
|
ScalarType(1.46849561928858024014e1),
|
||||||
|
ScalarType(2.18663306850790267539e0),
|
||||||
|
ScalarType(-1.40256079171354495875e-1),
|
||||||
|
ScalarType(-3.50424626827848203418e-2),
|
||||||
|
ScalarType(-8.57456785154685413611e-4)
|
||||||
|
};
|
||||||
|
const ScalarType q1[] = {
|
||||||
|
ScalarType(1.0),
|
||||||
|
ScalarType(1.57799883256466749731e1),
|
||||||
|
ScalarType(4.53907635128879210584e1),
|
||||||
|
ScalarType(4.13172038254672030440e1),
|
||||||
|
ScalarType(1.50425385692907503408e1),
|
||||||
|
ScalarType(2.50464946208309415979e0),
|
||||||
|
ScalarType(-1.42182922854787788574e-1),
|
||||||
|
ScalarType(-3.80806407691578277194e-2),
|
||||||
|
ScalarType(-9.33259480895457427372e-4)
|
||||||
|
};
|
||||||
|
/* Approximation for interval z = sqrt(-2 log a ) between 8 and 64
|
||||||
|
* i.e., a between exp(-32) = 1.27e-14 and exp(-2048) = 3.67e-890.
|
||||||
|
*/
|
||||||
|
const ScalarType p2[] = {
|
||||||
|
ScalarType(3.23774891776946035970e0),
|
||||||
|
ScalarType(6.91522889068984211695e0),
|
||||||
|
ScalarType(3.93881025292474443415e0),
|
||||||
|
ScalarType(1.33303460815807542389e0),
|
||||||
|
ScalarType(2.01485389549179081538e-1),
|
||||||
|
ScalarType(1.23716634817820021358e-2),
|
||||||
|
ScalarType(3.01581553508235416007e-4),
|
||||||
|
ScalarType(2.65806974686737550832e-6),
|
||||||
|
ScalarType(6.23974539184983293730e-9)
|
||||||
|
};
|
||||||
|
const ScalarType q2[] = {
|
||||||
|
ScalarType(1.0),
|
||||||
|
ScalarType(6.02427039364742014255e0),
|
||||||
|
ScalarType(3.67983563856160859403e0),
|
||||||
|
ScalarType(1.37702099489081330271e0),
|
||||||
|
ScalarType(2.16236993594496635890e-1),
|
||||||
|
ScalarType(1.34204006088543189037e-2),
|
||||||
|
ScalarType(3.28014464682127739104e-4),
|
||||||
|
ScalarType(2.89247864745380683936e-6),
|
||||||
|
ScalarType(6.79019408009981274425e-9)
|
||||||
|
};
|
||||||
|
const T eight = pset1<T>(ScalarType(8.0));
|
||||||
|
const T one = pset1<T>(ScalarType(1));
|
||||||
|
const T neg_two = pset1<T>(ScalarType(-2));
|
||||||
|
T x, x0, x1, z;
|
||||||
|
|
||||||
|
x = psqrt(pmul(neg_two, plog(b)));
|
||||||
|
x0 = psub(x, pdiv(plog(x), x));
|
||||||
|
z = one / x;
|
||||||
|
x1 = pmul(
|
||||||
|
z, pselect(
|
||||||
|
pcmp_lt(x, eight),
|
||||||
|
pdiv(internal::ppolevl<T, 8>::run(z, p1),
|
||||||
|
internal::ppolevl<T, 8>::run(z, q1)),
|
||||||
|
pdiv(internal::ppolevl<T, 8>::run(z, p2),
|
||||||
|
internal::ppolevl<T, 8>::run(z, q2))));
|
||||||
|
return flipsign(should_flipsign, psub(x0, x1));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename ScalarType>
|
||||||
|
T generic_ndtri(const T& a) {
|
||||||
|
const T maxnum = pset1<T>(NumTraits<ScalarType>::infinity());
|
||||||
|
const T neg_maxnum = pset1<T>(-NumTraits<ScalarType>::infinity());
|
||||||
|
|
||||||
|
const T zero = pset1<T>(ScalarType(0));
|
||||||
|
const T one = pset1<T>(ScalarType(1));
|
||||||
|
// exp(-2)
|
||||||
|
const T exp_neg_two = pset1<T>(ScalarType(0.13533528323661269189));
|
||||||
|
T b, ndtri, should_flipsign;
|
||||||
|
|
||||||
|
should_flipsign = pcmp_le(a, psub(one, exp_neg_two));
|
||||||
|
b = pselect(should_flipsign, a, psub(one, a));
|
||||||
|
|
||||||
|
ndtri = pselect(
|
||||||
|
pcmp_lt(exp_neg_two, b),
|
||||||
|
generic_ndtri_gt_exp_neg_two<T, ScalarType>(b),
|
||||||
|
generic_ndtri_lt_exp_neg_two<T, ScalarType>(b, should_flipsign));
|
||||||
|
|
||||||
|
return pselect(
|
||||||
|
pcmp_le(a, zero), neg_maxnum,
|
||||||
|
pselect(pcmp_le(one, a), maxnum, ndtri));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct ndtri_retval {
|
||||||
|
typedef Scalar type;
|
||||||
|
};
|
||||||
|
|
||||||
|
#if !EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct ndtri_impl {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
|
||||||
|
EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
|
||||||
|
THIS_TYPE_IS_NOT_SUPPORTED);
|
||||||
|
return Scalar(0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
# else
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct ndtri_impl {
|
||||||
|
EIGEN_DEVICE_FUNC
|
||||||
|
static EIGEN_STRONG_INLINE Scalar run(const Scalar x) {
|
||||||
|
return generic_ndtri<Scalar, Scalar>(x);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
|
||||||
/**************************************************************************************************************
|
/**************************************************************************************************************
|
||||||
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
|
* Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
|
||||||
**************************************************************************************************************/
|
**************************************************************************************************************/
|
||||||
@ -2120,6 +2303,12 @@ EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
|
|||||||
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(ndtri, Scalar)
|
||||||
|
ndtri(const Scalar& x) {
|
||||||
|
return EIGEN_MATHFUNC_IMPL(ndtri, Scalar)::run(x);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
|
EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
|
||||||
igamma(const Scalar& a, const Scalar& x) {
|
igamma(const Scalar& a, const Scalar& x) {
|
||||||
|
@ -38,6 +38,13 @@ Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
|||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||||
|
|
||||||
|
/** \internal \returns the ndtri(\a a) (coeff-wise) */
|
||||||
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet pndtri(const Packet& a) {
|
||||||
|
typedef typename unpacket_traits<Packet>::type ScalarType;
|
||||||
|
using internal::generic_ndtri; return generic_ndtri<Packet, ScalarType>(a);
|
||||||
|
}
|
||||||
|
|
||||||
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
|
/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
|
Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
|
||||||
|
@ -101,6 +101,19 @@ double2 perfc<double2>(const double2& a)
|
|||||||
return make_double2(erfc(a.x), erfc(a.y));
|
return make_double2(erfc(a.x), erfc(a.y));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
float4 pndtri<float4>(const float4& a)
|
||||||
|
{
|
||||||
|
using numext::ndtri;
|
||||||
|
return make_float4(ndtri(a.x), ndtri(a.y), ndtri(a.z), ndtri(a.w));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
|
double2 pndtri<double2>(const double2& a)
|
||||||
|
{
|
||||||
|
using numext::ndtri;
|
||||||
|
return make_double2(ndtri(a.x), ndtri(a.y));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
float4 pigamma<float4>(const float4& a, const float4& x)
|
float4 pigamma<float4>(const float4& a, const float4& x)
|
||||||
|
@ -1069,6 +1069,66 @@ void test_gpu_erfc(const Scalar stddev)
|
|||||||
gpuFree(d_out);
|
gpuFree(d_out);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
template <typename Scalar>
|
||||||
|
void test_gpu_ndtri()
|
||||||
|
{
|
||||||
|
Tensor<Scalar, 1> in_x(8);
|
||||||
|
Tensor<Scalar, 1> out(8);
|
||||||
|
Tensor<Scalar, 1> expected_out(8);
|
||||||
|
out.setZero();
|
||||||
|
|
||||||
|
in_x(0) = Scalar(1);
|
||||||
|
in_x(1) = Scalar(0.);
|
||||||
|
in_x(2) = Scalar(0.5);
|
||||||
|
in_x(3) = Scalar(0.2);
|
||||||
|
in_x(4) = Scalar(0.8);
|
||||||
|
in_x(5) = Scalar(0.9);
|
||||||
|
in_x(6) = Scalar(0.1);
|
||||||
|
in_x(7) = Scalar(0.99);
|
||||||
|
in_x(8) = Scalar(0.01);
|
||||||
|
|
||||||
|
expected_out(0) = std::numeric_limits<Scalar>::infinity();
|
||||||
|
expected_out(1) = -std::numeric_limits<Scalar>::infinity();
|
||||||
|
expected_out(2) = Scalar(0.0);
|
||||||
|
expected_out(3) = Scalar(-0.8416212335729142);
|
||||||
|
expected_out(4) = Scalar(0.8416212335729142);j
|
||||||
|
expected_out(5) = Scalar(1.2815515655446004);
|
||||||
|
expected_out(6) = Scalar(-1.2815515655446004);
|
||||||
|
expected_out(7) = Scalar(2.3263478740408408);
|
||||||
|
expected_out(8) = Scalar(-2.3263478740408408);
|
||||||
|
|
||||||
|
std::size_t bytes = in_x.size() * sizeof(Scalar);
|
||||||
|
|
||||||
|
Scalar* d_in_x;
|
||||||
|
Scalar* d_out;
|
||||||
|
gpuMalloc((void**)(&d_in_x), bytes);
|
||||||
|
gpuMalloc((void**)(&d_out), bytes);
|
||||||
|
|
||||||
|
gpuMemcpy(d_in_x, in_x.data(), bytes, gpuMemcpyHostToDevice);
|
||||||
|
|
||||||
|
Eigen::GpuStreamDevice stream;
|
||||||
|
Eigen::GpuDevice gpu_device(&stream);
|
||||||
|
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_in_x(d_in_x, 6);
|
||||||
|
Eigen::TensorMap<Eigen::Tensor<Scalar, 1> > gpu_out(d_out, 6);
|
||||||
|
|
||||||
|
gpu_out.device(gpu_device) = gpu_in_x.ndtri();
|
||||||
|
|
||||||
|
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||||
|
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||||
|
|
||||||
|
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||||
|
VERIFY((std::isnan)(out(3)));
|
||||||
|
|
||||||
|
for (int i = 1; i < 6; ++i) {
|
||||||
|
if (i != 3) {
|
||||||
|
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gpuFree(d_in_x);
|
||||||
|
gpuFree(d_out);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
void test_gpu_betainc()
|
void test_gpu_betainc()
|
||||||
@ -1538,6 +1598,10 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
|
|||||||
|
|
||||||
#if !defined(EIGEN_USE_HIP)
|
#if !defined(EIGEN_USE_HIP)
|
||||||
// disable these tests on HIP for now.
|
// disable these tests on HIP for now.
|
||||||
|
|
||||||
|
CALL_SUBTEST_5(test_gpu_ndtri<float>());
|
||||||
|
CALL_SUBTEST_5(test_gpu_ndtri<double>());
|
||||||
|
|
||||||
CALL_SUBTEST_5(test_gpu_digamma<float>());
|
CALL_SUBTEST_5(test_gpu_digamma<float>());
|
||||||
CALL_SUBTEST_5(test_gpu_digamma<double>());
|
CALL_SUBTEST_5(test_gpu_digamma<double>());
|
||||||
|
|
||||||
|
@ -133,6 +133,26 @@ template<typename ArrayType> void array_special_functions()
|
|||||||
}
|
}
|
||||||
#endif // EIGEN_HAS_C99_MATH
|
#endif // EIGEN_HAS_C99_MATH
|
||||||
|
|
||||||
|
// Check the ndtri function against scipy.special.ndtri
|
||||||
|
{
|
||||||
|
ArrayType x(7), res(7), ref(7);
|
||||||
|
x << 0.5, 0.2, 0.8, 0.9, 0.1, 0.99, 0.01;
|
||||||
|
ref << 0., -0.8416212335729142, 0.8416212335729142, 1.2815515655446004, -1.2815515655446004, 2.3263478740408408, -2.3263478740408408;
|
||||||
|
CALL_SUBTEST( verify_component_wise(ref, ref); );
|
||||||
|
CALL_SUBTEST( res = x.ndtri(); verify_component_wise(res, ref); );
|
||||||
|
CALL_SUBTEST( res = ndtri(x); verify_component_wise(res, ref); );
|
||||||
|
|
||||||
|
// ndtri(normal_cdf(x)) ~= x
|
||||||
|
CALL_SUBTEST(
|
||||||
|
ArrayType m1 = ArrayType::Random(32);
|
||||||
|
using std::sqrt;
|
||||||
|
|
||||||
|
ArrayType cdf_val = (m1 / sqrt(2.)).erf();
|
||||||
|
cdf_val = (cdf_val + 1.) / 2.;
|
||||||
|
verify_component_wise(cdf_val.ndtri(), m1););
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Check the zeta function against scipy.special.zeta
|
// Check the zeta function against scipy.special.zeta
|
||||||
{
|
{
|
||||||
ArrayType x(7), q(7), res(7), ref(7);
|
ArrayType x(7), q(7), res(7), ref(7);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user