mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Merged in rmlarsen/eigen (pull request PR-704)
Add generic PacketMath implementation of the Error Function (erf).
This commit is contained in:
commit
d38e6fbc27
@ -531,6 +531,10 @@ Packet pcosh(const Packet& a) { EIGEN_USING_STD_MATH(cosh); return cosh(a); }
|
|||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); }
|
Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); }
|
||||||
|
|
||||||
|
/** \internal \returns the error function of \a a (coeff-wise). */
|
||||||
|
template <typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
|
Packet perf(const Packet& a) { return numext::erf(a); }
|
||||||
|
|
||||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); }
|
Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); }
|
||||||
|
@ -891,7 +891,7 @@ template<typename T> EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex<T>& x)
|
|||||||
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
|
template<typename T> EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex<T>& x);
|
||||||
|
|
||||||
template<typename T> T generic_fast_tanh_float(const T& a_x);
|
template<typename T> T generic_fast_tanh_float(const T& a_x);
|
||||||
|
template<typename T> T generic_fast_erf_float(const T& a_x);
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
@ -1579,6 +1579,30 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
|||||||
double tanh(const double &x) { return ::tanh(x); }
|
double tanh(const double &x) { return ::tanh(x); }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
T erf(const T &x) {
|
||||||
|
EIGEN_USING_STD_MATH(tanh);
|
||||||
|
return erf(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY)
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
float erf(float x) { return internal::generic_fast_erf_float(x); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(SYCL_DEVICE_ONLY)
|
||||||
|
SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(erf, erf)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !EIGEN_HAS_CXX11 || defined(EIGEN_GPUCC)
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
float erf(const float &x) { return ::erff(x); }
|
||||||
|
|
||||||
|
template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
|
double erf(const double &x) { return ::erf(x); }
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||||
T fmod(const T& a, const T& b) {
|
T fmod(const T& a, const T& b) {
|
||||||
|
@ -66,6 +66,58 @@ T generic_fast_tanh_float(const T& a_x)
|
|||||||
return pdiv(p, q);
|
return pdiv(p, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \internal \returns the error function of \a a (coeff-wise)
|
||||||
|
Doesn't do anything fancy, just a 13/8-degree rational interpolant which
|
||||||
|
is accurate up to a couple of ulp in the range [-4, 4], outside of which
|
||||||
|
fl(erf(x)) = +/-1.
|
||||||
|
|
||||||
|
This implementation works on both scalars and Ts.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
T generic_fast_erf_float(const T& a_x) {
|
||||||
|
// Clamp the inputs to the range [-4, 4] since anything outside
|
||||||
|
// this range is +/-1.0f in single-precision.
|
||||||
|
const T plus_4 = pset1<T>(4.f);
|
||||||
|
const T minus_4 = pset1<T>(-4.f);
|
||||||
|
const T x = pmax(pmin(a_x, plus_4), minus_4);
|
||||||
|
// The monomial coefficients of the numerator polynomial (odd).
|
||||||
|
const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
|
||||||
|
const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
|
||||||
|
const T alpha_5 = pset1<T>(-7.34990630326855e-04f);
|
||||||
|
const T alpha_7 = pset1<T>(-5.69250639462346e-05f);
|
||||||
|
const T alpha_9 = pset1<T>(-2.10102402082508e-06f);
|
||||||
|
const T alpha_11 = pset1<T>(2.77068142495902e-08f);
|
||||||
|
const T alpha_13 = pset1<T>(-2.72614225801306e-10f);
|
||||||
|
|
||||||
|
// The monomial coefficients of the denominator polynomial (even).
|
||||||
|
const T beta_0 = pset1<T>(-1.42647390514189e-02f);
|
||||||
|
const T beta_2 = pset1<T>(-7.37332916720468e-03f);
|
||||||
|
const T beta_4 = pset1<T>(-1.68282697438203e-03f);
|
||||||
|
const T beta_6 = pset1<T>(-2.13374055278905e-04f);
|
||||||
|
const T beta_8 = pset1<T>(-1.45660718464996e-05f);
|
||||||
|
|
||||||
|
// Since the polynomials are odd/even, we need x^2.
|
||||||
|
const T x2 = pmul(x, x);
|
||||||
|
|
||||||
|
// Evaluate the numerator polynomial p.
|
||||||
|
T p = pmadd(x2, alpha_13, alpha_11);
|
||||||
|
p = pmadd(x2, p, alpha_9);
|
||||||
|
p = pmadd(x2, p, alpha_7);
|
||||||
|
p = pmadd(x2, p, alpha_5);
|
||||||
|
p = pmadd(x2, p, alpha_3);
|
||||||
|
p = pmadd(x2, p, alpha_1);
|
||||||
|
p = pmul(x, p);
|
||||||
|
|
||||||
|
// Evaluate the denominator polynomial p.
|
||||||
|
T q = pmadd(x2, beta_8, beta_6);
|
||||||
|
q = pmadd(x2, q, beta_4);
|
||||||
|
q = pmadd(x2, q, beta_2);
|
||||||
|
q = pmadd(x2, q, beta_0);
|
||||||
|
|
||||||
|
// Divide the numerator by the denominator.
|
||||||
|
return pdiv(p, q);
|
||||||
|
}
|
||||||
|
|
||||||
template<typename RealScalar>
|
template<typename RealScalar>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
|
||||||
|
@ -62,6 +62,14 @@ ptanh<Packet8f>(const Packet8f& x) {
|
|||||||
return internal::generic_fast_tanh_float(x);
|
return internal::generic_fast_tanh_float(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
|
||||||
|
perf<Packet8f>(const Packet8f& x) {
|
||||||
|
return internal::generic_fast_erf_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Exponential function for doubles.
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
|
||||||
pexp<Packet4d>(const Packet4d& x) {
|
pexp<Packet4d>(const Packet4d& x) {
|
||||||
|
@ -77,6 +77,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH,
|
||||||
HasBlend = 1,
|
HasBlend = 1,
|
||||||
HasRound = 1,
|
HasRound = 1,
|
||||||
HasFloor = 1,
|
HasFloor = 1,
|
||||||
|
@ -405,6 +405,18 @@ Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||||
|
ptanh<Packet16f>(const Packet16f& _x) {
|
||||||
|
return internal::generic_fast_tanh_float(_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f
|
||||||
|
perf<Packet16f>(const Packet16f& _x) {
|
||||||
|
return internal::generic_fast_erf_float(_x);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -103,6 +103,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = EIGEN_FAST_MATH,
|
HasSqrt = EIGEN_FAST_MATH,
|
||||||
HasRsqrt = EIGEN_FAST_MATH,
|
HasRsqrt = EIGEN_FAST_MATH,
|
||||||
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH,
|
||||||
#endif
|
#endif
|
||||||
HasDiv = 1
|
HasDiv = 1
|
||||||
};
|
};
|
||||||
|
@ -76,6 +76,20 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Hyperbolic Tangent function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
ptanh<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_tanh_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
perf<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_erf_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -120,8 +120,8 @@ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_L
|
|||||||
#define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
#define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> struct packet_traits<float> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<float> : default_packet_traits {
|
||||||
typedef Packet4f type;
|
typedef Packet4f type;
|
||||||
typedef Packet4f half;
|
typedef Packet4f half;
|
||||||
enum {
|
enum {
|
||||||
@ -151,6 +151,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
#else
|
#else
|
||||||
HasSqrt = 0,
|
HasSqrt = 0,
|
||||||
HasRsqrt = 0,
|
HasRsqrt = 0,
|
||||||
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH,
|
||||||
#endif
|
#endif
|
||||||
HasRound = 1,
|
HasRound = 1,
|
||||||
HasFloor = 1,
|
HasFloor = 1,
|
||||||
@ -159,8 +161,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasBlend = 1
|
HasBlend = 1
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct packet_traits<int> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<int> : default_packet_traits {
|
||||||
typedef Packet4i type;
|
typedef Packet4i type;
|
||||||
typedef Packet4i half;
|
typedef Packet4i half;
|
||||||
enum {
|
enum {
|
||||||
@ -177,7 +179,6 @@ template<> struct packet_traits<int> : default_packet_traits
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template<> struct unpacket_traits<Packet4f>
|
template<> struct unpacket_traits<Packet4f>
|
||||||
{
|
{
|
||||||
typedef float type;
|
typedef float type;
|
||||||
|
@ -321,6 +321,13 @@ pcos<Packet4f>(const Packet4f& x) {
|
|||||||
return psincos_inner_msa_float</* sine */ false>(x);
|
return psincos_inner_msa_float</* sine */ false>(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
perf<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_erf_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
|
||||||
pexp<Packet2d>(const Packet2d& _x) {
|
pexp<Packet2d>(const Packet2d& _x) {
|
||||||
|
@ -88,6 +88,7 @@ struct packet_traits<float> : default_packet_traits {
|
|||||||
HasSin = EIGEN_FAST_MATH,
|
HasSin = EIGEN_FAST_MATH,
|
||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
|
@ -36,6 +36,20 @@ Packet4f pcos<Packet4f>(const Packet4f& x)
|
|||||||
return pcos_float(x);
|
return pcos_float(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hyperbolic Tangent function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
ptanh<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_tanh_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
perf<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_erf_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -98,8 +98,8 @@ typedef uint32x4_t Packet4ui;
|
|||||||
#define EIGEN_ARM_PREFETCH(ADDR)
|
#define EIGEN_ARM_PREFETCH(ADDR)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> struct packet_traits<float> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<float> : default_packet_traits {
|
||||||
typedef Packet4f type;
|
typedef Packet4f type;
|
||||||
typedef Packet4f half; // Packet2f intrinsics not implemented yet
|
typedef Packet4f half; // Packet2f intrinsics not implemented yet
|
||||||
enum {
|
enum {
|
||||||
@ -115,11 +115,13 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasCos = EIGEN_FAST_MATH,
|
HasCos = EIGEN_FAST_MATH,
|
||||||
HasLog = 1,
|
HasLog = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
HasSqrt = 0
|
HasSqrt = 0,
|
||||||
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct packet_traits<int32_t> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<int32_t> : default_packet_traits {
|
||||||
typedef Packet4i type;
|
typedef Packet4i type;
|
||||||
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
typedef Packet4i half; // Packet2i intrinsics not implemented yet
|
||||||
enum {
|
enum {
|
||||||
|
@ -147,6 +147,13 @@ ptanh<Packet4f>(const Packet4f& x) {
|
|||||||
return internal::generic_fast_tanh_float(x);
|
return internal::generic_fast_tanh_float(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
perf<Packet4f>(const Packet4f& a) {
|
||||||
|
return internal::generic_fast_erf_float(a);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
namespace numext {
|
namespace numext {
|
||||||
|
@ -96,8 +96,8 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; };
|
|||||||
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
// Use the packet_traits defined in AVX/PacketMath.h instead if we're going
|
||||||
// to leverage AVX instructions.
|
// to leverage AVX instructions.
|
||||||
#ifndef EIGEN_VECTORIZE_AVX
|
#ifndef EIGEN_VECTORIZE_AVX
|
||||||
template<> struct packet_traits<float> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<float> : default_packet_traits {
|
||||||
typedef Packet4f type;
|
typedef Packet4f type;
|
||||||
typedef Packet4f half;
|
typedef Packet4f half;
|
||||||
enum {
|
enum {
|
||||||
@ -118,6 +118,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
HasTanh = EIGEN_FAST_MATH,
|
HasTanh = EIGEN_FAST_MATH,
|
||||||
|
HasErf = EIGEN_FAST_MATH,
|
||||||
HasBlend = 1,
|
HasBlend = 1,
|
||||||
HasFloor = 1
|
HasFloor = 1
|
||||||
|
|
||||||
@ -128,8 +129,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct packet_traits<double> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<double> : default_packet_traits {
|
||||||
typedef Packet2d type;
|
typedef Packet2d type;
|
||||||
typedef Packet2d half;
|
typedef Packet2d half;
|
||||||
enum {
|
enum {
|
||||||
|
@ -225,6 +225,20 @@ Packet4f prsqrt<Packet4f>(const Packet4f& x) {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hyperbolic Tangent function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
ptanh<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_tanh_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error function.
|
||||||
|
template <>
|
||||||
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
|
||||||
|
perf<Packet4f>(const Packet4f& x) {
|
||||||
|
return internal::generic_fast_erf_float(x);
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -173,8 +173,8 @@ template<> struct packet_traits<int> : default_packet_traits
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<> struct packet_traits<float> : default_packet_traits
|
template <>
|
||||||
{
|
struct packet_traits<float> : default_packet_traits {
|
||||||
typedef Packet4f type;
|
typedef Packet4f type;
|
||||||
typedef Packet4f half;
|
typedef Packet4f half;
|
||||||
enum {
|
enum {
|
||||||
@ -200,6 +200,8 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
#endif
|
#endif
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
HasRsqrt = 1,
|
HasRsqrt = 1,
|
||||||
|
HasTanh = 1,
|
||||||
|
HasErf = 1,
|
||||||
HasRound = 1,
|
HasRound = 1,
|
||||||
HasFloor = 1,
|
HasFloor = 1,
|
||||||
HasCeil = 1,
|
HasCeil = 1,
|
||||||
|
@ -578,7 +578,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
|||||||
h.store(data2, internal::plgamma(h.load(data1)));
|
h.store(data2, internal::plgamma(h.load(data1)));
|
||||||
VERIFY((numext::isnan)(data2[0]));
|
VERIFY((numext::isnan)(data2[0]));
|
||||||
}
|
}
|
||||||
{
|
if (internal::packet_traits<Scalar>::HasErf) {
|
||||||
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
||||||
packet_helper<internal::packet_traits<Scalar>::HasErf,Packet> h;
|
packet_helper<internal::packet_traits<Scalar>::HasErf,Packet> h;
|
||||||
h.store(data2, internal::perf(h.load(data1)));
|
h.store(data2, internal::perf(h.load(data1)));
|
||||||
|
@ -238,25 +238,40 @@ struct functor_traits<scalar_polygamma_op<Scalar> >
|
|||||||
};
|
};
|
||||||
|
|
||||||
/** \internal
|
/** \internal
|
||||||
* \brief Template functor to compute the Gauss error function of a
|
* \brief Template functor to compute the error function of a scalar
|
||||||
* scalar
|
* \sa class CwiseUnaryOp, ArrayBase::erf()
|
||||||
* \sa class CwiseUnaryOp, Cwise::erf()
|
|
||||||
*/
|
*/
|
||||||
template<typename Scalar> struct scalar_erf_op {
|
template<typename Scalar> struct scalar_erf_op {
|
||||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
|
EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
|
||||||
using numext::erf; return erf(a);
|
operator()(const Scalar& a) const {
|
||||||
|
return numext::erf(a);
|
||||||
|
}
|
||||||
|
template <typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const {
|
||||||
|
return perf(x);
|
||||||
}
|
}
|
||||||
typedef typename packet_traits<Scalar>::type Packet;
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perf(a); }
|
|
||||||
};
|
};
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<scalar_erf_op<Scalar> >
|
struct functor_traits<scalar_erf_op<Scalar> > {
|
||||||
{
|
|
||||||
enum {
|
enum {
|
||||||
// Guesstimate
|
PacketAccess = packet_traits<Scalar>::HasErf,
|
||||||
Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
|
Cost =
|
||||||
PacketAccess = packet_traits<Scalar>::HasErf
|
(PacketAccess
|
||||||
|
#ifdef EIGEN_VECTORIZE_FMA
|
||||||
|
// TODO(rmlarsen): Move the FMA cost model to a central location.
|
||||||
|
// Haswell can issue 2 add/mul/madd per cycle.
|
||||||
|
// 10 pmadd, 2 pmul, 1 div, 2 other
|
||||||
|
? (2 * NumTraits<Scalar>::AddCost +
|
||||||
|
7 * NumTraits<Scalar>::MulCost +
|
||||||
|
scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value)
|
||||||
|
#else
|
||||||
|
? (12 * NumTraits<Scalar>::AddCost +
|
||||||
|
12 * NumTraits<Scalar>::MulCost +
|
||||||
|
scalar_div_cost<Scalar, packet_traits<Scalar>::HasDiv>::value)
|
||||||
|
#endif
|
||||||
|
// Assume for simplicity that this is as expensive as an exp().
|
||||||
|
: (functor_traits<scalar_exp_op<Scalar> >::Cost))
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -30,10 +30,6 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta
|
|||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
|
Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
|
||||||
|
|
||||||
/** \internal \returns the erf(\a a) (coeff-wise) */
|
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
||||||
Packet perf(const Packet& a) { using numext::erf; return erf(a); }
|
|
||||||
|
|
||||||
/** \internal \returns the erfc(\a a) (coeff-wise) */
|
/** \internal \returns the erfc(\a a) (coeff-wise) */
|
||||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||||
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
|
||||||
@ -77,4 +73,3 @@ Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext
|
|||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
|
||||||
#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
|
#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user