diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index 2d7b9f9e0..c102914ce 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -531,6 +531,10 @@ Packet pcosh(const Packet& a) { EIGEN_USING_STD_MATH(cosh); return cosh(a); } template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) { EIGEN_USING_STD_MATH(tanh); return tanh(a); } +/** \internal \returns the error function of \a a (coeff-wise). */ +template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS +Packet perf(const Packet& a) { return numext::erf(a); } + /** \internal \returns the exp of \a a (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { EIGEN_USING_STD_MATH(exp); return exp(a); } diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index fbec39d83..bf1c847b9 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -891,7 +891,7 @@ template EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x) template EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); template T generic_fast_tanh_float(const T& a_x); - +template T generic_fast_erf_float(const T& a_x); } // end namespace internal /**************************************************************************** @@ -1579,6 +1579,30 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tanh(const double &x) { return ::tanh(x); } #endif +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T erf(const T &x) { + EIGEN_USING_STD_MATH(tanh); + return erf(x); +} + +#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float erf(float x) { return internal::generic_fast_erf_float(x); } +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(erf, erf) +#endif + +#if !EIGEN_HAS_CXX11 || defined(EIGEN_GPUCC) +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float erf(const float &x) { return ::erff(x); } + +template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double erf(const double &x) { return ::erf(x); } +#endif + template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) { diff --git a/Eigen/src/Core/MathFunctionsImpl.h b/Eigen/src/Core/MathFunctionsImpl.h index a23e93ccb..c4957fbc2 100644 --- a/Eigen/src/Core/MathFunctionsImpl.h +++ b/Eigen/src/Core/MathFunctionsImpl.h @@ -66,6 +66,58 @@ T generic_fast_tanh_float(const T& a_x) return pdiv(p, q); } +/** \internal \returns the error function of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/8-degree rational interpolant which + is accurate up to a couple of ulp in the range [-4, 4], outside of which + fl(erf(x)) = +/-1. + + This implementation works on both scalars and Ts. +*/ +template +T generic_fast_erf_float(const T& a_x) { + // Clamp the inputs to the range [-4, 4] since anything outside + // this range is +/-1.0f in single-precision. + const T plus_4 = pset1(4.f); + const T minus_4 = pset1(-4.f); + const T x = pmax(pmin(a_x, plus_4), minus_4); + // The monomial coefficients of the numerator polynomial (odd). + const T alpha_1 = pset1(-1.60960333262415e-02f); + const T alpha_3 = pset1(-2.95459980854025e-03f); + const T alpha_5 = pset1(-7.34990630326855e-04f); + const T alpha_7 = pset1(-5.69250639462346e-05f); + const T alpha_9 = pset1(-2.10102402082508e-06f); + const T alpha_11 = pset1(2.77068142495902e-08f); + const T alpha_13 = pset1(-2.72614225801306e-10f); + + // The monomial coefficients of the denominator polynomial (even). + const T beta_0 = pset1(-1.42647390514189e-02f); + const T beta_2 = pset1(-7.37332916720468e-03f); + const T beta_4 = pset1(-1.68282697438203e-03f); + const T beta_6 = pset1(-2.13374055278905e-04f); + const T beta_8 = pset1(-1.45660718464996e-05f); + + // Since the polynomials are odd/even, we need x^2. + const T x2 = pmul(x, x); + + // Evaluate the numerator polynomial p. + T p = pmadd(x2, alpha_13, alpha_11); + p = pmadd(x2, p, alpha_9); + p = pmadd(x2, p, alpha_7); + p = pmadd(x2, p, alpha_5); + p = pmadd(x2, p, alpha_3); + p = pmadd(x2, p, alpha_1); + p = pmul(x, p); + + // Evaluate the denominator polynomial p. + T q = pmadd(x2, beta_8, beta_6); + q = pmadd(x2, q, beta_4); + q = pmadd(x2, q, beta_2); + q = pmadd(x2, q, beta_0); + + // Divide the numerator by the denominator. + return pdiv(p, q); +} + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) diff --git a/Eigen/src/Core/arch/AVX/MathFunctions.h b/Eigen/src/Core/arch/AVX/MathFunctions.h index c6d3cf6a0..089a682ba 100644 --- a/Eigen/src/Core/arch/AVX/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX/MathFunctions.h @@ -62,6 +62,14 @@ ptanh(const Packet8f& x) { return internal::generic_fast_tanh_float(x); } +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f +perf(const Packet8f& x) { + return internal::generic_fast_erf_float(x); +} + +// Exponential function for doubles. template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d pexp(const Packet4d& x) { diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 0472e9850..ed8acfc61 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -62,21 +62,22 @@ template<> struct packet_traits : default_packet_traits enum { Vectorizable = 1, AlignedOnScalar = 1, - size=8, + size = 8, HasHalfPacket = 1, - HasDiv = 1, - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasLog = 1, - HasLog1p = 1, - HasExpm1 = 1, - HasExp = 1, + HasDiv = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasLog = 1, + HasLog1p = 1, + HasExpm1 = 1, + HasExp = 1, HasNdtri = 1, - HasBessel = 1, + HasBessel = 1, HasSqrt = 1, HasRsqrt = 1, - HasTanh = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH, HasBlend = 1, HasRound = 1, HasFloor = 1, diff --git a/Eigen/src/Core/arch/AVX512/MathFunctions.h b/Eigen/src/Core/arch/AVX512/MathFunctions.h index 9e37a720b..66581964c 100644 --- a/Eigen/src/Core/arch/AVX512/MathFunctions.h +++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h @@ -405,6 +405,18 @@ Packet16f pexpm1(const Packet16f& _x) { } #endif +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f +ptanh(const Packet16f& _x) { + return internal::generic_fast_tanh_float(_x); +} + +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f +perf(const Packet16f& _x) { + return internal::generic_fast_erf_float(_x); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AVX512/PacketMath.h b/Eigen/src/Core/arch/AVX512/PacketMath.h index 589ccbb7a..19c03cfe2 100644 --- a/Eigen/src/Core/arch/AVX512/PacketMath.h +++ b/Eigen/src/Core/arch/AVX512/PacketMath.h @@ -98,11 +98,13 @@ template<> struct packet_traits : default_packet_traits HasLog1p = 1, HasExpm1 = 1, HasNdtri = 1, -#endif HasBessel = 1, +#endif HasExp = 1, HasSqrt = EIGEN_FAST_MATH, HasRsqrt = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH, #endif HasDiv = 1 }; diff --git a/Eigen/src/Core/arch/AltiVec/MathFunctions.h b/Eigen/src/Core/arch/AltiVec/MathFunctions.h index b6d1f47f2..00a0248e2 100644 --- a/Eigen/src/Core/arch/AltiVec/MathFunctions.h +++ b/Eigen/src/Core/arch/AltiVec/MathFunctions.h @@ -76,6 +76,20 @@ Packet2d pexp(const Packet2d& _x) } #endif +// Hyperbolic Tangent function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +ptanh(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); +} + +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +perf(const Packet4f& x) { + return internal::generic_fast_erf_float(x); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 7ee290a29..2bba3a798 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -120,27 +120,27 @@ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_PSET64_HI, p16uc_PSET64_L #define EIGEN_PPC_PREFETCH(ADDR) asm( " dcbt [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); #endif -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4f type; typedef Packet4f half; enum { Vectorizable = 1, AlignedOnScalar = 1, - size=4, + size = 4, HasHalfPacket = 1, - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasMin = 1, - HasMax = 1, - HasAbs = 1, - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasLog = 1, - HasExp = 1, + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasMin = 1, + HasMax = 1, + HasAbs = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasLog = 1, + HasExp = 1, #ifdef __VSX__ HasSqrt = 1, #if !EIGEN_COMP_CLANG @@ -151,6 +151,8 @@ template<> struct packet_traits : default_packet_traits #else HasSqrt = 0, HasRsqrt = 0, + HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH, #endif HasRound = 1, HasFloor = 1, @@ -159,8 +161,8 @@ template<> struct packet_traits : default_packet_traits HasBlend = 1 }; }; -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4i type; typedef Packet4i half; enum { @@ -177,7 +179,6 @@ template<> struct packet_traits : default_packet_traits }; }; - template<> struct unpacket_traits { typedef float type; diff --git a/Eigen/src/Core/arch/MSA/MathFunctions.h b/Eigen/src/Core/arch/MSA/MathFunctions.h index f5181b90e..23e4b8664 100644 --- a/Eigen/src/Core/arch/MSA/MathFunctions.h +++ b/Eigen/src/Core/arch/MSA/MathFunctions.h @@ -321,6 +321,13 @@ pcos(const Packet4f& x) { return psincos_inner_msa_float(x); } +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +perf(const Packet4f& x) { + return internal::generic_fast_erf_float(x); +} + template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp(const Packet2d& _x) { diff --git a/Eigen/src/Core/arch/MSA/PacketMath.h b/Eigen/src/Core/arch/MSA/PacketMath.h index 3c922d4de..94ee0e302 100644 --- a/Eigen/src/Core/arch/MSA/PacketMath.h +++ b/Eigen/src/Core/arch/MSA/PacketMath.h @@ -88,6 +88,7 @@ struct packet_traits : default_packet_traits { HasSin = EIGEN_FAST_MATH, HasCos = EIGEN_FAST_MATH, HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH, HasLog = 1, HasExp = 1, HasSqrt = 1, diff --git a/Eigen/src/Core/arch/NEON/MathFunctions.h b/Eigen/src/Core/arch/NEON/MathFunctions.h index fdee9f9b4..13e3f4989 100644 --- a/Eigen/src/Core/arch/NEON/MathFunctions.h +++ b/Eigen/src/Core/arch/NEON/MathFunctions.h @@ -36,6 +36,20 @@ Packet4f pcos(const Packet4f& x) return pcos_float(x); } +// Hyperbolic Tangent function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +ptanh(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); +} + +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +perf(const Packet4f& x) { + return internal::generic_fast_erf_float(x); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index 84153ece1..83447547c 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -98,28 +98,30 @@ typedef uint32x4_t Packet4ui; #define EIGEN_ARM_PREFETCH(ADDR) #endif -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4f type; - typedef Packet4f half; // Packet2f intrinsics not implemented yet + typedef Packet4f half; // Packet2f intrinsics not implemented yet enum { Vectorizable = 1, AlignedOnScalar = 1, size = 4, - HasHalfPacket=0, // Packet2f intrinsics not implemented yet - - HasDiv = 1, + HasHalfPacket = 0, // Packet2f intrinsics not implemented yet + + HasDiv = 1, HasFloor = 1, // FIXME check the Has* - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasLog = 1, - HasExp = 1, - HasSqrt = 0 + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasLog = 1, + HasExp = 1, + HasSqrt = 0, + HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH }; }; -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4i type; typedef Packet4i half; // Packet2i intrinsics not implemented yet enum { diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index b21bb93bf..a991b1a39 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -147,6 +147,13 @@ ptanh(const Packet4f& x) { return internal::generic_fast_tanh_float(x); } +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +perf(const Packet4f& a) { + return internal::generic_fast_erf_float(a); +} + } // end namespace internal namespace numext { diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index b48d70afd..4a772d078 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -96,28 +96,29 @@ template<> struct is_arithmetic<__m128d> { enum { value = true }; }; // Use the packet_traits defined in AVX/PacketMath.h instead if we're going // to leverage AVX instructions. #ifndef EIGEN_VECTORIZE_AVX -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4f type; typedef Packet4f half; enum { Vectorizable = 1, AlignedOnScalar = 1, - size=4, + size = 4, HasHalfPacket = 0, - HasDiv = 1, - HasSin = EIGEN_FAST_MATH, - HasCos = EIGEN_FAST_MATH, - HasLog = 1, - HasLog1p = 1, - HasExpm1 = 1, + HasDiv = 1, + HasSin = EIGEN_FAST_MATH, + HasCos = EIGEN_FAST_MATH, + HasLog = 1, + HasLog1p = 1, + HasExpm1 = 1, HasNdtri = 1, - HasExp = 1, - HasBessel = 1, + HasExp = 1, + HasBessel = 1, HasSqrt = 1, HasRsqrt = 1, - HasTanh = EIGEN_FAST_MATH, + HasTanh = EIGEN_FAST_MATH, + HasErf = EIGEN_FAST_MATH, HasBlend = 1, HasFloor = 1 @@ -128,8 +129,8 @@ template<> struct packet_traits : default_packet_traits #endif }; }; -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet2d type; typedef Packet2d half; enum { diff --git a/Eigen/src/Core/arch/ZVector/MathFunctions.h b/Eigen/src/Core/arch/ZVector/MathFunctions.h index ff33a975f..d4b33b56f 100644 --- a/Eigen/src/Core/arch/ZVector/MathFunctions.h +++ b/Eigen/src/Core/arch/ZVector/MathFunctions.h @@ -225,6 +225,20 @@ Packet4f prsqrt(const Packet4f& x) { return res; } +// Hyperbolic Tangent function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +ptanh(const Packet4f& x) { + return internal::generic_fast_tanh_float(x); +} + +// Error function. +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f +perf(const Packet4f& x) { + return internal::generic_fast_erf_float(x); +} + } // end namespace internal } // end namespace Eigen diff --git a/Eigen/src/Core/arch/ZVector/PacketMath.h b/Eigen/src/Core/arch/ZVector/PacketMath.h index b1c788168..5036106cd 100755 --- a/Eigen/src/Core/arch/ZVector/PacketMath.h +++ b/Eigen/src/Core/arch/ZVector/PacketMath.h @@ -173,33 +173,35 @@ template<> struct packet_traits : default_packet_traits }; }; -template<> struct packet_traits : default_packet_traits -{ +template <> +struct packet_traits : default_packet_traits { typedef Packet4f type; typedef Packet4f half; enum { Vectorizable = 1, AlignedOnScalar = 1, - size=4, + size = 4, HasHalfPacket = 0, - HasAdd = 1, - HasSub = 1, - HasMul = 1, - HasDiv = 1, - HasMin = 1, - HasMax = 1, - HasAbs = 1, - HasSin = 0, - HasCos = 0, - HasLog = 0, + HasAdd = 1, + HasSub = 1, + HasMul = 1, + HasDiv = 1, + HasMin = 1, + HasMax = 1, + HasAbs = 1, + HasSin = 0, + HasCos = 0, + HasLog = 0, #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12) - HasExp = 0, + HasExp = 0, #else - HasExp = 1, + HasExp = 1, #endif HasSqrt = 1, HasRsqrt = 1, + HasTanh = 1, + HasErf = 1, HasRound = 1, HasFloor = 1, HasCeil = 1, diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 2c7fb5aeb..d652082b0 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -578,7 +578,7 @@ template void packetmath_real() h.store(data2, internal::plgamma(h.load(data1))); VERIFY((numext::isnan)(data2[0])); } - { + if (internal::packet_traits::HasErf) { data1[0] = std::numeric_limits::quiet_NaN(); packet_helper::HasErf,Packet> h; h.store(data2, internal::perf(h.load(data1))); diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h index a4287c31f..7447f4bfa 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h @@ -238,25 +238,39 @@ struct functor_traits > }; /** \internal - * \brief Template functor to compute the Gauss error function of a - * scalar - * \sa class CwiseUnaryOp, Cwise::erf() + * \brief Template functor to compute the error function of a scalar + * \sa class CwiseUnaryOp, ArrayBase::erf() */ template struct scalar_erf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a) const { - using numext::erf; return erf(a); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& a) const { + return numext::erf(a); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return perf(x); } - typedef typename packet_traits::type Packet; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& a) const { return internal::perf(a); } }; -template -struct functor_traits > -{ +template +struct functor_traits > { enum { - // Guesstimate - Cost = 10 * NumTraits::MulCost + 5 * NumTraits::AddCost, - PacketAccess = packet_traits::HasErf + PacketAccess = packet_traits::HasErf, + Cost = + (PacketAccess +#ifdef EIGEN_VECTORIZE_FMA + // Haswell can issue 2 add/mul/madd per cycle. + // 10 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits::AddCost + + 7 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) +#else + ? (12 * NumTraits::AddCost + + 12 * NumTraits::MulCost + + scalar_div_cost::HasDiv>::value) +#endif + // Assume for simplicity that this is as expensive as an exp(). + : (functor_traits >::Cost)) }; }; diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h index 577015690..77fdb031a 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h @@ -30,10 +30,6 @@ Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); } -/** \internal \returns the erf(\a a) (coeff-wise) */ -template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS -Packet perf(const Packet& a) { using numext::erf; return erf(a); } - /** \internal \returns the erfc(\a a) (coeff-wise) */ template EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); } @@ -77,4 +73,3 @@ Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext } // end namespace Eigen #endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H -