Clean up generic packetmath specializations for various backends with the help of a macro.

This commit is contained in:
Rasmus Munk Larsen 2023-03-10 22:02:23 +00:00
parent e8fdf127c6
commit d6235d76db
6 changed files with 97 additions and 523 deletions

View File

@ -20,105 +20,8 @@ namespace Eigen {
namespace internal {
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
psin<Packet8f>(const Packet8f& _x) {
return psin_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
pcos<Packet8f>(const Packet8f& _x) {
return pcos_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
pasin<Packet8f>(const Packet8f& _x) {
return pasin_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
pacos<Packet8f>(const Packet8f& _x) {
return pacos_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
patan<Packet8f>(const Packet8f& _x) {
return patan_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
patan<Packet4d>(const Packet4d& _x) {
return patan_double(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
patanh<Packet8f>(const Packet8f& _x) {
return patanh_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
plog<Packet8f>(const Packet8f& _x) {
return plog_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
plog<Packet4d>(const Packet4d& _x) {
return plog_double(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
plog2<Packet8f>(const Packet8f& _x) {
return plog2_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
plog2<Packet4d>(const Packet4d& _x) {
return plog2_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
return generic_plog1p(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
return generic_expm1(_x);
}
// Exponential function. Works by writing "x = m*log(2) + r" where
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
pexp<Packet8f>(const Packet8f& _x) {
return pexp_float(_x);
}
// Hyperbolic Tangent function.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
ptanh<Packet8f>(const Packet8f& _x) {
return internal::generic_fast_tanh_float(_x);
}
// Exponential function for doubles.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
pexp<Packet4d>(const Packet4d& _x) {
return pexp_double(_x);
}
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d)
// Notice that for newer processors, it is counterproductive to use Newton
// iteration for square root. In particular, Skylake and Zen2 processors
@ -153,19 +56,6 @@ template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a)
#endif
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
template <>
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
Packet8f fexponent;
@ -179,18 +69,6 @@ EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent)
return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
}
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
template <>
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
Packet8f fexponent;
@ -204,6 +82,30 @@ EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& expone
return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
}
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
} // end namespace internal
} // end namespace Eigen

View File

@ -15,117 +15,8 @@
namespace Eigen {
namespace internal {
#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
plog<Packet16f>(const Packet16f& _x) {
return plog_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
plog<Packet8d>(const Packet8d& _x) {
return plog_double(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
plog2<Packet16f>(const Packet16f& _x) {
return plog2_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
plog2<Packet8d>(const Packet8d& _x) {
return plog2_double(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
// Exponential function. Works by writing "x = m*log(2) + r" where
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
pexp<Packet16f>(const Packet16f& _x) {
EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
// Clamp x.
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
// Express exp(x) as exp(m*ln(2) + r), start by extracting
// m = floor(x/ln(2) + 0.5).
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
// Get r = x - m*ln(2). Note that we can do this without losing more than one
// ulp precision due to the FMA instruction.
EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
Packet16f r2 = pmul(r, r);
Packet16f r3 = pmul(r2, r);
// Evaluate the polynomial approximant,improved by instruction-level parallelism.
Packet16f y, y1, y2;
y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
y2 = padd(r, p16f_1);
y = pmadd(y, r, p16f_cephes_exp_p2);
y1 = pmadd(y1, r, p16f_cephes_exp_p5);
y = pmadd(y, r3, y1);
y = pmadd(y, r2, y2);
// Build emm0 = 2^m.
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
emm0 = _mm512_slli_epi32(emm0, 23);
// Return 2^m * exp(r).
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
pexp<Packet8d>(const Packet8d& _x) {
return pexp_double(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d)
template <>
EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
@ -181,9 +72,6 @@ EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
}
#endif
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
// prsqrt for float.
#if defined(EIGEN_VECTORIZE_AVX512ER)
template <>
@ -199,8 +87,6 @@ prsqrt<Packet16f>(const Packet16f& _x) {
}
#endif
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
// prsqrt for double.
#if EIGEN_FAST_MATH
@ -223,80 +109,28 @@ template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f&
}
#endif
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
return generic_plog1p(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
return generic_expm1(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
psin<Packet16f>(const Packet16f& _x) {
return psin_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
pcos<Packet16f>(const Packet16f& _x) {
return pcos_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
pacos<Packet16f>(const Packet16f& _x) {
return pacos_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
pasin<Packet16f>(const Packet16f& _x) {
return pasin_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
patan<Packet16f>(const Packet16f& _x) {
return patan_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
patanh<Packet16f>(const Packet16f& _x) {
return patanh_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
patan<Packet8d>(const Packet8d& _x) {
return patan_double(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
ptanh<Packet16f>(const Packet16f& _x) {
return internal::generic_fast_tanh_float(_x);
}
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
} // end namespace internal

View File

@ -18,53 +18,10 @@ namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f plog<Packet4f>(const Packet4f& _x)
{
return plog_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pexp<Packet4f>(const Packet4f& _x)
{
return pexp_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psin<Packet4f>(const Packet4f& _x)
{
return psin_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pcos<Packet4f>(const Packet4f& _x)
{
return pcos_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pacos<Packet4f>(const Packet4f& _x)
{
return pacos_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pasin<Packet4f>(const Packet4f& _x)
{
return pasin_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patan<Packet4f>(const Packet4f& _x)
{
return patan_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patanh<Packet4f>(const Packet4f& _x)
{
return patanh_float(_x);
}
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
#ifdef EIGEN_VECTORIZE_VSX
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2d)
#endif
#ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@ -96,19 +53,8 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x)
// return vec_rsqrt(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x)
{
return patan_double(_x);
}
#endif
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d pexp<Packet2d>(const Packet2d& _x)
{
return pexp_double(_x);
}
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
}
@ -130,13 +76,6 @@ Packet4f psqrt<Packet4f>(const Packet4f& x)
}
#endif
// Hyperbolic Tangent function.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
ptanh<Packet4f>(const Packet4f& x) {
return internal::generic_fast_tanh_float(x);
}
} // end namespace internal
} // end namespace Eigen

View File

@ -109,7 +109,7 @@ template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan_double(const Packet& x);
/** \internal \returns atan(x) for single precision float */
/** \internal \returns atanh(x) for single precision float */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patanh_float(const Packet& x);
@ -126,6 +126,44 @@ Packet pdiv_complex(const Packet& x, const Packet& y);
template <typename Packet, int N> struct ppolevl;
// Macros for instantiating these generic functions for different backends.
#define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \
template <> \
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET p##METHOD<PACKET>(const PACKET& _x) { \
return p##METHOD##_##SCALAR(_x); \
}
#define EIGEN_FLOAT_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, float, PACKET)
#define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, double, PACKET)
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \
EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \
template <> \
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET pexpm1<PACKET>(const PACKET& _x) { \
return internal::generic_expm1(_x); \
} \
template <> \
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p<PACKET>(const PACKET& _x) { \
return internal::generic_plog1p(_x); \
} \
template <> \
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh<PACKET>(const PACKET& _x) { \
return internal::generic_fast_tanh_float(_x); \
}
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \
EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \
EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \
EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \
EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET)
} // end namespace internal
} // end namespace Eigen

View File

@ -14,52 +14,9 @@ namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pexp<Packet2f>(const Packet2f& x)
{ return pexp_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp<Packet4f>(const Packet4f& x)
{ return pexp_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f plog<Packet2f>(const Packet2f& x)
{ return plog_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f plog<Packet4f>(const Packet4f& x)
{ return plog_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f psin<Packet2f>(const Packet2f& x)
{ return psin_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psin<Packet4f>(const Packet4f& x)
{ return psin_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pcos<Packet2f>(const Packet2f& x)
{ return pcos_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pcos<Packet4f>(const Packet4f& x)
{ return pcos_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pacos<Packet2f>(const Packet2f& x)
{ return pacos_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pacos<Packet4f>(const Packet4f& x)
{ return pacos_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pasin<Packet2f>(const Packet2f& x)
{ return pasin_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pasin<Packet4f>(const Packet4f& x)
{ return pasin_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patan<Packet2f>(const Packet2f& x)
{ return patan_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patan<Packet4f>(const Packet4f& x)
{ return patan_float(x); }
// Hyperbolic Tangent function.
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh<Packet2f>(const Packet2f& x)
{ return internal::generic_fast_tanh_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
{ return internal::generic_fast_tanh_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh<Packet2f>(const Packet2f& x)
{ return patanh_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh<Packet4f>(const Packet4f& x)
{ return patanh_float(x); }
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2f)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
template <>
@ -79,7 +36,6 @@ Packet8hf ptanh<Packet8hf>(const Packet8hf& x) {
}
#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)
@ -102,14 +58,8 @@ EIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& expone
//---------- double ----------
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp<Packet2d>(const Packet2d& x)
{ return pexp_double(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog<Packet2d>(const Packet2d& x)
{ return plog_double(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan<Packet2d>(const Packet2d& x)
{ return patan_double(x); }
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)
#endif

View File

@ -21,88 +21,8 @@ namespace Eigen {
namespace internal {
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f plog<Packet4f>(const Packet4f& _x) {
return plog_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d plog<Packet2d>(const Packet2d& _x) {
return plog_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f plog2<Packet4f>(const Packet4f& _x) {
return plog2_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d plog2<Packet2d>(const Packet2d& _x) {
return plog2_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f plog1p<Packet4f>(const Packet4f& _x) {
return generic_plog1p(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pexpm1<Packet4f>(const Packet4f& _x) {
return generic_expm1(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pexp<Packet4f>(const Packet4f& _x)
{
return pexp_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d pexp<Packet2d>(const Packet2d& x)
{
return pexp_double(x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psin<Packet4f>(const Packet4f& _x)
{
return psin_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pcos<Packet4f>(const Packet4f& _x)
{
return pcos_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pacos<Packet4f>(const Packet4f& _x)
{
return pacos_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pasin<Packet4f>(const Packet4f& _x)
{
return pasin_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patan<Packet4f>(const Packet4f& _x)
{
return patan_float(_x);
}
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x) {
return patan_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patanh<Packet4f>(const Packet4f& _x)
{
return patanh_float(_x);
}
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)
// Notice that for newer processors, it is counterproductive to use Newton
// iteration for square root. In particular, Skylake and Zen2 processors
@ -133,15 +53,6 @@ template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& x)
#endif
// Hyperbolic Tangent function.
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
ptanh<Packet4f>(const Packet4f& x) {
return internal::generic_fast_tanh_float(x);
}
} // end namespace internal
namespace numext {