mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-10 02:39:03 +08:00
Clean up generic packetmath specializations for various backends with the help of a macro.
This commit is contained in:
parent
e8fdf127c6
commit
d6235d76db
@ -20,105 +20,8 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
psin<Packet8f>(const Packet8f& _x) {
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pcos<Packet8f>(const Packet8f& _x) {
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pasin<Packet8f>(const Packet8f& _x) {
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pacos<Packet8f>(const Packet8f& _x) {
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
patan<Packet8f>(const Packet8f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
patan<Packet4d>(const Packet4d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
patanh<Packet8f>(const Packet8f& _x) {
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog<Packet8f>(const Packet8f& _x) {
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
plog<Packet4d>(const Packet4d& _x) {
|
||||
return plog_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog2<Packet8f>(const Packet8f& _x) {
|
||||
return plog2_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
plog2<Packet4d>(const Packet4d& _x) {
|
||||
return plog2_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f plog1p<Packet8f>(const Packet8f& _x) {
|
||||
return generic_plog1p(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
|
||||
return generic_expm1(_x);
|
||||
}
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
pexp<Packet8f>(const Packet8f& _x) {
|
||||
return pexp_float(_x);
|
||||
}
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
ptanh<Packet8f>(const Packet8f& _x) {
|
||||
return internal::generic_fast_tanh_float(_x);
|
||||
}
|
||||
|
||||
// Exponential function for doubles.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d
|
||||
pexp<Packet4d>(const Packet4d& _x) {
|
||||
return pexp_double(_x);
|
||||
}
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet8f)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet4d)
|
||||
|
||||
// Notice that for newer processors, it is counterproductive to use Newton
|
||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||
@ -153,19 +56,6 @@ template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
|
||||
Packet8f fexponent;
|
||||
@ -179,18 +69,6 @@ EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent)
|
||||
return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
|
||||
}
|
||||
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
|
||||
Packet8f fexponent;
|
||||
@ -204,6 +82,30 @@ EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& expone
|
||||
return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
|
||||
}
|
||||
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, preciprocal)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, preciprocal)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
|
||||
F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -15,117 +15,8 @@
|
||||
namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
|
||||
const Packet16f p16f_##NAME = pset1<Packet16f>(X)
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
|
||||
const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
|
||||
const Packet8d p8d_##NAME = pset1<Packet8d>(X)
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
|
||||
const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
|
||||
const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
|
||||
|
||||
#define EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
|
||||
const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
plog<Packet16f>(const Packet16f& _x) {
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
plog<Packet8d>(const Packet8d& _x) {
|
||||
return plog_double(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
plog2<Packet16f>(const Packet16f& _x) {
|
||||
return plog2_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
plog2<Packet8d>(const Packet8d& _x) {
|
||||
return plog2_double(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
||||
|
||||
// Exponential function. Works by writing "x = m*log(2) + r" where
|
||||
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
|
||||
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pexp<Packet16f>(const Packet16f& _x) {
|
||||
EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
|
||||
|
||||
EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
|
||||
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
|
||||
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
|
||||
EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
|
||||
|
||||
// Clamp x.
|
||||
Packet16f x = pmax(pmin(_x, p16f_exp_hi), p16f_exp_lo);
|
||||
|
||||
// Express exp(x) as exp(m*ln(2) + r), start by extracting
|
||||
// m = floor(x/ln(2) + 0.5).
|
||||
Packet16f m = _mm512_floor_ps(pmadd(x, p16f_cephes_LOG2EF, p16f_half));
|
||||
|
||||
// Get r = x - m*ln(2). Note that we can do this without losing more than one
|
||||
// ulp precision due to the FMA instruction.
|
||||
EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
|
||||
Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
|
||||
Packet16f r2 = pmul(r, r);
|
||||
Packet16f r3 = pmul(r2, r);
|
||||
|
||||
// Evaluate the polynomial approximant,improved by instruction-level parallelism.
|
||||
Packet16f y, y1, y2;
|
||||
y = pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
|
||||
y1 = pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
|
||||
y2 = padd(r, p16f_1);
|
||||
y = pmadd(y, r, p16f_cephes_exp_p2);
|
||||
y1 = pmadd(y1, r, p16f_cephes_exp_p5);
|
||||
y = pmadd(y, r3, y1);
|
||||
y = pmadd(y, r2, y2);
|
||||
|
||||
// Build emm0 = 2^m.
|
||||
Packet16i emm0 = _mm512_cvttps_epi32(padd(m, p16f_127));
|
||||
emm0 = _mm512_slli_epi32(emm0, 23);
|
||||
|
||||
// Return 2^m * exp(r).
|
||||
return pmax(pmul(y, _mm512_castsi512_ps(emm0)), _x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
pexp<Packet8d>(const Packet8d& _x) {
|
||||
return pexp_double(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet16f)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet8d)
|
||||
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
|
||||
@ -181,9 +72,6 @@ EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
|
||||
}
|
||||
#endif
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
||||
|
||||
// prsqrt for float.
|
||||
#if defined(EIGEN_VECTORIZE_AVX512ER)
|
||||
template <>
|
||||
@ -199,8 +87,6 @@ prsqrt<Packet16f>(const Packet16f& _x) {
|
||||
}
|
||||
#endif
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
||||
|
||||
// prsqrt for double.
|
||||
#if EIGEN_FAST_MATH
|
||||
@ -223,80 +109,28 @@ template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f&
|
||||
}
|
||||
#endif
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet16f plog1p<Packet16f>(const Packet16f& _x) {
|
||||
return generic_plog1p(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet16f pexpm1<Packet16f>(const Packet16f& _x) {
|
||||
return generic_expm1(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
psin<Packet16f>(const Packet16f& _x) {
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pcos<Packet16f>(const Packet16f& _x) {
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pacos<Packet16f>(const Packet16f& _x) {
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
pasin<Packet16f>(const Packet16f& _x) {
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
patan<Packet16f>(const Packet16f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
patanh<Packet16f>(const Packet16f& _x) {
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
patan<Packet8d>(const Packet8d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
ptanh<Packet16f>(const Packet16f& _x) {
|
||||
return internal::generic_fast_tanh_float(_x);
|
||||
}
|
||||
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
|
||||
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexp)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, pexpm1)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog1p)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, plog2)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, preciprocal)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, prsqrt)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, psqrt)
|
||||
BF16_PACKET_FUNCTION(Packet16f, Packet16bf, ptanh)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pcos)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexp)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, pexpm1)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog1p)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, plog2)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, preciprocal)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, prsqrt)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, psin)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, psqrt)
|
||||
F16_PACKET_FUNCTION(Packet16f, Packet16h, ptanh)
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
|
@ -18,53 +18,10 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pexp_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pacos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pasin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patanh<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patanh_float(_x);
|
||||
}
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
|
||||
#ifdef EIGEN_VECTORIZE_VSX
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2d)
|
||||
#endif
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_VSX
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
@ -96,19 +53,8 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x)
|
||||
// return vec_rsqrt(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return patan_double(_x);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d pexp<Packet2d>(const Packet2d& _x)
|
||||
{
|
||||
return pexp_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet8bf psqrt<Packet8bf> (const Packet8bf& a){
|
||||
BF16_TO_F32_UNARY_OP_WRAPPER(psqrt<Packet4f>, a);
|
||||
}
|
||||
@ -130,13 +76,6 @@ Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
}
|
||||
#endif
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
|
||||
ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -109,7 +109,7 @@ template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan_double(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) for single precision float */
|
||||
/** \internal \returns atanh(x) for single precision float */
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patanh_float(const Packet& x);
|
||||
@ -126,6 +126,44 @@ Packet pdiv_complex(const Packet& x, const Packet& y);
|
||||
|
||||
template <typename Packet, int N> struct ppolevl;
|
||||
|
||||
// Macros for instantiating these generic functions for different backends.
|
||||
#define EIGEN_PACKET_FUNCTION(METHOD, SCALAR, PACKET) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET p##METHOD<PACKET>(const PACKET& _x) { \
|
||||
return p##METHOD##_##SCALAR(_x); \
|
||||
}
|
||||
|
||||
#define EIGEN_FLOAT_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, float, PACKET)
|
||||
#define EIGEN_DOUBLE_PACKET_FUNCTION(METHOD, PACKET) EIGEN_PACKET_FUNCTION(METHOD, double, PACKET)
|
||||
|
||||
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(sin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(cos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(asin, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(acos, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atan, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(atanh, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(log2, PACKET) \
|
||||
EIGEN_FLOAT_PACKET_FUNCTION(exp, PACKET) \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET pexpm1<PACKET>(const PACKET& _x) { \
|
||||
return internal::generic_expm1(_x); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET plog1p<PACKET>(const PACKET& _x) { \
|
||||
return internal::generic_plog1p(_x); \
|
||||
} \
|
||||
template <> \
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_UNUSED PACKET ptanh<PACKET>(const PACKET& _x) { \
|
||||
return internal::generic_fast_tanh_float(_x); \
|
||||
}
|
||||
|
||||
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(atan, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(log2, PACKET) \
|
||||
EIGEN_DOUBLE_PACKET_FUNCTION(exp, PACKET)
|
||||
|
||||
} // end namespace internal
|
||||
} // end namespace Eigen
|
||||
|
@ -14,52 +14,9 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pexp<Packet2f>(const Packet2f& x)
|
||||
{ return pexp_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp<Packet4f>(const Packet4f& x)
|
||||
{ return pexp_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f plog<Packet2f>(const Packet2f& x)
|
||||
{ return plog_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f plog<Packet4f>(const Packet4f& x)
|
||||
{ return plog_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f psin<Packet2f>(const Packet2f& x)
|
||||
{ return psin_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psin<Packet4f>(const Packet4f& x)
|
||||
{ return psin_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pcos<Packet2f>(const Packet2f& x)
|
||||
{ return pcos_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pcos<Packet4f>(const Packet4f& x)
|
||||
{ return pcos_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pacos<Packet2f>(const Packet2f& x)
|
||||
{ return pacos_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pacos<Packet4f>(const Packet4f& x)
|
||||
{ return pacos_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f pasin<Packet2f>(const Packet2f& x)
|
||||
{ return pasin_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pasin<Packet4f>(const Packet4f& x)
|
||||
{ return pasin_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patan<Packet2f>(const Packet2f& x)
|
||||
{ return patan_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patan<Packet4f>(const Packet4f& x)
|
||||
{ return patan_float(x); }
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh<Packet2f>(const Packet2f& x)
|
||||
{ return internal::generic_fast_tanh_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
|
||||
{ return internal::generic_fast_tanh_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh<Packet2f>(const Packet2f& x)
|
||||
{ return patanh_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh<Packet4f>(const Packet4f& x)
|
||||
{ return patanh_float(x); }
|
||||
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet2f)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
|
||||

|
||||
|
||||
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
|
||||
template <>
|
||||
@ -79,7 +36,6 @@ Packet8hf ptanh<Packet8hf>(const Packet8hf& x) {
|
||||
}
|
||||
#endif // EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
|
||||
|
||||
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, psin)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, pcos)
|
||||
BF16_PACKET_FUNCTION(Packet4f, Packet4bf, plog)
|
||||
@ -102,14 +58,8 @@ EIGEN_STRONG_INLINE Packet4bf pldexp(const Packet4bf& a, const Packet4bf& expone
|
||||
//---------- double ----------
|
||||
|
||||
#if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp<Packet2d>(const Packet2d& x)
|
||||
{ return pexp_double(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d plog<Packet2d>(const Packet2d& x)
|
||||
{ return plog_double(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d patan<Packet2d>(const Packet2d& x)
|
||||
{ return patan_double(x); }
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -21,88 +21,8 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f plog<Packet4f>(const Packet4f& _x) {
|
||||
return plog_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d plog<Packet2d>(const Packet2d& _x) {
|
||||
return plog_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f plog2<Packet4f>(const Packet4f& _x) {
|
||||
return plog2_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d plog2<Packet2d>(const Packet2d& _x) {
|
||||
return plog2_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f plog1p<Packet4f>(const Packet4f& _x) {
|
||||
return generic_plog1p(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pexpm1<Packet4f>(const Packet4f& _x) {
|
||||
return generic_expm1(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pexp<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pexp_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d pexp<Packet2d>(const Packet2d& x)
|
||||
{
|
||||
return pexp_double(x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return psin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pcos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pacos<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pasin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return pasin_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patanh<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patanh_float(_x);
|
||||
}
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)
|
||||
EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)
|
||||
|
||||
// Notice that for newer processors, it is counterproductive to use Newton
|
||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||
@ -133,15 +53,6 @@ template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& x)
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Hyperbolic Tangent function.
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f
|
||||
ptanh<Packet4f>(const Packet4f& x) {
|
||||
return internal::generic_fast_tanh_float(x);
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
|
Loading…
x
Reference in New Issue
Block a user