Vectorize atanh & add a missing definition and unit test for atan.

This commit is contained in:
Rasmus Munk Larsen 2023-02-21 03:14:05 +00:00
parent 049a144798
commit ce62177b5b
17 changed files with 179 additions and 25 deletions

View File

@ -82,6 +82,7 @@ struct default_packet_traits
HasASin = 0,
HasACos = 0,
HasATan = 0,
HasATanh = 0,
HasSinh = 0,
HasCosh = 0,
HasTanh = 0,
@ -857,9 +858,6 @@ Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
/** \internal \returns the arc tangent of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
@ -869,10 +867,18 @@ Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
/** \internal \returns the arc tangent of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
/** \internal \returns the arc tangent of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patanh(const Packet& a) { EIGEN_USING_STD(atanh); return atanh(a); }
/** \internal \returns the exp of \a a (coeff-wise) */
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }

View File

@ -56,6 +56,12 @@ patan<Packet4d>(const Packet4d& _x) {
return patan_double(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
patanh<Packet8f>(const Packet8f& _x) {
return patanh_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
plog<Packet8f>(const Packet8f& _x) {

View File

@ -76,6 +76,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasACos = 1,
HasASin = 1,
HasATan = 1,
HasATanh = 1,
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,

View File

@ -275,6 +275,12 @@ patan<Packet16f>(const Packet16f& _x) {
return patan_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
patanh<Packet16f>(const Packet16f& _x) {
return patanh_float(_x);
}
template <>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
patan<Packet8d>(const Packet8d& _x) {

View File

@ -125,6 +125,7 @@ template<> struct packet_traits<float> : default_packet_traits
HasACos = 1,
HasASin = 1,
HasATan = 1,
HasATanh = 1,
#if EIGEN_HAS_AVX512_MATH
HasLog = 1,
HasLog1p = 1,

View File

@ -60,6 +60,12 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
return patan_float(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patanh<Packet4f>(const Packet4f& _x)
{
return patanh_float(_x);
}
#ifdef EIGEN_VECTORIZE_VSX
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f psqrt<Packet4f>(const Packet4f& x)

View File

@ -171,6 +171,7 @@ struct packet_traits<float> : default_packet_traits {
HasACos = 1,
HasASin = 1,
HasATan = 1,
HasATanh = 1,
HasLog = 1,
HasExp = 1,
#ifdef EIGEN_VECTORIZE_VSX

View File

@ -967,6 +967,34 @@ Packet patan_double(const Packet& x_in) {
return pxor(p, x_signmask);
}
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patanh_float(const Packet& x) {
typedef typename unpacket_traits<Packet>::type Scalar;
static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
const Packet half = pset1<Packet>(0.5f);
const Packet x_gt_half = pcmp_le(half, pabs(x));
// For |x| in [0:0.5] we use a polynomial approximation of the form
// P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )).
const Packet C3 = pset1<Packet>(0.3333373963832855224609375f);
const Packet C5 = pset1<Packet>(0.1997792422771453857421875f);
const Packet C7 = pset1<Packet>(0.14672131836414337158203125f);
const Packet C9 = pset1<Packet>(8.2311116158962249755859375e-2f);
const Packet C11 = pset1<Packet>(0.1819281280040740966796875f);
const Packet x2 = pmul(x,x);
Packet p = pmadd(C11, x2, C9);
p = pmadd(x2, p, C7);
p = pmadd(x2, p, C5);
p = pmadd(x2, p, C3);
p = pmadd(pmul(x,x2), p, x);
// For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x));
const Packet one = pset1<Packet>(1.0f);
Packet r = pdiv(padd(one, x), psub(one, x));
r = pmul(half, plog(r));
return pselect(x_gt_half, r, p);
}
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet pdiv_complex(const Packet& x, const Packet& y) {

View File

@ -109,6 +109,11 @@ template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patan_double(const Packet& x);
/** \internal \returns atan(x) for single precision float */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet patanh_float(const Packet& x);
/** \internal \returns sqrt(x) for complex types */
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS

View File

@ -779,6 +779,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
return half(::acosf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan(const half& a) {
return half(::atanf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atanh(const half& a) {
return half(::atanhf(float(a)));
}
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
defined(EIGEN_HIP_DEVICE_COMPILE)

View File

@ -55,6 +55,12 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh<Pa
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
{ return internal::generic_fast_tanh_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh<Packet2f>(const Packet2f& x)
{ return patanh_float(x); }
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh<Packet4f>(const Packet4f& x)
{ return patanh_float(x); }
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
template <>
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC

View File

@ -215,6 +215,7 @@ struct packet_traits<float> : default_packet_traits
HasACos = 1,
HasASin = 1,
HasATan = 1,
HasATanh = 1,
HasLog = 1,
HasExp = 1,
HasSqrt = 1,

View File

@ -81,11 +81,6 @@ Packet4f pacos<Packet4f>(const Packet4f& _x)
return pacos_float(_x);
}
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x) {
return patan_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f pasin<Packet4f>(const Packet4f& _x)
{
@ -98,6 +93,17 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
return patan_float(_x);
}
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet2d patan<Packet2d>(const Packet2d& _x) {
return patan_double(_x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet4f patanh<Packet4f>(const Packet4f& _x)
{
return patanh_float(_x);
}
// Notice that for newer processors, it is counterproductive to use Newton
// iteration for square root. In particular, Skylake and Zen2 processors
// have approximately doubled throughput of the _mm_sqrt_ps instruction

View File

@ -142,6 +142,7 @@ struct packet_traits<float> : default_packet_traits {
HasACos = 1,
HasASin = 1,
HasATan = 1,
HasATanh = 1,
HasLog = 1,
HasLog1p = 1,
HasExpm1 = 1,

View File

@ -622,11 +622,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
template <typename Scalar>
struct scalar_atanh_op {
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::atanh(a); }
template <typename Packet>
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return patanh(x); }
};
template <typename Scalar>
struct functor_traits<scalar_atanh_op<Scalar> > {
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
enum {
Cost = 5 * NumTraits<Scalar>::MulCost,
PacketAccess = packet_traits<Scalar>::HasATanh
};
};
/** \internal

View File

@ -93,27 +93,91 @@ void binary_op_test(std::string name, Fn fun, RefFn ref) {
VERIFY(all_pass);
}
#define BINARY_FUNCTOR_TEST_ARGS(fun) #fun, \
[](const auto& x, const auto& y) { return (Eigen::fun)(x, y); }, \
[](const auto& x, const auto& y) { return (std::fun)(x, y); }
template <typename Scalar>
void binary_ops_test() {
binary_op_test<Scalar>("pow",
[](const auto& x, const auto& y) { return Eigen::pow(x, y); },
[](const auto& x, const auto& y) { return std::pow(x, y); });
binary_op_test<Scalar>("atan2",
[](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
[](const auto& x, const auto& y) {
auto t = std::atan2(x, y);
#if EIGEN_COMP_MSVC
// Work around MSVC return value on underflow.
// |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
// MSVC otherwise returns denorm_min.
if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
return x/y;
}
binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(pow));
#ifndef EIGEN_COMP_MSVC
binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(atan2));
#else
binary_op_test<Scalar>(
"atan2", [](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
[](Scalar x, Scalar y) {
auto t = Scalar(std::atan2(x, y));
// Work around MSVC return value on underflow.
// |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
// MSVC otherwise returns denorm_min.
if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
return x / y;
}
return t;
});
#endif
return t;
});
}
template <typename Scalar, typename Fn, typename RefFn>
void unary_op_test(std::string name, Fn fun, RefFn ref) {
const Scalar tol = test_precision<Scalar>();
auto values = special_values<Scalar>();
Map<Array<Scalar, Dynamic, 1>> x(values.data(), values.size());
Array<Scalar, Dynamic, Dynamic> actual = fun(x);
bool all_pass = true;
for (Index i = 0; i < x.size(); ++i) {
Scalar e = static_cast<Scalar>(ref(x(i)));
Scalar a = actual(i);
bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
((numext::isnan)(a) && (numext::isnan)(e));
if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a);
all_pass &= success;
if (!success) {
std::cout << name << "(" << x(i) << ") = " << a << " != " << e << std::endl;
}
}
VERIFY(all_pass);
}
#define UNARY_FUNCTOR_TEST_ARGS(fun) #fun, \
[](const auto& x) { return (Eigen::fun)(x); }, \
[](const auto& x) { return (std::fun)(x); }
template <typename Scalar>
void unary_ops_test() {
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sqrt));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(exp));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(log));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sin));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cos));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tan));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asin));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acos));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atan));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sinh));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cosh));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tanh));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asinh));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acosh));
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atanh));
/* FIXME: Enable when the behavior of rsqrt on denormals for half and double is fixed.
unary_op_test<Scalar>("rsqrt",
[](const auto& x) { return Eigen::rsqrt(x); },
[](Scalar x) {
if (x >= 0 && x < (std::numeric_limits<Scalar>::min)()) {
// rsqrt return +inf for positive subnormals.
return NumTraits<Scalar>::infinity();
} else {
return Scalar(std::sqrt(Scalar(1)/x));
}
});
*/
}
template <typename Scalar>
void pow_scalar_exponent_test() {
using Int_t = typename internal::make_integer<Scalar>::type;
@ -630,6 +694,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
VERIFY_IS_APPROX(m1.sinh().asinh(), asinh(sinh(m1)));
VERIFY_IS_APPROX(m1.cosh().acosh(), acosh(cosh(m1)));
VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
VERIFY_IS_APPROX(m1.arg(), arg(m1));
@ -722,6 +787,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
VERIFY_IS_APPROX(m3.pow(RealScalar(-2)), m3.square().inverse());
// Test pow and atan2 on special IEEE values.
unary_ops_test<Scalar>();
binary_ops_test<Scalar>();
pow_scalar_exponent_test<Scalar>();

View File

@ -879,6 +879,9 @@ void packetmath_real() {
}
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
CHECK_CWISE1_IF(PacketTraits::HasATan, std::atan, internal::patan);
CHECK_CWISE1_IF(PacketTraits::HasATanh, std::atanh, internal::patanh);
for (int i = 0; i < size; ++i) {
data1[i] = Scalar(internal::random<double>(-87, 88));