mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 16:19:37 +08:00
Vectorize atanh & add a missing definition and unit test for atan.
This commit is contained in:
parent
049a144798
commit
ce62177b5b
@ -82,6 +82,7 @@ struct default_packet_traits
|
||||
HasASin = 0,
|
||||
HasACos = 0,
|
||||
HasATan = 0,
|
||||
HasATanh = 0,
|
||||
HasSinh = 0,
|
||||
HasCosh = 0,
|
||||
HasTanh = 0,
|
||||
@ -857,9 +858,6 @@ Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
|
||||
|
||||
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
@ -869,10 +867,18 @@ Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
|
||||
|
||||
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
|
||||
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
|
||||
|
||||
/** \internal \returns the arc tangent of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patanh(const Packet& a) { EIGEN_USING_STD(atanh); return atanh(a); }
|
||||
|
||||
/** \internal \returns the exp of \a a (coeff-wise) */
|
||||
template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
|
||||
|
@ -56,6 +56,12 @@ patan<Packet4d>(const Packet4d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
patanh<Packet8f>(const Packet8f& _x) {
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f
|
||||
plog<Packet8f>(const Packet8f& _x) {
|
||||
|
@ -76,6 +76,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
|
@ -275,6 +275,12 @@ patan<Packet16f>(const Packet16f& _x) {
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16f
|
||||
patanh<Packet16f>(const Packet16f& _x) {
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
template <>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8d
|
||||
patan<Packet8d>(const Packet8d& _x) {
|
||||
|
@ -125,6 +125,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
#if EIGEN_HAS_AVX512_MATH
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
|
@ -60,6 +60,12 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patanh<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_VECTORIZE_VSX
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& x)
|
||||
|
@ -171,6 +171,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
#ifdef EIGEN_VECTORIZE_VSX
|
||||
|
@ -967,6 +967,34 @@ Packet patan_double(const Packet& x_in) {
|
||||
return pxor(p, x_signmask);
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patanh_float(const Packet& x) {
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
static_assert(std::is_same<Scalar, float>::value, "Scalar type must be float");
|
||||
const Packet half = pset1<Packet>(0.5f);
|
||||
const Packet x_gt_half = pcmp_le(half, pabs(x));
|
||||
// For |x| in [0:0.5] we use a polynomial approximation of the form
|
||||
// P(x) = x + x^3*(c3 + x^2 * (c5 + x^2 * (... x^2 * c11) ... )).
|
||||
const Packet C3 = pset1<Packet>(0.3333373963832855224609375f);
|
||||
const Packet C5 = pset1<Packet>(0.1997792422771453857421875f);
|
||||
const Packet C7 = pset1<Packet>(0.14672131836414337158203125f);
|
||||
const Packet C9 = pset1<Packet>(8.2311116158962249755859375e-2f);
|
||||
const Packet C11 = pset1<Packet>(0.1819281280040740966796875f);
|
||||
const Packet x2 = pmul(x,x);
|
||||
Packet p = pmadd(C11, x2, C9);
|
||||
p = pmadd(x2, p, C7);
|
||||
p = pmadd(x2, p, C5);
|
||||
p = pmadd(x2, p, C3);
|
||||
p = pmadd(pmul(x,x2), p, x);
|
||||
|
||||
// For |x| in ]0.5:1.0] we use atanh = 0.5*ln((1+x)/(1-x));
|
||||
const Packet one = pset1<Packet>(1.0f);
|
||||
Packet r = pdiv(padd(one, x), psub(one, x));
|
||||
r = pmul(half, plog(r));
|
||||
return pselect(x_gt_half, r, p);
|
||||
}
|
||||
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet pdiv_complex(const Packet& x, const Packet& y) {
|
||||
|
@ -109,6 +109,11 @@ template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patan_double(const Packet& x);
|
||||
|
||||
/** \internal \returns atan(x) for single precision float */
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet patanh_float(const Packet& x);
|
||||
|
||||
/** \internal \returns sqrt(x) for complex types */
|
||||
template<typename Packet>
|
||||
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
|
@ -779,6 +779,12 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half asin(const half& a) {
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half acos(const half& a) {
|
||||
return half(::acosf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atan(const half& a) {
|
||||
return half(::atanf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half atanh(const half& a) {
|
||||
return half(::atanhf(float(a)));
|
||||
}
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half floor(const half& a) {
|
||||
#if (EIGEN_CUDA_SDK_VER >= 80000 && defined EIGEN_CUDA_ARCH && EIGEN_CUDA_ARCH >= 300) || \
|
||||
defined(EIGEN_HIP_DEVICE_COMPILE)
|
||||
|
@ -55,6 +55,12 @@ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f ptanh<Pa
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh<Packet4f>(const Packet4f& x)
|
||||
{ return internal::generic_fast_tanh_float(x); }
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2f patanh<Packet2f>(const Packet2f& x)
|
||||
{ return patanh_float(x); }
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f patanh<Packet4f>(const Packet4f& x)
|
||||
{ return patanh_float(x); }
|
||||
|
||||
|
||||
#if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||
|
@ -215,6 +215,7 @@ struct packet_traits<float> : default_packet_traits
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasLog = 1,
|
||||
HasExp = 1,
|
||||
HasSqrt = 1,
|
||||
|
@ -81,11 +81,6 @@ Packet4f pacos<Packet4f>(const Packet4f& _x)
|
||||
return pacos_float(_x);
|
||||
}
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f pasin<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
@ -98,6 +93,17 @@ Packet4f patan<Packet4f>(const Packet4f& _x)
|
||||
return patan_float(_x);
|
||||
}
|
||||
|
||||
template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet2d patan<Packet2d>(const Packet2d& _x) {
|
||||
return patan_double(_x);
|
||||
}
|
||||
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
Packet4f patanh<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
return patanh_float(_x);
|
||||
}
|
||||
|
||||
// Notice that for newer processors, it is counterproductive to use Newton
|
||||
// iteration for square root. In particular, Skylake and Zen2 processors
|
||||
// have approximately doubled throughput of the _mm_sqrt_ps instruction
|
||||
|
@ -142,6 +142,7 @@ struct packet_traits<float> : default_packet_traits {
|
||||
HasACos = 1,
|
||||
HasASin = 1,
|
||||
HasATan = 1,
|
||||
HasATanh = 1,
|
||||
HasLog = 1,
|
||||
HasLog1p = 1,
|
||||
HasExpm1 = 1,
|
||||
|
@ -622,11 +622,16 @@ struct functor_traits<scalar_tanh_op<Scalar> > {
|
||||
template <typename Scalar>
|
||||
struct scalar_atanh_op {
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { return numext::atanh(a); }
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x) const { return patanh(x); }
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct functor_traits<scalar_atanh_op<Scalar> > {
|
||||
enum { Cost = 5 * NumTraits<Scalar>::MulCost, PacketAccess = false };
|
||||
enum {
|
||||
Cost = 5 * NumTraits<Scalar>::MulCost,
|
||||
PacketAccess = packet_traits<Scalar>::HasATanh
|
||||
};
|
||||
};
|
||||
|
||||
/** \internal
|
||||
|
@ -93,27 +93,91 @@ void binary_op_test(std::string name, Fn fun, RefFn ref) {
|
||||
VERIFY(all_pass);
|
||||
}
|
||||
|
||||
#define BINARY_FUNCTOR_TEST_ARGS(fun) #fun, \
|
||||
[](const auto& x, const auto& y) { return (Eigen::fun)(x, y); }, \
|
||||
[](const auto& x, const auto& y) { return (std::fun)(x, y); }
|
||||
|
||||
|
||||
template <typename Scalar>
|
||||
void binary_ops_test() {
|
||||
binary_op_test<Scalar>("pow",
|
||||
[](const auto& x, const auto& y) { return Eigen::pow(x, y); },
|
||||
[](const auto& x, const auto& y) { return std::pow(x, y); });
|
||||
binary_op_test<Scalar>("atan2",
|
||||
[](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
|
||||
[](const auto& x, const auto& y) {
|
||||
auto t = std::atan2(x, y);
|
||||
#if EIGEN_COMP_MSVC
|
||||
// Work around MSVC return value on underflow.
|
||||
// |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
|
||||
// MSVC otherwise returns denorm_min.
|
||||
if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
|
||||
return x/y;
|
||||
}
|
||||
binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(pow));
|
||||
#ifndef EIGEN_COMP_MSVC
|
||||
binary_op_test<Scalar>(BINARY_FUNCTOR_TEST_ARGS(atan2));
|
||||
#else
|
||||
binary_op_test<Scalar>(
|
||||
"atan2", [](const auto& x, const auto& y) { return Eigen::atan2(x, y); },
|
||||
[](Scalar x, Scalar y) {
|
||||
auto t = Scalar(std::atan2(x, y));
|
||||
// Work around MSVC return value on underflow.
|
||||
// |atan(y/x)| is bounded above by |y/x|, so on underflow return y/x according to POSIX spec.
|
||||
// MSVC otherwise returns denorm_min.
|
||||
if (EIGEN_PREDICT_FALSE(std::abs(t) == std::numeric_limits<decltype(t)>::denorm_min())) {
|
||||
return x / y;
|
||||
}
|
||||
return t;
|
||||
});
|
||||
#endif
|
||||
return t;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
template <typename Scalar, typename Fn, typename RefFn>
|
||||
void unary_op_test(std::string name, Fn fun, RefFn ref) {
|
||||
const Scalar tol = test_precision<Scalar>();
|
||||
auto values = special_values<Scalar>();
|
||||
Map<Array<Scalar, Dynamic, 1>> x(values.data(), values.size());
|
||||
|
||||
Array<Scalar, Dynamic, Dynamic> actual = fun(x);
|
||||
bool all_pass = true;
|
||||
for (Index i = 0; i < x.size(); ++i) {
|
||||
Scalar e = static_cast<Scalar>(ref(x(i)));
|
||||
Scalar a = actual(i);
|
||||
bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
|
||||
((numext::isnan)(a) && (numext::isnan)(e));
|
||||
if ((a == a) && (e == e)) success &= (bool)numext::signbit(e) == (bool)numext::signbit(a);
|
||||
all_pass &= success;
|
||||
if (!success) {
|
||||
std::cout << name << "(" << x(i) << ") = " << a << " != " << e << std::endl;
|
||||
}
|
||||
}
|
||||
VERIFY(all_pass);
|
||||
}
|
||||
|
||||
#define UNARY_FUNCTOR_TEST_ARGS(fun) #fun, \
|
||||
[](const auto& x) { return (Eigen::fun)(x); }, \
|
||||
[](const auto& x) { return (std::fun)(x); }
|
||||
|
||||
template <typename Scalar>
|
||||
void unary_ops_test() {
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sqrt));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(exp));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(log));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sin));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cos));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tan));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asin));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acos));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atan));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(sinh));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(cosh));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(tanh));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(asinh));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(acosh));
|
||||
unary_op_test<Scalar>(UNARY_FUNCTOR_TEST_ARGS(atanh));
|
||||
/* FIXME: Enable when the behavior of rsqrt on denormals for half and double is fixed.
|
||||
unary_op_test<Scalar>("rsqrt",
|
||||
[](const auto& x) { return Eigen::rsqrt(x); },
|
||||
[](Scalar x) {
|
||||
if (x >= 0 && x < (std::numeric_limits<Scalar>::min)()) {
|
||||
// rsqrt return +inf for positive subnormals.
|
||||
return NumTraits<Scalar>::infinity();
|
||||
} else {
|
||||
return Scalar(std::sqrt(Scalar(1)/x));
|
||||
}
|
||||
});
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
template <typename Scalar>
|
||||
void pow_scalar_exponent_test() {
|
||||
using Int_t = typename internal::make_integer<Scalar>::type;
|
||||
@ -630,6 +694,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
|
||||
VERIFY_IS_APPROX(m1.sinh().asinh(), asinh(sinh(m1)));
|
||||
VERIFY_IS_APPROX(m1.cosh().acosh(), acosh(cosh(m1)));
|
||||
VERIFY_IS_APPROX(m1.tanh().atanh(), atanh(tanh(m1)));
|
||||
VERIFY_IS_APPROX(m1.logistic(), logistic(m1));
|
||||
|
||||
VERIFY_IS_APPROX(m1.arg(), arg(m1));
|
||||
@ -722,6 +787,7 @@ template<typename ArrayType> void array_real(const ArrayType& m)
|
||||
VERIFY_IS_APPROX(m3.pow(RealScalar(-2)), m3.square().inverse());
|
||||
|
||||
// Test pow and atan2 on special IEEE values.
|
||||
unary_ops_test<Scalar>();
|
||||
binary_ops_test<Scalar>();
|
||||
pow_scalar_exponent_test<Scalar>();
|
||||
|
||||
|
@ -879,6 +879,9 @@ void packetmath_real() {
|
||||
}
|
||||
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasATan, std::atan, internal::patan);
|
||||
CHECK_CWISE1_IF(PacketTraits::HasATanh, std::atanh, internal::patanh);
|
||||
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
data1[i] = Scalar(internal::random<double>(-87, 88));
|
||||
|
Loading…
x
Reference in New Issue
Block a user