mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 03:39:01 +08:00
Fixed a typo in my previous commit
This commit is contained in:
parent
08348b4e48
commit
217d984abc
@ -616,7 +616,53 @@ template<typename Scalar> struct scalar_tanh_op {
|
||||
EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op)
|
||||
EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tanh(a); }
|
||||
template <typename Packet>
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::ptanh(a); }
|
||||
EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& _x) const {
|
||||
/** \internal \returns the hyperbolic tan of \a a (coeff-wise)
|
||||
Doesn't do anything fancy, just a 13/6-degree rational interpolant which
|
||||
is accurate up to a couple of ulp in the range [-9, 9], outside of which the
|
||||
fl(tanh(x)) = +/-1. */
|
||||
|
||||
// Clamp the inputs to the range [-9, 9] since anything outside
|
||||
// this range is +/-1.0f in single-precision.
|
||||
const Packet plus_9 = pset1<Packet>(9.0);
|
||||
const Packet minus_9 = pset1<Packet>(-9.0);
|
||||
const Packet x = pmax(minus_9, pmin(plus_9, _x));
|
||||
|
||||
// The monomial coefficients of the numerator polynomial (odd).
|
||||
const Packet alpha_1 = pset1<Packet>(4.89352455891786e-03);
|
||||
const Packet alpha_3 = pset1<Packet>(6.37261928875436e-04);
|
||||
const Packet alpha_5 = pset1<Packet>(1.48572235717979e-05);
|
||||
const Packet alpha_7 = pset1<Packet>(5.12229709037114e-08);
|
||||
const Packet alpha_9 = pset1<Packet>(-8.60467152213735e-11);
|
||||
const Packet alpha_11 = pset1<Packet>(2.00018790482477e-13);
|
||||
const Packet alpha_13 = pset1<Packet>(-2.76076847742355e-16);
|
||||
|
||||
// The monomial coefficients of the denominator polynomial (even).
|
||||
const Packet beta_0 = pset1<Packet>(4.89352518554385e-03);
|
||||
const Packet beta_2 = pset1<Packet>(2.26843463243900e-03);
|
||||
const Packet beta_4 = pset1<Packet>(1.18534705686654e-04);
|
||||
const Packet beta_6 = pset1<Packet>(1.19825839466702e-06);
|
||||
|
||||
// Since the polynomials are odd/even, we need x^2.
|
||||
const Packet x2 = pmul(x, x);
|
||||
|
||||
// Evaluate the numerator polynomial p.
|
||||
Packet p = pmadd(x2, alpha_13, alpha_11);
|
||||
p = pmadd(x2, p, alpha_9);
|
||||
p = pmadd(x2, p, alpha_7);
|
||||
p = pmadd(x2, p, alpha_5);
|
||||
p = pmadd(x2, p, alpha_3);
|
||||
p = pmadd(x2, p, alpha_1);
|
||||
p = pmul(x, p);
|
||||
|
||||
// Evaluate the denominator polynomial p.
|
||||
Packet q = pmadd(x2, beta_6, beta_4);
|
||||
q = pmadd(x2, q, beta_2);
|
||||
q = pmadd(x2, q, beta_0);
|
||||
|
||||
// Divide the numerator by the denominator.
|
||||
return pdiv(p, q);
|
||||
}
|
||||
};
|
||||
template<typename Scalar>
|
||||
struct functor_traits<scalar_tanh_op<Scalar> >
|
||||
|
@ -147,7 +147,7 @@ ei_add_test(nomalloc)
|
||||
ei_add_test(first_aligned)
|
||||
ei_add_test(nullary)
|
||||
ei_add_test(mixingtypes)
|
||||
ei_add_test(packetmath)
|
||||
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
|
||||
ei_add_test(unalignedassert)
|
||||
ei_add_test(vectorization_logic)
|
||||
ei_add_test(basicstuff)
|
||||
|
@ -318,7 +318,7 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
|
||||
for (Index i = thread_id; i < num_preserved_coeffs; i += num_threads) {
|
||||
output[i] = reducer.initialize();
|
||||
}
|
||||
_syncthreads();
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
for (Index i = blockIdx.x; i < num_input_blocks; i += gridDim.x) {
|
||||
|
@ -88,6 +88,16 @@ void test_conversion()
|
||||
#endif
|
||||
}
|
||||
|
||||
void test_numtraits()
|
||||
{
|
||||
std::cout << "expsilin = " << NumTraits<half>::epsilon() << std::endl;
|
||||
std::cout << "highest = " << NumTraits<half>::highest() << std::endl;
|
||||
std::cout << "lowest = " << NumTraits<half>::lowest() << std::endl;
|
||||
std::cout << "inifinty = " << NumTraits<half>::infinity() << std::endl;
|
||||
std::cout << "nan = " << NumTraits<half>::quiet_NaN() << std::endl;
|
||||
|
||||
}
|
||||
|
||||
void test_arithmetic()
|
||||
{
|
||||
VERIFY_IS_EQUAL(float(half(2) + half(2)), 4);
|
||||
@ -185,6 +195,7 @@ void test_trigonometric_functions()
|
||||
void test_cxx11_float16()
|
||||
{
|
||||
CALL_SUBTEST(test_conversion());
|
||||
CALL_SUBTEST(test_numtraits());
|
||||
CALL_SUBTEST(test_arithmetic());
|
||||
CALL_SUBTEST(test_comparison());
|
||||
CALL_SUBTEST(test_basic_functions());
|
||||
|
@ -388,6 +388,9 @@ void test_cuda_forced_evals() {
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
void test_cxx11_tensor_of_float16_cuda()
|
||||
{
|
||||
#ifdef EIGEN_HAS_CUDA_FP16
|
||||
|
Loading…
x
Reference in New Issue
Block a user