mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00

To this end, I added the following functions: pzero, pcmp_*, pfrexp, pset1frombits functions.
106 lines
4.1 KiB
C++
106 lines
4.1 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2007 Julien Pommier
|
|
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
|
|
// Copyright (C) 2009-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
/* The log function of this file initially comes from
|
|
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
|
*/
|
|
|
|
namespace Eigen {
|
|
namespace internal {
|
|
|
|
// Natural logarithm
|
|
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
|
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
|
|
// be easily approximated by a polynomial centered on m=1 for stability.
|
|
// TODO(gonnet): Further reduce the interval allowing for lower-degree
|
|
// polynomial interpolants -> ... -> profit!
|
|
template <typename Packet>
|
|
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
|
EIGEN_UNUSED
|
|
Packet plog_float(const Packet _x) {
|
|
Packet x = _x;
|
|
|
|
const Packet cst_1 = pset1<Packet>(1.0f);
|
|
const Packet cst_half = pset1<Packet>(0.5f);
|
|
//const Packet cst_126f = pset1<Packet>(126.0f);
|
|
// The smallest non denormalized float number.
|
|
const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
|
|
const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
|
|
|
|
// Polynomial coefficients.
|
|
const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
|
|
const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
|
|
const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
|
|
const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
|
|
const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
|
|
const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
|
|
const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
|
|
const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
|
|
const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
|
|
const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
|
|
const Packet cst_cephes_log_q1 = pset1<Packet>(-2.12194440e-4f);
|
|
const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375f);
|
|
|
|
Packet invalid_mask = pcmp_lt_or_nan(x, pzero(x));
|
|
Packet iszero_mask = pcmp_eq(x,pzero(x));
|
|
|
|
// Truncate input values to the minimum positive normal.
|
|
x = pmax(x, cst_min_norm_pos);
|
|
|
|
Packet e;
|
|
// extract significant in the range [0.5,1) and exponent
|
|
x = pfrexp(x,e);
|
|
|
|
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
|
|
// and shift by -1. The values are then centered around 0, which improves
|
|
// the stability of the polynomial evaluation.
|
|
// if( x < SQRTHF ) {
|
|
// e -= 1;
|
|
// x = x + x - 1.0;
|
|
// } else { x = x - 1.0; }
|
|
Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
|
|
Packet tmp = pand(x, mask);
|
|
x = psub(x, cst_1);
|
|
e = psub(e, pand(cst_1, mask));
|
|
x = padd(x, tmp);
|
|
|
|
Packet x2 = pmul(x, x);
|
|
Packet x3 = pmul(x2, x);
|
|
|
|
// Evaluate the polynomial approximant of degree 8 in three parts, probably
|
|
// to improve instruction-level parallelism.
|
|
Packet y, y1, y2;
|
|
y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
|
|
y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
|
|
y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
|
|
y = pmadd(y, x, cst_cephes_log_p2);
|
|
y1 = pmadd(y1, x, cst_cephes_log_p5);
|
|
y2 = pmadd(y2, x, cst_cephes_log_p8);
|
|
y = pmadd(y, x3, y1);
|
|
y = pmadd(y, x3, y2);
|
|
y = pmul(y, x3);
|
|
|
|
// Add the logarithm of the exponent back to the result of the interpolation.
|
|
y1 = pmul(e, cst_cephes_log_q1);
|
|
tmp = pmul(x2, cst_half);
|
|
y = padd(y, y1);
|
|
x = psub(x, tmp);
|
|
y2 = pmul(e, cst_cephes_log_q2);
|
|
x = padd(x, y);
|
|
x = padd(x, y2);
|
|
|
|
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
|
|
return pselect(iszero_mask, cst_minus_inf, por(x, invalid_mask));
|
|
}
|
|
|
|
} // end namespace internal
|
|
} // end namespace Eigen
|