eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h
Gael Guennebaud 2c44c40114 First step toward a unification of packet log implementation, currently only SSE and AVX are unified.
To this end, I added the following functions: pzero, pcmp_*, pfrexp, pset1frombits functions.
2018-11-26 14:21:24 +01:00

106 lines
4.1 KiB
C++

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
// Copyright (C) 2009-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/* The log function of this file initially comes from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/
namespace Eigen {
namespace internal {
// Natural logarithm
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
// TODO(gonnet): Further reduce the interval allowing for lower-degree
// polynomial interpolants -> ... -> profit!
template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_float(const Packet _x) {
Packet x = _x;
const Packet cst_1 = pset1<Packet>(1.0f);
const Packet cst_half = pset1<Packet>(0.5f);
//const Packet cst_126f = pset1<Packet>(126.0f);
// The smallest non denormalized float number.
const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
// Polynomial coefficients.
const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
const Packet cst_cephes_log_q1 = pset1<Packet>(-2.12194440e-4f);
const Packet cst_cephes_log_q2 = pset1<Packet>(0.693359375f);
Packet invalid_mask = pcmp_lt_or_nan(x, pzero(x));
Packet iszero_mask = pcmp_eq(x,pzero(x));
// Truncate input values to the minimum positive normal.
x = pmax(x, cst_min_norm_pos);
Packet e;
// extract significant in the range [0.5,1) and exponent
x = pfrexp(x,e);
// part2: Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
// and shift by -1. The values are then centered around 0, which improves
// the stability of the polynomial evaluation.
// if( x < SQRTHF ) {
// e -= 1;
// x = x + x - 1.0;
// } else { x = x - 1.0; }
Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
Packet tmp = pand(x, mask);
x = psub(x, cst_1);
e = psub(e, pand(cst_1, mask));
x = padd(x, tmp);
Packet x2 = pmul(x, x);
Packet x3 = pmul(x2, x);
// Evaluate the polynomial approximant of degree 8 in three parts, probably
// to improve instruction-level parallelism.
Packet y, y1, y2;
y = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
y = pmadd(y, x, cst_cephes_log_p2);
y1 = pmadd(y1, x, cst_cephes_log_p5);
y2 = pmadd(y2, x, cst_cephes_log_p8);
y = pmadd(y, x3, y1);
y = pmadd(y, x3, y2);
y = pmul(y, x3);
// Add the logarithm of the exponent back to the result of the interpolation.
y1 = pmul(e, cst_cephes_log_q1);
tmp = pmul(x2, cst_half);
y = padd(y, y1);
x = psub(x, tmp);
y2 = pmul(e, cst_cephes_log_q2);
x = padd(x, y);
x = padd(x, y2);
// Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
return pselect(iszero_mask, cst_minus_inf, por(x, invalid_mask));
}
} // end namespace internal
} // end namespace Eigen