mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-10 02:39:03 +08:00
Unify Altivec/VSX's plog with generic implementation, and enable it!
This commit is contained in:
parent
c24e98e6a8
commit
c2f35b1b47
@ -9,13 +9,15 @@
|
|||||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
/* The sin, cos, exp, and log functions of this file come from
|
/* The sin, cos, and exp functions of this file come from
|
||||||
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
#ifndef EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||||
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
#define EIGEN_MATH_FUNCTIONS_ALTIVEC_H
|
||||||
|
|
||||||
|
#include "../Default/GenericPacketMathFunctions.h"
|
||||||
|
|
||||||
namespace Eigen {
|
namespace Eigen {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
@ -94,62 +96,7 @@ static Packet2ul p2ul_52 = { 52, 52 };
|
|||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet4f plog<Packet4f>(const Packet4f& _x)
|
Packet4f plog<Packet4f>(const Packet4f& _x)
|
||||||
{
|
{
|
||||||
Packet4f x = _x;
|
return plog_float(_x);
|
||||||
|
|
||||||
Packet4i emm0;
|
|
||||||
|
|
||||||
/* isvalid_mask is 0 if x < 0 or x is NaN. */
|
|
||||||
Packet4ui isvalid_mask = reinterpret_cast<Packet4ui>(vec_cmpge(x, p4f_ZERO));
|
|
||||||
Packet4ui iszero_mask = reinterpret_cast<Packet4ui>(vec_cmpeq(x, p4f_ZERO));
|
|
||||||
|
|
||||||
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
|
|
||||||
emm0 = vec_sr(reinterpret_cast<Packet4i>(x),
|
|
||||||
reinterpret_cast<Packet4ui>(p4i_23));
|
|
||||||
|
|
||||||
/* keep only the fractional part */
|
|
||||||
x = pand(x, p4f_inv_mant_mask);
|
|
||||||
x = por(x, p4f_half);
|
|
||||||
|
|
||||||
emm0 = psub(emm0, p4i_0x7f);
|
|
||||||
Packet4f e = padd(vec_ctf(emm0, 0), p4f_1);
|
|
||||||
|
|
||||||
/* part2:
|
|
||||||
if( x < SQRTHF ) {
|
|
||||||
e -= 1;
|
|
||||||
x = x + x - 1.0;
|
|
||||||
} else { x = x - 1.0; }
|
|
||||||
*/
|
|
||||||
Packet4f mask = reinterpret_cast<Packet4f>(vec_cmplt(x, p4f_cephes_SQRTHF));
|
|
||||||
Packet4f tmp = pand(x, mask);
|
|
||||||
x = psub(x, p4f_1);
|
|
||||||
e = psub(e, pand(p4f_1, mask));
|
|
||||||
x = padd(x, tmp);
|
|
||||||
|
|
||||||
Packet4f x2 = pmul(x,x);
|
|
||||||
Packet4f x3 = pmul(x2,x);
|
|
||||||
|
|
||||||
Packet4f y, y1, y2;
|
|
||||||
y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
|
|
||||||
y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
|
|
||||||
y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
|
|
||||||
y = pmadd(y , x, p4f_cephes_log_p2);
|
|
||||||
y1 = pmadd(y1, x, p4f_cephes_log_p5);
|
|
||||||
y2 = pmadd(y2, x, p4f_cephes_log_p8);
|
|
||||||
y = pmadd(y, x3, y1);
|
|
||||||
y = pmadd(y, x3, y2);
|
|
||||||
y = pmul(y, x3);
|
|
||||||
|
|
||||||
y1 = pmul(e, p4f_cephes_log_q1);
|
|
||||||
tmp = pmul(x2, p4f_half);
|
|
||||||
y = padd(y, y1);
|
|
||||||
x = psub(x, tmp);
|
|
||||||
y2 = pmul(e, p4f_cephes_log_q2);
|
|
||||||
x = padd(x, y);
|
|
||||||
x = padd(x, y2);
|
|
||||||
// negative arg will be NAN, 0 will be -INF
|
|
||||||
x = vec_sel(x, p4f_minus_inf, iszero_mask);
|
|
||||||
x = vec_sel(p4f_minus_nan, x, isvalid_mask);
|
|
||||||
return x;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
|
@ -148,7 +148,7 @@ template<> struct packet_traits<float> : default_packet_traits
|
|||||||
HasAbs = 1,
|
HasAbs = 1,
|
||||||
HasSin = 0,
|
HasSin = 0,
|
||||||
HasCos = 0,
|
HasCos = 0,
|
||||||
HasLog = 0,
|
HasLog = 1,
|
||||||
HasExp = 1,
|
HasExp = 1,
|
||||||
#ifdef __VSX__
|
#ifdef __VSX__
|
||||||
HasSqrt = 1,
|
HasSqrt = 1,
|
||||||
@ -285,6 +285,11 @@ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from) {
|
|||||||
Packet4i v = {from, from, from, from};
|
Packet4i v = {from, from, from, from};
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pset1frombits<Packet4f>(unsigned int from) {
|
||||||
|
return reinterpret_cast<Packet4f>(pset1<Packet4i>(from));
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE void
|
template<> EIGEN_STRONG_INLINE void
|
||||||
pbroadcast4<Packet4f>(const float *a,
|
pbroadcast4<Packet4f>(const float *a,
|
||||||
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
|
||||||
@ -414,6 +419,14 @@ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const
|
|||||||
}
|
}
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmple(a,b)); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmplt(a,b)); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return reinterpret_cast<Packet4f>(vec_cmpeq(a,b)); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) {
|
||||||
|
Packet4f c = reinterpret_cast<Packet4f>(vec_cmpge(a,b));
|
||||||
|
return vec_nor(c,c);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
|
template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
|
||||||
|
|
||||||
@ -426,6 +439,10 @@ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const
|
|||||||
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
|
template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
|
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, vec_nor(b, b)); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f& mask, const Packet4f& a, const Packet4f& b) {
|
||||||
|
return vec_sel(b, a, mask);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
|
template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) { return vec_round(a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
|
template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) { return vec_ceil(a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
|
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) { return vec_floor(a); }
|
||||||
@ -550,6 +567,15 @@ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
|
|||||||
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
|
template<> EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) { return vec_abs(a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
template<> EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) { return vec_abs(a); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pshiftright_and_cast(Packet4f a, int n) {
|
||||||
|
return vec_ctf(vec_sr(reinterpret_cast<Packet4i>(a),
|
||||||
|
reinterpret_cast<Packet4ui>(pset1<Packet4i>(n))),0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pfrexp<Packet4f>(const Packet4f& a, Packet4f& exponent) {
|
||||||
|
return pfrexp_float(a,exponent);
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
|
||||||
{
|
{
|
||||||
Packet4f b, sum;
|
Packet4f b, sum;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user