mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-19 16:19:37 +08:00
Fix arm32 issues.
This commit is contained in:
parent
5808122017
commit
a73970a864
Eigen/src/Core
test
@ -582,8 +582,8 @@ EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
|
||||
|
||||
// Subtract y * Pi/2 to reduce x to the interval -Pi/4 <= x <= +Pi/4
|
||||
// using "Extended precision modular arithmetic"
|
||||
#if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
|
||||
// This version requires true FMA for high accuracy
|
||||
#if defined(EIGEN_VECTORIZE_FMA)
|
||||
// This version requires true FMA for high accuracy.
|
||||
// It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08):
|
||||
const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
|
||||
x = pmadd(y, pset1<Packet>(-1.57079601287841796875f), x);
|
||||
@ -1181,7 +1181,7 @@ EIGEN_STRONG_INLINE void fast_twosum(const Packet& x, const Packet& y, Packet& s
|
||||
s_lo = psub(y, t);
|
||||
}
|
||||
|
||||
#ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// This function implements the extended precision product of
|
||||
// a pair of floating point numbers. Given {x, y}, it computes the pair
|
||||
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
|
||||
@ -1227,7 +1227,7 @@ EIGEN_STRONG_INLINE void twoprod(const Packet& x, const Packet& y, Packet& p_hi,
|
||||
p_lo = pmadd(x_lo, y_lo, p_lo);
|
||||
}
|
||||
|
||||
#endif // EIGEN_HAS_SINGLE_INSTRUCTION_MADD
|
||||
#endif // EIGEN_VECTORIZE_FMA
|
||||
|
||||
// This function implements Dekker's algorithm for the addition
|
||||
// of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.
|
||||
|
@ -1271,7 +1271,7 @@ EIGEN_STRONG_INLINE Packet2ul pdiv<Packet2ul>(const Packet2ul& /*a*/, const Pack
|
||||
return pset1<Packet2ul>(0ULL);
|
||||
}
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
|
||||
return vfmaq_f32(c, a, b);
|
||||
@ -5249,7 +5249,7 @@ EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b
|
||||
return vdivq_f64(a, b);
|
||||
}
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
#ifdef EIGEN_VECTORIZE_FMA
|
||||
// See bug 936. See above comment about FMA for float.
|
||||
template <>
|
||||
EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
|
||||
|
@ -354,6 +354,7 @@ extern "C" {
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_VSX 1
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#include <altivec.h>
|
||||
// We need to #undef all these ugly tokens defined in <altivec.h>
|
||||
// => use __vector instead of vector
|
||||
@ -365,6 +366,7 @@ extern "C" {
|
||||
|
||||
#define EIGEN_VECTORIZE
|
||||
#define EIGEN_VECTORIZE_ALTIVEC
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#include <altivec.h>
|
||||
// We need to #undef all these ugly tokens defined in <altivec.h>
|
||||
// => use __vector instead of vector
|
||||
@ -431,6 +433,11 @@ extern "C" {
|
||||
#include <arm_fp16.h>
|
||||
#endif
|
||||
|
||||
// Enable FMA for ARM.
|
||||
#if defined(__ARM_FEATURE_FMA)
|
||||
#define EIGEN_VECTORIZE_FMA
|
||||
#endif
|
||||
|
||||
#if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3, 8, 0))
|
||||
// We can use the optimized fp16 to float and float to fp16 conversion routines
|
||||
#define EIGEN_HAS_FP16_C
|
||||
|
@ -98,9 +98,12 @@ void binary_op_test(std::string name, Fn fun, RefFn ref) {
|
||||
Scalar a = actual(i, j);
|
||||
#if EIGEN_ARCH_ARM
|
||||
// Work around NEON flush-to-zero mode
|
||||
// if ref returns denormalized value and Eigen returns 0, then skip the test
|
||||
int ref_fpclass = std::fpclassify(e);
|
||||
if (a == Scalar(0) && ref_fpclass == FP_SUBNORMAL) continue;
|
||||
// if ref returns a subnormal value and Eigen returns 0, then skip the test
|
||||
if (a == Scalar(0) &&
|
||||
(e > -(std::numeric_limits<Scalar>::min)() && e < (std::numeric_limits<Scalar>::min)() &&
|
||||
e >= -std::numeric_limits<Scalar>::denorm_min() && e <= std::numeric_limits<Scalar>::denorm_min())) {
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
bool success = (a == e) || ((numext::isfinite)(e) && internal::isApprox(a, e, tol)) ||
|
||||
((numext::isnan)(a) && (numext::isnan)(e));
|
||||
|
Loading…
x
Reference in New Issue
Block a user