bug #1674: workaround clang fast-math aggressive optimizations

This commit is contained in:
Gael Guennebaud 2019-02-22 15:48:53 +01:00
parent 7e3084bb6f
commit 1c09ee8541

View File

@ -324,6 +324,14 @@ __attribute__((optimize("-fno-unsafe-math-optimizations")))
#endif #endif
Packet psincos_float(const Packet& _x) Packet psincos_float(const Packet& _x)
{ {
// Workaround -ffast-math aggressive optimizations
// See bug 1674
#if EIGEN_COMP_CLANG && defined(EIGEN_VECTORIZE_SSE)
#define EIGEN_SINCOS_DONT_OPT(X) __asm__ ("" : "+x" (X));
#else
#define EIGEN_SINCOS_DONT_OPT(X)
#endif
typedef typename unpacket_traits<Packet>::integer_packet PacketI; typedef typename unpacket_traits<Packet>::integer_packet PacketI;
const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f); // 2/PI
@ -338,6 +346,7 @@ Packet psincos_float(const Packet& _x)
// Rounding trick: // Rounding trick:
Packet y_round = padd(y, cst_rounding_magic); Packet y_round = padd(y, cst_rounding_magic);
EIGEN_SINCOS_DONT_OPT(y_round)
PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24) PacketI y_int = preinterpret<PacketI>(y_round); // last 23 digits represent integer (if abs(x)<2^24)
y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi
@ -359,7 +368,9 @@ Packet psincos_float(const Packet& _x)
// and 2 ULP up to: // and 2 ULP up to:
const float huge_th = ComputeSine ? 25966.f : 18838.f; const float huge_th = ComputeSine ? 25966.f : 18838.f;
x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000 x = pmadd(y, pset1<Packet>(-1.5703125), x); // = 0xbfc90000
EIGEN_SINCOS_DONT_OPT(x)
x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000 x = pmadd(y, pset1<Packet>(-0.000483989715576171875), x); // = 0xb9fdc000
EIGEN_SINCOS_DONT_OPT(x)
x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000 x = pmadd(y, pset1<Packet>(1.62865035235881805419921875e-07), x); // = 0x342ee000
x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee x = pmadd(y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x); // = 0x2e74b9ee
@ -436,6 +447,8 @@ Packet psincos_float(const Packet& _x)
// Update the sign and filter huge inputs // Update the sign and filter huge inputs
return pxor(y, sign_bit); return pxor(y, sign_bit);
#undef EIGEN_SINCOS_DONT_OPT
} }
template<typename Packet> template<typename Packet>