From 28cd28072657e8367c0c611c8f155b17947bfbf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20S=C3=A1nchez?= Date: Tue, 4 Oct 2022 16:05:49 +0000 Subject: [PATCH] Fix 4x4 inverse when compiling with -Ofast. (cherry picked from commit 7d6a9925cc38842359750f3e06263e20b7635436) --- Eigen/src/LU/arch/InverseSize4.h | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/Eigen/src/LU/arch/InverseSize4.h b/Eigen/src/LU/arch/InverseSize4.h index a232ffc0a..178bc3895 100644 --- a/Eigen/src/LU/arch/InverseSize4.h +++ b/Eigen/src/LU/arch/InverseSize4.h @@ -35,6 +35,13 @@ #ifndef EIGEN_INVERSE_SIZE_4_H #define EIGEN_INVERSE_SIZE_4_H +#ifdef EIGEN_COMP_GNUC +// These routines requires bit manipulation of the sign, which is not compatible +// with fastmath. +#pragma GCC push_options +#pragma GCC optimize ("no-fast-math") +#endif + namespace Eigen { namespace internal @@ -143,8 +150,8 @@ struct compute_inverse_size4(0x80000000u), numext::bit_cast(0x80000000u), 0.0f}; - const Packet4f p4f_sign_PNNP = ploadu(sign_mask); + EIGEN_ALIGN_MAX const float sign_mask[4] = {0.0f, -0.0f, -0.0f, 0.0f}; + const Packet4f p4f_sign_PNNP = pload(sign_mask); rd = pxor(rd, p4f_sign_PNNP); iA = pmul(iA, rd); iB = pmul(iB, rd); @@ -326,10 +333,10 @@ struct compute_inverse_size4(0x8000000000000000ull)}; - const double sign_mask2[2] = {numext::bit_cast(0x8000000000000000ull), 0.0}; - const Packet2d sign_PN = ploadu(sign_mask1); - const Packet2d sign_NP = ploadu(sign_mask2); + EIGEN_ALIGN_MAX const double sign_mask1[2] = {0.0, -0.0}; + EIGEN_ALIGN_MAX const double sign_mask2[2] = {-0.0, 0.0}; + const Packet2d sign_PN = pload(sign_mask1); + const Packet2d sign_NP = pload(sign_mask2); d1 = pxor(rd, sign_PN); d2 = pxor(rd, sign_NP); @@ -348,4 +355,9 @@ struct compute_inverse_size4