From 5ad8b9bfe2bf75620bc89467c5cc051fc2a597df Mon Sep 17 00:00:00 2001 From: Antonio Sanchez Date: Wed, 4 Aug 2021 13:39:09 -0700 Subject: [PATCH] Make inverse 3x3 faster and avoid gcc bug. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There seems to be a gcc 4.7 bug that incorrectly flags the current 3x3 inverse as using uninitialized memory. I'm *pretty* sure it's a false positive, but it's hard to trigger. The same warning does not trigger with clang or later compiler versions. In trying to find a work-around, this implementation turns out to be faster anyways for static-sized matrices. ``` name old cpu/op new cpu/op delta BM_Inverse3x3> 423ns ± 2% 433ns ± 3% +2.32% (p=0.000 n=98+96) BM_Inverse3x3> 425ns ± 2% 427ns ± 3% +0.48% (p=0.003 n=99+96) BM_Inverse3x3> 7.10ns ± 2% 0.80ns ± 1% -88.67% (p=0.000 n=114+112) BM_Inverse3x3> 7.45ns ± 2% 1.34ns ± 1% -82.01% (p=0.000 n=105+111) BM_AliasedInverse3x3> 409ns ± 3% 419ns ± 3% +2.40% (p=0.000 n=100+98) BM_AliasedInverse3x3> 414ns ± 3% 413ns ± 2% ~ (p=0.322 n=98+98) BM_AliasedInverse3x3> 7.57ns ± 1% 0.80ns ± 1% -89.37% (p=0.000 n=111+114) BM_AliasedInverse3x3> 9.09ns ± 1% 2.58ns ±41% -71.60% (p=0.000 n=113+116) ``` --- Eigen/src/LU/InverseImpl.h | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h index 27e263945..a40cefa9e 100644 --- a/Eigen/src/LU/InverseImpl.h +++ b/Eigen/src/LU/InverseImpl.h @@ -144,13 +144,18 @@ inline void compute_inverse_size3_helper( const Matrix& cofactors_col0, ResultType& result) { - result.row(0) = cofactors_col0 * invdet; - result.coeffRef(1,0) = cofactor_3x3(matrix) * invdet; - result.coeffRef(1,1) = cofactor_3x3(matrix) * invdet; + // Compute cofactors in a way that avoids aliasing issues. + typedef typename ResultType::Scalar Scalar; + const Scalar c01 = cofactor_3x3(matrix) * invdet; + const Scalar c11 = cofactor_3x3(matrix) * invdet; + const Scalar c02 = cofactor_3x3(matrix) * invdet; result.coeffRef(1,2) = cofactor_3x3(matrix) * invdet; - result.coeffRef(2,0) = cofactor_3x3(matrix) * invdet; result.coeffRef(2,1) = cofactor_3x3(matrix) * invdet; result.coeffRef(2,2) = cofactor_3x3(matrix) * invdet; + result.coeffRef(1,0) = c01; + result.coeffRef(1,1) = c11; + result.coeffRef(2,0) = c02; + result.row(0) = cofactors_col0 * invdet; } template @@ -166,12 +171,7 @@ struct compute_inverse cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); const Scalar invdet = Scalar(1) / det; - if(extract_data(matrix) != extract_data(result)) { - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); - } else { - MatrixType matrix_t = matrix; - compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, result); - } + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result); } }; @@ -187,22 +187,16 @@ struct compute_inverse_and_det_with_check bool& invertible ) { - using std::abs; typedef typename ResultType::Scalar Scalar; Matrix cofactors_col0; cofactors_col0.coeffRef(0) = cofactor_3x3(matrix); cofactors_col0.coeffRef(1) = cofactor_3x3(matrix); cofactors_col0.coeffRef(2) = cofactor_3x3(matrix); determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum(); - invertible = abs(determinant) > absDeterminantThreshold; + invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold; if(!invertible) return; const Scalar invdet = Scalar(1) / determinant; - if(extract_data(matrix) != extract_data(inverse)) { - compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); - } else { - MatrixType matrix_t = matrix; - compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, inverse); - } + compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse); } };