From 5ad8b9bfe2bf75620bc89467c5cc051fc2a597df Mon Sep 17 00:00:00 2001
From: Antonio Sanchez <cantonios@google.com>
Date: Wed, 4 Aug 2021 13:39:09 -0700
Subject: [PATCH] Make inverse 3x3 faster and avoid gcc bug.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There seems to be a gcc 4.7 bug that incorrectly flags the current
3x3 inverse as using uninitialized memory.  I'm *pretty* sure it's
a false positive, but it's hard to trigger.  The same warning
does not trigger with clang or later compiler versions.

In trying to find a work-around, this implementation turns out to be
faster anyways for static-sized matrices.

```
name                                            old cpu/op  new cpu/op  delta
BM_Inverse3x3<DynamicMatrix3T<float>>            423ns ± 2%   433ns ± 3%   +2.32%    (p=0.000 n=98+96)
BM_Inverse3x3<DynamicMatrix3T<double>>           425ns ± 2%   427ns ± 3%   +0.48%    (p=0.003 n=99+96)
BM_Inverse3x3<StaticMatrix3T<float>>            7.10ns ± 2%  0.80ns ± 1%  -88.67%  (p=0.000 n=114+112)
BM_Inverse3x3<StaticMatrix3T<double>>           7.45ns ± 2%  1.34ns ± 1%  -82.01%  (p=0.000 n=105+111)
BM_AliasedInverse3x3<DynamicMatrix3T<float>>     409ns ± 3%   419ns ± 3%   +2.40%   (p=0.000 n=100+98)
BM_AliasedInverse3x3<DynamicMatrix3T<double>>    414ns ± 3%   413ns ± 2%     ~       (p=0.322 n=98+98)
BM_AliasedInverse3x3<StaticMatrix3T<float>>     7.57ns ± 1%  0.80ns ± 1%  -89.37%  (p=0.000 n=111+114)
BM_AliasedInverse3x3<StaticMatrix3T<double>>    9.09ns ± 1%  2.58ns ±41%  -71.60%  (p=0.000 n=113+116)
```
---
 Eigen/src/LU/InverseImpl.h | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)
diff --git a/Eigen/src/LU/InverseImpl.h b/Eigen/src/LU/InverseImpl.h
index 27e263945..a40cefa9e 100644
--- a/Eigen/src/LU/InverseImpl.h
+++ b/Eigen/src/LU/InverseImpl.h
@@ -144,13 +144,18 @@ inline void compute_inverse_size3_helper(
     const Matrix<typename ResultType::Scalar,3,1>& cofactors_col0,
     ResultType& result)
 {
-  result.row(0) = cofactors_col0 * invdet;
-  result.coeffRef(1,0) =  cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
-  result.coeffRef(1,1) =  cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  // Compute cofactors in a way that avoids aliasing issues.
+  typedef typename ResultType::Scalar Scalar;
+  const Scalar c01 = cofactor_3x3<MatrixType,0,1>(matrix) * invdet;
+  const Scalar c11 = cofactor_3x3<MatrixType,1,1>(matrix) * invdet;
+  const Scalar c02 = cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
   result.coeffRef(1,2) =  cofactor_3x3<MatrixType,2,1>(matrix) * invdet;
-  result.coeffRef(2,0) =  cofactor_3x3<MatrixType,0,2>(matrix) * invdet;
   result.coeffRef(2,1) =  cofactor_3x3<MatrixType,1,2>(matrix) * invdet;
   result.coeffRef(2,2) =  cofactor_3x3<MatrixType,2,2>(matrix) * invdet;
+  result.coeffRef(1,0) =  c01;
+  result.coeffRef(1,1) =  c11;
+  result.coeffRef(2,0) =  c02;  
+  result.row(0) = cofactors_col0 * invdet;
 }
 
 template<typename MatrixType, typename ResultType>
@@ -166,12 +171,7 @@ struct compute_inverse<MatrixType, ResultType, 3>
     cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
     const Scalar det = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
     const Scalar invdet = Scalar(1) / det;
-    if(extract_data(matrix) != extract_data(result)) {
-      compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
-    } else {
-      MatrixType matrix_t = matrix;
-      compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, result);
-    }
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, result);
   }
 };
 
@@ -187,22 +187,16 @@ struct compute_inverse_and_det_with_check<MatrixType, ResultType, 3>
     bool& invertible
   )
   {
-    using std::abs;
     typedef typename ResultType::Scalar Scalar;
     Matrix<Scalar,3,1> cofactors_col0;
     cofactors_col0.coeffRef(0) =  cofactor_3x3<MatrixType,0,0>(matrix);
     cofactors_col0.coeffRef(1) =  cofactor_3x3<MatrixType,1,0>(matrix);
     cofactors_col0.coeffRef(2) =  cofactor_3x3<MatrixType,2,0>(matrix);
     determinant = (cofactors_col0.cwiseProduct(matrix.col(0))).sum();
-    invertible = abs(determinant) > absDeterminantThreshold;
+    invertible = Eigen::numext::abs(determinant) > absDeterminantThreshold;
     if(!invertible) return;
     const Scalar invdet = Scalar(1) / determinant;
-    if(extract_data(matrix) != extract_data(inverse)) {
-      compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
-    } else {
-      MatrixType matrix_t = matrix;
-      compute_inverse_size3_helper(matrix_t, invdet, cofactors_col0, inverse);
-    }
+    compute_inverse_size3_helper(matrix, invdet, cofactors_col0, inverse);
   }
 };