look at that subtle difference in Product.h...

the cacheOptimal is only good for large enough matrices. When taking a block in a fixed-size (hence small) matrix, the SizeAtCompileTime is Dynamic hence that's not a good indicator. This example shows that the good indicator is MaxSizeAtCompileTime. Result: +10% speed in echelon.cpp
2025-08-09 18:29:03 +08:00 · 2008-03-26 09:29:29 +00:00 · 2008-03-26 09:29:29 +00:00 · c9b0dcd733
commit c9b0dcd733
parent a994e51c96
2 changed files with 4 additions and 3 deletions
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -91,8 +91,9 @@ struct ei_product_eval_mode
 {
  enum {
    SizeAtCompileTime = MatrixBase<Product<Lhs,Rhs,UnrolledDotProduct> >::SizeAtCompileTime,
+    MaxSizeAtCompileTime = MatrixBase<Product<Lhs,Rhs,UnrolledDotProduct> >::MaxSizeAtCompileTime,
    EvalMode = ( EIGEN_UNROLLED_LOOPS
-              && SizeAtCompileTime != Dynamic
+              && MaxSizeAtCompileTime != Dynamic
              && SizeAtCompileTime <= EIGEN_UNROLLING_LIMIT) ? UnrolledDotProduct : CacheOptimal,
  };
 };
--- a/doc/echelon.cpp
+++ b/doc/echelon.cpp
@ -67,8 +67,8 @@ int main(int, char **)
  // now m is still a matrix of rank N-1
  cout << "Here's the matrix m:" << endl << m << endl;

-  cout << "Now let's echelon m:" << endl;
-  echelon(m);
+  cout << "Now let's echelon m (repeating many times for benchmarking purposes):" << endl;
+  for(int i = 0; i < 1000000; i++) echelon(m);

  cout << "Now m is:" << endl << m << endl;
 }