From f61b0d56f0f78654e7fbf3a28cc3f0368077beef Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 7 Feb 2017 17:19:59 +0100 Subject: [PATCH] Improve multi-threading heuristic for matrix products with a small number of columns. (grafted from fc8fd5fd24d3dce28b7fafa538b67e61dd667f6e ) --- Eigen/src/Core/products/Parallelizer.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index 3477d7182..c2f084c82 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -104,13 +104,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, // - the sizes are large enough // compute the maximal number of threads from the size of the product: - // FIXME this has to be fine tuned + // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once. Index size = transpose ? rows : cols; - Index pb_max_threads = std::max(1,size / 32); + Index pb_max_threads = std::max(1,size / Functor::Traits::nr); + // compute the maximal number of threads from the total amount of work: double work = static_cast(rows) * static_cast(cols) * static_cast(depth); - double kMinTaskSize = 50000; // Heuristic. + double kMinTaskSize = 50000; // FIXME improve this heuristic. pb_max_threads = std::max(1, std::min(pb_max_threads, work / kMinTaskSize)); // compute the number of threads we are going to use