Improve multi-threading heuristic for matrix products with a small number of columns.

This commit is contained in:
Gael Guennebaud 2017-02-07 17:19:59 +01:00
parent 4254b3eda3
commit fc8fd5fd24

View File

@ -104,13 +104,14 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth,
// - the sizes are large enough // - the sizes are large enough
// compute the maximal number of threads from the size of the product: // compute the maximal number of threads from the size of the product:
// FIXME this has to be fine tuned // This first heuristic takes into account that the product kernel is fully optimized when working with nr columns at once.
Index size = transpose ? rows : cols; Index size = transpose ? rows : cols;
Index pb_max_threads = std::max<Index>(1,size / 32); Index pb_max_threads = std::max<Index>(1,size / Functor::Traits::nr);
// compute the maximal number of threads from the total amount of work: // compute the maximal number of threads from the total amount of work:
double work = static_cast<double>(rows) * static_cast<double>(cols) * double work = static_cast<double>(rows) * static_cast<double>(cols) *
static_cast<double>(depth); static_cast<double>(depth);
double kMinTaskSize = 50000; // Heuristic. double kMinTaskSize = 50000; // FIXME improve this heuristic.
pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize)); pb_max_threads = std::max<Index>(1, std::min<Index>(pb_max_threads, work / kMinTaskSize));
// compute the number of threads we are going to use // compute the number of threads we are going to use