diff --git a/Eigen/src/Core/GeneralProduct.h b/Eigen/src/Core/GeneralProduct.h index dec24848d..483277fe6 100644 --- a/Eigen/src/Core/GeneralProduct.h +++ b/Eigen/src/Core/GeneralProduct.h @@ -18,6 +18,16 @@ enum { Small = 3 }; +// Define the threshold value to fallback from the generic matrix-matrix product +// implementation (heavy) to the lightweight coeff-based product one. +// See generic_product_impl +// in products/GeneralMatrixMatrix.h for more details. +// TODO This threshold should also be used in the compile-time selector below. +#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD +// This default value has been obtained on a Haswell architecture. +#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20 +#endif + namespace internal { template struct product_type_selector; diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index 6440e1d09..ed4d3182b 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -427,7 +427,13 @@ struct generic_product_impl template static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program + // to determine the following heuristic. + // EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h, + // unless it has been specialized by the user or for a given architecture. + // Note that the condition rhs.rows()>0 was required because lazy produc is (was?) not happy with empty inputs. + // I'm not sure it is still required. + if((rhs.rows()+dst.rows()+dst.cols())0) lazyproduct::evalTo(dst, lhs, rhs); else { @@ -439,7 +445,7 @@ struct generic_product_impl template static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + if((rhs.rows()+dst.rows()+dst.cols())0) lazyproduct::addTo(dst, lhs, rhs); else scaleAndAddTo(dst,lhs, rhs, Scalar(1)); @@ -448,7 +454,7 @@ struct generic_product_impl template static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { - if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) + if((rhs.rows()+dst.rows()+dst.cols())0) lazyproduct::subTo(dst, lhs, rhs); else scaleAndAddTo(dst, lhs, rhs, Scalar(-1));