Make the threshold from gemm to coeff-based-product configurable, and add some explanations.

This commit is contained in:
Gael Guennebaud 2017-08-24 10:43:21 +02:00
parent 39864ebe1e
commit 12249849b5
2 changed files with 19 additions and 3 deletions

View File

@ -18,6 +18,16 @@ enum {
Small = 3 Small = 3
}; };
// Define the threshold value to fallback from the generic matrix-matrix product
// implementation (heavy) to the lightweight coeff-based product one.
// See generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
// in products/GeneralMatrixMatrix.h for more details.
// TODO This threshold should also be used in the compile-time selector below.
#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD
// This default value has been obtained on a Haswell architecture.
#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20
#endif
namespace internal { namespace internal {
template<int Rows, int Cols, int Depth> struct product_type_selector; template<int Rows, int Cols, int Depth> struct product_type_selector;

View File

@ -427,7 +427,13 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst> template<typename Dst>
static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=404 for a discussion and helper program
// to determine the following heuristic.
// EIGEN_GEMM_TO_COEFFBASED_THRESHOLD is typically defined to 20 in GeneralProduct.h,
// unless it has been specialized by the user or for a given architecture.
// Note that the condition rhs.rows()>0 was required because lazy produc is (was?) not happy with empty inputs.
// I'm not sure it is still required.
if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::evalTo(dst, lhs, rhs); lazyproduct::evalTo(dst, lhs, rhs);
else else
{ {
@ -439,7 +445,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst> template<typename Dst>
static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::addTo(dst, lhs, rhs); lazyproduct::addTo(dst, lhs, rhs);
else else
scaleAndAddTo(dst,lhs, rhs, Scalar(1)); scaleAndAddTo(dst,lhs, rhs, Scalar(1));
@ -448,7 +454,7 @@ struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemmProduct>
template<typename Dst> template<typename Dst>
static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
{ {
if((rhs.rows()+dst.rows()+dst.cols())<20 && rhs.rows()>0) if((rhs.rows()+dst.rows()+dst.cols())<EIGEN_GEMM_TO_COEFFBASED_THRESHOLD && rhs.rows()>0)
lazyproduct::subTo(dst, lhs, rhs); lazyproduct::subTo(dst, lhs, rhs);
else else
scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); scaleAndAddTo(dst, lhs, rhs, Scalar(-1));