From 755010702856250c5a63c9bcde305ebe1f5e61fa Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 5 Mar 2015 10:03:46 +0100 Subject: [PATCH] Product optimization: implement a dynamic loop-swapping startegy to improve memory accesses to the destination matrix in the case of K-rank-update like products, i.e., for products of the kind: "large x small" * "small x large" --- .../Core/products/GeneralBlockPanelKernel.h | 57 +++++++++++++++---- 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index df9f66995..8f4ee4dbb 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -952,14 +952,28 @@ void gebp_kernel=3*Traits::LhsProgress) { +#ifdef EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION + const bool swap_loops = EIGEN_TEST_SPECIFIC_LOOP_SWAP_CRITERION; +#else + const bool swap_loops = depth<48; +#endif + + Index bound1 = swap_loops ? packet_cols4 : peeled_mc3; + Index bound2 = !swap_loops ? packet_cols4 : peeled_mc3; + Index incr1 = swap_loops ? nr : 3*Traits::LhsProgress; + Index incr2 = !swap_loops ? nr : 3*Traits::LhsProgress; + PossiblyRotatingKernelHelper possiblyRotatingKernelHelper(traits); - + // loops on each largest micro horizontal panel of lhs (3*Traits::LhsProgress x depth) - for(Index i=0; i=2*Traits::LhsProgress) { - // loops on each largest micro horizontal panel of lhs (2*LhsProgress x depth) - for(Index i=peeled_mc3; i