From fc4a85ecd5042c54c3db15c96ce7c4832bd18738 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 22 Feb 2010 16:35:05 +0100 Subject: [PATCH] fully adapt the gebp kernel and optimize it for CPU with only 8 registers --- .../Core/products/GeneralBlockPanelKernel.h | 456 ++++++++++-------- 1 file changed, 262 insertions(+), 194 deletions(-) diff --git a/Eigen/src/Core/products/GeneralBlockPanelKernel.h b/Eigen/src/Core/products/GeneralBlockPanelKernel.h index 8c29d2218..18e913b0e 100644 --- a/Eigen/src/Core/products/GeneralBlockPanelKernel.h +++ b/Eigen/src/Core/products/GeneralBlockPanelKernel.h @@ -27,6 +27,12 @@ #ifndef EIGEN_EXTERN_INSTANTIATIONS +#ifdef EIGEN_HAS_FUSE_CJMADD +#define CJMADD(A,B,C,T) C = cj.pmadd(A,B,C); +#else +#define CJMADD(A,B,C,T) T = A; T = cj.pmul(T,B); C = ei_padd(C,T); +#endif + // optimized GEneral packed Block * packed Panel product kernel template struct ei_gebp_kernel @@ -74,133 +80,111 @@ struct ei_gebp_kernel const Scalar* blB = &blockB[j2*strideB*PacketSize+offsetB*nr]; for(int k=0; k