diff --git a/Eigen/src/Core/products/SelfadjointProduct.h b/Eigen/src/Core/products/SelfadjointProduct.h index 08df9e15c..facde7252 100644 --- a/Eigen/src/Core/products/SelfadjointProduct.h +++ b/Eigen/src/Core/products/SelfadjointProduct.h @@ -149,12 +149,65 @@ SelfAdjointView& SelfAdjointView } +// optimized SYmmetric packed Block * packed Block product kernel +template +struct ei_sybb_kernel +{ + enum { + PacketSize = ei_packet_traits::size, + BlockSize = EIGEN_ENUM_MAX(mr,nr) + }; + void operator()(Scalar* res, int resStride, const Scalar* blockA, const Scalar* blockB, int actual_mc, int actual_kc, int packet_cols) + { + ei_gebp_kernel gebp_kernel; + Matrix buffer; + + // let's process the block per panel of actual_mc x BlockSize, + // again, each is split into three parts, etc. + for (int j=0; j use the gebp kernel on a temporary buffer, + // and then perform a triangular copy + { + int i = j; + buffer.setZero(); + gebp_kernel(buffer.data(), BlockSize, blockA+actual_kc*i, actual_b, + actualBlockSize, actual_kc, actualPacketCols, 0, actualBlockSize); + for(int j1=0; j1 TODO find a way to factorize the two kernels in a single one template -struct ei_sybb_kernel +struct ei_sybb_kernel_ { void operator()(Scalar* res, int resStride, const Scalar* blockA, const Scalar* blockB, int actual_mc, int actual_kc, int packet_cols) {