diff --git a/Eigen/Core b/Eigen/Core index 5ebbd9496..15dae4af0 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -189,6 +189,7 @@ namespace Eigen { #include "src/Core/products/SelfadjointRank2Update.h" #include "src/Core/products/TriangularMatrixVector.h" #include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/TriangularMatrixMatrix.h" #include "src/Core/BandMatrix.h" } // namespace Eigen diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index a1be8ea50..66fee793c 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -125,7 +125,7 @@ struct ei_gebp_kernel // loops on each register blocking of lhs/res for(int i=0; i -struct ei_gemm_pack_rhs +template +struct ei_gemm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; - void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols) + void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols, + int stride=0, int offset=0) { + ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); int packet_cols = (cols/nr) * nr; int count = 0; for(int j2=0; j2 count += nr*PacketSize; } } + // skip what we have after + if(PanelMode) count += PacketSize * nr * (stride-offset-depth); } // copy the remaining columns one at a time (nr==1) for(int j2=packet_cols; j2 count += PacketSize; } } + if(PanelMode) count += PacketSize * (stride-offset-depth); } } }; // this version is optimized for row major matrices -template -struct ei_gemm_pack_rhs +template +struct ei_gemm_pack_rhs { enum { PacketSize = ei_packet_traits::size }; - void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols) + void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols, + int stride=0, int offset=0) { + ei_assert(((!PanelMode) && stride==0 && offset==0) || (PanelMode && stride>=depth && offset<=stride)); bool hasAlpha = alpha != Scalar(1); int packet_cols = (cols/nr) * nr; int count = 0; for(int j2=0; j2 for(int k=0; k -struct ei_gemm_pack_rhs_panel; - // if the rhs is row major, we have to evaluate it in a temporary colmajor matrix template struct ei_triangular_solve_matrix @@ -136,7 +133,7 @@ struct ei_triangular_solve_matrix() + ei_gemm_pack_rhs() (blockB, _rhs+startBlock, rhsStride, -1, actualPanelWidth, cols, actual_kc, blockBOffset); // GEBP @@ -174,46 +171,4 @@ struct ei_triangular_solve_matrix -struct ei_gemm_pack_rhs_panel -{ - enum { PacketSize = ei_packet_traits::size }; - void operator()(Scalar* blockB, const Scalar* rhs, int rhsStride, Scalar alpha, int depth, int cols, int stride, int offset) - { - int packet_cols = (cols/nr) * nr; - int count = 0; - for(int j2=0; j2 struct ei_gebp_kernel; -template +template struct ei_gemm_pack_rhs; template