From 5bffe0962416269d40e7b40f64227cb551310b55 Mon Sep 17 00:00:00 2001 From: Everton Constantino Date: Thu, 22 Apr 2021 13:14:00 +0000 Subject: [PATCH] WIP --- Eigen/src/Core/arch/NEON/MatrixProduct.h | 111 ++++++++++++++++------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/Eigen/src/Core/arch/NEON/MatrixProduct.h b/Eigen/src/Core/arch/NEON/MatrixProduct.h index 3e8d7aa41..7adc213c9 100644 --- a/Eigen/src/Core/arch/NEON/MatrixProduct.h +++ b/Eigen/src/Core/arch/NEON/MatrixProduct.h @@ -560,7 +560,7 @@ EIGEN_STRONG_INLINE void gemm_old(const DataMapper& res, const LhsScalar* blockA #endif template -constexpr int SHAPES_COUNT = 2; +constexpr int SHAPES_COUNT = 4; constexpr int SHAPES_DIMENSION = 4; constexpr int SHAPES_LHS_DIMENSION = 0; @@ -578,7 +578,7 @@ constexpr int PACK_SHAPES_END = -1; // lhs_progress x depth_progress x rhs_progress (depth_progress > 1 matrix ops) x pointer to next rhs_progress on the shapes map template -constexpr int SHAPES[SHAPES_COUNT][SHAPES_DIMENSION] = {{1,1,1,SHAPES_POINTER_END},{4,1,4,0}}; +constexpr int SHAPES[SHAPES_COUNT][SHAPES_DIMENSION] = {{1,1,1,SHAPES_POINTER_END},{4,1,1,0},{1,1,4,1},{4,1,4,1}}; // d1progress x d2progress template @@ -694,8 +694,7 @@ template[IDX][PACK_SHAPES_POINTER]> pmc; - - inline Index getPosition(Index pos, Index d2Size) + EIGEN_STRONG_INLINE Index getPosition(Index pos, Index d2Size) { constexpr auto d1Progress = PACK_SHAPES[IDX][0]; Index v = (pos / d1Progress) * d1Progress; @@ -706,7 +705,7 @@ struct PackMapCalculator template struct PackMapCalculator { - inline Index getPosition(Index, Index) { return Index(0); } + EIGEN_STRONG_INLINE Index getPosition(Index, Index) { return Index(0); } }; template @@ -719,41 +718,87 @@ struct PackMap PackMap(const Scalar *base, Index d2Size) : pBase(base), pCur(base), d2Size(d2Size) {} - inline void resetCur() { pCur = pBase; } - inline void moveTo(Index pos) - { - Index inc = pmc.getPosition(pos, d2Size); - std::cout << isLhs << " MOVE_TO " << pos << " " << inc << std::endl; - pCur = pBase + inc; - } - inline void advance(int progress) { pCur += progress; } + EIGEN_STRONG_INLINE void resetCur() { pCur = pBase; } + EIGEN_STRONG_INLINE void moveTo(Index p1) { pCur = pBase + pmc.getPosition(p1, d2Size); } + EIGEN_STRONG_INLINE void advance(int progress) { pCur += progress; } }; -template +template +struct Accumulator +{ + Scalar dt[M][N]; + + EIGEN_STRONG_INLINE void zero() + { + for(auto i = 0; i < M; i++) + { + for(auto j = 0; j < N; j++) + { + dt[i][j] = Scalar(0); + } + } + } + + EIGEN_STRONG_INLINE void scale(ResScalar alpha) + { + for(auto i = 0; i < M; i++) + { + for(auto j = 0; j < N; j++) + { + dt[i][j] *= alpha; + } + } + } + + EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col) + { + for(auto i = 0; i < M; i++) + { + for(auto j = 0; j < N; j++) + { + dest(row + i, col + j) = dt[i][j]; + } + } + } +}; + +template struct MicroKernel { - EIGEN_STRONG_INLINE void operator()(PackMap& lhsPackMap, PackMap& rhsPackMap, Index rowIdx, Index colIdx, Index depthIdx) + EIGEN_STRONG_INLINE void operator()(PackMap& lhsPackMap, + PackMap& rhsPackMap, + Index rowIdx, Index colIdx, Index depthIdx, + Accumulator& acc) { std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl; std::cout << "LHS "; - for(auto i = rowIdx; i < M + rowIdx; i++) + for(auto i = 0; i < M; i++) { - for(auto j = depthIdx; j < K + depthIdx; j++) + for(auto j = 0; j < K; j++) { - std::cout << *lhsPackMap.pCur << " "; - lhsPackMap.advance(1); + std::cout << lhsPackMap.pCur[i*K + j] << " "; } } std::cout << std::endl << "RHS "; - for(auto i = depthIdx; i < K + depthIdx; i++) + for(auto i = 0; i < K; i++) { - for(auto j = colIdx; j < N + colIdx; j++) + for(auto j = 0; j < N; j++) { - std::cout << *rhsPackMap.pCur << " "; - rhsPackMap.advance(1); + std::cout << rhsPackMap.pCur[i*N + j] << " "; } } std::cout << std::endl; + const RhsScalar *pRhs = rhsPackMap.pCur; + for(auto i = 0; i < N; i++) + { + const LhsScalar *pLhs = lhsPackMap.pCur; + for(auto j = 0; j < M; j++) + { + acc.dt[j][i] += pRhs[i]*pLhs[j]; + } + } + lhsPackMap.advance(M*K); + rhsPackMap.advance(K*N); }; }; @@ -770,11 +815,15 @@ struct DepthLoopStruct if(rhsProgress == SHAPES[IDX][SHAPES_RHS_DIMENSION] && lhsProgress == SHAPES[IDX][SHAPES_LHS_DIMENSION]) { - MicroKernel mkt; + MicroKernel mkt; + Accumulator acc; + acc.zero(); for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress) { - mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx); + mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc); } + acc.scale(alpha); + acc.store(res, rowIdx, colIdx); } depthLS(rowIdx, colIdx, depthIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap); } @@ -825,10 +874,8 @@ struct RhsLoopStruct { constexpr auto rhsProgress = SHAPES[IDX][SHAPES_RHS_DIMENSION]; - std::cout << __PRETTY_FUNCTION__ << std::endl; for(;colIdx + rhsProgress <= cols; colIdx+=rhsProgress) { - //rhsPackMap.moveTo(colIdx); LhsLoopStruct lhsLS; lhsLS(0, colIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap); } @@ -843,7 +890,7 @@ struct RhsLoopStruct&, PackMap&) {} }; -template +template EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB, Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB) { @@ -864,9 +911,9 @@ EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, co } std::cout << std::endl; - RhsLoopStruct<0, 0, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, SHAPES_COUNT<0, 0, LhsScalar, RhsScalar>-1> rhsLS; - PackMap<0, 0, Index, LhsScalar, DataMapper, true> lhsPackMap(blockA, depth); - PackMap<0, 0, Index, RhsScalar, DataMapper, false> rhsPackMap(blockB, depth); + RhsLoopStruct-1> rhsLS; + PackMap lhsPackMap(blockA, depth); + PackMap rhsPackMap(blockB, depth); rhsLS(0, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap); } @@ -940,7 +987,7 @@ void gebp_kernel(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); + gemm<0, 0, float, float, float, float, Index, DataMapper>(res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB); } } // end namespace internal