mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-02 02:00:37 +08:00
WIP 2
This commit is contained in:
parent
6533187280
commit
09f3e95447
@ -115,6 +115,13 @@ namespace internal {
|
|||||||
lhsPackMap.advance(4); \
|
lhsPackMap.advance(4); \
|
||||||
rhsPackMap.advance(1);
|
rhsPackMap.advance(1);
|
||||||
|
|
||||||
|
#define MICRO_2x1x1() \
|
||||||
|
pLhs = pload<LhsPacket>(lhsPackMap.pCur); \
|
||||||
|
pRhs = pset1<RhsPacket>(*rhsPackMap.pCur); \
|
||||||
|
acc._acc += pRhs*pLhs; \
|
||||||
|
lhsPackMap.advance(2); \
|
||||||
|
rhsPackMap.advance(1);
|
||||||
|
|
||||||
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 12, 1>
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 12, 1>
|
||||||
{
|
{
|
||||||
@ -225,6 +232,38 @@ struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 1>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 2, 1>
|
||||||
|
{
|
||||||
|
using LinearMapper = typename DataMapper::LinearMapper;
|
||||||
|
using AccPacket = typename packet_traits<Scalar>::half;
|
||||||
|
using ResPacket = typename packet_traits<ResScalar>::half;
|
||||||
|
|
||||||
|
AccPacket _acc;
|
||||||
|
|
||||||
|
EIGEN_STRONG_INLINE void zero()
|
||||||
|
{
|
||||||
|
_acc = pset1<AccPacket>(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int LhsProgress, int DepthProgress, int RhsProgress>
|
||||||
|
EIGEN_STRONG_INLINE void prefetch(const DataMapper&, Index, Index) {}
|
||||||
|
|
||||||
|
template<typename ResPacket_>
|
||||||
|
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
||||||
|
{
|
||||||
|
_acc *= pAlpha;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename ResPacket_>
|
||||||
|
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col, ResScalar alpha, const ResPacket_& pAlpha)
|
||||||
|
{
|
||||||
|
PacketBlock<ResPacket, 1> block;
|
||||||
|
block.packet[0] = dest.template loadPacket<ResPacket>(row, col) + pAlpha*_acc;
|
||||||
|
dest.template storePacketBlock<AccPacket, 1>(row, col, block);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4>
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4>
|
||||||
{
|
{
|
||||||
@ -985,6 +1024,28 @@ struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap,
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
|
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 2, 1, 1>
|
||||||
|
{
|
||||||
|
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||||
|
RhsPackMap& rhsPackMap,
|
||||||
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
|
Accumulator& acc)
|
||||||
|
{
|
||||||
|
using LhsPacket = typename packet_traits<LhsScalar>::half;
|
||||||
|
using RhsPacket = typename packet_traits<RhsScalar>::half;
|
||||||
|
|
||||||
|
asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x1\n\t");
|
||||||
|
|
||||||
|
LhsPacket pLhs;
|
||||||
|
RhsPacket pRhs;
|
||||||
|
|
||||||
|
MICRO_2x1x1();
|
||||||
|
|
||||||
|
asm __volatile__("#END_NEON_MICROKERNEL_4x1x1\n\t");
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4>
|
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4>
|
||||||
{
|
{
|
||||||
|
@ -292,26 +292,6 @@ struct MicroKernel
|
|||||||
Index rowIdx, Index colIdx, Index depthIdx,
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
Accumulator& acc)
|
Accumulator& acc)
|
||||||
{
|
{
|
||||||
#ifdef __DEBUG__
|
|
||||||
std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl;
|
|
||||||
std::cout << "LHS ";
|
|
||||||
for(auto i = 0; i < M; i++)
|
|
||||||
{
|
|
||||||
for(auto j = 0; j < K; j++)
|
|
||||||
{
|
|
||||||
std::cout << lhsPackMap.pCur[i*K + j] << " ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl << "RHS ";
|
|
||||||
for(auto i = 0; i < K; i++)
|
|
||||||
{
|
|
||||||
for(auto j = 0; j < N; j++)
|
|
||||||
{
|
|
||||||
std::cout << rhsPackMap.pCur[i*N + j] << " ";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << std::endl;
|
|
||||||
#endif
|
|
||||||
const RhsScalar *pRhs = rhsPackMap.pCur;
|
const RhsScalar *pRhs = rhsPackMap.pCur;
|
||||||
for(auto i = 0; i < N; i++)
|
for(auto i = 0; i < N; i++)
|
||||||
{
|
{
|
||||||
@ -326,26 +306,26 @@ struct MicroKernel
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX, int IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, typename AccumulatorType, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX, int IDX>
|
||||||
struct DepthLoopStruct
|
struct DepthLoopStruct
|
||||||
{
|
{
|
||||||
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_POINTER];
|
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_POINTER];
|
||||||
|
|
||||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, PREVIOUS> depthLS;
|
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, LHS_SHAPE_IDX, PREVIOUS> depthLS;
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void operator()(Index rowIdx, Index colIdx, Index depthIdx, const DataMapper& res,
|
EIGEN_STRONG_INLINE void operator()(Index rowIdx, Index colIdx, Index depthIdx, const DataMapper& res, AccumulatorType& acc,
|
||||||
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||||
{
|
{
|
||||||
constexpr int rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[RHS_SHAPE_IDX][SHAPES_RHS_DIMENSION];
|
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[RHS_SHAPE_IDX][SHAPES_RHS_DIMENSION];
|
||||||
constexpr int lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[LHS_SHAPE_IDX][SHAPES_LHS_DIMENSION];
|
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[LHS_SHAPE_IDX][SHAPES_LHS_DIMENSION];
|
||||||
constexpr int depthProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_DIMENSION];
|
constexpr auto depthProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_DIMENSION];
|
||||||
|
|
||||||
typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
//typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
||||||
|
|
||||||
MicroKernel<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, AccumulatorType, lhsProgress, depthProgress, rhsProgress> mkt;
|
MicroKernel<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, AccumulatorType, lhsProgress, depthProgress, rhsProgress> mkt;
|
||||||
AccumulatorType acc;
|
//AccumulatorType acc;
|
||||||
|
|
||||||
acc.zero();
|
//acc.zero();
|
||||||
|
|
||||||
acc.template prefetch<lhsProgress, depthProgress, rhsProgress>(res, rowIdx, colIdx);
|
acc.template prefetch<lhsProgress, depthProgress, rhsProgress>(res, rowIdx, colIdx);
|
||||||
|
|
||||||
@ -354,18 +334,41 @@ struct DepthLoopStruct
|
|||||||
|
|
||||||
for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress)
|
for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress)
|
||||||
{
|
{
|
||||||
|
#ifdef __DEBUG__
|
||||||
|
auto M = lhsProgress;
|
||||||
|
auto K = depthProgress;
|
||||||
|
auto N = rhsProgress;
|
||||||
|
std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl;
|
||||||
|
std::cout << "LHS ";
|
||||||
|
for(auto i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
for(auto j = 0; j < K; j++)
|
||||||
|
{
|
||||||
|
std::cout << lhsPackMap.pCur[i*K + j] << " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << std::endl << "RHS ";
|
||||||
|
for(auto i = 0; i < K; i++)
|
||||||
|
{
|
||||||
|
for(auto j = 0; j < N; j++)
|
||||||
|
{
|
||||||
|
std::cout << rhsPackMap.pCur[i*N + j] << " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc);
|
mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc);
|
||||||
}
|
}
|
||||||
acc.store(res, rowIdx, colIdx, alpha, pAlpha);
|
//acc.store(res, rowIdx, colIdx, alpha, pAlpha);
|
||||||
|
|
||||||
depthLS(rowIdx, colIdx, depthIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
depthLS(rowIdx, colIdx, depthIdx, res, acc, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, typename AccumulatorType, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX>
|
||||||
struct DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, -1>
|
struct DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, LHS_SHAPE_IDX, -1>
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE void operator()(Index, Index, Index, const DataMapper&,
|
EIGEN_STRONG_INLINE void operator()(Index, Index, Index, const DataMapper&, AccumulatorType&,
|
||||||
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -380,15 +383,22 @@ struct LhsLoopStruct
|
|||||||
{
|
{
|
||||||
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION];
|
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION];
|
||||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
||||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, IDX, IDX> depthLS;
|
|
||||||
|
typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
||||||
|
|
||||||
|
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, AccumulatorType, RHS_SHAPE_IDX, IDX, IDX> depthLS;
|
||||||
|
|
||||||
//rhsPackMap.resetCur();
|
//rhsPackMap.resetCur();
|
||||||
for(;rowIdx + lhsProgress <= rows; rowIdx+=lhsProgress)
|
for(;rowIdx + lhsProgress <= rows; rowIdx+=lhsProgress)
|
||||||
{
|
{
|
||||||
rhsPackMap.resetCur();
|
rhsPackMap.resetCur();
|
||||||
|
AccumulatorType acc;
|
||||||
|
acc.zero();
|
||||||
//lhsPackMap.moveTo(rowIdx);
|
//lhsPackMap.moveTo(rowIdx);
|
||||||
//rhsPackMap.moveTo(colIdx);
|
//rhsPackMap.moveTo(colIdx);
|
||||||
|
|
||||||
depthLS(rowIdx, colIdx, 0, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
depthLS(rowIdx, colIdx, 0, res, acc, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
|
acc.store(res, rowIdx, colIdx, alpha, pAlpha);
|
||||||
}
|
}
|
||||||
lhsLS(rowIdx, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
lhsLS(rowIdx, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
|
@ -28,9 +28,14 @@ int main(int argc, char* argv[])
|
|||||||
for(auto i = 0; i < 2; i++)
|
for(auto i = 0; i < 2; i++)
|
||||||
C = A*B;
|
C = A*B;
|
||||||
|
|
||||||
|
#ifdef __DEBUG_SHOW_INPUTS__
|
||||||
std::cout << A << std::endl;
|
std::cout << A << std::endl;
|
||||||
std::cout << B << std::endl;
|
std::cout << B << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __DEBUG_SHOW_RESULT__
|
||||||
std::cout << C << std::endl;
|
std::cout << C << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
||||||
@ -50,8 +55,9 @@ int main(int argc, char* argv[])
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef __DEBUG_SHOW_RESULT__
|
||||||
std::cout << D << std::endl;
|
std::cout << D << std::endl;
|
||||||
|
#endif
|
||||||
#else
|
#else
|
||||||
if(argc < 5)
|
if(argc < 5)
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user