mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-05 19:46:07 +08:00
WIP
This commit is contained in:
parent
ca0d3f92d7
commit
43ce8e9d2d
@ -14,6 +14,76 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 4>
|
||||
{
|
||||
using LinearMapper = typename DataMapper::LinearMapper;
|
||||
using AccPacket = typename packet_traits<Scalar>::type;
|
||||
using ResPacket = typename packet_traits<ResScalar>::type;
|
||||
|
||||
PacketBlock<AccPacket, 4> _acc;
|
||||
|
||||
EIGEN_STRONG_INLINE void zero()
|
||||
{
|
||||
_acc.packet[0] = pset1<AccPacket>(0);
|
||||
_acc.packet[1] = pset1<AccPacket>(0);
|
||||
_acc.packet[2] = pset1<AccPacket>(0);
|
||||
_acc.packet[3] = pset1<AccPacket>(0);
|
||||
}
|
||||
|
||||
template<typename ResPacket_>
|
||||
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
||||
{
|
||||
_acc.packet[0] *= pAlpha;
|
||||
_acc.packet[1] *= pAlpha;
|
||||
_acc.packet[2] *= pAlpha;
|
||||
_acc.packet[3] *= pAlpha;
|
||||
}
|
||||
|
||||
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
||||
{
|
||||
LinearMapper r0 = dest.getLinearMapper(row, col + 0);
|
||||
LinearMapper r1 = dest.getLinearMapper(row, col + 1);
|
||||
LinearMapper r2 = dest.getLinearMapper(row, col + 2);
|
||||
LinearMapper r3 = dest.getLinearMapper(row, col + 3);
|
||||
|
||||
r0.storePacket(0, r0.template loadPacket<ResPacket>(0) + _acc.packet[0]);
|
||||
r1.storePacket(0, r1.template loadPacket<ResPacket>(0) + _acc.packet[1]);
|
||||
r2.storePacket(0, r2.template loadPacket<ResPacket>(0) + _acc.packet[2]);
|
||||
r3.storePacket(0, r3.template loadPacket<ResPacket>(0) + _acc.packet[3]);
|
||||
}
|
||||
};
|
||||
|
||||
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 4>
|
||||
{
|
||||
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||
RhsPackMap& rhsPackMap,
|
||||
Index rowIdx, Index colIdx, Index depthIdx,
|
||||
Accumulator& acc)
|
||||
{
|
||||
using LhsPacket = typename packet_traits<LhsScalar>::type;
|
||||
using RhsPacket = typename packet_traits<RhsScalar>::type;
|
||||
|
||||
asm __volatile__("#BEGIN_NEON_MICROKERNEL_3x1\n\t");
|
||||
LhsPacket pLhs = pload<LhsPacket>(lhsPackMap.pCur);
|
||||
RhsPacket pRhs = pload<RhsPacket>(rhsPackMap.pCur);
|
||||
RhsPacket pRhs0 = pset1<RhsPacket>(pRhs[0]);
|
||||
RhsPacket pRhs1 = pset1<RhsPacket>(pRhs[1]);
|
||||
RhsPacket pRhs2 = pset1<RhsPacket>(pRhs[2]);
|
||||
RhsPacket pRhs3 = pset1<RhsPacket>(pRhs[3]);
|
||||
|
||||
acc._acc.packet[0] += pLhs*pRhs0;
|
||||
acc._acc.packet[1] += pLhs*pRhs1;
|
||||
acc._acc.packet[2] += pLhs*pRhs2;
|
||||
acc._acc.packet[3] += pLhs*pRhs3;
|
||||
|
||||
lhsPackMap.advance(4*1);
|
||||
rhsPackMap.advance(1*4);
|
||||
asm __volatile__("#END_NEON_MICROKERNEL_3x1\n\t");
|
||||
};
|
||||
};
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -584,8 +584,8 @@ constexpr int SHAPES[SHAPES_COUNT<Architecture, CPU, LhsScalar,RhsScalar>][SHAPE
|
||||
template<int Architecture, int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES[PACK_SHAPES_COUNT<Architecture, CPU, Scalar, isLhs>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0}};
|
||||
|
||||
template<int Architecture, int CPU, typename Scalar>
|
||||
constexpr int PACK_SHAPES<Architecture, CPU, Scalar, false>[PACK_SHAPES_COUNT<Architecture, CPU, Scalar, false>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0}};
|
||||
//template<int Architecture, int CPU, typename Scalar>
|
||||
//constexpr int PACK_SHAPES<Architecture, CPU, Scalar, false>[PACK_SHAPES_COUNT<Architecture, CPU, Scalar, false>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0}};
|
||||
|
||||
template<int Architecture, int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder, int M, int N>
|
||||
struct PackingOperator
|
||||
|
@ -14,6 +14,104 @@ namespace Eigen {
|
||||
|
||||
namespace internal {
|
||||
|
||||
template<int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES_COUNT<0, CPU, Scalar, isLhs> = 3;
|
||||
|
||||
template<int CPU, typename Scalar, bool isLhs>
|
||||
constexpr int PACK_SHAPES<0, CPU, Scalar, isLhs>[PACK_SHAPES_COUNT<0, CPU, Scalar, isLhs>][PACK_SHAPES_DIMENSION] = {{1,1,PACK_SHAPES_END},{4,1,0},{4,4,0}};
|
||||
|
||||
template<int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingOperator<0, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, 4, 4>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
constexpr int vectorSize = packet_traits<Scalar>::size;
|
||||
|
||||
Scalar *c = block;
|
||||
|
||||
if(!isLhs)
|
||||
{
|
||||
int tD = d1Idx;
|
||||
d1Idx = d2Idx;
|
||||
d2Idx = tD;
|
||||
}
|
||||
|
||||
if(isLhs && StorageOrder == ColMajor || !isLhs && StorageOrder == RowMajor)
|
||||
{
|
||||
Packet p0 = data.template loadPacket<Packet>(d1Idx, d2Idx + 0);
|
||||
Packet p1 = data.template loadPacket<Packet>(d1Idx, d2Idx + 1);
|
||||
Packet p2 = data.template loadPacket<Packet>(d1Idx, d2Idx + 2);
|
||||
Packet p3 = data.template loadPacket<Packet>(d1Idx, d2Idx + 3);
|
||||
|
||||
pstore<Scalar>(c + 0*vectorSize, p0);
|
||||
pstore<Scalar>(c + 1*vectorSize, p1);
|
||||
pstore<Scalar>(c + 2*vectorSize, p2);
|
||||
pstore<Scalar>(c + 3*vectorSize, p3);
|
||||
c+=4*vectorSize;
|
||||
} else {
|
||||
PacketBlock<Packet, 4> pblock;
|
||||
|
||||
pblock.packet[0] = data.template loadPacket<Packet>(d1Idx, d2Idx + 0);
|
||||
pblock.packet[1] = data.template loadPacket<Packet>(d1Idx, d2Idx + 1);
|
||||
pblock.packet[2] = data.template loadPacket<Packet>(d1Idx, d2Idx + 2);
|
||||
pblock.packet[3] = data.template loadPacket<Packet>(d1Idx, d2Idx + 3);
|
||||
|
||||
ptranspose(pblock);
|
||||
|
||||
pstore<Scalar>(c + 0*vectorSize, pblock.packet[0]);
|
||||
pstore<Scalar>(c + 1*vectorSize, pblock.packet[1]);
|
||||
pstore<Scalar>(c + 2*vectorSize, pblock.packet[2]);
|
||||
pstore<Scalar>(c + 3*vectorSize, pblock.packet[3]);
|
||||
c+=4*vectorSize;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
template<int CPU, typename Index, typename Scalar, bool isLhs, typename DataMapper, bool Conjugate, bool PanelMode, int StorageOrder>
|
||||
struct PackingOperator<0, CPU, Index, Scalar, isLhs, DataMapper, Conjugate, PanelMode, StorageOrder, 4, 1>
|
||||
{
|
||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||
{
|
||||
using Packet = typename packet_traits<Scalar>::type;
|
||||
Scalar *c = block;
|
||||
if(isLhs && StorageOrder == ColMajor)
|
||||
{
|
||||
Packet p = data.template loadPacket<Packet>(d1Idx, d2Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else if(!isLhs && StorageOrder == RowMajor) {
|
||||
Packet p = data.template loadPacket<Packet>(d2Idx, d1Idx);
|
||||
pstore<Scalar>(c, p);
|
||||
c+=4;
|
||||
} else {
|
||||
if(isLhs)
|
||||
{
|
||||
*c = data(d1Idx + 0, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 1, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 2, d2Idx);
|
||||
c++;
|
||||
*c = data(d1Idx + 3, d2Idx);
|
||||
c++;
|
||||
} else {
|
||||
*c = data(d2Idx, d1Idx + 0);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 1);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 2);
|
||||
c++;
|
||||
*c = data(d2Idx, d1Idx + 3);
|
||||
c++;
|
||||
}
|
||||
}
|
||||
return c;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
Loading…
x
Reference in New Issue
Block a user