mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 20:56:00 +08:00
WIP - Vector
This commit is contained in:
parent
70c0363c28
commit
54f80f442d
@ -61,100 +61,100 @@ namespace internal {
|
|||||||
// }
|
// }
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 1>
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 1>
|
||||||
// {
|
{
|
||||||
// using LinearMapper = typename DataMapper::LinearMapper;
|
using LinearMapper = typename DataMapper::LinearMapper;
|
||||||
// using AccPacket = typename packet_traits<Scalar>::type;
|
using AccPacket = typename packet_traits<Scalar>::type;
|
||||||
// using ResPacket = typename packet_traits<ResScalar>::type;
|
using ResPacket = typename packet_traits<ResScalar>::type;
|
||||||
|
|
||||||
// AccPacket _acc;
|
AccPacket _acc;
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void zero()
|
EIGEN_STRONG_INLINE void zero()
|
||||||
// {
|
{
|
||||||
// _acc = pset1<AccPacket>(0);
|
_acc = pset1<AccPacket>(0);
|
||||||
// }
|
}
|
||||||
|
|
||||||
// template<typename ResPacket_>
|
template<typename ResPacket_>
|
||||||
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
||||||
// {
|
{
|
||||||
// _acc *= pAlpha;
|
_acc *= pAlpha;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
||||||
// {
|
{
|
||||||
// PacketBlock<ResPacket, 1> block;
|
PacketBlock<ResPacket, 1> block;
|
||||||
// block.packet[0] = dest.template loadPacket<ResPacket>(row, col) + _acc;
|
block.packet[0] = dest.template loadPacket<ResPacket>(row, col) + _acc;
|
||||||
// dest.template storePacketBlock<AccPacket, 1>(row, col, block);
|
dest.template storePacketBlock<AccPacket, 1>(row, col, block);
|
||||||
// }
|
}
|
||||||
// };
|
};
|
||||||
|
|
||||||
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4>
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4>
|
||||||
// {
|
{
|
||||||
// using LinearMapper = typename DataMapper::LinearMapper;
|
using LinearMapper = typename DataMapper::LinearMapper;
|
||||||
// using AccPacket = typename packet_traits<Scalar>::type;
|
using AccPacket = typename packet_traits<Scalar>::type;
|
||||||
// using ResPacket = typename packet_traits<ResScalar>::type;
|
using ResPacket = typename packet_traits<ResScalar>::type;
|
||||||
|
|
||||||
// AccPacket _acc;
|
AccPacket _acc;
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void zero()
|
EIGEN_STRONG_INLINE void zero()
|
||||||
// {
|
{
|
||||||
// _acc = pset1<AccPacket>(0);
|
_acc = pset1<AccPacket>(0);
|
||||||
// }
|
}
|
||||||
|
|
||||||
// template<typename ResPacket_>
|
template<typename ResPacket_>
|
||||||
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
||||||
// {
|
{
|
||||||
// _acc *= pAlpha;
|
_acc *= pAlpha;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
||||||
// {
|
{
|
||||||
// ResPacket r = dest.template gatherPacket<ResPacket>(row, col) + _acc;
|
ResPacket r = dest.template gatherPacket<ResPacket>(row, col) + _acc;
|
||||||
// dest.template scatterPacket<ResPacket>(row, col, r);
|
dest.template scatterPacket<ResPacket>(row, col, r);
|
||||||
// }
|
}
|
||||||
// };
|
};
|
||||||
|
|
||||||
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 4>
|
struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 4>
|
||||||
// {
|
{
|
||||||
// using LinearMapper = typename DataMapper::LinearMapper;
|
using LinearMapper = typename DataMapper::LinearMapper;
|
||||||
// using AccPacket = typename packet_traits<Scalar>::type;
|
using AccPacket = typename packet_traits<Scalar>::type;
|
||||||
// using ResPacket = typename packet_traits<ResScalar>::type;
|
using ResPacket = typename packet_traits<ResScalar>::type;
|
||||||
|
|
||||||
// PacketBlock<AccPacket, 4> _acc;
|
PacketBlock<AccPacket, 4> _acc;
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void zero()
|
EIGEN_STRONG_INLINE void zero()
|
||||||
// {
|
{
|
||||||
// _acc.packet[0] = pset1<AccPacket>(0);
|
_acc.packet[0] = pset1<AccPacket>(0);
|
||||||
// _acc.packet[1] = pset1<AccPacket>(0);
|
_acc.packet[1] = pset1<AccPacket>(0);
|
||||||
// _acc.packet[2] = pset1<AccPacket>(0);
|
_acc.packet[2] = pset1<AccPacket>(0);
|
||||||
// _acc.packet[3] = pset1<AccPacket>(0);
|
_acc.packet[3] = pset1<AccPacket>(0);
|
||||||
// }
|
}
|
||||||
|
|
||||||
// template<typename ResPacket_>
|
template<typename ResPacket_>
|
||||||
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
|
||||||
// {
|
{
|
||||||
// _acc.packet[0] *= pAlpha;
|
_acc.packet[0] *= pAlpha;
|
||||||
// _acc.packet[1] *= pAlpha;
|
_acc.packet[1] *= pAlpha;
|
||||||
// _acc.packet[2] *= pAlpha;
|
_acc.packet[2] *= pAlpha;
|
||||||
// _acc.packet[3] *= pAlpha;
|
_acc.packet[3] *= pAlpha;
|
||||||
// }
|
}
|
||||||
|
|
||||||
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
|
||||||
// {
|
{
|
||||||
// LinearMapper r0 = dest.getLinearMapper(row, col + 0);
|
LinearMapper r0 = dest.getLinearMapper(row, col + 0);
|
||||||
// LinearMapper r1 = dest.getLinearMapper(row, col + 1);
|
LinearMapper r1 = dest.getLinearMapper(row, col + 1);
|
||||||
// LinearMapper r2 = dest.getLinearMapper(row, col + 2);
|
LinearMapper r2 = dest.getLinearMapper(row, col + 2);
|
||||||
// LinearMapper r3 = dest.getLinearMapper(row, col + 3);
|
LinearMapper r3 = dest.getLinearMapper(row, col + 3);
|
||||||
|
|
||||||
// r0.storePacket(0, r0.template loadPacket<ResPacket>(0) + _acc.packet[0]);
|
r0.storePacket(0, r0.template loadPacket<ResPacket>(0) + _acc.packet[0]);
|
||||||
// r1.storePacket(0, r1.template loadPacket<ResPacket>(0) + _acc.packet[1]);
|
r1.storePacket(0, r1.template loadPacket<ResPacket>(0) + _acc.packet[1]);
|
||||||
// r2.storePacket(0, r2.template loadPacket<ResPacket>(0) + _acc.packet[2]);
|
r2.storePacket(0, r2.template loadPacket<ResPacket>(0) + _acc.packet[2]);
|
||||||
// r3.storePacket(0, r3.template loadPacket<ResPacket>(0) + _acc.packet[3]);
|
r3.storePacket(0, r3.template loadPacket<ResPacket>(0) + _acc.packet[3]);
|
||||||
// }
|
}
|
||||||
// };
|
};
|
||||||
|
|
||||||
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
|
||||||
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 8, 4>
|
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 8, 4>
|
||||||
@ -316,19 +316,19 @@ namespace internal {
|
|||||||
// };
|
// };
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// #define MICRO_4x1x4() \
|
#define MICRO_4x1x4() \
|
||||||
// pLhs = pload<LhsPacket>(lhsPackMap.pCur); \
|
pLhs = pload<LhsPacket>(lhsPackMap.pCur); \
|
||||||
// pRhs = pload<RhsPacket>(rhsPackMap.pCur); \
|
pRhs = pload<RhsPacket>(rhsPackMap.pCur); \
|
||||||
// pRhs0 = pset1<RhsPacket>(pRhs[0]); \
|
pRhs0 = pset1<RhsPacket>(pRhs[0]); \
|
||||||
// pRhs1 = pset1<RhsPacket>(pRhs[1]); \
|
pRhs1 = pset1<RhsPacket>(pRhs[1]); \
|
||||||
// pRhs2 = pset1<RhsPacket>(pRhs[2]); \
|
pRhs2 = pset1<RhsPacket>(pRhs[2]); \
|
||||||
// pRhs3 = pset1<RhsPacket>(pRhs[3]); \
|
pRhs3 = pset1<RhsPacket>(pRhs[3]); \
|
||||||
// acc._acc.packet[0] += pLhs*pRhs0; \
|
acc._acc.packet[0] += pLhs*pRhs0; \
|
||||||
// acc._acc.packet[1] += pLhs*pRhs1; \
|
acc._acc.packet[1] += pLhs*pRhs1; \
|
||||||
// acc._acc.packet[2] += pLhs*pRhs2; \
|
acc._acc.packet[2] += pLhs*pRhs2; \
|
||||||
// acc._acc.packet[3] += pLhs*pRhs3; \
|
acc._acc.packet[3] += pLhs*pRhs3; \
|
||||||
// lhsPackMap.advance(4*1); \
|
lhsPackMap.advance(4*1); \
|
||||||
// rhsPackMap.advance(1*4);
|
rhsPackMap.advance(1*4);
|
||||||
|
|
||||||
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 8, 4>
|
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 8, 4>
|
||||||
@ -381,27 +381,27 @@ namespace internal {
|
|||||||
// };
|
// };
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 4>
|
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 4>
|
||||||
// {
|
{
|
||||||
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||||
// RhsPackMap& rhsPackMap,
|
RhsPackMap& rhsPackMap,
|
||||||
// Index rowIdx, Index colIdx, Index depthIdx,
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
// Accumulator& acc)
|
Accumulator& acc)
|
||||||
// {
|
{
|
||||||
// using LhsPacket = typename packet_traits<LhsScalar>::type;
|
using LhsPacket = typename packet_traits<LhsScalar>::type;
|
||||||
// using RhsPacket = typename packet_traits<RhsScalar>::type;
|
using RhsPacket = typename packet_traits<RhsScalar>::type;
|
||||||
|
|
||||||
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x4\n\t");
|
asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x4\n\t");
|
||||||
|
|
||||||
// LhsPacket pLhs;
|
LhsPacket pLhs;
|
||||||
// RhsPacket pRhs, pRhs0, pRhs1, pRhs2, pRhs3;
|
RhsPacket pRhs, pRhs0, pRhs1, pRhs2, pRhs3;
|
||||||
|
|
||||||
// MICRO_4x1x4();
|
MICRO_4x1x4();
|
||||||
|
|
||||||
// asm __volatile__("#END_NEON_MICROKERNEL_4x1x4\n\t");
|
asm __volatile__("#END_NEON_MICROKERNEL_4x1x4\n\t");
|
||||||
// };
|
};
|
||||||
// };
|
};
|
||||||
|
|
||||||
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 8, 1, 1>
|
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 8, 1, 1>
|
||||||
@ -430,54 +430,54 @@ namespace internal {
|
|||||||
// };
|
// };
|
||||||
// };
|
// };
|
||||||
|
|
||||||
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 1>
|
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 1>
|
||||||
// {
|
{
|
||||||
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||||
// RhsPackMap& rhsPackMap,
|
RhsPackMap& rhsPackMap,
|
||||||
// Index rowIdx, Index colIdx, Index depthIdx,
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
// Accumulator& acc)
|
Accumulator& acc)
|
||||||
// {
|
{
|
||||||
// using LhsPacket = typename packet_traits<LhsScalar>::type;
|
using LhsPacket = typename packet_traits<LhsScalar>::type;
|
||||||
// using RhsPacket = typename packet_traits<RhsScalar>::type;
|
using RhsPacket = typename packet_traits<RhsScalar>::type;
|
||||||
|
|
||||||
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x1\n\t");
|
asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x1\n\t");
|
||||||
|
|
||||||
// LhsPacket pLhs = pload<LhsPacket>(lhsPackMap.pCur);
|
LhsPacket pLhs = pload<LhsPacket>(lhsPackMap.pCur);
|
||||||
// RhsPacket pRhs = pset1<RhsPacket>(*rhsPackMap.pCur);
|
RhsPacket pRhs = pset1<RhsPacket>(*rhsPackMap.pCur);
|
||||||
|
|
||||||
// //acc._acc += pRhs*pLhs;
|
//acc._acc += pRhs*pLhs;
|
||||||
// acc._acc = pmadd(pRhs, pLhs, acc._acc);
|
acc._acc = pmadd(pRhs, pLhs, acc._acc);
|
||||||
|
|
||||||
// lhsPackMap.advance(4*1);
|
lhsPackMap.advance(4*1);
|
||||||
// rhsPackMap.advance(1);
|
rhsPackMap.advance(1);
|
||||||
// asm __volatile__("#END_NEON_MICROKERNEL_4x1x1\n\t");
|
asm __volatile__("#END_NEON_MICROKERNEL_4x1x1\n\t");
|
||||||
// };
|
};
|
||||||
// };
|
};
|
||||||
|
|
||||||
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
|
||||||
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4>
|
struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4>
|
||||||
// {
|
{
|
||||||
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||||
// RhsPackMap& rhsPackMap,
|
RhsPackMap& rhsPackMap,
|
||||||
// Index rowIdx, Index colIdx, Index depthIdx,
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
// Accumulator& acc)
|
Accumulator& acc)
|
||||||
// {
|
{
|
||||||
// using RhsPacket = typename packet_traits<RhsScalar>::type;
|
using RhsPacket = typename packet_traits<RhsScalar>::type;
|
||||||
// using LhsPacket = typename packet_traits<LhsScalar>::type;
|
using LhsPacket = typename packet_traits<LhsScalar>::type;
|
||||||
|
|
||||||
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_1x1x4\n\t");
|
asm __volatile__("#BEGIN_NEON_MICROKERNEL_1x1x4\n\t");
|
||||||
|
|
||||||
// LhsPacket pLhs = pset1<LhsPacket>(*lhsPackMap.pCur);
|
LhsPacket pLhs = pset1<LhsPacket>(*lhsPackMap.pCur);
|
||||||
// RhsPacket pRhs = pload<RhsPacket>(rhsPackMap.pCur);
|
RhsPacket pRhs = pload<RhsPacket>(rhsPackMap.pCur);
|
||||||
|
|
||||||
// acc._acc += pLhs*pRhs;
|
acc._acc += pLhs*pRhs;
|
||||||
|
|
||||||
// lhsPackMap.advance(1);
|
lhsPackMap.advance(1);
|
||||||
// rhsPackMap.advance(4*1);
|
rhsPackMap.advance(4*1);
|
||||||
// asm __volatile__("#END_NEON_MICROKERNEL_1x1x4\n\t");
|
asm __volatile__("#END_NEON_MICROKERNEL_1x1x4\n\t");
|
||||||
// };
|
};
|
||||||
// };
|
};
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
Loading…
x
Reference in New Issue
Block a user