WIP - Vector

This commit is contained in:
Everton Constantino 2021-05-10 20:06:34 +00:00
parent 70c0363c28
commit 54f80f442d

View File

@ -61,100 +61,100 @@ namespace internal {
// } // }
// }; // };
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper> template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 1> struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 1>
// { {
// using LinearMapper = typename DataMapper::LinearMapper; using LinearMapper = typename DataMapper::LinearMapper;
// using AccPacket = typename packet_traits<Scalar>::type; using AccPacket = typename packet_traits<Scalar>::type;
// using ResPacket = typename packet_traits<ResScalar>::type; using ResPacket = typename packet_traits<ResScalar>::type;
// AccPacket _acc; AccPacket _acc;
// EIGEN_STRONG_INLINE void zero() EIGEN_STRONG_INLINE void zero()
// { {
// _acc = pset1<AccPacket>(0); _acc = pset1<AccPacket>(0);
// } }
// template<typename ResPacket_> template<typename ResPacket_>
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha) EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
// { {
// _acc *= pAlpha; _acc *= pAlpha;
// } }
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col) EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
// { {
// PacketBlock<ResPacket, 1> block; PacketBlock<ResPacket, 1> block;
// block.packet[0] = dest.template loadPacket<ResPacket>(row, col) + _acc; block.packet[0] = dest.template loadPacket<ResPacket>(row, col) + _acc;
// dest.template storePacketBlock<AccPacket, 1>(row, col, block); dest.template storePacketBlock<AccPacket, 1>(row, col, block);
// } }
// }; };
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper> template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4> struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 1, 4>
// { {
// using LinearMapper = typename DataMapper::LinearMapper; using LinearMapper = typename DataMapper::LinearMapper;
// using AccPacket = typename packet_traits<Scalar>::type; using AccPacket = typename packet_traits<Scalar>::type;
// using ResPacket = typename packet_traits<ResScalar>::type; using ResPacket = typename packet_traits<ResScalar>::type;
// AccPacket _acc; AccPacket _acc;
// EIGEN_STRONG_INLINE void zero() EIGEN_STRONG_INLINE void zero()
// { {
// _acc = pset1<AccPacket>(0); _acc = pset1<AccPacket>(0);
// } }
// template<typename ResPacket_> template<typename ResPacket_>
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha) EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
// { {
// _acc *= pAlpha; _acc *= pAlpha;
// } }
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col) EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
// { {
// ResPacket r = dest.template gatherPacket<ResPacket>(row, col) + _acc; ResPacket r = dest.template gatherPacket<ResPacket>(row, col) + _acc;
// dest.template scatterPacket<ResPacket>(row, col, r); dest.template scatterPacket<ResPacket>(row, col, r);
// } }
// }; };
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper> template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 4> struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 4, 4>
// { {
// using LinearMapper = typename DataMapper::LinearMapper; using LinearMapper = typename DataMapper::LinearMapper;
// using AccPacket = typename packet_traits<Scalar>::type; using AccPacket = typename packet_traits<Scalar>::type;
// using ResPacket = typename packet_traits<ResScalar>::type; using ResPacket = typename packet_traits<ResScalar>::type;
// PacketBlock<AccPacket, 4> _acc; PacketBlock<AccPacket, 4> _acc;
// EIGEN_STRONG_INLINE void zero() EIGEN_STRONG_INLINE void zero()
// { {
// _acc.packet[0] = pset1<AccPacket>(0); _acc.packet[0] = pset1<AccPacket>(0);
// _acc.packet[1] = pset1<AccPacket>(0); _acc.packet[1] = pset1<AccPacket>(0);
// _acc.packet[2] = pset1<AccPacket>(0); _acc.packet[2] = pset1<AccPacket>(0);
// _acc.packet[3] = pset1<AccPacket>(0); _acc.packet[3] = pset1<AccPacket>(0);
// } }
// template<typename ResPacket_> template<typename ResPacket_>
// EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha) EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket_& pAlpha)
// { {
// _acc.packet[0] *= pAlpha; _acc.packet[0] *= pAlpha;
// _acc.packet[1] *= pAlpha; _acc.packet[1] *= pAlpha;
// _acc.packet[2] *= pAlpha; _acc.packet[2] *= pAlpha;
// _acc.packet[3] *= pAlpha; _acc.packet[3] *= pAlpha;
// } }
// EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col) EIGEN_STRONG_INLINE void store(const DataMapper& dest, Index row, Index col)
// { {
// LinearMapper r0 = dest.getLinearMapper(row, col + 0); LinearMapper r0 = dest.getLinearMapper(row, col + 0);
// LinearMapper r1 = dest.getLinearMapper(row, col + 1); LinearMapper r1 = dest.getLinearMapper(row, col + 1);
// LinearMapper r2 = dest.getLinearMapper(row, col + 2); LinearMapper r2 = dest.getLinearMapper(row, col + 2);
// LinearMapper r3 = dest.getLinearMapper(row, col + 3); LinearMapper r3 = dest.getLinearMapper(row, col + 3);
// r0.storePacket(0, r0.template loadPacket<ResPacket>(0) + _acc.packet[0]); r0.storePacket(0, r0.template loadPacket<ResPacket>(0) + _acc.packet[0]);
// r1.storePacket(0, r1.template loadPacket<ResPacket>(0) + _acc.packet[1]); r1.storePacket(0, r1.template loadPacket<ResPacket>(0) + _acc.packet[1]);
// r2.storePacket(0, r2.template loadPacket<ResPacket>(0) + _acc.packet[2]); r2.storePacket(0, r2.template loadPacket<ResPacket>(0) + _acc.packet[2]);
// r3.storePacket(0, r3.template loadPacket<ResPacket>(0) + _acc.packet[3]); r3.storePacket(0, r3.template loadPacket<ResPacket>(0) + _acc.packet[3]);
// } }
// }; };
// template<int CPU, typename Scalar, typename ResScalar, typename DataMapper> // template<int CPU, typename Scalar, typename ResScalar, typename DataMapper>
// struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 8, 4> // struct Accumulator<0, CPU, Scalar, ResScalar, DataMapper, 8, 4>
@ -316,19 +316,19 @@ namespace internal {
// }; // };
// }; // };
// #define MICRO_4x1x4() \ #define MICRO_4x1x4() \
// pLhs = pload<LhsPacket>(lhsPackMap.pCur); \ pLhs = pload<LhsPacket>(lhsPackMap.pCur); \
// pRhs = pload<RhsPacket>(rhsPackMap.pCur); \ pRhs = pload<RhsPacket>(rhsPackMap.pCur); \
// pRhs0 = pset1<RhsPacket>(pRhs[0]); \ pRhs0 = pset1<RhsPacket>(pRhs[0]); \
// pRhs1 = pset1<RhsPacket>(pRhs[1]); \ pRhs1 = pset1<RhsPacket>(pRhs[1]); \
// pRhs2 = pset1<RhsPacket>(pRhs[2]); \ pRhs2 = pset1<RhsPacket>(pRhs[2]); \
// pRhs3 = pset1<RhsPacket>(pRhs[3]); \ pRhs3 = pset1<RhsPacket>(pRhs[3]); \
// acc._acc.packet[0] += pLhs*pRhs0; \ acc._acc.packet[0] += pLhs*pRhs0; \
// acc._acc.packet[1] += pLhs*pRhs1; \ acc._acc.packet[1] += pLhs*pRhs1; \
// acc._acc.packet[2] += pLhs*pRhs2; \ acc._acc.packet[2] += pLhs*pRhs2; \
// acc._acc.packet[3] += pLhs*pRhs3; \ acc._acc.packet[3] += pLhs*pRhs3; \
// lhsPackMap.advance(4*1); \ lhsPackMap.advance(4*1); \
// rhsPackMap.advance(1*4); rhsPackMap.advance(1*4);
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator> // template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 8, 4> // struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 8, 4>
@ -381,27 +381,27 @@ namespace internal {
// }; // };
// }; // };
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator> template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 4> struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 4>
// { {
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap, EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
// RhsPackMap& rhsPackMap, RhsPackMap& rhsPackMap,
// Index rowIdx, Index colIdx, Index depthIdx, Index rowIdx, Index colIdx, Index depthIdx,
// Accumulator& acc) Accumulator& acc)
// { {
// using LhsPacket = typename packet_traits<LhsScalar>::type; using LhsPacket = typename packet_traits<LhsScalar>::type;
// using RhsPacket = typename packet_traits<RhsScalar>::type; using RhsPacket = typename packet_traits<RhsScalar>::type;
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x4\n\t"); asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x4\n\t");
// LhsPacket pLhs; LhsPacket pLhs;
// RhsPacket pRhs, pRhs0, pRhs1, pRhs2, pRhs3; RhsPacket pRhs, pRhs0, pRhs1, pRhs2, pRhs3;
// MICRO_4x1x4(); MICRO_4x1x4();
// asm __volatile__("#END_NEON_MICROKERNEL_4x1x4\n\t"); asm __volatile__("#END_NEON_MICROKERNEL_4x1x4\n\t");
// }; };
// }; };
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator> // template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 8, 1, 1> // struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 8, 1, 1>
@ -430,54 +430,54 @@ namespace internal {
// }; // };
// }; // };
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator> template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 1> struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 4, 1, 1>
// { {
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap, EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
// RhsPackMap& rhsPackMap, RhsPackMap& rhsPackMap,
// Index rowIdx, Index colIdx, Index depthIdx, Index rowIdx, Index colIdx, Index depthIdx,
// Accumulator& acc) Accumulator& acc)
// { {
// using LhsPacket = typename packet_traits<LhsScalar>::type; using LhsPacket = typename packet_traits<LhsScalar>::type;
// using RhsPacket = typename packet_traits<RhsScalar>::type; using RhsPacket = typename packet_traits<RhsScalar>::type;
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x1\n\t"); asm __volatile__("#BEGIN_NEON_MICROKERNEL_4x1x1\n\t");
// LhsPacket pLhs = pload<LhsPacket>(lhsPackMap.pCur); LhsPacket pLhs = pload<LhsPacket>(lhsPackMap.pCur);
// RhsPacket pRhs = pset1<RhsPacket>(*rhsPackMap.pCur); RhsPacket pRhs = pset1<RhsPacket>(*rhsPackMap.pCur);
// //acc._acc += pRhs*pLhs; //acc._acc += pRhs*pLhs;
// acc._acc = pmadd(pRhs, pLhs, acc._acc); acc._acc = pmadd(pRhs, pLhs, acc._acc);
// lhsPackMap.advance(4*1); lhsPackMap.advance(4*1);
// rhsPackMap.advance(1); rhsPackMap.advance(1);
// asm __volatile__("#END_NEON_MICROKERNEL_4x1x1\n\t"); asm __volatile__("#END_NEON_MICROKERNEL_4x1x1\n\t");
// }; };
// }; };
// template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator> template<int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator>
// struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4> struct MicroKernel<0, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, Accumulator, 1, 1, 4>
// { {
// EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap, EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
// RhsPackMap& rhsPackMap, RhsPackMap& rhsPackMap,
// Index rowIdx, Index colIdx, Index depthIdx, Index rowIdx, Index colIdx, Index depthIdx,
// Accumulator& acc) Accumulator& acc)
// { {
// using RhsPacket = typename packet_traits<RhsScalar>::type; using RhsPacket = typename packet_traits<RhsScalar>::type;
// using LhsPacket = typename packet_traits<LhsScalar>::type; using LhsPacket = typename packet_traits<LhsScalar>::type;
// asm __volatile__("#BEGIN_NEON_MICROKERNEL_1x1x4\n\t"); asm __volatile__("#BEGIN_NEON_MICROKERNEL_1x1x4\n\t");
// LhsPacket pLhs = pset1<LhsPacket>(*lhsPackMap.pCur); LhsPacket pLhs = pset1<LhsPacket>(*lhsPackMap.pCur);
// RhsPacket pRhs = pload<RhsPacket>(rhsPackMap.pCur); RhsPacket pRhs = pload<RhsPacket>(rhsPackMap.pCur);
// acc._acc += pLhs*pRhs; acc._acc += pLhs*pRhs;
// lhsPackMap.advance(1); lhsPackMap.advance(1);
// rhsPackMap.advance(4*1); rhsPackMap.advance(4*1);
// asm __volatile__("#END_NEON_MICROKERNEL_1x1x4\n\t"); asm __volatile__("#END_NEON_MICROKERNEL_1x1x4\n\t");
// }; };
// }; };
} // end namespace internal } // end namespace internal
} // end namespace Eigen } // end namespace Eigen