mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-01 17:50:40 +08:00
WIP
This commit is contained in:
parent
5bffe09624
commit
ca0d3f92d7
@ -351,6 +351,8 @@ using std::ptrdiff_t;
|
|||||||
#elif defined EIGEN_VECTORIZE_NEON
|
#elif defined EIGEN_VECTORIZE_NEON
|
||||||
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h"
|
||||||
#include "src/Core/arch/NEON/MatrixProduct.h"
|
#include "src/Core/arch/NEON/MatrixProduct.h"
|
||||||
|
#include "src/Core/arch/NEON/PackingOps.h"
|
||||||
|
#include "src/Core/arch/NEON/Kernels.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "src/Core/BooleanRedux.h"
|
#include "src/Core/BooleanRedux.h"
|
||||||
|
21
Eigen/src/Core/arch/NEON/Kernels.h
Normal file
21
Eigen/src/Core/arch/NEON/Kernels.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2021 Everton Constantino (everton.constantino@hotmail.com)
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_KERNELS_NEON_H
|
||||||
|
#define EIGEN_KERNELS_NEON_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_KERNELS_NEON_H
|
@ -592,7 +592,9 @@ struct PackingOperator
|
|||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
EIGEN_STRONG_INLINE Scalar* operator()(Index d1Idx, Index d2Idx, Scalar *block, const DataMapper& data)
|
||||||
{
|
{
|
||||||
|
#ifdef __DEBUG__
|
||||||
std::cout << M << "x" << N << " ( " << d1Idx << ", " << d2Idx <<") -> ( " << d1Idx + M << ", " << d2Idx + N << ") ";
|
std::cout << M << "x" << N << " ( " << d1Idx << ", " << d2Idx <<") -> ( " << d1Idx + M << ", " << d2Idx + N << ") ";
|
||||||
|
#endif
|
||||||
Scalar *c = block;
|
Scalar *c = block;
|
||||||
for(auto i = 0; i < M; i++)
|
for(auto i = 0; i < M; i++)
|
||||||
for(auto j = 0; j < N; j++)
|
for(auto j = 0; j < N; j++)
|
||||||
@ -601,10 +603,14 @@ struct PackingOperator
|
|||||||
*c = data(d1Idx + i, d2Idx + j);
|
*c = data(d1Idx + i, d2Idx + j);
|
||||||
else
|
else
|
||||||
*c = data(d2Idx + j, d1Idx + i);
|
*c = data(d2Idx + j, d1Idx + i);
|
||||||
|
#ifdef __DEBUG__
|
||||||
std::cout << *c << " ";
|
std::cout << *c << " ";
|
||||||
|
#endif
|
||||||
c++;
|
c++;
|
||||||
}
|
}
|
||||||
|
#ifdef __DEBUG__
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -713,10 +719,12 @@ struct PackMap
|
|||||||
{
|
{
|
||||||
const Scalar *pBase;
|
const Scalar *pBase;
|
||||||
const Scalar *pCur;
|
const Scalar *pCur;
|
||||||
|
Index stride;
|
||||||
|
Index offset;
|
||||||
Index d2Size;
|
Index d2Size;
|
||||||
PackMapCalculator<Architecture, CPU, Index, Scalar, DataMapper, isLhs, PACK_SHAPES_COUNT<Architecture, CPU, Scalar, isLhs>-1> pmc;
|
PackMapCalculator<Architecture, CPU, Index, Scalar, DataMapper, isLhs, PACK_SHAPES_COUNT<Architecture, CPU, Scalar, isLhs>-1> pmc;
|
||||||
|
|
||||||
PackMap(const Scalar *base, Index d2Size) : pBase(base), pCur(base), d2Size(d2Size) {}
|
PackMap(const Scalar *base, Index d2Size, Index stride, Index offset) : pBase(base), pCur(base), d2Size(d2Size), stride(stride), offset(offset) {}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void resetCur() { pCur = pBase; }
|
EIGEN_STRONG_INLINE void resetCur() { pCur = pBase; }
|
||||||
EIGEN_STRONG_INLINE void moveTo(Index p1) { pCur = pBase + pmc.getPosition(p1, d2Size); }
|
EIGEN_STRONG_INLINE void moveTo(Index p1) { pCur = pBase + pmc.getPosition(p1, d2Size); }
|
||||||
@ -739,7 +747,8 @@ struct Accumulator
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void scale(ResScalar alpha)
|
template<typename ResPacket>
|
||||||
|
EIGEN_STRONG_INLINE void scale(ResScalar alpha, const ResPacket& pAlpha)
|
||||||
{
|
{
|
||||||
for(auto i = 0; i < M; i++)
|
for(auto i = 0; i < M; i++)
|
||||||
{
|
{
|
||||||
@ -762,14 +771,15 @@ struct Accumulator
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int SHAPE_IDX, int M, int K, int N>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename Accumulator, int M, int K, int N>
|
||||||
struct MicroKernel
|
struct MicroKernel
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE void operator()(PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>& lhsPackMap,
|
EIGEN_STRONG_INLINE void operator()(LhsPackMap& lhsPackMap,
|
||||||
PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>& rhsPackMap,
|
RhsPackMap& rhsPackMap,
|
||||||
Index rowIdx, Index colIdx, Index depthIdx,
|
Index rowIdx, Index colIdx, Index depthIdx,
|
||||||
Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, M, N>& acc)
|
Accumulator& acc)
|
||||||
{
|
{
|
||||||
|
#ifdef __DEBUG__
|
||||||
std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl;
|
std::cout << "Kernel " << M << " x " << K << " x " << N << " @ " << rowIdx << ", " << depthIdx << ", " << colIdx << std::endl;
|
||||||
std::cout << "LHS ";
|
std::cout << "LHS ";
|
||||||
for(auto i = 0; i < M; i++)
|
for(auto i = 0; i < M; i++)
|
||||||
@ -788,6 +798,7 @@ struct MicroKernel
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
const RhsScalar *pRhs = rhsPackMap.pCur;
|
const RhsScalar *pRhs = rhsPackMap.pCur;
|
||||||
for(auto i = 0; i < N; i++)
|
for(auto i = 0; i < N; i++)
|
||||||
{
|
{
|
||||||
@ -802,98 +813,104 @@ struct MicroKernel
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX, int IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX, int IDX>
|
||||||
struct DepthLoopStruct
|
struct DepthLoopStruct
|
||||||
{
|
{
|
||||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, IDX-1> depthLS;
|
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, IDX-1> depthLS;
|
||||||
EIGEN_STRONG_INLINE void operator()(Index rowIdx, Index colIdx, Index depthIdx, const DataMapper& res, const LhsScalar* blockA, const RhsScalar*blockB,
|
EIGEN_STRONG_INLINE void operator()(Index rowIdx, Index colIdx, Index depthIdx, const DataMapper& res,
|
||||||
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>& lhsPackMap, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>& rhsPackMap)
|
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||||
{
|
{
|
||||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[RHS_SHAPE_IDX][SHAPES_RHS_DIMENSION];
|
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[RHS_SHAPE_IDX][SHAPES_RHS_DIMENSION];
|
||||||
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[LHS_SHAPE_IDX][SHAPES_LHS_DIMENSION];
|
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[LHS_SHAPE_IDX][SHAPES_LHS_DIMENSION];
|
||||||
constexpr auto depthProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_DIMENSION];
|
constexpr auto depthProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_DEP_DIMENSION];
|
||||||
|
typedef Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> AccumulatorType;
|
||||||
|
|
||||||
if(rhsProgress == SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION] && lhsProgress == SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION])
|
if(rhsProgress == SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION] && lhsProgress == SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION])
|
||||||
{
|
{
|
||||||
MicroKernel<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, IDX, lhsProgress, depthProgress, rhsProgress> mkt;
|
MicroKernel<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, AccumulatorType, lhsProgress, depthProgress, rhsProgress> mkt;
|
||||||
Accumulator<Architecture, CPU, AccScalar, ResScalar, DataMapper, lhsProgress, rhsProgress> acc;
|
AccumulatorType acc;
|
||||||
acc.zero();
|
acc.zero();
|
||||||
for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress)
|
for(; depthIdx + depthProgress <= depth; depthIdx+=depthProgress)
|
||||||
{
|
{
|
||||||
mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc);
|
mkt(lhsPackMap, rhsPackMap, rowIdx, colIdx, depthIdx, acc);
|
||||||
}
|
}
|
||||||
acc.scale(alpha);
|
acc.scale(alpha, pAlpha);
|
||||||
acc.store(res, rowIdx, colIdx);
|
acc.store(res, rowIdx, colIdx);
|
||||||
}
|
}
|
||||||
depthLS(rowIdx, colIdx, depthIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
depthLS(rowIdx, colIdx, depthIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int LHS_SHAPE_IDX>
|
||||||
struct DepthLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, -1>
|
struct DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, LHS_SHAPE_IDX, -1>
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE void operator()(Index, Index, Index, const DataMapper&, const LhsScalar*, const RhsScalar*,
|
EIGEN_STRONG_INLINE void operator()(Index, Index, Index, const DataMapper&,
|
||||||
Index, Index, Index, ResScalar, Index, Index, Index, Index, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>&, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>&) {}
|
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int RHS_SHAPE_IDX, int IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX, int IDX>
|
||||||
struct LhsLoopStruct
|
struct LhsLoopStruct
|
||||||
{
|
{
|
||||||
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, RHS_SHAPE_IDX, IDX-1> lhsLS;
|
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, IDX-1> lhsLS;
|
||||||
EIGEN_STRONG_INLINE void operator()(Index rowIdx, int colIdx, const DataMapper& res, const LhsScalar* blockA, const RhsScalar*blockB,
|
EIGEN_STRONG_INLINE void operator()(Index rowIdx, int colIdx, const DataMapper& res,
|
||||||
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>& lhsPackMap, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>& rhsPackMap)
|
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||||
{
|
{
|
||||||
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION];
|
constexpr auto lhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_LHS_DIMENSION];
|
||||||
|
|
||||||
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, RHS_SHAPE_IDX, IDX, IDX> depthLS;
|
DepthLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, IDX, IDX> depthLS;
|
||||||
for(;rowIdx + lhsProgress <= rows; rowIdx+=lhsProgress)
|
for(;rowIdx + lhsProgress <= rows; rowIdx+=lhsProgress)
|
||||||
{
|
{
|
||||||
lhsPackMap.moveTo(rowIdx);
|
lhsPackMap.moveTo(rowIdx);
|
||||||
rhsPackMap.moveTo(colIdx);
|
rhsPackMap.moveTo(colIdx);
|
||||||
depthLS(rowIdx, colIdx, 0, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
depthLS(rowIdx, colIdx, 0, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
lhsLS(rowIdx, colIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
lhsLS(rowIdx, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int RHS_SHAPE_IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int RHS_SHAPE_IDX>
|
||||||
struct LhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, RHS_SHAPE_IDX, -1>
|
struct LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, RHS_SHAPE_IDX, -1>
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE void operator()(Index, Index, const DataMapper&, const LhsScalar*, const RhsScalar*,
|
EIGEN_STRONG_INLINE void operator()(Index, Index, const DataMapper&,
|
||||||
Index, Index, Index, ResScalar, Index, Index, Index, Index, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>&, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>&) {}
|
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper, int IDX>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper, int IDX>
|
||||||
struct RhsLoopStruct
|
struct RhsLoopStruct
|
||||||
{
|
{
|
||||||
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_POINTER];
|
static constexpr auto PREVIOUS = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_POINTER];
|
||||||
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, PREVIOUS> rhsLS;
|
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, PREVIOUS> rhsLS;
|
||||||
|
|
||||||
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper& res, const LhsScalar* blockA, const RhsScalar*blockB,
|
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper& res,
|
||||||
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>& lhsPackMap, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>& rhsPackMap)
|
Index rows, Index depth, Index cols, ResScalar alpha, const ResPacket& pAlpha, LhsPackMap& lhsPackMap, RhsPackMap& rhsPackMap)
|
||||||
{
|
{
|
||||||
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
constexpr auto rhsProgress = SHAPES<Architecture, CPU, LhsScalar, RhsScalar>[IDX][SHAPES_RHS_DIMENSION];
|
||||||
|
|
||||||
for(;colIdx + rhsProgress <= cols; colIdx+=rhsProgress)
|
for(;colIdx + rhsProgress <= cols; colIdx+=rhsProgress)
|
||||||
{
|
{
|
||||||
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, IDX, IDX> lhsLS;
|
LhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, IDX, IDX> lhsLS;
|
||||||
lhsLS(0, colIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
lhsLS(0, colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
rhsLS(colIdx, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
rhsLS(colIdx, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename RhsScalar, typename AccScalar, typename ResScalar, typename DataMapper>
|
template<int Architecture, int CPU, typename Index, typename LhsScalar, typename LhsPackMap, typename RhsScalar, typename RhsPackMap, typename AccScalar, typename ResScalar, typename ResPacket, typename DataMapper>
|
||||||
struct RhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, -1>
|
struct RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, -1>
|
||||||
{
|
{
|
||||||
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper&, const LhsScalar*, const RhsScalar*,
|
EIGEN_STRONG_INLINE void operator()(Index colIdx, const DataMapper&,
|
||||||
Index, Index, Index, ResScalar, Index, Index, Index, Index, PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true>&, PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false>&) {}
|
Index, Index, Index, ResScalar, const ResPacket&, LhsPackMap&, RhsPackMap&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<int Architecture, int CPU, typename ResScalar, typename AccScalar, typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper>
|
template<int Architecture, int CPU, typename ResScalar, typename AccScalar, typename LhsScalar, typename RhsScalar, typename Index, typename DataMapper>
|
||||||
EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
|
EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, const RhsScalar* blockB,
|
||||||
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
Index rows, Index depth, Index cols, ResScalar alpha, Index strideA, Index strideB, Index offsetA, Index offsetB)
|
||||||
{
|
{
|
||||||
|
using ResPacket = typename unpacket_traits<ResScalar>::type;
|
||||||
|
typedef PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true> LhsPackMap;
|
||||||
|
typedef PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false> RhsPackMap;
|
||||||
|
|
||||||
|
#ifdef __DEBUG__
|
||||||
std::cout << "blockA" << std::endl;
|
std::cout << "blockA" << std::endl;
|
||||||
for(auto i = 0; i < rows*depth; i++)
|
for(auto i = 0; i < rows*depth; i++)
|
||||||
{
|
{
|
||||||
@ -910,11 +927,14 @@ EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const LhsScalar* blockA, co
|
|||||||
std::cout << blockB[i] << " ";
|
std::cout << blockB[i] << " ";
|
||||||
}
|
}
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
#endif
|
||||||
|
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, LhsPackMap, RhsScalar, RhsPackMap, AccScalar, ResScalar, ResPacket, DataMapper, SHAPES_COUNT<0, 0, LhsScalar, RhsScalar>-1> rhsLS;
|
||||||
|
LhsPackMap lhsPackMap(blockA, depth, strideA, offsetA);
|
||||||
|
RhsPackMap rhsPackMap(blockB, depth, strideB, offsetB);
|
||||||
|
|
||||||
RhsLoopStruct<Architecture, CPU, Index, LhsScalar, RhsScalar, AccScalar, ResScalar, DataMapper, SHAPES_COUNT<0, 0, LhsScalar, RhsScalar>-1> rhsLS;
|
ResPacket pAlpha = pset1<ResPacket>(alpha);
|
||||||
PackMap<Architecture, CPU, Index, LhsScalar, DataMapper, true> lhsPackMap(blockA, depth);
|
|
||||||
PackMap<Architecture, CPU, Index, RhsScalar, DataMapper, false> rhsPackMap(blockB, depth);
|
rhsLS(0, res, rows, depth, cols, alpha, pAlpha, lhsPackMap, rhsPackMap);
|
||||||
rhsLS(0, res, blockA, blockB, rows, depth, cols, alpha, strideA, strideB, offsetA, offsetB, lhsPackMap, rhsPackMap);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
template<typename Index, typename DataMapper, int nr, bool Conjugate, bool PanelMode>
|
||||||
|
21
Eigen/src/Core/arch/NEON/PackingOps.h
Normal file
21
Eigen/src/Core/arch/NEON/PackingOps.h
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2021 Everton Constantino (everton.constantino@hotmail.com)
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#ifndef EIGEN_PACKING_OPS_NEON_H
|
||||||
|
#define EIGEN_PACKING_OPS_NEON_H
|
||||||
|
|
||||||
|
namespace Eigen {
|
||||||
|
|
||||||
|
namespace internal {
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
} // end namespace Eigen
|
||||||
|
|
||||||
|
#endif // EIGEN_PACKING_OPS_NEON_H
|
Loading…
x
Reference in New Issue
Block a user