mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-14 04:35:57 +08:00
Enable runtime stack alignment in gemm_blocking_space.
This commit is contained in:
parent
d4f5efc51a
commit
becd89df29
@ -2066,7 +2066,7 @@ EIGEN_DONT_INLINE void gemm_pack_rhs<Scalar, Index, DataMapper, nr, ColMajor, Co
|
|||||||
const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
|
const LinearMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
|
||||||
|
|
||||||
Index k=0;
|
Index k=0;
|
||||||
if((PacketSize%4)==0) // TODO enbale vectorized transposition for PacketSize==2 ??
|
if((PacketSize%4)==0) // TODO enable vectorized transposition for PacketSize==2 ??
|
||||||
{
|
{
|
||||||
for(; k<peeled_k; k+=PacketSize) {
|
for(; k<peeled_k; k+=PacketSize) {
|
||||||
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
|
PacketBlock<Packet,(PacketSize%4)==0?4:PacketSize> kernel;
|
||||||
|
@ -293,8 +293,13 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
SizeB = ActualCols * MaxDepth
|
SizeB = ActualCols * MaxDepth
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
|
||||||
EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
|
EIGEN_ALIGN_MAX LhsScalar m_staticA[SizeA];
|
||||||
EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
|
EIGEN_ALIGN_MAX RhsScalar m_staticB[SizeB];
|
||||||
|
#else
|
||||||
|
EIGEN_ALIGN_MAX char m_staticA[SizeA * sizeof(LhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
|
||||||
|
EIGEN_ALIGN_MAX char m_staticB[SizeB * sizeof(RhsScalar) + EIGEN_DEFAULT_ALIGN_BYTES-1];
|
||||||
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -303,8 +308,13 @@ class gemm_blocking_space<StorageOrder,_LhsScalar,_RhsScalar,MaxRows, MaxCols, M
|
|||||||
this->m_mc = ActualRows;
|
this->m_mc = ActualRows;
|
||||||
this->m_nc = ActualCols;
|
this->m_nc = ActualCols;
|
||||||
this->m_kc = MaxDepth;
|
this->m_kc = MaxDepth;
|
||||||
|
#if EIGEN_MAX_STATIC_ALIGN_BYTES >= EIGEN_DEFAULT_ALIGN_BYTES
|
||||||
this->m_blockA = m_staticA;
|
this->m_blockA = m_staticA;
|
||||||
this->m_blockB = m_staticB;
|
this->m_blockB = m_staticB;
|
||||||
|
#else
|
||||||
|
this->m_blockA = reinterpret_cast<LhsScalar*>((std::size_t(m_staticA) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
|
||||||
|
this->m_blockB = reinterpret_cast<RhsScalar*>((std::size_t(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void initParallel(Index, Index, Index, Index)
|
void initParallel(Index, Index, Index, Index)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user