mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 19:59:05 +08:00
Fix compiler bugs for GCC 10 & 11 for Power GEMM
This commit is contained in:
parent
b02c384ef4
commit
44ba7a0da3
@ -138,7 +138,7 @@ EIGEN_ALWAYS_INLINE void ploadRhsMMA(const double* rhs, __vector_pair& rhsV)
|
||||
reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs + (sizeof(Packet2d) / sizeof(double)))),
|
||||
reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs)));
|
||||
#else
|
||||
__asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
|
||||
rhsV = *reinterpret_cast<__vector_pair *>(const_cast<double *>(rhs));
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -147,6 +147,10 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
ploadRhsMMA(lhs, lhsV);
|
||||
}
|
||||
|
||||
#if (EIGEN_COMP_LLVM || (__GNUC__ >= 11))
|
||||
#define VECTOR_PAIR_LOADS_LHS
|
||||
#endif
|
||||
|
||||
// PEEL_MMA loop factor.
|
||||
#define PEEL_MMA 7
|
||||
|
||||
@ -162,6 +166,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \
|
||||
}
|
||||
|
||||
#ifdef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_MMA_WORK_TWO(iter, type, peel) \
|
||||
if (unroll_factor > iter) { \
|
||||
pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV2##iter.packet[peel & 1]); \
|
||||
@ -185,6 +190,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
}
|
||||
|
||||
#define MICRO_MMA_LOAD_TWO(iter) MICRO_MMA_LOAD1_TWO(lhs_ptr, iter)
|
||||
#endif
|
||||
|
||||
#define MICRO_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
|
||||
if (PEEL_MMA > peel) { \
|
||||
@ -196,6 +202,14 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
EIGEN_UNUSED_VARIABLE(rhsV##peel); \
|
||||
}
|
||||
|
||||
#ifndef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
|
||||
type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7; \
|
||||
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,1) \
|
||||
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,3) \
|
||||
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,4) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,5) \
|
||||
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,6) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,7)
|
||||
#else
|
||||
#define MICRO_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
|
||||
if (PEEL_MMA > peel2) { \
|
||||
PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23, lhsV24, lhsV25, lhsV26, lhsV27; \
|
||||
@ -215,6 +229,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3) \
|
||||
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,4,5) \
|
||||
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,6,7)
|
||||
#endif
|
||||
|
||||
#define MICRO_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
|
||||
type rhsV0; \
|
||||
@ -224,11 +239,15 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, RhsPacket) \
|
||||
rhs_ptr += (accRows * size);
|
||||
|
||||
#ifndef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_PEEL, PEEL_MMA)
|
||||
#else
|
||||
#define MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_TYPE, size) \
|
||||
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, MICRO_MMA_WORK_TWO, MICRO_MMA_LOAD_TWO, RhsPacket) \
|
||||
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, MICRO_MMA_WORK_TWO, MICRO_MMA_LOAD_TWO, RhsPacket) \
|
||||
rhs_ptr += (accRows * size);
|
||||
|
||||
#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_UNROLL_TYPE_PEEL2, PEEL_MMA)
|
||||
#endif
|
||||
|
||||
#define MICRO_MMA_ONE MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_ONE, 1)
|
||||
|
||||
@ -406,6 +425,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
||||
pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
|
||||
}
|
||||
|
||||
#ifdef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_COMPLEX_MMA_WORK_TWO(iter, type, peel) \
|
||||
if (unroll_factor > iter) { \
|
||||
pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV2##iter.packet[peel & 1], lhsVi2##iter.packet[peel & 1], rhsV##peel, rhsVi##peel); \
|
||||
@ -428,6 +448,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
||||
MICRO_MMA_LOAD1_TWO(lhs_ptr_real, iter)
|
||||
|
||||
#define MICRO_COMPLEX_MMA_LOAD_TWO(iter) MICRO_COMPLEX_MMA_LOAD1_TWO(lhs_ptr, iter)
|
||||
#endif
|
||||
|
||||
#define MICRO_COMPLEX_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
|
||||
if (PEEL_COMPLEX_MMA > peel) { \
|
||||
@ -446,6 +467,13 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
||||
EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
|
||||
}
|
||||
|
||||
#ifndef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
|
||||
type rhsV0, rhsV1, rhsV2, rhsV3; \
|
||||
type rhsVi0, rhsVi1, rhsVi2, rhsVi3; \
|
||||
MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,1) \
|
||||
MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,3)
|
||||
#else
|
||||
#define MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
|
||||
if (PEEL_COMPLEX_MMA > peel2) { \
|
||||
PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23; \
|
||||
@ -473,6 +501,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
||||
type rhsVi0, rhsVi1, rhsVi2, rhsVi3; \
|
||||
MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,0,1) \
|
||||
MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3)
|
||||
#endif
|
||||
|
||||
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
|
||||
type rhsV0, rhsVi0; \
|
||||
@ -483,12 +512,16 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
||||
rhs_ptr_real += (accRows * size); \
|
||||
if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
|
||||
|
||||
#ifndef VECTOR_PAIR_LOADS_LHS
|
||||
#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL, PEEL_COMPLEX_MMA)
|
||||
#else
|
||||
#define MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_TYPE, size) \
|
||||
MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, MICRO_COMPLEX_MMA_WORK_TWO, MICRO_COMPLEX_MMA_LOAD_TWO, RhsPacket) \
|
||||
MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, MICRO_COMPLEX_MMA_WORK_TWO, MICRO_COMPLEX_MMA_LOAD_TWO, RhsPacket) \
|
||||
rhs_ptr_real += (accRows * size); \
|
||||
if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
|
||||
|
||||
#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL2, PEEL_COMPLEX_MMA)
|
||||
#endif
|
||||
|
||||
#define MICRO_COMPLEX_MMA_ONE MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE, 1)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user