Fix compiler bugs for GCC 10 & 11 for Power GEMM

This commit is contained in:
Chip Kerchner 2022-04-20 15:59:00 +00:00 committed by Rasmus Munk Larsen
parent b02c384ef4
commit 44ba7a0da3

View File

@ -138,7 +138,7 @@ EIGEN_ALWAYS_INLINE void ploadRhsMMA(const double* rhs, __vector_pair& rhsV)
reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs + (sizeof(Packet2d) / sizeof(double)))),
reinterpret_cast<__vector unsigned char>(ploadRhs<Packet2d>(rhs)));
#else
__asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
rhsV = *reinterpret_cast<__vector_pair *>(const_cast<double *>(rhs));
#endif
}
@ -147,6 +147,10 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
ploadRhsMMA(lhs, lhsV);
}
#if (EIGEN_COMP_LLVM || (__GNUC__ >= 11))
#define VECTOR_PAIR_LOADS_LHS
#endif
// PEEL_MMA loop factor.
#define PEEL_MMA 7
@ -162,6 +166,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV##iter); \
}
#ifdef VECTOR_PAIR_LOADS_LHS
#define MICRO_MMA_WORK_TWO(iter, type, peel) \
if (unroll_factor > iter) { \
pgerMMA<Packet, type, false>(&accZero##iter, rhsV##peel, lhsV2##iter.packet[peel & 1]); \
@ -185,6 +190,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
}
#define MICRO_MMA_LOAD_TWO(iter) MICRO_MMA_LOAD1_TWO(lhs_ptr, iter)
#endif
#define MICRO_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
if (PEEL_MMA > peel) { \
@ -196,6 +202,14 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
EIGEN_UNUSED_VARIABLE(rhsV##peel); \
}
#ifndef VECTOR_PAIR_LOADS_LHS
#define MICRO_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
type rhsV0, rhsV1, rhsV2, rhsV3, rhsV4, rhsV5, rhsV6, rhsV7; \
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,1) \
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,3) \
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,4) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,5) \
MICRO_MMA_TYPE_PEEL(funcw,funcl,type,6) MICRO_MMA_TYPE_PEEL(funcw,funcl,type,7)
#else
#define MICRO_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
if (PEEL_MMA > peel2) { \
PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23, lhsV24, lhsV25, lhsV26, lhsV27; \
@ -215,6 +229,7 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3) \
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,4,5) \
MICRO_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,6,7)
#endif
#define MICRO_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
type rhsV0; \
@ -224,11 +239,15 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, RhsPacket) \
rhs_ptr += (accRows * size);
#ifndef VECTOR_PAIR_LOADS_LHS
#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_PEEL, PEEL_MMA)
#else
#define MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_TYPE, size) \
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, MICRO_MMA_WORK_TWO, MICRO_MMA_LOAD_TWO, RhsPacket) \
MICRO_MMA_TYPE(MICRO_MMA_WORK_ONE, MICRO_LOAD_ONE, MICRO_MMA_WORK_TWO, MICRO_MMA_LOAD_TWO, RhsPacket) \
rhs_ptr += (accRows * size);
#define MICRO_MMA_ONE_PEEL MICRO_MMA_UNROLL_TYPE2(MICRO_MMA_UNROLL_TYPE_PEEL2, PEEL_MMA)
#endif
#define MICRO_MMA_ONE MICRO_MMA_UNROLL_TYPE(MICRO_MMA_UNROLL_TYPE_ONE, 1)
@ -406,6 +425,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV##iter, lhsVi##iter, rhsV##peel, rhsVi##peel); \
}
#ifdef VECTOR_PAIR_LOADS_LHS
#define MICRO_COMPLEX_MMA_WORK_TWO(iter, type, peel) \
if (unroll_factor > iter) { \
pgercMMA<Packet, type, ConjugateLhs, ConjugateRhs, LhsIsReal, RhsIsReal>(&accReal##iter, &accImag##iter, lhsV2##iter.packet[peel & 1], lhsVi2##iter.packet[peel & 1], rhsV##peel, rhsVi##peel); \
@ -428,6 +448,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
MICRO_MMA_LOAD1_TWO(lhs_ptr_real, iter)
#define MICRO_COMPLEX_MMA_LOAD_TWO(iter) MICRO_COMPLEX_MMA_LOAD1_TWO(lhs_ptr, iter)
#endif
#define MICRO_COMPLEX_MMA_TYPE_PEEL(funcw, funcl, type, peel) \
if (PEEL_COMPLEX_MMA > peel) { \
@ -446,6 +467,13 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
EIGEN_UNUSED_VARIABLE(rhsVi##peel); \
}
#ifndef VECTOR_PAIR_LOADS_LHS
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL(funcw, funcl, type) \
type rhsV0, rhsV1, rhsV2, rhsV3; \
type rhsVi0, rhsVi1, rhsVi2, rhsVi3; \
MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,0) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,1) \
MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,2) MICRO_COMPLEX_MMA_TYPE_PEEL(funcw,funcl,type,3)
#else
#define MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1, funcl1, funcw2, funcl2, type, peel1, peel2) \
if (PEEL_COMPLEX_MMA > peel2) { \
PacketBlock<Packet,2> lhsV20, lhsV21, lhsV22, lhsV23; \
@ -473,6 +501,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
type rhsVi0, rhsVi1, rhsVi2, rhsVi3; \
MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,0,1) \
MICRO_COMPLEX_MMA_TYPE_PEEL2(funcw1,funcl1,funcw2,funcl2,type,2,3)
#endif
#define MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE(funcw, funcl, type) \
type rhsV0, rhsVi0; \
@ -483,12 +512,16 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
rhs_ptr_real += (accRows * size); \
if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
#ifndef VECTOR_PAIR_LOADS_LHS
#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL, PEEL_COMPLEX_MMA)
#else
#define MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_TYPE, size) \
MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, MICRO_COMPLEX_MMA_WORK_TWO, MICRO_COMPLEX_MMA_LOAD_TWO, RhsPacket) \
MICRO_COMPLEX_MMA_TYPE(MICRO_COMPLEX_MMA_WORK_ONE, MICRO_COMPLEX_LOAD_ONE, MICRO_COMPLEX_MMA_WORK_TWO, MICRO_COMPLEX_MMA_LOAD_TWO, RhsPacket) \
rhs_ptr_real += (accRows * size); \
if(!RhsIsReal) rhs_ptr_imag += (accRows * size);
#define MICRO_COMPLEX_MMA_ONE_PEEL MICRO_COMPLEX_MMA_UNROLL_TYPE2(MICRO_COMPLEX_MMA_UNROLL_TYPE_PEEL2, PEEL_COMPLEX_MMA)
#endif
#define MICRO_COMPLEX_MMA_ONE MICRO_COMPLEX_MMA_UNROLL_TYPE(MICRO_COMPLEX_MMA_UNROLL_TYPE_ONE, 1)