mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-08 09:49:03 +08:00
Cleanup compiler warnings, etc from recent changes in GEMM & GEMV for PowerPC
This commit is contained in:
parent
cec0005c74
commit
cb5ca1c901
@ -193,7 +193,7 @@ EIGEN_STRONG_INLINE void symm_pack_complex_lhs_helper(std::complex<Scalar>* bloc
|
|||||||
const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder> lhs(_lhs, lhsStride);
|
const_blas_data_mapper<std::complex<Scalar>, Index, StorageOrder> lhs(_lhs, lhsStride);
|
||||||
const Index vectorSize = quad_traits<Scalar>::vectorsize;
|
const Index vectorSize = quad_traits<Scalar>::vectorsize;
|
||||||
const Index vectorDelta = vectorSize * depth;
|
const Index vectorDelta = vectorSize * depth;
|
||||||
Scalar* blockAf = (Scalar *)(blockA);
|
Scalar* blockAf = reinterpret_cast<Scalar *>(blockA);
|
||||||
|
|
||||||
Index rir = 0, rii, j = 0;
|
Index rir = 0, rii, j = 0;
|
||||||
for(; j + vectorSize <= rows; j+=vectorSize)
|
for(; j + vectorSize <= rows; j+=vectorSize)
|
||||||
@ -1269,8 +1269,8 @@ const static Packet4i mask43 = { -1, -1, -1, 0 };
|
|||||||
|
|
||||||
const static Packet2l mask21 = { -1, 0 };
|
const static Packet2l mask21 = { -1, 0 };
|
||||||
|
|
||||||
template<typename Packet>
|
template<typename Packet, typename Index>
|
||||||
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)
|
EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows)
|
||||||
{
|
{
|
||||||
if (remaining_rows == 0) {
|
if (remaining_rows == 0) {
|
||||||
return pset1<Packet>(float(0.0)); // Not used
|
return pset1<Packet>(float(0.0)); // Not used
|
||||||
@ -1284,7 +1284,7 @@ EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows)
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(const int remaining_rows)
|
EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d,Index>(const Index remaining_rows)
|
||||||
{
|
{
|
||||||
if (remaining_rows == 0) {
|
if (remaining_rows == 0) {
|
||||||
return pset1<Packet2d>(double(0.0)); // Not used
|
return pset1<Packet2d>(double(0.0)); // Not used
|
||||||
@ -1748,7 +1748,7 @@ EIGEN_STRONG_INLINE void gemm(const DataMapper& res, const Scalar* blockA, const
|
|||||||
if( strideB == -1 ) strideB = depth;
|
if( strideB == -1 ) strideB = depth;
|
||||||
|
|
||||||
const Packet pAlpha = pset1<Packet>(alpha);
|
const Packet pAlpha = pset1<Packet>(alpha);
|
||||||
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
const Packet pMask = bmask<Packet>(remaining_rows);
|
||||||
|
|
||||||
Index col = 0;
|
Index col = 0;
|
||||||
for(; col + accRows <= cols; col += accRows)
|
for(; col + accRows <= cols; col += accRows)
|
||||||
@ -2208,7 +2208,7 @@ EIGEN_STRONG_INLINE void gemm_complex(const DataMapper& res, const LhsScalar* bl
|
|||||||
|
|
||||||
const Packet pAlphaReal = pset1<Packet>(alpha.real());
|
const Packet pAlphaReal = pset1<Packet>(alpha.real());
|
||||||
const Packet pAlphaImag = pset1<Packet>(alpha.imag());
|
const Packet pAlphaImag = pset1<Packet>(alpha.imag());
|
||||||
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
const Packet pMask = bmask<Packet>(remaining_rows);
|
||||||
|
|
||||||
const Scalar* blockA = (Scalar *) blockAc;
|
const Scalar* blockA = (Scalar *) blockAc;
|
||||||
const Scalar* blockB = (Scalar *) blockBc;
|
const Scalar* blockB = (Scalar *) blockBc;
|
||||||
|
@ -44,8 +44,8 @@ EIGEN_STRONG_INLINE void gemm_extra_cols(
|
|||||||
const Packet& pAlpha,
|
const Packet& pAlpha,
|
||||||
const Packet& pMask);
|
const Packet& pMask);
|
||||||
|
|
||||||
template<typename Packet>
|
template<typename Packet, typename Index>
|
||||||
EIGEN_ALWAYS_INLINE Packet bmask(const int remaining_rows);
|
EIGEN_ALWAYS_INLINE Packet bmask(const Index remaining_rows);
|
||||||
|
|
||||||
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
template<typename Scalar, typename Packet, typename Packetc, typename DataMapper, typename Index, const Index accRows, const Index accCols, bool ConjugateLhs, bool ConjugateRhs, bool LhsIsReal, bool RhsIsReal>
|
||||||
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
|
EIGEN_ALWAYS_INLINE void gemm_complex_extra_row(
|
||||||
@ -87,7 +87,7 @@ template<typename Scalar, typename Packet>
|
|||||||
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);
|
EIGEN_ALWAYS_INLINE Packet ploadLhs(const Scalar* lhs);
|
||||||
|
|
||||||
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int StorageOrder, bool Complex, int N>
|
template<typename DataMapper, typename Packet, typename Index, const Index accCols, int StorageOrder, bool Complex, int N>
|
||||||
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index col);
|
EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index col);
|
||||||
|
|
||||||
template<typename Packet, int N>
|
template<typename Packet, int N>
|
||||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
|
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
|
||||||
|
@ -11,7 +11,9 @@
|
|||||||
#ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
#ifndef EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
||||||
#define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
#define EIGEN_MATRIX_PRODUCT_MMA_ALTIVEC_H
|
||||||
|
|
||||||
|
#if !EIGEN_COMP_LLVM
|
||||||
#pragma GCC target("cpu=power10,htm")
|
#pragma GCC target("cpu=power10,htm")
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __has_builtin
|
#ifdef __has_builtin
|
||||||
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
#if !__has_builtin(__builtin_vsx_assemble_pair)
|
||||||
@ -80,7 +82,7 @@ EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const RhsPacket& a, const L
|
|||||||
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
template<typename LhsPacket, typename RhsPacket, bool NegativeAccumulate>
|
||||||
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const PacketBlock<Packet2d,2>& a, const Packet2d& b)
|
EIGEN_ALWAYS_INLINE void pgerMMA(__vector_quad* acc, const PacketBlock<Packet2d,2>& a, const Packet2d& b)
|
||||||
{
|
{
|
||||||
__vector_pair* a0 = (__vector_pair *)(&a.packet[0]);
|
__vector_pair* a0 = reinterpret_cast<__vector_pair *>(const_cast<Packet2d *>(&a.packet[0]));
|
||||||
if(NegativeAccumulate)
|
if(NegativeAccumulate)
|
||||||
{
|
{
|
||||||
__builtin_mma_xvf64gernp(acc, *a0, (__vector unsigned char)b);
|
__builtin_mma_xvf64gernp(acc, *a0, (__vector unsigned char)b);
|
||||||
@ -133,8 +135,8 @@ EIGEN_ALWAYS_INLINE void ploadRhsMMA(const Scalar* rhs, Packet& rhsV)
|
|||||||
template<>
|
template<>
|
||||||
EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, PacketBlock<Packet2d, 2> >(const double* rhs, PacketBlock<Packet2d, 2>& rhsV)
|
EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, PacketBlock<Packet2d, 2> >(const double* rhs, PacketBlock<Packet2d, 2>& rhsV)
|
||||||
{
|
{
|
||||||
rhsV.packet[0] = ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs ));
|
rhsV.packet[0] = ploadRhs<double, Packet2d>(rhs);
|
||||||
rhsV.packet[1] = ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1));
|
rhsV.packet[1] = ploadRhs<double, Packet2d>(rhs + (sizeof(Packet2d) / sizeof(double)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
template<>
|
||||||
@ -142,8 +144,8 @@ EIGEN_ALWAYS_INLINE void ploadRhsMMA<double, __vector_pair>(const double* rhs, _
|
|||||||
{
|
{
|
||||||
#if EIGEN_COMP_LLVM
|
#if EIGEN_COMP_LLVM
|
||||||
__builtin_vsx_assemble_pair(&rhsV,
|
__builtin_vsx_assemble_pair(&rhsV,
|
||||||
(__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)(((Packet2d *)rhs) + 1))),
|
reinterpret_cast<__vector unsigned char>(ploadRhs<double, Packet2d>(rhs + (sizeof(Packet2d) / sizeof(double)))),
|
||||||
(__vector unsigned char)(ploadRhs<double, Packet2d>((const double *)((Packet2d *)rhs ))));
|
reinterpret_cast<__vector unsigned char>(ploadRhs<double, Packet2d>(rhs)));
|
||||||
#else
|
#else
|
||||||
__asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
|
__asm__ ("lxvp %x0,%1" : "=wa" (rhsV) : "Y" (*rhs));
|
||||||
#endif
|
#endif
|
||||||
@ -360,7 +362,7 @@ void gemmMMA(const DataMapper& res, const Scalar* blockA, const Scalar* blockB,
|
|||||||
if( strideB == -1 ) strideB = depth;
|
if( strideB == -1 ) strideB = depth;
|
||||||
|
|
||||||
const Packet pAlpha = pset1<Packet>(alpha);
|
const Packet pAlpha = pset1<Packet>(alpha);
|
||||||
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
const Packet pMask = bmask<Packet>(remaining_rows);
|
||||||
|
|
||||||
Index col = 0;
|
Index col = 0;
|
||||||
for(; col + accRows <= cols; col += accRows)
|
for(; col + accRows <= cols; col += accRows)
|
||||||
@ -595,7 +597,7 @@ void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsS
|
|||||||
|
|
||||||
const Packet pAlphaReal = pset1<Packet>(alpha.real());
|
const Packet pAlphaReal = pset1<Packet>(alpha.real());
|
||||||
const Packet pAlphaImag = pset1<Packet>(alpha.imag());
|
const Packet pAlphaImag = pset1<Packet>(alpha.imag());
|
||||||
const Packet pMask = bmask<Packet>((const int)(remaining_rows));
|
const Packet pMask = bmask<Packet>(remaining_rows);
|
||||||
|
|
||||||
const Scalar* blockA = (Scalar *) blockAc;
|
const Scalar* blockA = (Scalar *) blockAc;
|
||||||
const Scalar* blockB = (Scalar *) blockBc;
|
const Scalar* blockB = (Scalar *) blockBc;
|
||||||
@ -613,7 +615,9 @@ void gemm_complexMMA(const DataMapper& res, const LhsScalar* blockAc, const RhsS
|
|||||||
#undef advanceRows
|
#undef advanceRows
|
||||||
#undef advanceCols
|
#undef advanceCols
|
||||||
|
|
||||||
|
#if !EIGEN_COMP_LLVM
|
||||||
#pragma GCC reset_options
|
#pragma GCC reset_options
|
||||||
|
#endif
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -167,7 +167,7 @@ EIGEN_ALWAYS_INLINE void storeMaddData(ResScalar* res, ResScalar& alpha, ResScal
|
|||||||
if (GEMV_GETN(N) > iter1) { \
|
if (GEMV_GETN(N) > iter1) { \
|
||||||
if (GEMV_IS_FLOAT) { \
|
if (GEMV_IS_FLOAT) { \
|
||||||
LhsPacket h[2]; \
|
LhsPacket h[2]; \
|
||||||
__builtin_vsx_disassemble_pair((void*)(h), &b##iter2); \
|
__builtin_vsx_disassemble_pair(reinterpret_cast<void*>(h), &b##iter2); \
|
||||||
pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter2, a0, h[0]); \
|
pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter2, a0, h[0]); \
|
||||||
pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter3, a0, h[1]); \
|
pger_vecMMA_acc<LhsPacket, RhsPacket, true>(&e##iter3, a0, h[1]); \
|
||||||
} else { \
|
} else { \
|
||||||
@ -302,6 +302,8 @@ EIGEN_ALWAYS_INLINE void storeMaddData(ResScalar* res, ResScalar& alpha, ResScal
|
|||||||
#define GEMV_INIT(iter, N) \
|
#define GEMV_INIT(iter, N) \
|
||||||
if (N > iter) { \
|
if (N > iter) { \
|
||||||
c##iter = pset1<ResPacket>(ResScalar(0)); \
|
c##iter = pset1<ResPacket>(ResScalar(0)); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c##iter); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
|
#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
|
||||||
@ -407,9 +409,11 @@ EIGEN_STRONG_INLINE void gemv_col(
|
|||||||
RhsPacketSize = Traits::RhsPacketSize,
|
RhsPacketSize = Traits::RhsPacketSize,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
const Index n8 = rows - 8 * ResPacketSize + 1;
|
const Index n8 = rows - 8 * ResPacketSize + 1;
|
||||||
const Index n4 = rows - 4 * ResPacketSize + 1;
|
const Index n4 = rows - 4 * ResPacketSize + 1;
|
||||||
const Index n2 = rows - 2 * ResPacketSize + 1;
|
const Index n2 = rows - 2 * ResPacketSize + 1;
|
||||||
|
#endif
|
||||||
const Index n1 = rows - 1 * ResPacketSize + 1;
|
const Index n1 = rows - 1 * ResPacketSize + 1;
|
||||||
#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
|
#ifdef EIGEN_POWER_USE_GEMV_PREFETCH
|
||||||
const Index prefetch_dist = 64 * LhsPacketSize;
|
const Index prefetch_dist = 64 * LhsPacketSize;
|
||||||
@ -1289,10 +1293,10 @@ EIGEN_ALWAYS_INLINE void gemv_mult_complex_MMA(LhsType& a0, RhsType* b, __vector
|
|||||||
gemv_mult_complex_real_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
|
gemv_mult_complex_real_MMA<ScalarPacket, LhsPacket, SLhsPacket, RhsScalar, ResPacket, ConjugateLhs, ConjugateRhs, StorageOrder>(a0, b, c0); \
|
||||||
}
|
}
|
||||||
|
|
||||||
GEMV_MULT_COMPLEX_REAL_MMA(Packet2cf, float);
|
GEMV_MULT_COMPLEX_REAL_MMA(Packet2cf, float)
|
||||||
GEMV_MULT_COMPLEX_REAL_MMA(Packet1cd, double);
|
GEMV_MULT_COMPLEX_REAL_MMA(Packet1cd, double)
|
||||||
GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, float);
|
GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, float)
|
||||||
GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, double);
|
GEMV_MULT_COMPLEX_REAL_MMA(__vector_pair, double)
|
||||||
|
|
||||||
/** \internal disassemble MMA accumulator results into packets */
|
/** \internal disassemble MMA accumulator results into packets */
|
||||||
template <typename Scalar, typename ScalarPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
|
template <typename Scalar, typename ScalarPacket, typename LhsPacket, typename RhsPacket, bool ConjugateLhs, bool ConjugateRhs>
|
||||||
@ -1439,7 +1443,7 @@ EIGEN_ALWAYS_INLINE void disassembleResults(__vector_quad* c0, PacketBlock<Scala
|
|||||||
if (GEMV_GETN_COMPLEX(N) > iter1) { \
|
if (GEMV_GETN_COMPLEX(N) > iter1) { \
|
||||||
if (GEMV_IS_COMPLEX_FLOAT) { \
|
if (GEMV_IS_COMPLEX_FLOAT) { \
|
||||||
PLhsPacket g[2]; \
|
PLhsPacket g[2]; \
|
||||||
__builtin_vsx_disassemble_pair((void*)(g), &a##iter2); \
|
__builtin_vsx_disassemble_pair(reinterpret_cast<void*>(g), &a##iter2); \
|
||||||
gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[0], b, &e0##iter2); \
|
gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[0], b, &e0##iter2); \
|
||||||
gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[1], b, &e0##iter3); \
|
gemv_mult_complex_MMA<ScalarPacket, LhsScalar, PLhsPacket, PLhsPacket, RhsScalar, RhsPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(g[1], b, &e0##iter3); \
|
||||||
} else { \
|
} else { \
|
||||||
@ -1525,12 +1529,17 @@ EIGEN_ALWAYS_INLINE void disassembleResults(__vector_quad* c0, PacketBlock<Scala
|
|||||||
if (N > iter) { \
|
if (N > iter) { \
|
||||||
c0##iter = pset_zero<PResPacket>(); \
|
c0##iter = pset_zero<PResPacket>(); \
|
||||||
c1##iter = pset_init<ResPacket, LhsPacket, RhsPacket>(c1##iter); \
|
c1##iter = pset_init<ResPacket, LhsPacket, RhsPacket>(c1##iter); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c0##iter); \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c1##iter); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_WORK_COL_COMPLEX(iter, N) \
|
#define GEMV_WORK_COL_COMPLEX(iter, N) \
|
||||||
if (N > iter) { \
|
if (N > iter) { \
|
||||||
f##iter = GEMV_LOADPACKET_COL_COMPLEX(iter); \
|
f##iter = GEMV_LOADPACKET_COL_COMPLEX(iter); \
|
||||||
gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(f##iter, b, c0##iter, c1##iter); \
|
gemv_mult_complex<ScalarPacket, PLhsPacket, RhsScalar, RhsPacket, PResPacket, ResPacket, ConjugateLhs, ConjugateRhs, ColMajor>(f##iter, b, c0##iter, c1##iter); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(f##iter); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_STORE_COL_COMPLEX(iter, N) \
|
#define GEMV_STORE_COL_COMPLEX(iter, N) \
|
||||||
@ -1616,9 +1625,11 @@ EIGEN_STRONG_INLINE void gemv_complex_col(
|
|||||||
const Index prefetch_dist = 64 * LhsPacketSize;
|
const Index prefetch_dist = 64 * LhsPacketSize;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
const Index n8 = rows - 8 * ResPacketSize + 1;
|
const Index n8 = rows - 8 * ResPacketSize + 1;
|
||||||
const Index n4 = rows - 4 * ResPacketSize + 1;
|
const Index n4 = rows - 4 * ResPacketSize + 1;
|
||||||
const Index n2 = rows - 2 * ResPacketSize + 1;
|
const Index n2 = rows - 2 * ResPacketSize + 1;
|
||||||
|
#endif
|
||||||
const Index n1 = rows - 1 * ResPacketSize + 1;
|
const Index n1 = rows - 1 * ResPacketSize + 1;
|
||||||
|
|
||||||
// TODO: improve the following heuristic:
|
// TODO: improve the following heuristic:
|
||||||
@ -1661,10 +1672,10 @@ EIGEN_STRONG_INLINE void gemv_complex_col(
|
|||||||
{
|
{
|
||||||
GEMV_PROCESS_COL_COMPLEX(2)
|
GEMV_PROCESS_COL_COMPLEX(2)
|
||||||
}
|
}
|
||||||
|
if (i < n1)
|
||||||
#else
|
#else
|
||||||
while (i < n1)
|
while (i < n1)
|
||||||
#endif
|
#endif
|
||||||
if (i < n1)
|
|
||||||
{
|
{
|
||||||
GEMV_PROCESS_COL_COMPLEX_ONE(1)
|
GEMV_PROCESS_COL_COMPLEX_ONE(1)
|
||||||
}
|
}
|
||||||
@ -1861,11 +1872,15 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(ResPacket& a, ResPa
|
|||||||
} else { \
|
} else { \
|
||||||
cc##iter1 = predux_real<ResScalar, ResPacket>(&c##iter1); \
|
cc##iter1 = predux_real<ResScalar, ResPacket>(&c##iter1); \
|
||||||
} \
|
} \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(cc##iter1); \
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#define GEMV_INIT_ROW(iter, N) \
|
#define GEMV_INIT_ROW(iter, N) \
|
||||||
if (N > iter) { \
|
if (N > iter) { \
|
||||||
c##iter = pset1<ResPacket>(ResScalar(0)); \
|
c##iter = pset1<ResPacket>(ResScalar(0)); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c##iter); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_WORK_ROW(iter, N) \
|
#define GEMV_WORK_ROW(iter, N) \
|
||||||
@ -1876,6 +1891,8 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(ResPacket& a, ResPa
|
|||||||
#define GEMV_PREDUX2(iter1, iter2, iter3, N) \
|
#define GEMV_PREDUX2(iter1, iter2, iter3, N) \
|
||||||
if (N > iter1) { \
|
if (N > iter1) { \
|
||||||
cc##iter1 = predux_real<ResScalar, ResPacket>(c##iter2, c##iter3); \
|
cc##iter1 = predux_real<ResScalar, ResPacket>(c##iter2, c##iter3); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(cc##iter1); \
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -1933,9 +1950,11 @@ EIGEN_STRONG_INLINE void gemv_row(
|
|||||||
|
|
||||||
// TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
|
// TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
|
||||||
// processing 8 rows at once might be counter productive wrt cache.
|
// processing 8 rows at once might be counter productive wrt cache.
|
||||||
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
|
const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
|
||||||
const Index n4 = rows - 3;
|
const Index n4 = rows - 3;
|
||||||
const Index n2 = rows - 1;
|
const Index n2 = rows - 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
// TODO: for padded aligned inputs, we could enable aligned reads
|
// TODO: for padded aligned inputs, we could enable aligned reads
|
||||||
enum {
|
enum {
|
||||||
@ -1952,8 +1971,8 @@ EIGEN_STRONG_INLINE void gemv_row(
|
|||||||
#else
|
#else
|
||||||
ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
|
ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
|
||||||
#endif
|
#endif
|
||||||
ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
|
|
||||||
#ifndef GCC_ONE_VECTORPAIR_BUG
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
|
ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
|
||||||
GEMV_PROCESS_ROW(8)
|
GEMV_PROCESS_ROW(8)
|
||||||
GEMV_PROCESS_ROW(4)
|
GEMV_PROCESS_ROW(4)
|
||||||
GEMV_PROCESS_ROW(2)
|
GEMV_PROCESS_ROW(2)
|
||||||
@ -2061,6 +2080,8 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(PResPacket& a0, PRe
|
|||||||
} else { \
|
} else { \
|
||||||
cc##iter1 = predux_complex<ResScalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(&e0##iter1); \
|
cc##iter1 = predux_complex<ResScalar, ScalarPacket, LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs>(&e0##iter1); \
|
||||||
} \
|
} \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(cc##iter1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_MMA(N) \
|
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_MMA(N) \
|
||||||
@ -2084,6 +2105,8 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(PResPacket& a0, PRe
|
|||||||
#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N) \
|
#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N) \
|
||||||
if (N > iter1) { \
|
if (N > iter1) { \
|
||||||
cc##iter1 = predux_complex<ResScalar, PResPacket, ResPacket, LhsPacket, RhsPacket>(c0##iter2, c0##iter3, c1##iter2, c1##iter3); \
|
cc##iter1 = predux_complex<ResScalar, PResPacket, ResPacket, LhsPacket, RhsPacket>(c0##iter2, c0##iter3, c1##iter2, c1##iter3); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(cc##iter1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N) \
|
#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N) \
|
||||||
@ -2133,9 +2156,11 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(PResPacket& a0, PRe
|
|||||||
lhs.template load<LhsPacket, LhsAlignment>(i + (iter), j)
|
lhs.template load<LhsPacket, LhsAlignment>(i + (iter), j)
|
||||||
|
|
||||||
#define GEMV_INIT_COMPLEX_OLD(iter, N) \
|
#define GEMV_INIT_COMPLEX_OLD(iter, N) \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c0##iter); \
|
||||||
if (N > iter) { \
|
if (N > iter) { \
|
||||||
c1##iter = pset_zero<ResPacket>(); \
|
c1##iter = pset_zero<ResPacket>(); \
|
||||||
EIGEN_UNUSED_VARIABLE(c0##iter); \
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(c1##iter); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N) \
|
#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N) \
|
||||||
@ -2148,6 +2173,8 @@ EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex(PResPacket& a0, PRe
|
|||||||
if (N > iter1) { \
|
if (N > iter1) { \
|
||||||
cc##iter1.scalar[0] = predux(c1##iter2); \
|
cc##iter1.scalar[0] = predux(c1##iter2); \
|
||||||
cc##iter1.scalar[1] = predux(c1##iter3); \
|
cc##iter1.scalar[1] = predux(c1##iter3); \
|
||||||
|
} else { \
|
||||||
|
EIGEN_UNUSED_VARIABLE(cc##iter1); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
|
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N) \
|
||||||
@ -2237,9 +2264,11 @@ EIGEN_STRONG_INLINE void gemv_complex_row(
|
|||||||
|
|
||||||
// TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
|
// TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
|
||||||
// processing 8 rows at once might be counter productive wrt cache.
|
// processing 8 rows at once might be counter productive wrt cache.
|
||||||
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
|
const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
|
||||||
const Index n4 = rows - 3;
|
const Index n4 = rows - 3;
|
||||||
const Index n2 = rows - 1;
|
const Index n2 = rows - 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
// TODO: for padded aligned inputs, we could enable aligned reads
|
// TODO: for padded aligned inputs, we could enable aligned reads
|
||||||
enum {
|
enum {
|
||||||
@ -2258,12 +2287,12 @@ EIGEN_STRONG_INLINE void gemv_complex_row(
|
|||||||
GEMV_UNUSED_EXTRA(1, c0)
|
GEMV_UNUSED_EXTRA(1, c0)
|
||||||
GEMV_UNUSED_EXTRA(1, c1)
|
GEMV_UNUSED_EXTRA(1, c1)
|
||||||
#endif
|
#endif
|
||||||
ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
|
|
||||||
ResScalar dd0;
|
ResScalar dd0;
|
||||||
#if !defined(GCC_ONE_VECTORPAIR_BUG) && defined(USE_GEMV_MMA)
|
#ifndef GCC_ONE_VECTORPAIR_BUG
|
||||||
|
ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
|
||||||
|
#ifdef USE_GEMV_MMA
|
||||||
if (!GEMV_IS_COMPLEX_COMPLEX)
|
if (!GEMV_IS_COMPLEX_COMPLEX)
|
||||||
#endif
|
#endif
|
||||||
#ifndef GCC_ONE_VECTORPAIR_BUG
|
|
||||||
{
|
{
|
||||||
GEMV_PROCESS_ROW_COMPLEX(8)
|
GEMV_PROCESS_ROW_COMPLEX(8)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user