mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-13 01:43:13 +08:00
Partial Packet support for GEMM real-only (PowerPC). Also fix compilation warnings & errors for some conditions in new API.
This commit is contained in:
parent
5a1c7807e6
commit
ce60a7be83
@ -1159,6 +1159,29 @@ EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet,N>& acc, const DataMapper& re
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full>
|
||||
EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index elements)
|
||||
{
|
||||
for (Index M = 0; M < N; M++) {
|
||||
acc.packet[M] = res.template loadPacketPartial<Packet>(row, M, elements);
|
||||
}
|
||||
if (Complex && full) {
|
||||
for (Index M = 0; M < N; M++) {
|
||||
acc.packet[M+N] = res.template loadPacketPartial<Packet>(row + accCols, M, elements);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename DataMapper, typename Packet, Index N>
|
||||
EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index elements)
|
||||
{
|
||||
for (Index M = 0; M < N; M++) {
|
||||
res.template storePacketPartial<Packet>(row, M, acc.packet[M], elements);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR10
|
||||
#define USE_P10_AND_PVIPR2_0 (EIGEN_COMP_LLVM || (__GNUC__ >= 11))
|
||||
#else
|
||||
@ -1199,6 +1222,14 @@ EIGEN_ALWAYS_INLINE Packet2d bmask<Packet2d>(const Index remaining_rows)
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename Packet, int N>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha)
|
||||
{
|
||||
for (int M = 0; M < N; M++) {
|
||||
acc.packet[M] = pmadd<Packet>(pAlpha, accZ.packet[M], acc.packet[M]);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale the PacketBlock vectors by alpha.
|
||||
template<typename Packet, int N, bool mask>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha, const Packet& pMask)
|
||||
@ -1209,9 +1240,7 @@ EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N
|
||||
EIGEN_UNUSED_VARIABLE(pMask);
|
||||
}
|
||||
|
||||
for (int M = 0; M < N; M++) {
|
||||
acc.packet[M] = pmadd<Packet>(pAlpha, accZ.packet[M], acc.packet[M]);
|
||||
}
|
||||
bscale<Packet, N>(acc, accZ, pAlpha);
|
||||
}
|
||||
|
||||
template<typename Packet, int N, bool real>
|
||||
@ -1461,6 +1490,13 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_row_iteration(
|
||||
MICRO_EXTRA_ROW<Scalar, Packet, accRows, remaining_rows>(lhs_ptr, rhs_ptr0, rhs_ptr1, rhs_ptr2, accZero0);
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
EIGEN_UNUSED_VARIABLE(rows);
|
||||
EIGEN_UNUSED_VARIABLE(pMask);
|
||||
bload_partial<DataMapper, Packet, 0, false, accRows>(acc, res, row, remaining_rows);
|
||||
bscale<Packet,accRows>(acc, accZero0, pAlpha);
|
||||
bstore_partial<DataMapper, Packet, accRows>(acc, res, row, remaining_rows);
|
||||
#else
|
||||
bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row, 0);
|
||||
if ((accRows == 1) || (rows >= accCols))
|
||||
{
|
||||
@ -1474,6 +1510,7 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_row_iteration(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define MICRO_EXTRA(MICRO_EXTRA_UNROLL, value, is_col) \
|
||||
@ -1565,16 +1602,35 @@ EIGEN_ALWAYS_INLINE void gemm_extra_row(
|
||||
|
||||
#define MICRO_PREFETCH MICRO_UNROLL(MICRO_PREFETCH_ONE)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_STORE_ONE(iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
if (MICRO_NORMAL_PARTIAL(iter)) { \
|
||||
bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
|
||||
bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
|
||||
bstore<DataMapper, Packet, accRows>(acc, res, row + iter*accCols); \
|
||||
} else { \
|
||||
bload_partial<DataMapper, Packet, 0, false, accRows>(acc, res, row + iter*accCols, accCols2); \
|
||||
bscale<Packet,accRows>(acc, accZero##iter, pAlpha); \
|
||||
bstore_partial<DataMapper, Packet, accRows>(acc, res, row + iter*accCols, accCols2); \
|
||||
} \
|
||||
}
|
||||
#else
|
||||
#define MICRO_STORE_ONE(iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
bload<DataMapper, Packet, 0, ColMajor, false, accRows>(acc, res, row + iter*accCols, 0); \
|
||||
bscale<Packet,accRows,!(MICRO_NORMAL(iter))>(acc, accZero##iter, pAlpha, pMask); \
|
||||
bstore<DataMapper, Packet, accRows>(acc, res, row + iter*accCols); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define MICRO_STORE MICRO_UNROLL(MICRO_STORE_ONE)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols, bool full>
|
||||
#else
|
||||
template<int unroll_factor, typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2>
|
||||
#endif
|
||||
EIGEN_ALWAYS_INLINE void gemm_unrolled_iteration(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
@ -1585,7 +1641,12 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_iteration(
|
||||
Index strideB,
|
||||
Index& row,
|
||||
const Packet& pAlpha,
|
||||
const Packet& pMask)
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
Index accCols2
|
||||
#else
|
||||
const Packet& pMask
|
||||
#endif
|
||||
)
|
||||
{
|
||||
const Scalar* rhs_ptr0 = rhs_base, * rhs_ptr1 = NULL, * rhs_ptr2 = NULL;
|
||||
const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
|
||||
@ -1612,9 +1673,15 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_iteration(
|
||||
MICRO_UPDATE
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_UNROLL_ITER2(N, M) \
|
||||
gemm_unrolled_iteration<N + ((M) ? 1 : 0), Scalar, Packet, DataMapper, accRows, accCols, !M>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlpha, M ? remaining_rows : accCols); \
|
||||
if (M) return;
|
||||
#else
|
||||
#define MICRO_UNROLL_ITER2(N, M) \
|
||||
gemm_unrolled_iteration<N + ((M) ? 1 : 0), Scalar, Packet, DataMapper, accRows, accCols, M ? M : accCols>(res3, lhs_base, rhs_base, depth, strideA, offsetA, strideB, row, pAlpha, pMask); \
|
||||
if (M) return;
|
||||
#endif
|
||||
|
||||
template<typename Scalar, typename Packet, typename DataMapper, const Index accRows, const Index accCols>
|
||||
EIGEN_ALWAYS_INLINE void gemm_cols(
|
||||
@ -2094,22 +2161,22 @@ EIGEN_ALWAYS_INLINE void gemm_complex_cols(
|
||||
switch( (rows-row)/accCols ) {
|
||||
#if MAX_COMPLEX_UNROLL > 4
|
||||
case 4:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 4)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 4)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_UNROLL > 3
|
||||
case 3:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 3)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 3)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_UNROLL > 2
|
||||
case 2:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 2)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 2)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_UNROLL > 1
|
||||
case 1:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 1)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_UNROLL_ITER2, 1)
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
|
@ -5,6 +5,10 @@
|
||||
#define EIGEN_POWER_PREFETCH(p)
|
||||
#endif
|
||||
|
||||
#ifdef _ARCH_PWR9
|
||||
#define USE_PARTIAL_PACKETS
|
||||
#endif
|
||||
|
||||
#include "../../InternalHeaderCheck.h"
|
||||
|
||||
namespace Eigen {
|
||||
@ -89,6 +93,17 @@ EIGEN_ALWAYS_INLINE void bload(PacketBlock<Packet,N*(Complex?2:1)>& acc, const D
|
||||
template<typename DataMapper, typename Packet, int N>
|
||||
EIGEN_ALWAYS_INLINE void bstore(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row);
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<typename DataMapper, typename Packet, const Index accCols, bool Complex, Index N, bool full = true>
|
||||
EIGEN_ALWAYS_INLINE void bload_partial(PacketBlock<Packet,N*(Complex?2:1)>& acc, const DataMapper& res, Index row, Index elements);
|
||||
|
||||
template<typename DataMapper, typename Packet, Index N>
|
||||
EIGEN_ALWAYS_INLINE void bstore_partial(PacketBlock<Packet,N>& acc, const DataMapper& res, Index row, Index elements);
|
||||
#endif
|
||||
|
||||
template<typename Packet, int N>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha);
|
||||
|
||||
template<typename Packet, int N, bool mask>
|
||||
EIGEN_ALWAYS_INLINE void bscale(PacketBlock<Packet,N>& acc, PacketBlock<Packet,N>& accZ, const Packet& pAlpha, const Packet& pMask);
|
||||
|
||||
@ -101,7 +116,7 @@ EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Pa
|
||||
#define MICRO_NORMAL(iter) \
|
||||
(accCols == accCols2) || (unroll_factor != (iter + 1))
|
||||
|
||||
#define MICRO_UNROLL_ITER(func, N) \
|
||||
#define MICRO_UNROLL_ITER1(func, N) \
|
||||
switch (remaining_rows) { \
|
||||
default: \
|
||||
func(N, 0) \
|
||||
@ -121,6 +136,22 @@ EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Pa
|
||||
break; \
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_UNROLL_ITER(func, N) \
|
||||
if (remaining_rows) { \
|
||||
func(N, true); \
|
||||
} else { \
|
||||
func(N, false); \
|
||||
}
|
||||
|
||||
#define MICRO_NORMAL_PARTIAL(iter) \
|
||||
full || (unroll_factor != (iter + 1))
|
||||
#else
|
||||
#define MICRO_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
|
||||
#endif
|
||||
|
||||
#define MICRO_COMPLEX_UNROLL_ITER(func, N) MICRO_UNROLL_ITER1(func, N)
|
||||
|
||||
#define MICRO_NORMAL_COLS(iter, a, b) ((MICRO_NORMAL(iter)) ? a : b)
|
||||
|
||||
#define MICRO_LOAD1(lhs_ptr, iter) \
|
||||
@ -161,9 +192,15 @@ EIGEN_ALWAYS_INLINE void bcouple(PacketBlock<Packet,N>& taccReal, PacketBlock<Pa
|
||||
|
||||
#define MICRO_COMPLEX_PREFETCH_ONE(iter) MICRO_PREFETCH1(lhs_ptr_real, iter)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_UPDATE_MASK
|
||||
#else
|
||||
#define MICRO_UPDATE_MASK EIGEN_UNUSED_VARIABLE(pMask);
|
||||
#endif
|
||||
|
||||
#define MICRO_UPDATE \
|
||||
if (accCols == accCols2) { \
|
||||
EIGEN_UNUSED_VARIABLE(pMask); \
|
||||
MICRO_UPDATE_MASK \
|
||||
EIGEN_UNUSED_VARIABLE(offsetA); \
|
||||
row += unroll_factor*accCols; \
|
||||
}
|
||||
|
@ -39,18 +39,34 @@ EIGEN_ALWAYS_INLINE void bsetzeroMMA(__vector_quad* acc)
|
||||
__builtin_mma_xxsetaccz(acc);
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<typename DataMapper, typename Packet, bool full>
|
||||
EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, const DataMapper& data, const Packet& alpha, const Index elements, __vector_quad* acc)
|
||||
#else
|
||||
template<typename DataMapper, typename Packet, const Index accCols, const Index accCols2>
|
||||
EIGEN_ALWAYS_INLINE void storeAccumulator(Index i, const DataMapper& data, const Packet& alpha, const Packet& pMask, __vector_quad* acc)
|
||||
#endif
|
||||
{
|
||||
PacketBlock<Packet, 4> result;
|
||||
__builtin_mma_disassemble_acc(&result.packet, acc);
|
||||
|
||||
PacketBlock<Packet, 4> tRes;
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
if (full) {
|
||||
EIGEN_UNUSED_VARIABLE(elements);
|
||||
bload<DataMapper, Packet, 0, ColMajor, false, 4>(tRes, data, i, 0);
|
||||
bscale<Packet, 4>(tRes, result, alpha);
|
||||
bstore<DataMapper, Packet, 4>(tRes, data, i);
|
||||
} else {
|
||||
bload_partial<DataMapper, Packet, 0, false, 4>(tRes, data, i, elements);
|
||||
bscale<Packet, 4>(tRes, result, alpha);
|
||||
bstore_partial<DataMapper, Packet, 4>(tRes, data, i, elements);
|
||||
}
|
||||
#else
|
||||
bload<DataMapper, Packet, 0, ColMajor, false, 4>(tRes, data, i, 0);
|
||||
|
||||
bscale<Packet, 4, (accCols != accCols2)>(tRes, result, alpha, pMask);
|
||||
|
||||
bstore<DataMapper, Packet, 4>(tRes, data, i);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename DataMapper, typename Packet, typename Packetc, const Index accCols, const Index accCols2>
|
||||
@ -270,14 +286,25 @@ EIGEN_ALWAYS_INLINE void ploadLhsMMA(const double* lhs, __vector_pair& lhsV)
|
||||
|
||||
#define MICRO_MMA_PREFETCH MICRO_MMA_UNROLL(MICRO_PREFETCH_ONE)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_MMA_STORE_ONE(iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
storeAccumulator<DataMapper, Packet, MICRO_NORMAL_PARTIAL(iter)>(row + iter*accCols, res, pAlpha, accCols2, &accZero##iter); \
|
||||
}
|
||||
#else
|
||||
#define MICRO_MMA_STORE_ONE(iter) \
|
||||
if (unroll_factor > iter) { \
|
||||
storeAccumulator<DataMapper, Packet, accCols, (unroll_factor != (iter + 1)) ? accCols : accCols2>(row + iter*accCols, res, pAlpha, pMask, &accZero##iter); \
|
||||
}
|
||||
#endif
|
||||
|
||||
#define MICRO_MMA_STORE MICRO_MMA_UNROLL(MICRO_MMA_STORE_ONE)
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, bool full>
|
||||
#else
|
||||
template<int unroll_factor, typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols, const Index accCols2>
|
||||
#endif
|
||||
EIGEN_ALWAYS_INLINE void gemm_unrolled_MMA_iteration(
|
||||
const DataMapper& res,
|
||||
const Scalar* lhs_base,
|
||||
@ -287,7 +314,12 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_MMA_iteration(
|
||||
Index offsetA,
|
||||
Index& row,
|
||||
const Packet& pAlpha,
|
||||
const Packet& pMask)
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
Index accCols2
|
||||
#else
|
||||
const Packet& pMask
|
||||
#endif
|
||||
)
|
||||
{
|
||||
const Scalar* rhs_ptr = rhs_base;
|
||||
const Scalar* lhs_ptr0 = NULL, * lhs_ptr1 = NULL, * lhs_ptr2 = NULL, * lhs_ptr3 = NULL, * lhs_ptr4 = NULL, * lhs_ptr5 = NULL, * lhs_ptr6 = NULL, * lhs_ptr7 = NULL;
|
||||
@ -312,9 +344,15 @@ EIGEN_ALWAYS_INLINE void gemm_unrolled_MMA_iteration(
|
||||
MICRO_UPDATE
|
||||
}
|
||||
|
||||
#ifdef USE_PARTIAL_PACKETS
|
||||
#define MICRO_MMA_UNROLL_ITER2(N, M) \
|
||||
gemm_unrolled_MMA_iteration<N + (M ? 1 : 0), Scalar, Packet, RhsPacket, DataMapper, accRows, accCols, !M>(res3, lhs_base, rhs_base, depth, strideA, offsetA, row, pAlpha, M ? remaining_rows : accCols); \
|
||||
if (M) return;
|
||||
#else
|
||||
#define MICRO_MMA_UNROLL_ITER2(N, M) \
|
||||
gemm_unrolled_MMA_iteration<N + (M ? 1 : 0), Scalar, Packet, RhsPacket, DataMapper, accRows, accCols, M ? M : accCols>(res3, lhs_base, rhs_base, depth, strideA, offsetA, row, pAlpha, pMask); \
|
||||
if (M) return;
|
||||
#endif
|
||||
|
||||
template<typename Scalar, typename Packet, typename RhsPacket, typename DataMapper, const Index accRows, const Index accCols>
|
||||
EIGEN_ALWAYS_INLINE void gemmMMA_cols(
|
||||
@ -643,22 +681,22 @@ EIGEN_ALWAYS_INLINE void gemmMMA_complex_cols(
|
||||
switch( (rows-row)/accCols ) {
|
||||
#if MAX_COMPLEX_MMA_UNROLL > 4
|
||||
case 4:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 4)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 4)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_MMA_UNROLL > 3
|
||||
case 3:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 3)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 3)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_MMA_UNROLL > 2
|
||||
case 2:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 2)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 2)
|
||||
break;
|
||||
#endif
|
||||
#if MAX_COMPLEX_MMA_UNROLL > 1
|
||||
case 1:
|
||||
MICRO_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 1)
|
||||
MICRO_COMPLEX_UNROLL_ITER(MICRO_COMPLEX_MMA_UNROLL_ITER2, 1)
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
|
@ -513,6 +513,7 @@ EIGEN_ALWAYS_INLINE Packet pload_partial_common(const __UNPACK_TYPE__(Packet)* f
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
EIGEN_UNUSED_VARIABLE(from);
|
||||
Packet load = vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
|
||||
@ -645,6 +646,7 @@ template<typename Packet> EIGEN_ALWAYS_INLINE void pstore_partial_common(__UNPAC
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_UNUSED_VARIABLE(to);
|
||||
EIGEN_DEBUG_ALIGNED_STORE
|
||||
Packet store = from;
|
||||
@ -1215,6 +1217,7 @@ template<typename Packet> EIGEN_ALWAYS_INLINE Packet ploadu_partial_common(const
|
||||
eigen_assert(n <= packet_size && "number of elements will read past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
|
||||
@ -1402,6 +1405,7 @@ template<typename Packet> EIGEN_ALWAYS_INLINE void pstoreu_partial_common(__UNPA
|
||||
eigen_assert(n <= packet_size && "number of elements will write past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
vec_xst_len(from, to, n * size);
|
||||
#else
|
||||
|
@ -292,7 +292,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index /*offset*/) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index n, Index /*offset*/ = 0) const {
|
||||
return pgather_partial<Scalar,PacketType>(m_data + i*m_incr.value(), m_incr.value(), n);
|
||||
}
|
||||
|
||||
@ -302,7 +302,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index /*offset*/) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, const PacketType &p, Index n, Index /*offset*/ = 0) const {
|
||||
pscatter_partial<Scalar, PacketType>(m_data + i*m_incr.value(), p, m_incr.value(), n);
|
||||
}
|
||||
|
||||
@ -343,7 +343,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index /*offset*/) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketType loadPacketPartial(Index i, Index j, Index n, Index /*offset*/ = 0) const {
|
||||
return pgather_partial<Scalar,PacketType>(&operator()(i, j),m_incr.value(),n);
|
||||
}
|
||||
|
||||
@ -358,7 +358,7 @@ public:
|
||||
}
|
||||
|
||||
template<typename PacketType>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index /*offset*/) const {
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void storePacketPartial(Index i, Index j, const PacketType &p, Index n, Index /*offset*/ = 0) const {
|
||||
pscatter_partial<Scalar, PacketType>(&operator()(i, j), p, m_incr.value(), n);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user