mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 20:26:03 +08:00
Add optional offset parameter to ploadu_partial and pstoreu_partial
This commit is contained in:
parent
44c20bbbe3
commit
211c5dfc67
@ -730,14 +730,14 @@ ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
|
||||
/** \internal \returns n elements of a packet version of \a *from, (un-aligned load)
|
||||
* All elements after the last element loaded will initialized with zero */
|
||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||
ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n)
|
||||
ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n <= packet_size && "number of elements will read past end of packet");
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
||||
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
elements[i] = from[i];
|
||||
for (Index i = offset; i < numext::mini(n+offset,packet_size); i++) {
|
||||
elements[i] = from[i-offset];
|
||||
}
|
||||
return pload<Packet>(elements);
|
||||
}
|
||||
@ -855,14 +855,14 @@ template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu
|
||||
{ (*to) = from; }
|
||||
|
||||
/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n)
|
||||
template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_assert(n <= packet_size && "number of elements will write past end of packet");
|
||||
eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
||||
EIGEN_ALIGN_MAX Scalar elements[packet_size];
|
||||
pstore<Scalar>(elements, from);
|
||||
for (Index i = 0; i < numext::mini(n,packet_size); i++) {
|
||||
to[i] = elements[i];
|
||||
for (Index i = 0; i < numext::mini(n,packet_size-offset); i++) {
|
||||
to[i] = elements[i + offset];
|
||||
}
|
||||
}
|
||||
|
||||
@ -1201,7 +1201,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpac
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
return pload_partial<Packet>(from, n, offset);
|
||||
else
|
||||
return ploadu_partial<Packet>(from, n);
|
||||
return ploadu_partial<Packet>(from, n, offset);
|
||||
}
|
||||
|
||||
/** \internal copy the packet \a from to \a *to.
|
||||
@ -1223,7 +1223,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Pac
|
||||
if(Alignment >= unpacket_traits<Packet>::alignment)
|
||||
pstore_partial(to, from, n, offset);
|
||||
else
|
||||
pstoreu_partial(to, from, n);
|
||||
pstoreu_partial(to, from, n, offset);
|
||||
}
|
||||
|
||||
/** \internal \returns a packet version of \a *from.
|
||||
|
@ -136,16 +136,16 @@ template<> EIGEN_ALWAYS_INLINE Packet2cf pload_partial<Packet2cf>(const std::com
|
||||
{
|
||||
return Packet2cf(pload_partial<Packet4f>((const float *) from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2cf ploadu_partial<Packet2cf>(const std::complex<float>* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2cf ploadu_partial<Packet2cf>(const std::complex<float>* from, const Index n, const Index offset)
|
||||
{
|
||||
return Packet2cf(ploadu_partial<Packet4f>((const float*) from, n * 2));
|
||||
return Packet2cf(ploadu_partial<Packet4f>((const float*) from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstore((float*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { pstoreu((float*)to, from.v); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<float> >(std::complex<float> * to, const Packet2cf& from, const Index n, const Index offset) { pstore_partial((float*)to, from.v, n * 2, offset * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<float> >(std::complex<float> * to, const Packet2cf& from, const Index n) { pstoreu_partial((float*)to, from.v, n * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<float> >(std::complex<float> * to, const Packet2cf& from, const Index n, const Index offset) { pstoreu_partial((float*)to, from.v, n * 2, offset * 2); }
|
||||
|
||||
EIGEN_STRONG_INLINE Packet2cf pload2(const std::complex<float>& from0, const std::complex<float>& from1)
|
||||
{
|
||||
@ -382,14 +382,14 @@ template<> EIGEN_ALWAYS_INLINE Packet1cd pload_partial<Packet1cd>(const std::com
|
||||
{
|
||||
return Packet1cd(pload_partial<Packet2d>((const double*)from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet1cd ploadu_partial<Packet1cd>(const std::complex<double>* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet1cd ploadu_partial<Packet1cd>(const std::complex<double>* from, const Index n, const Index offset)
|
||||
{
|
||||
return Packet1cd(ploadu_partial<Packet2d>((const double*)from, n * 2));
|
||||
return Packet1cd(ploadu_partial<Packet2d>((const double*)from, n * 2, offset * 2));
|
||||
}
|
||||
template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstore((double*)to, from.v); }
|
||||
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { pstoreu((double*)to, from.v); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstore_partial <std::complex<double> >(std::complex<double> * to, const Packet1cd& from, const Index n, const Index offset) { pstore_partial((double*)to, from.v, n * 2, offset * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<double> >(std::complex<double> * to, const Packet1cd& from, const Index n) { pstoreu_partial((double*)to, from.v, n * 2); }
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<std::complex<double> >(std::complex<double> * to, const Packet1cd& from, const Index n, const Index offset) { pstoreu_partial((double*)to, from.v, n * 2, offset * 2); }
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
|
||||
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from); }
|
||||
|
@ -1260,26 +1260,36 @@ template<> EIGEN_STRONG_INLINE Packet16uc ploadu<Packet16uc>(const unsigned char
|
||||
return ploadu_common<Packet16uc>(from);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_ALWAYS_INLINE Packet ploadu_partial_common(const __UNPACK_TYPE__(Packet)* from, const Index n)
|
||||
template<typename Packet> EIGEN_ALWAYS_INLINE Packet ploadu_partial_common(const __UNPACK_TYPE__(Packet)* from, const Index n, const Index offset)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_internal_assert(n <= packet_size && "number of elements will read past end of packet");
|
||||
eigen_internal_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_DEBUG_ALIGNED_LOAD
|
||||
EIGEN_DEBUG_UNALIGNED_LOAD
|
||||
return vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
|
||||
Packet load = vec_xl_len(const_cast<__UNPACK_TYPE__(Packet)*>(from), n * size);
|
||||
if (offset) {
|
||||
Packet16uc shift = pset1<Packet16uc>(offset * 8 * size);
|
||||
#ifdef _BIG_ENDIAN
|
||||
load = Packet(vec_sro(Packet16uc(load), shift));
|
||||
#else
|
||||
load = Packet(vec_slo(Packet16uc(load), shift));
|
||||
#endif
|
||||
}
|
||||
return load;
|
||||
#else
|
||||
if (n) {
|
||||
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) load[packet_size];
|
||||
unsigned char* load2 = reinterpret_cast<unsigned char *>(load + offset);
|
||||
unsigned char* from2 = reinterpret_cast<unsigned char *>(const_cast<__UNPACK_TYPE__(Packet)*>(from));
|
||||
Index n2 = n * size;
|
||||
if (16 <= n2) {
|
||||
return ploadu<Packet>(from);
|
||||
pstoreu(load2, ploadu<Packet16uc>(from2));
|
||||
} else {
|
||||
memcpy((void *)load2, (void *)from2, n2);
|
||||
}
|
||||
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) load[packet_size];
|
||||
unsigned char* load2 = reinterpret_cast<unsigned char *>(load);
|
||||
unsigned char* from2 = reinterpret_cast<unsigned char *>(const_cast<__UNPACK_TYPE__(Packet)*>(from));
|
||||
memcpy((void *)load2, (void *)from2, n2);
|
||||
return pload_ignore<Packet>(load);
|
||||
} else {
|
||||
return Packet(pset1<Packet16uc>(0));
|
||||
@ -1287,33 +1297,33 @@ template<typename Packet> EIGEN_ALWAYS_INLINE Packet ploadu_partial_common(const
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_ALWAYS_INLINE Packet4f ploadu_partial<Packet4f>(const float* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet4f ploadu_partial<Packet4f>(const float* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet4f>(from, n);
|
||||
return ploadu_partial_common<Packet4f>(from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet4i ploadu_partial<Packet4i>(const int* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet4i ploadu_partial<Packet4i>(const int* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet4i>(from, n);
|
||||
return ploadu_partial_common<Packet4i>(from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8s ploadu_partial<Packet8s>(const short int* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8s ploadu_partial<Packet8s>(const short int* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet8s>(from, n);
|
||||
return ploadu_partial_common<Packet8s>(from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8us ploadu_partial<Packet8us>(const unsigned short int* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8us ploadu_partial<Packet8us>(const unsigned short int* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet8us>(from, n);
|
||||
return ploadu_partial_common<Packet8us>(from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8bf ploadu_partial<Packet8bf>(const bfloat16* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet8bf ploadu_partial<Packet8bf>(const bfloat16* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from), n);
|
||||
return ploadu_partial_common<Packet8us>(reinterpret_cast<const unsigned short int*>(from), n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet16c ploadu_partial<Packet16c>(const signed char* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet16c ploadu_partial<Packet16c>(const signed char* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet16c>(from, n);
|
||||
return ploadu_partial_common<Packet16c>(from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE Packet16uc ploadu_partial<Packet16uc>(const unsigned char* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet16uc ploadu_partial<Packet16uc>(const unsigned char* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet16uc>(from, n);
|
||||
return ploadu_partial_common<Packet16uc>(from, n, offset);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_STRONG_INLINE Packet ploaddup_common(const __UNPACK_TYPE__(Packet)* from)
|
||||
@ -1436,57 +1446,67 @@ template<> EIGEN_STRONG_INLINE void pstoreu<unsigned char>(unsigned char* t
|
||||
pstoreu_common<Packet16uc>(to, from);
|
||||
}
|
||||
|
||||
template<typename Packet> EIGEN_ALWAYS_INLINE void pstoreu_partial_common(__UNPACK_TYPE__(Packet)* to, const Packet& from, const Index n)
|
||||
template<typename Packet> EIGEN_ALWAYS_INLINE void pstoreu_partial_common(__UNPACK_TYPE__(Packet)* to, const Packet& from, const Index n, const Index offset)
|
||||
{
|
||||
const Index packet_size = unpacket_traits<Packet>::size;
|
||||
eigen_internal_assert(n <= packet_size && "number of elements will write past end of packet");
|
||||
eigen_internal_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
|
||||
const Index size = sizeof(__UNPACK_TYPE__(Packet));
|
||||
#ifdef _ARCH_PWR9
|
||||
EIGEN_UNUSED_VARIABLE(packet_size);
|
||||
EIGEN_DEBUG_UNALIGNED_STORE
|
||||
vec_xst_len(from, to, n * size);
|
||||
Packet store = from;
|
||||
if (offset) {
|
||||
Packet16uc shift = pset1<Packet16uc>(offset * 8 * size);
|
||||
#ifdef _BIG_ENDIAN
|
||||
store = Packet(vec_slo(Packet16uc(store), shift));
|
||||
#else
|
||||
store = Packet(vec_sro(Packet16uc(store), shift));
|
||||
#endif
|
||||
}
|
||||
vec_xst_len(store, to, n * size);
|
||||
#else
|
||||
if (n) {
|
||||
Index n2 = n * size;
|
||||
if (16 <= n2) {
|
||||
pstoreu(to, from);
|
||||
}
|
||||
EIGEN_ALIGN16 __UNPACK_TYPE__(Packet) store[packet_size];
|
||||
pstore(store, from);
|
||||
unsigned char* store2 = reinterpret_cast<unsigned char *>(store);
|
||||
unsigned char* store2 = reinterpret_cast<unsigned char *>(store + offset);
|
||||
unsigned char* to2 = reinterpret_cast<unsigned char *>(to);
|
||||
memcpy((void *)to2, (void *)store2, n2);
|
||||
Index n2 = n * size;
|
||||
if (16 <= n2) {
|
||||
pstoreu(to2, ploadu<Packet16uc>(store2));
|
||||
} else {
|
||||
memcpy((void *)to2, (void *)store2, n2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<float>(float* to, const Packet4f& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<float>(float* to, const Packet4f& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet4f>(to, from, n);
|
||||
pstoreu_partial_common<Packet4f>(to, from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<int>(int* to, const Packet4i& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<int>(int* to, const Packet4i& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet4i>(to, from, n);
|
||||
pstoreu_partial_common<Packet4i>(to, from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<short int>(short int* to, const Packet8s& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<short int>(short int* to, const Packet8s& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet8s>(to, from, n);
|
||||
pstoreu_partial_common<Packet8s>(to, from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned short int>(unsigned short int* to, const Packet8us& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned short int>(unsigned short int* to, const Packet8us& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet8us>(to, from, n);
|
||||
pstoreu_partial_common<Packet8us>(to, from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<bfloat16>(bfloat16* to, const Packet8bf& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<bfloat16>(bfloat16* to, const Packet8bf& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from, n);
|
||||
pstoreu_partial_common<Packet8us>(reinterpret_cast<unsigned short int*>(to), from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<signed char>(signed char* to, const Packet16c& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<signed char>(signed char* to, const Packet16c& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet16c>(to, from, n);
|
||||
pstoreu_partial_common<Packet16c>(to, from, n, offset);
|
||||
}
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned char>(unsigned char* to, const Packet16uc& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<unsigned char>(unsigned char* to, const Packet16uc& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet16uc>(to, from, n);
|
||||
pstoreu_partial_common<Packet16uc>(to, from, n, offset);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
@ -2953,9 +2973,9 @@ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from)
|
||||
return vec_xl(0, const_cast<double*>(from));
|
||||
}
|
||||
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2d ploadu_partial<Packet2d>(const double* from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE Packet2d ploadu_partial<Packet2d>(const double* from, const Index n, const Index offset)
|
||||
{
|
||||
return ploadu_partial_common<Packet2d>(from, n);
|
||||
return ploadu_partial_common<Packet2d>(from, n, offset);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
|
||||
@ -2972,9 +2992,9 @@ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d&
|
||||
vec_xst(from, 0, to);
|
||||
}
|
||||
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<double>(double* to, const Packet2d& from, const Index n)
|
||||
template<> EIGEN_ALWAYS_INLINE void pstoreu_partial<double>(double* to, const Packet2d& from, const Index n, const Index offset)
|
||||
{
|
||||
pstoreu_partial_common<Packet2d>(to, from, n);
|
||||
pstoreu_partial_common<Packet2d>(to, from, n, offset);
|
||||
}
|
||||
|
||||
template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_PPC_PREFETCH(addr); }
|
||||
|
Loading…
x
Reference in New Issue
Block a user