mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-20 22:52:51 +08:00
Optimize TensorBlockCopyOp
This commit is contained in:
parent
9f33e71e9d
commit
e95696acb3
@ -144,24 +144,88 @@ class TensorBlock {
|
|||||||
|
|
||||||
template <typename Scalar, typename StorageIndex>
|
template <typename Scalar, typename StorageIndex>
|
||||||
struct TensorBlockCopyOp {
|
struct TensorBlockCopyOp {
|
||||||
|
|
||||||
|
typedef typename packet_traits<Scalar>::type Packet;
|
||||||
|
enum {
|
||||||
|
Vectorizable = internal::packet_traits<Scalar>::Vectorizable,
|
||||||
|
PacketSize = internal::packet_traits<Scalar>::size
|
||||||
|
};
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void Run(
|
||||||
const StorageIndex num_coeff_to_copy, const StorageIndex dst_index,
|
const StorageIndex num_coeff_to_copy, const StorageIndex dst_index,
|
||||||
const StorageIndex dst_stride, Scalar* EIGEN_RESTRICT dst_data,
|
const StorageIndex dst_stride, Scalar* EIGEN_RESTRICT dst_data,
|
||||||
const StorageIndex src_index, const StorageIndex src_stride,
|
const StorageIndex src_index, const StorageIndex src_stride,
|
||||||
const Scalar* EIGEN_RESTRICT src_data) {
|
const Scalar* EIGEN_RESTRICT src_data) {
|
||||||
const Scalar* src_base = &src_data[src_index];
|
const Scalar* src = &src_data[src_index];
|
||||||
Scalar* dst_base = &dst_data[dst_index];
|
Scalar* dst = &dst_data[dst_index];
|
||||||
|
|
||||||
typedef const Array<Scalar, Dynamic, 1> Src;
|
if (!Vectorizable) {
|
||||||
typedef Array<Scalar, Dynamic, 1> Dst;
|
for (Index i = 0; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i * dst_stride] = src[i * src_stride];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
typedef Map<Src, 0, InnerStride<> > SrcMap;
|
if (src_stride == 1) {
|
||||||
typedef Map<Dst, 0, InnerStride<> > DstMap;
|
const Index vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
|
||||||
|
if (dst_stride == 1) {
|
||||||
const SrcMap src(src_base, num_coeff_to_copy, InnerStride<>(src_stride));
|
// LINEAR
|
||||||
DstMap dst(dst_base, num_coeff_to_copy, InnerStride<>(dst_stride));
|
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||||
|
Packet p = internal::ploadu<Packet>(src + i);
|
||||||
dst = src;
|
internal::pstoreu<Scalar, Packet>(dst + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i] = src[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SCATTER
|
||||||
|
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||||
|
Packet p = internal::ploadu<Packet>(src + i);
|
||||||
|
internal::pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i * dst_stride] = src[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (src_stride == 0) {
|
||||||
|
const Index vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
|
||||||
|
if (dst_stride == 1) {
|
||||||
|
// LINEAR
|
||||||
|
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||||
|
Packet p = internal::pload1<Packet>(src);
|
||||||
|
internal::pstoreu<Scalar, Packet>(dst + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i] = *src;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SCATTER
|
||||||
|
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||||
|
Packet p = internal::pload1<Packet>(src);
|
||||||
|
internal::pscatter<Scalar, Packet>(dst + i * dst_stride, p, dst_stride);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i * dst_stride] = *src;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (dst_stride == 1) {
|
||||||
|
// GATHER
|
||||||
|
const Index vectorized_size = (num_coeff_to_copy / PacketSize) * PacketSize;
|
||||||
|
for (Index i = 0; i < vectorized_size; i += PacketSize) {
|
||||||
|
Packet p = internal::pgather<Scalar, Packet>(src + i * src_stride, src_stride);
|
||||||
|
internal::pstoreu<Scalar, Packet>(dst + i, p);
|
||||||
|
}
|
||||||
|
for (Index i = vectorized_size; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i] = src[i * src_stride];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// RANDOM
|
||||||
|
for (Index i = 0; i < num_coeff_to_copy; ++i) {
|
||||||
|
dst[i * dst_stride] = src[i * src_stride];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user