mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-02 17:59:46 +08:00
masked load/store framework
This commit is contained in:
parent
cebe09110c
commit
28c3b26d53
@ -136,6 +136,8 @@ struct copy_using_evaluator_traits {
|
|||||||
: Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
|
: Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling)
|
||||||
#endif
|
#endif
|
||||||
: NoUnrolling;
|
: NoUnrolling;
|
||||||
|
static constexpr bool UsePacketSegment =
|
||||||
|
enable_packet_segment<Src>::value && enable_packet_segment<Dst>::value && has_packet_segment<PacketType>::value;
|
||||||
|
|
||||||
#ifdef EIGEN_DEBUG_ASSIGN
|
#ifdef EIGEN_DEBUG_ASSIGN
|
||||||
static void debug() {
|
static void debug() {
|
||||||
@ -273,6 +275,33 @@ struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlign
|
|||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment, bool UsePacketSegment>
|
||||||
|
struct copy_using_evaluator_innervec_segment {
|
||||||
|
using PacketType = typename Kernel::PacketType;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) {
|
||||||
|
kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Start, 0,
|
||||||
|
Stop - Start);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Start, int Stop, int SrcAlignment, int DstAlignment>
|
||||||
|
struct copy_using_evaluator_innervec_segment<Kernel, Start, Stop, SrcAlignment, DstAlignment,
|
||||||
|
/*UsePacketSegment*/ false>
|
||||||
|
: copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, Start, Stop> {};
|
||||||
|
|
||||||
|
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
||||||
|
struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
|
||||||
|
/*UsePacketSegment*/ true> {
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
||||||
|
struct copy_using_evaluator_innervec_segment<Kernel, Stop, Stop, SrcAlignment, DstAlignment,
|
||||||
|
/*UsePacketSegment*/ false> {
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {}
|
||||||
|
};
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Part 3 : implementation of all cases
|
* Part 3 : implementation of all cases
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
@ -353,28 +382,48 @@ struct dense_assignment_loop_impl<Kernel, DefaultTraversal, InnerUnrolling> {
|
|||||||
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
// The goal of unaligned_dense_assignment_loop is simply to factorize the handling
|
||||||
// of the non vectorizable beginning and ending parts
|
// of the non vectorizable beginning and ending parts
|
||||||
|
|
||||||
template <bool IsAligned = false>
|
template <typename PacketType, int DstAlignment, int SrcAlignment, bool UsePacketSegment, bool Skip>
|
||||||
struct unaligned_dense_assignment_loop {
|
struct unaligned_dense_assignment_loop {
|
||||||
// if IsAligned = true, then do nothing
|
// if Skip == true, then do nothing
|
||||||
template <typename Kernel>
|
template <typename Kernel>
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&, Index, Index) {}
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& /*kernel*/, Index /*start*/,
|
||||||
|
Index /*end*/) {}
|
||||||
|
template <typename Kernel>
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& /*kernel*/, Index /*outer*/,
|
||||||
|
Index /*innerStart*/, Index /*innerEnd*/) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <typename PacketType, int DstAlignment, int SrcAlignment>
|
||||||
struct unaligned_dense_assignment_loop<false> {
|
struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ true,
|
||||||
// MSVC must not inline this functions. If it does, it fails to optimize the
|
/*Skip*/ false> {
|
||||||
// packet access path.
|
|
||||||
// FIXME check which version exhibits this issue
|
|
||||||
#if EIGEN_COMP_MSVC
|
|
||||||
template <typename Kernel>
|
template <typename Kernel>
|
||||||
static EIGEN_DONT_INLINE void run(Kernel& kernel, Index start, Index end)
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index start, Index end) {
|
||||||
#else
|
Index count = end - start;
|
||||||
|
eigen_assert(count <= unpacket_traits<PacketType>::size);
|
||||||
|
if (count > 0) kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(start, 0, count);
|
||||||
|
}
|
||||||
template <typename Kernel>
|
template <typename Kernel>
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index start, Index end)
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index outer, Index start,
|
||||||
#endif
|
Index end) {
|
||||||
{
|
Index count = end - start;
|
||||||
|
eigen_assert(count <= unpacket_traits<PacketType>::size);
|
||||||
|
if (count > 0)
|
||||||
|
kernel.template assignPacketSegmentByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, start, 0, count);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename PacketType, int DstAlignment, int SrcAlignment>
|
||||||
|
struct unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, /*UsePacketSegment*/ false,
|
||||||
|
/*Skip*/ false> {
|
||||||
|
template <typename Kernel>
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index start, Index end) {
|
||||||
for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
|
for (Index index = start; index < end; ++index) kernel.assignCoeff(index);
|
||||||
}
|
}
|
||||||
|
template <typename Kernel>
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel, Index outer, Index innerStart,
|
||||||
|
Index innerEnd) {
|
||||||
|
for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Kernel, int Index_, int Stop>
|
template <typename Kernel, int Index_, int Stop>
|
||||||
@ -395,28 +444,60 @@ struct copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, Stop, Stop> {
|
|||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {}
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Index_, int Stop, bool UsePacketSegment>
|
||||||
|
struct copy_using_evaluator_linearvec_segment {
|
||||||
|
using PacketType = typename Kernel::PacketType;
|
||||||
|
static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment;
|
||||||
|
static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment;
|
||||||
|
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) {
|
||||||
|
kernel.template assignPacketSegment<DstAlignment, SrcAlignment, PacketType>(Index_, 0, Stop - Index_);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Index_, int Stop>
|
||||||
|
struct copy_using_evaluator_linearvec_segment<Kernel, Index_, Stop, /*UsePacketSegment*/ false>
|
||||||
|
: copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, Index_, Stop> {};
|
||||||
|
|
||||||
|
template <typename Kernel, int Stop>
|
||||||
|
struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ true> {
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Kernel, int Stop>
|
||||||
|
struct copy_using_evaluator_linearvec_segment<Kernel, Stop, Stop, /*UsePacketSegment*/ false> {
|
||||||
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel&) {}
|
||||||
|
};
|
||||||
|
|
||||||
template <typename Kernel>
|
template <typename Kernel>
|
||||||
struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
|
struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, NoUnrolling> {
|
||||||
using Scalar = typename Kernel::Scalar;
|
using Scalar = typename Kernel::Scalar;
|
||||||
using PacketType = typename Kernel::PacketType;
|
using PacketType = typename Kernel::PacketType;
|
||||||
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
static constexpr int RequestedAlignment = Kernel::AssignmentTraits::LinearRequiredAlignment;
|
|
||||||
static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
|
|
||||||
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
||||||
static constexpr int DstAlignment =
|
static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
|
||||||
packet_traits<Scalar>::AlignedOnScalar ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
|
static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
|
||||||
|
static constexpr bool Alignable =
|
||||||
|
(DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
|
||||||
|
static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
|
||||||
|
static constexpr bool DstIsAligned = DstAlignment >= Alignment;
|
||||||
|
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
||||||
|
|
||||||
|
using head_loop =
|
||||||
|
unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
|
||||||
|
using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, SrcAlignment, UsePacketSegment, false>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
||||||
const Index size = kernel.size();
|
const Index size = kernel.size();
|
||||||
const Index alignedStart = DstIsAligned ? 0 : first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
|
const Index alignedStart = DstIsAligned ? 0 : first_aligned<Alignment>(kernel.dstDataPtr(), size);
|
||||||
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
||||||
|
|
||||||
unaligned_dense_assignment_loop<DstIsAligned>::run(kernel, 0, alignedStart);
|
head_loop::run(kernel, 0, alignedStart);
|
||||||
|
|
||||||
for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
|
for (Index index = alignedStart; index < alignedEnd; index += PacketSize)
|
||||||
kernel.template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
|
kernel.template assignPacket<Alignment, SrcAlignment, PacketType>(index);
|
||||||
|
|
||||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
tail_loop::run(kernel, alignedEnd, size);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -426,10 +507,11 @@ struct dense_assignment_loop_impl<Kernel, LinearVectorizedTraversal, CompleteUnr
|
|||||||
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
|
static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime;
|
||||||
static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
|
static constexpr int AlignedSize = numext::round_down(Size, PacketSize);
|
||||||
|
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
||||||
copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
|
copy_using_evaluator_linearvec_CompleteUnrolling<Kernel, 0, AlignedSize>::run(kernel);
|
||||||
copy_using_evaluator_LinearTraversal_CompleteUnrolling<Kernel, AlignedSize, Size>::run(kernel);
|
copy_using_evaluator_linearvec_segment<Kernel, AlignedSize, Size, UsePacketSegment>::run(kernel);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -505,35 +587,35 @@ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
|||||||
using Scalar = typename Kernel::Scalar;
|
using Scalar = typename Kernel::Scalar;
|
||||||
using PacketType = typename Kernel::PacketType;
|
using PacketType = typename Kernel::PacketType;
|
||||||
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
static constexpr int RequestedAlignment = Kernel::AssignmentTraits::InnerRequiredAlignment;
|
static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment;
|
||||||
|
static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar));
|
||||||
|
static constexpr int RequestedAlignment = unpacket_traits<PacketType>::alignment;
|
||||||
static constexpr bool Alignable =
|
static constexpr bool Alignable =
|
||||||
packet_traits<Scalar>::AlignedOnScalar || Kernel::AssignmentTraits::DstAlignment >= sizeof(Scalar);
|
(DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0);
|
||||||
static constexpr bool DstIsAligned = Kernel::AssignmentTraits::DstAlignment >= RequestedAlignment;
|
static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment;
|
||||||
static constexpr int DstAlignment = Alignable ? RequestedAlignment : Kernel::AssignmentTraits::DstAlignment;
|
static constexpr bool DstIsAligned = DstAlignment >= Alignment;
|
||||||
|
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
||||||
|
|
||||||
|
using head_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, Unaligned, UsePacketSegment, !Alignable>;
|
||||||
|
using tail_loop = unaligned_dense_assignment_loop<PacketType, Alignment, Unaligned, UsePacketSegment, false>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
||||||
const Scalar* dst_ptr = kernel.dstDataPtr();
|
const Scalar* dst_ptr = kernel.dstDataPtr();
|
||||||
if ((!DstIsAligned) && (std::uintptr_t(dst_ptr) % sizeof(Scalar)) > 0) {
|
|
||||||
// the pointer is not aligned-on scalar, so alignment is not possible
|
|
||||||
return dense_assignment_loop<Kernel, DefaultTraversal, NoUnrolling>::run(kernel);
|
|
||||||
}
|
|
||||||
const Index innerSize = kernel.innerSize();
|
const Index innerSize = kernel.innerSize();
|
||||||
const Index outerSize = kernel.outerSize();
|
const Index outerSize = kernel.outerSize();
|
||||||
const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
|
const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0;
|
||||||
Index alignedStart =
|
Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<Alignment>(dst_ptr, innerSize);
|
||||||
((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned<RequestedAlignment>(dst_ptr, innerSize);
|
|
||||||
|
|
||||||
for (Index outer = 0; outer < outerSize; ++outer) {
|
for (Index outer = 0; outer < outerSize; ++outer) {
|
||||||
const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
|
const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize);
|
||||||
// do the non-vectorizable part of the assignment
|
|
||||||
for (Index inner = 0; inner < alignedStart; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
|
head_loop::run(kernel, outer, 0, alignedStart);
|
||||||
|
|
||||||
// do the vectorizable part of the assignment
|
// do the vectorizable part of the assignment
|
||||||
for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
|
for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize)
|
||||||
kernel.template assignPacketByOuterInner<DstAlignment, Unaligned, PacketType>(outer, inner);
|
kernel.template assignPacketByOuterInner<Alignment, Unaligned, PacketType>(outer, inner);
|
||||||
|
|
||||||
// do the non-vectorizable part of the assignment
|
tail_loop::run(kernel, outer, alignedEnd, innerSize);
|
||||||
for (Index inner = alignedEnd; inner < innerSize; ++inner) kernel.assignCoeffByOuterInner(outer, inner);
|
|
||||||
|
|
||||||
alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
|
alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize);
|
||||||
}
|
}
|
||||||
@ -547,11 +629,16 @@ struct dense_assignment_loop_impl<Kernel, SliceVectorizedTraversal, InnerUnrolli
|
|||||||
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime;
|
||||||
static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
|
static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize);
|
||||||
|
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
||||||
|
|
||||||
|
using packet_loop = copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, Unaligned, Unaligned>;
|
||||||
|
using packet_segment_loop = copy_using_evaluator_innervec_segment<Kernel, VectorizableSize, InnerSize, Unaligned,
|
||||||
|
Unaligned, UsePacketSegment>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE EIGEN_CONSTEXPR void run(Kernel& kernel) {
|
||||||
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
for (Index outer = 0; outer < kernel.outerSize(); ++outer) {
|
||||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, VectorizableSize, 0, 0>::run(kernel, outer);
|
packet_loop::run(kernel, outer);
|
||||||
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, VectorizableSize, InnerSize>::run(kernel, outer);
|
packet_segment_loop::run(kernel, outer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -635,6 +722,27 @@ class generic_dense_assignment_kernel {
|
|||||||
assignPacket<StoreMode, LoadMode, Packet>(row, col);
|
assignPacket<StoreMode, LoadMode, Packet>(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, int LoadMode, typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
|
||||||
|
m_functor.template assignPacketSegment<StoreMode>(
|
||||||
|
&m_dst.coeffRef(row, col), m_src.template packetSegment<LoadMode, Packet>(row, col, begin, count), begin,
|
||||||
|
count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, int LoadMode, typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
|
||||||
|
m_functor.template assignPacketSegment<StoreMode>(
|
||||||
|
&m_dst.coeffRef(index), m_src.template packetSegment<LoadMode, Packet>(index, begin, count), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, int LoadMode, typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin,
|
||||||
|
Index count) {
|
||||||
|
Index row = rowIndexByOuterInner(outer, inner);
|
||||||
|
Index col = colIndexByOuterInner(outer, inner);
|
||||||
|
assignPacketSegment<StoreMode, LoadMode, Packet>(row, col, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) {
|
||||||
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
typedef typename DstEvaluatorType::ExpressionTraits Traits;
|
||||||
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
return int(Traits::RowsAtCompileTime) == 1 ? 0
|
||||||
|
@ -198,19 +198,13 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const {
|
||||||
if (IsRowMajor)
|
return coeff(getIndex(row, col));
|
||||||
return m_d.data[row * m_d.outerStride() + col];
|
|
||||||
else
|
|
||||||
return m_d.data[row + col * m_d.outerStride()];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; }
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; }
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) {
|
||||||
if (IsRowMajor)
|
return coeffRef(getIndex(row, col));
|
||||||
return const_cast<Scalar*>(m_d.data)[row * m_d.outerStride() + col];
|
|
||||||
else
|
|
||||||
return const_cast<Scalar*>(m_d.data)[row + col * m_d.outerStride()];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) {
|
||||||
@ -219,10 +213,7 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
|||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
if (IsRowMajor)
|
return packet<LoadMode, PacketType>(getIndex(row, col));
|
||||||
return ploadt<PacketType, LoadMode>(m_d.data + row * m_d.outerStride() + col);
|
|
||||||
else
|
|
||||||
return ploadt<PacketType, LoadMode>(m_d.data + row + col * m_d.outerStride());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
@ -232,19 +223,43 @@ struct evaluator<PlainObjectBase<Derived>> : evaluator_base<Derived> {
|
|||||||
|
|
||||||
template <int StoreMode, typename PacketType>
|
template <int StoreMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||||
if (IsRowMajor)
|
writePacket<StoreMode, PacketType>(getIndex(row, col), x);
|
||||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + row * m_d.outerStride() + col, x);
|
|
||||||
else
|
|
||||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + row + col * m_d.outerStride(), x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int StoreMode, typename PacketType>
|
template <int StoreMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||||
return pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);
|
pstoret<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return packetSegment<LoadMode, PacketType>(getIndex(row, col), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return ploadtSegment<PacketType, LoadMode>(m_d.data + index, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
writePacketSegment<StoreMode, PacketType>(getIndex(row, col), x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
pstoretSegment<Scalar, PacketType, StoreMode>(const_cast<Scalar*>(m_d.data) + index, x, begin, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
plainobjectbase_evaluator_data<Scalar, OuterStrideAtCompileTime> m_d;
|
plainobjectbase_evaluator_data<Scalar, OuterStrideAtCompileTime> m_d;
|
||||||
|
|
||||||
|
private:
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndex(Index row, Index col) const {
|
||||||
|
return IsRowMajor ? row * m_d.outerStride() + col : row + col * m_d.outerStride();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
template <typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
|
||||||
@ -318,6 +333,28 @@ struct unary_evaluator<Transpose<ArgType>, IndexBased> : evaluator_base<Transpos
|
|||||||
m_argImpl.template writePacket<StoreMode, PacketType>(index, x);
|
m_argImpl.template writePacket<StoreMode, PacketType>(index, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(col, row, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(index, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
m_argImpl.template writePacketSegment<StoreMode, PacketType>(col, row, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
m_argImpl.template writePacketSegment<StoreMode, PacketType>(index, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
evaluator<ArgType> m_argImpl;
|
evaluator<ArgType> m_argImpl;
|
||||||
};
|
};
|
||||||
@ -464,10 +501,10 @@ template <typename NullaryOp, typename PlainObjectType>
|
|||||||
struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
|
struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
|
||||||
: evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType>> {
|
: evaluator_base<CwiseNullaryOp<NullaryOp, PlainObjectType>> {
|
||||||
typedef CwiseNullaryOp<NullaryOp, PlainObjectType> XprType;
|
typedef CwiseNullaryOp<NullaryOp, PlainObjectType> XprType;
|
||||||
typedef internal::remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
|
typedef remove_all_t<PlainObjectType> PlainObjectTypeCleaned;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
|
CoeffReadCost = functor_traits<NullaryOp>::Cost,
|
||||||
|
|
||||||
Flags = (evaluator<PlainObjectTypeCleaned>::Flags &
|
Flags = (evaluator<PlainObjectTypeCleaned>::Flags &
|
||||||
(HereditaryBits | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) |
|
(HereditaryBits | (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0) |
|
||||||
@ -502,9 +539,21 @@ struct evaluator<CwiseNullaryOp<NullaryOp, PlainObjectType>>
|
|||||||
return m_wrapper.template packetOp<PacketType>(m_functor, index);
|
return m_wrapper.template packetOp<PacketType>(m_functor, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType row, IndexType col, Index /*begin*/,
|
||||||
|
Index /*count*/) const {
|
||||||
|
return packet<LoadMode, PacketType, IndexType>(row, col);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType, typename IndexType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType index, Index /*begin*/,
|
||||||
|
Index /*count*/) const {
|
||||||
|
return packet<LoadMode, PacketType, IndexType>(index);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const NullaryOp m_functor;
|
const NullaryOp m_functor;
|
||||||
const internal::nullary_wrapper<CoeffReturnType, NullaryOp> m_wrapper;
|
const nullary_wrapper<CoeffReturnType, NullaryOp> m_wrapper;
|
||||||
};
|
};
|
||||||
|
|
||||||
// -------------------- CwiseUnaryOp --------------------
|
// -------------------- CwiseUnaryOp --------------------
|
||||||
@ -546,6 +595,16 @@ struct unary_evaluator<CwiseUnaryOp<UnaryOp, ArgType>, IndexBased> : evaluator_b
|
|||||||
return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));
|
return m_d.func().packetOp(m_d.argImpl.template packet<LoadMode, PacketType>(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.argImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.argImpl.template packetSegment<LoadMode, PacketType>(index, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data {
|
struct Data {
|
||||||
@ -600,16 +659,11 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
|||||||
template <typename DstPacketType>
|
template <typename DstPacketType>
|
||||||
using SrcPacketArgs8 = std::enable_if_t<(unpacket_traits<DstPacketType>::size) == (8 * SrcPacketSize), bool>;
|
using SrcPacketArgs8 = std::enable_if_t<(unpacket_traits<DstPacketType>::size) == (8 * SrcPacketSize), bool>;
|
||||||
|
|
||||||
template <bool UseRowMajor = IsRowMajor, std::enable_if_t<UseRowMajor, bool> = true>
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index col, Index begin, Index count) const {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index, Index col, Index packetSize) const {
|
return IsRowMajor ? (col + count + begin <= cols()) : (row + count + begin <= rows());
|
||||||
return col + packetSize <= cols();
|
|
||||||
}
|
}
|
||||||
template <bool UseRowMajor = IsRowMajor, std::enable_if_t<!UseRowMajor, bool> = true>
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index begin, Index count) const {
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index, Index packetSize) const {
|
return index + count + begin <= size();
|
||||||
return row + packetSize <= rows();
|
|
||||||
}
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index packetSize) const {
|
|
||||||
return index + packetSize <= size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index row, Index col, Index offset) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index row, Index col, Index offset) const {
|
||||||
@ -632,43 +686,86 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
|||||||
template <int LoadMode, typename PacketType = SrcPacketType>
|
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const {
|
||||||
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
Index actualRow = IsRowMajor ? row : row + (offset * PacketSize);
|
Index packetOffset = offset * PacketSize;
|
||||||
Index actualCol = IsRowMajor ? col + (offset * PacketSize) : col;
|
Index actualRow = IsRowMajor ? row : row + packetOffset;
|
||||||
eigen_assert(check_array_bounds(actualRow, actualCol, PacketSize) && "Array index out of bounds");
|
Index actualCol = IsRowMajor ? col + packetOffset : col;
|
||||||
|
eigen_assert(check_array_bounds(actualRow, actualCol, 0, PacketSize) && "Array index out of bounds");
|
||||||
return m_argImpl.template packet<LoadMode, PacketType>(actualRow, actualCol);
|
return m_argImpl.template packet<LoadMode, PacketType>(actualRow, actualCol);
|
||||||
}
|
}
|
||||||
template <int LoadMode, typename PacketType = SrcPacketType>
|
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const {
|
||||||
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
Index actualIndex = index + (offset * PacketSize);
|
Index packetOffset = offset * PacketSize;
|
||||||
eigen_assert(check_array_bounds(actualIndex, PacketSize) && "Array index out of bounds");
|
Index actualIndex = index + packetOffset;
|
||||||
|
eigen_assert(check_array_bounds(actualIndex, 0, PacketSize) && "Array index out of bounds");
|
||||||
return m_argImpl.template packet<LoadMode, PacketType>(actualIndex);
|
return m_argImpl.template packet<LoadMode, PacketType>(actualIndex);
|
||||||
}
|
}
|
||||||
|
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index row, Index col, Index begin, Index count,
|
||||||
|
Index offset) const {
|
||||||
|
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
Index packetOffset = offset * PacketSize;
|
||||||
|
Index actualRow = IsRowMajor ? row : row + packetOffset;
|
||||||
|
Index actualCol = IsRowMajor ? col + packetOffset : col;
|
||||||
|
eigen_assert(check_array_bounds(actualRow, actualCol, 0, count) && "Array index out of bounds");
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(actualRow, actualCol, begin, count);
|
||||||
|
}
|
||||||
|
template <int LoadMode, typename PacketType = SrcPacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index index, Index begin, Index count,
|
||||||
|
Index offset) const {
|
||||||
|
constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
Index packetOffset = offset * PacketSize;
|
||||||
|
Index actualIndex = index + packetOffset + begin;
|
||||||
|
eigen_assert(check_array_bounds(actualIndex, 0, count) && "Array index out of bounds");
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(actualIndex, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int NumPackets, int LoadMode, typename PacketType = SrcPacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock<PacketType, NumPackets> srcPacketSegmentHelper(Index row, Index col,
|
||||||
|
Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<PacketType, NumPackets> packets;
|
||||||
|
Index offset = begin / SrcPacketSize;
|
||||||
|
Index actualBegin = begin % SrcPacketSize;
|
||||||
|
for (; offset < NumPackets; offset++) {
|
||||||
|
Index actualCount = numext::mini(SrcPacketSize - actualBegin, count);
|
||||||
|
packets.packet[offset] = srcPacketSegment<SrcLoadMode>(row, col, actualBegin, actualCount, offset);
|
||||||
|
if (count == actualCount) break;
|
||||||
|
actualBegin = 0;
|
||||||
|
count -= actualCount;
|
||||||
|
}
|
||||||
|
return packets;
|
||||||
|
}
|
||||||
|
template <int NumPackets, int LoadMode, typename PacketType = SrcPacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock<PacketType, NumPackets> srcPacketSegmentHelper(Index index,
|
||||||
|
Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<PacketType, NumPackets> packets;
|
||||||
|
Index offset = begin / SrcPacketSize;
|
||||||
|
Index actualBegin = begin % SrcPacketSize;
|
||||||
|
for (; offset < NumPackets; offset++) {
|
||||||
|
Index actualCount = numext::mini(SrcPacketSize - actualBegin, count);
|
||||||
|
packets.packet[offset] = srcPacketSegment<SrcLoadMode>(index, actualBegin, actualCount, offset);
|
||||||
|
if (count == actualCount) break;
|
||||||
|
actualBegin = 0;
|
||||||
|
count -= actualCount;
|
||||||
|
}
|
||||||
|
return packets;
|
||||||
|
}
|
||||||
|
|
||||||
// There is no source packet type with equal or fewer elements than DstPacketType.
|
// There is no source packet type with equal or fewer elements than DstPacketType.
|
||||||
// This is problematic as the evaluation loop may attempt to access data outside the bounds of the array.
|
// This is problematic as the evaluation loop may attempt to access data outside the bounds of the array.
|
||||||
// For example, consider the cast utilizing pcast<Packet4f,Packet2d> with an array of size 4: {0.0f,1.0f,2.0f,3.0f}.
|
// For example, consider the cast utilizing pcast<Packet4f,Packet2d> with an array of size 4: {0.0f,1.0f,2.0f,3.0f}.
|
||||||
// The first iteration of the evaluation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which
|
// The first iteration of the evaluation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which
|
||||||
// is acceptable. The second iteration will load 16 bytes: {2.0f,3.0f,?,?}, which is outside the bounds of the array.
|
// is acceptable. The second iteration will load 16 bytes: {2.0f,3.0f,?,?}, which is outside the bounds of the array.
|
||||||
|
|
||||||
// Instead, perform runtime check to determine if the load would access data outside the bounds of the array.
|
|
||||||
// If not, perform full load. Otherwise, revert to a scalar loop to perform a partial load.
|
|
||||||
// In either case, perform a vectorized cast of the source packet.
|
|
||||||
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const {
|
||||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
SrcPacketType src;
|
return pcast<SrcPacketType, DstPacketType>(srcPacketSegment<SrcLoadMode>(row, col, 0, DstPacketSize, 0));
|
||||||
if (EIGEN_PREDICT_TRUE(check_array_bounds(row, col, SrcPacketSize))) {
|
|
||||||
src = srcPacket<SrcLoadMode>(row, col, 0);
|
|
||||||
} else {
|
|
||||||
Array<SrcType, SrcPacketSize, 1> srcArray;
|
|
||||||
for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(row, col, k);
|
|
||||||
for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0);
|
|
||||||
src = pload<SrcPacketType>(srcArray.data());
|
|
||||||
}
|
|
||||||
return pcast<SrcPacketType, DstPacketType>(src);
|
|
||||||
}
|
}
|
||||||
// Use the source packet type with the same size as DstPacketType, if it exists
|
// Use the source packet type with the same size as DstPacketType, if it exists
|
||||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||||
@ -704,22 +801,67 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
|||||||
srcPacket<SrcLoadMode>(row, col, 6), srcPacket<SrcLoadMode>(row, col, 7));
|
srcPacket<SrcLoadMode>(row, col, 6), srcPacket<SrcLoadMode>(row, col, 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// packetSegment variants
|
||||||
|
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(srcPacketSegment<SrcLoadMode>(row, col, begin, count, 0));
|
||||||
|
}
|
||||||
|
// Use the source packet type with the same size as DstPacketType, if it exists
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
|
using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
|
||||||
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
|
return pcast<SizedSrcPacketType, DstPacketType>(
|
||||||
|
srcPacketSegment<SrcLoadMode, SizedSrcPacketType>(row, col, begin, count, 0));
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 2 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int NumPackets = 2;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(row, col, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1]);
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 4 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int NumPackets = 4;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(row, col, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1], packets.packet[2],
|
||||||
|
packets.packet[3]);
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 8 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
constexpr int NumPackets = 8;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(row, col, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1], packets.packet[2],
|
||||||
|
packets.packet[3], packets.packet[4], packets.packet[5],
|
||||||
|
packets.packet[6], packets.packet[7]);
|
||||||
|
}
|
||||||
|
|
||||||
// Analogous routines for linear access.
|
// Analogous routines for linear access.
|
||||||
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||||
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
SrcPacketType src;
|
return pcast<SrcPacketType, DstPacketType>(srcPacketSegment<SrcLoadMode>(index, 0, DstPacketSize, 0));
|
||||||
if (EIGEN_PREDICT_TRUE(check_array_bounds(index, SrcPacketSize))) {
|
|
||||||
src = srcPacket<SrcLoadMode>(index, 0);
|
|
||||||
} else {
|
|
||||||
Array<SrcType, SrcPacketSize, 1> srcArray;
|
|
||||||
for (size_t k = 0; k < DstPacketSize; k++) srcArray[k] = srcCoeff(index, k);
|
|
||||||
for (size_t k = DstPacketSize; k < SrcPacketSize; k++) srcArray[k] = SrcType(0);
|
|
||||||
src = pload<SrcPacketType>(srcArray.data());
|
|
||||||
}
|
|
||||||
return pcast<SrcPacketType, DstPacketType>(src);
|
|
||||||
}
|
}
|
||||||
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const {
|
||||||
@ -749,6 +891,55 @@ struct unary_evaluator<CwiseUnaryOp<core_cast_op<SrcType, DstType>, ArgType>, In
|
|||||||
srcPacket<SrcLoadMode>(index, 6), srcPacket<SrcLoadMode>(index, 7));
|
srcPacket<SrcLoadMode>(index, 6), srcPacket<SrcLoadMode>(index, 7));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// packetSegment variants
|
||||||
|
template <int LoadMode, typename DstPacketType, AltSrcScalarOp<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(srcPacketSegment<SrcLoadMode>(index, begin, count, 0));
|
||||||
|
}
|
||||||
|
// Use the source packet type with the same size as DstPacketType, if it exists
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs1<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
constexpr int DstPacketSize = unpacket_traits<DstPacketType>::size;
|
||||||
|
using SizedSrcPacketType = typename find_packet_by_size<SrcType, DstPacketSize>::type;
|
||||||
|
constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType);
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode);
|
||||||
|
return pcast<SizedSrcPacketType, DstPacketType>(
|
||||||
|
srcPacketSegment<SrcLoadMode, SizedSrcPacketType>(index, begin, count, 0));
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 2 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs2<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
constexpr int NumPackets = 2;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(index, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1]);
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 4 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs4<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
constexpr int NumPackets = 4;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(index, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1], packets.packet[2],
|
||||||
|
packets.packet[3]);
|
||||||
|
}
|
||||||
|
// unpacket_traits<DstPacketType>::size == 8 * SrcPacketSize
|
||||||
|
template <int LoadMode, typename DstPacketType, SrcPacketArgs8<DstPacketType> = true>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
constexpr int NumPackets = 8;
|
||||||
|
constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode);
|
||||||
|
PacketBlock<SrcPacketType, NumPackets> packets =
|
||||||
|
srcPacketSegmentHelper<NumPackets, SrcLoadMode>(index, begin, count);
|
||||||
|
return pcast<SrcPacketType, DstPacketType>(packets.packet[0], packets.packet[1], packets.packet[2],
|
||||||
|
packets.packet[3], packets.packet[4], packets.packet[5],
|
||||||
|
packets.packet[6], packets.packet[7]);
|
||||||
|
}
|
||||||
|
|
||||||
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows; }
|
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows; }
|
||||||
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols; }
|
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols; }
|
||||||
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_rows * m_cols; }
|
constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_rows * m_cols; }
|
||||||
@ -826,6 +1017,20 @@ struct ternary_evaluator<CwiseTernaryOp<TernaryOp, Arg1, Arg2, Arg3>, IndexBased
|
|||||||
m_d.arg3Impl.template packet<LoadMode, PacketType>(index));
|
m_d.arg3Impl.template packet<LoadMode, PacketType>(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.arg1Impl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
||||||
|
m_d.arg2Impl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
||||||
|
m_d.arg3Impl.template packetSegment<LoadMode, PacketType>(row, col, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.arg1Impl.template packetSegment<LoadMode, PacketType>(index, begin, count),
|
||||||
|
m_d.arg2Impl.template packetSegment<LoadMode, PacketType>(index, begin, count),
|
||||||
|
m_d.arg3Impl.template packetSegment<LoadMode, PacketType>(index, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data {
|
struct Data {
|
||||||
@ -922,6 +1127,18 @@ struct binary_evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs>, IndexBased, IndexBase
|
|||||||
m_d.rhsImpl.template packet<LoadMode, PacketType>(index));
|
m_d.rhsImpl.template packet<LoadMode, PacketType>(index));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.lhsImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
||||||
|
m_d.rhsImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return m_d.func().packetOp(m_d.lhsImpl.template packetSegment<LoadMode, PacketType>(index, begin, count),
|
||||||
|
m_d.rhsImpl.template packetSegment<LoadMode, PacketType>(index, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// this helper permits to completely eliminate the functor if it is empty
|
// this helper permits to completely eliminate the functor if it is empty
|
||||||
struct Data {
|
struct Data {
|
||||||
@ -1013,7 +1230,7 @@ struct mapbase_evaluator : evaluator_base<Derived> {
|
|||||||
m_innerStride(map.innerStride()),
|
m_innerStride(map.innerStride()),
|
||||||
m_outerStride(map.outerStride()) {
|
m_outerStride(map.outerStride()) {
|
||||||
EIGEN_STATIC_ASSERT(check_implication((evaluator<Derived>::Flags & PacketAccessBit) != 0,
|
EIGEN_STATIC_ASSERT(check_implication((evaluator<Derived>::Flags & PacketAccessBit) != 0,
|
||||||
internal::inner_stride_at_compile_time<Derived>::ret == 1),
|
inner_stride_at_compile_time<Derived>::ret == 1),
|
||||||
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
|
||||||
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
|
||||||
}
|
}
|
||||||
@ -1035,23 +1252,47 @@ struct mapbase_evaluator : evaluator_base<Derived> {
|
|||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||||
return internal::ploadt<PacketType, LoadMode>(ptr);
|
return ploadt<PacketType, LoadMode>(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||||
return internal::ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
|
return ploadt<PacketType, LoadMode>(m_data + index * m_innerStride.value());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int StoreMode, typename PacketType>
|
template <int StoreMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||||
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||||
return internal::pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
pstoret<Scalar, PacketType, StoreMode>(ptr, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int StoreMode, typename PacketType>
|
template <int StoreMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||||
internal::pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
pstoret<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||||
|
return ploadtSegment<PacketType, LoadMode>(ptr, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return ploadtSegment<PacketType, LoadMode>(m_data + index * m_innerStride.value(), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
PointerType ptr = m_data + row * rowStride() + col * colStride();
|
||||||
|
pstoretSegment<Scalar, PacketType, StoreMode>(ptr, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
pstoretSegment<Scalar, PacketType, StoreMode>(m_data + index * m_innerStride.value(), x, begin, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -1063,8 +1304,8 @@ struct mapbase_evaluator : evaluator_base<Derived> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
PointerType m_data;
|
PointerType m_data;
|
||||||
const internal::variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
const variable_if_dynamic<Index, XprType::InnerStrideAtCompileTime> m_innerStride;
|
||||||
const internal::variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
|
const variable_if_dynamic<Index, XprType::OuterStrideAtCompileTime> m_outerStride;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename PlainObjectType, int MapOptions, typename StrideType>
|
template <typename PlainObjectType, int MapOptions, typename StrideType>
|
||||||
@ -1117,7 +1358,7 @@ struct evaluator<Ref<PlainObjectType, RefOptions, StrideType>>
|
|||||||
// -------------------- Block --------------------
|
// -------------------- Block --------------------
|
||||||
|
|
||||||
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
|
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel,
|
||||||
bool HasDirectAccess = internal::has_direct_access<ArgType>::ret>
|
bool HasDirectAccess = has_direct_access<ArgType>::ret>
|
||||||
struct block_evaluator;
|
struct block_evaluator;
|
||||||
|
|
||||||
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
template <typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
|
||||||
@ -1246,6 +1487,39 @@ struct unary_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>, IndexBa
|
|||||||
x);
|
x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(m_startRow.value() + row, m_startCol.value() + col,
|
||||||
|
begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
if (ForwardLinearAccess)
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(m_linear_offset.value() + index, begin, count);
|
||||||
|
else
|
||||||
|
return packetSegment<LoadMode, PacketType>(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0,
|
||||||
|
begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
return m_argImpl.template writePacketSegment<StoreMode, PacketType>(m_startRow.value() + row,
|
||||||
|
m_startCol.value() + col, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
if (ForwardLinearAccess)
|
||||||
|
return m_argImpl.template writePacketSegment<StoreMode, PacketType>(m_linear_offset.value() + index, x, begin,
|
||||||
|
count);
|
||||||
|
else
|
||||||
|
return writePacketSegment<StoreMode, PacketType>(RowsAtCompileTime == 1 ? 0 : index,
|
||||||
|
RowsAtCompileTime == 1 ? index : 0, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType
|
||||||
linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const {
|
linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const {
|
||||||
@ -1341,8 +1615,8 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
|||||||
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
|
typedef Replicate<ArgType, RowFactor, ColFactor> XprType;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor };
|
enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor };
|
||||||
typedef typename internal::nested_eval<ArgType, Factor>::type ArgTypeNested;
|
typedef typename nested_eval<ArgType, Factor>::type ArgTypeNested;
|
||||||
typedef internal::remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
|
typedef remove_all_t<ArgTypeNested> ArgTypeNestedCleaned;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
|
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
|
||||||
@ -1361,19 +1635,15 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
|||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const {
|
||||||
// try to avoid using modulo; this is a pure optimization strategy
|
// try to avoid using modulo; this is a pure optimization strategy
|
||||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime == 1 ? 0
|
const Index actual_row = traits<XprType>::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value();
|
||||||
: RowFactor == 1 ? row
|
const Index actual_col = traits<XprType>::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value();
|
||||||
: row % m_rows.value();
|
|
||||||
const Index actual_col = internal::traits<XprType>::ColsAtCompileTime == 1 ? 0
|
|
||||||
: ColFactor == 1 ? col
|
|
||||||
: col % m_cols.value();
|
|
||||||
|
|
||||||
return m_argImpl.coeff(actual_row, actual_col);
|
return m_argImpl.coeff(actual_row, actual_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const {
|
||||||
// try to avoid using modulo; this is a pure optimization strategy
|
// try to avoid using modulo; this is a pure optimization strategy
|
||||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime == 1
|
const Index actual_index = traits<XprType>::RowsAtCompileTime == 1
|
||||||
? (ColFactor == 1 ? index : index % m_cols.value())
|
? (ColFactor == 1 ? index : index % m_cols.value())
|
||||||
: (RowFactor == 1 ? index : index % m_rows.value());
|
: (RowFactor == 1 ? index : index % m_rows.value());
|
||||||
|
|
||||||
@ -1382,25 +1652,38 @@ struct unary_evaluator<Replicate<ArgType, RowFactor, ColFactor>>
|
|||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
const Index actual_row = internal::traits<XprType>::RowsAtCompileTime == 1 ? 0
|
const Index actual_row = traits<XprType>::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value();
|
||||||
: RowFactor == 1 ? row
|
const Index actual_col = traits<XprType>::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value();
|
||||||
: row % m_rows.value();
|
|
||||||
const Index actual_col = internal::traits<XprType>::ColsAtCompileTime == 1 ? 0
|
|
||||||
: ColFactor == 1 ? col
|
|
||||||
: col % m_cols.value();
|
|
||||||
|
|
||||||
return m_argImpl.template packet<LoadMode, PacketType>(actual_row, actual_col);
|
return m_argImpl.template packet<LoadMode, PacketType>(actual_row, actual_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||||
const Index actual_index = internal::traits<XprType>::RowsAtCompileTime == 1
|
const Index actual_index = traits<XprType>::RowsAtCompileTime == 1
|
||||||
? (ColFactor == 1 ? index : index % m_cols.value())
|
? (ColFactor == 1 ? index : index % m_cols.value())
|
||||||
: (RowFactor == 1 ? index : index % m_rows.value());
|
: (RowFactor == 1 ? index : index % m_rows.value());
|
||||||
|
|
||||||
return m_argImpl.template packet<LoadMode, PacketType>(actual_index);
|
return m_argImpl.template packet<LoadMode, PacketType>(actual_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
const Index actual_row = traits<XprType>::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value();
|
||||||
|
const Index actual_col = traits<XprType>::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value();
|
||||||
|
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(actual_row, actual_col, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
const Index actual_index = traits<XprType>::RowsAtCompileTime == 1
|
||||||
|
? (ColFactor == 1 ? index : index % m_cols.value())
|
||||||
|
: (RowFactor == 1 ? index : index % m_rows.value());
|
||||||
|
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(actual_index, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const ArgTypeNested m_arg;
|
const ArgTypeNested m_arg;
|
||||||
evaluator<ArgTypeNestedCleaned> m_argImpl;
|
evaluator<ArgTypeNestedCleaned> m_argImpl;
|
||||||
@ -1457,6 +1740,28 @@ struct evaluator_wrapper_base : evaluator_base<XprType> {
|
|||||||
m_argImpl.template writePacket<StoreMode>(index, x);
|
m_argImpl.template writePacket<StoreMode>(index, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
return m_argImpl.template packetSegment<LoadMode, PacketType>(index, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
m_argImpl.template writePacketSegment<StoreMode>(row, col, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
m_argImpl.template writePacketSegment<StoreMode>(index, x, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
evaluator<ArgType> m_argImpl;
|
evaluator<ArgType> m_argImpl;
|
||||||
};
|
};
|
||||||
@ -1536,41 +1841,97 @@ struct unary_evaluator<Reverse<ArgType, Direction>> : evaluator_base<Reverse<Arg
|
|||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
enum {
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1;
|
||||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1;
|
||||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
using reverse_packet = reverse_packet_cond<PacketType, ReversePacket>;
|
||||||
};
|
|
||||||
typedef internal::reverse_packet_cond<PacketType, ReversePacket> reverse_packet;
|
Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row;
|
||||||
return reverse_packet::run(m_argImpl.template packet<LoadMode, PacketType>(
|
Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col;
|
||||||
ReverseRow ? m_rows.value() - row - OffsetRow : row, ReverseCol ? m_cols.value() - col - OffsetCol : col));
|
|
||||||
|
return reverse_packet::run(m_argImpl.template packet<LoadMode, PacketType>(actualRow, actualCol));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const {
|
||||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
return preverse(
|
|
||||||
m_argImpl.template packet<LoadMode, PacketType>(m_rows.value() * m_cols.value() - index - PacketSize));
|
Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize;
|
||||||
|
|
||||||
|
return preverse(m_argImpl.template packet<LoadMode, PacketType>(actualIndex));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) {
|
||||||
// FIXME we could factorize some code with packet(i,j)
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
enum {
|
static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1;
|
||||||
PacketSize = unpacket_traits<PacketType>::size,
|
static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1;
|
||||||
OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1,
|
using reverse_packet = reverse_packet_cond<PacketType, ReversePacket>;
|
||||||
OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1
|
|
||||||
};
|
Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row;
|
||||||
typedef internal::reverse_packet_cond<PacketType, ReversePacket> reverse_packet;
|
Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col;
|
||||||
m_argImpl.template writePacket<LoadMode>(ReverseRow ? m_rows.value() - row - OffsetRow : row,
|
|
||||||
ReverseCol ? m_cols.value() - col - OffsetCol : col,
|
m_argImpl.template writePacket<LoadMode>(actualRow, actualCol, reverse_packet::run(x));
|
||||||
reverse_packet::run(x));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) {
|
||||||
enum { PacketSize = unpacket_traits<PacketType>::size };
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
m_argImpl.template writePacket<LoadMode>(m_rows.value() * m_cols.value() - index - PacketSize, preverse(x));
|
|
||||||
|
Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize;
|
||||||
|
|
||||||
|
m_argImpl.template writePacket<LoadMode>(actualIndex, preverse(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1;
|
||||||
|
static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1;
|
||||||
|
using reverse_packet = reverse_packet_cond<PacketType, ReversePacket>;
|
||||||
|
|
||||||
|
Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row;
|
||||||
|
Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col;
|
||||||
|
Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin;
|
||||||
|
|
||||||
|
return reverse_packet::run(
|
||||||
|
m_argImpl.template packetSegment<LoadMode, PacketType>(actualRow, actualCol, actualBegin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
|
||||||
|
Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize;
|
||||||
|
Index actualBegin = PacketSize - count - begin;
|
||||||
|
|
||||||
|
return preverse(m_argImpl.template packetSegment<LoadMode, PacketType>(actualIndex, actualBegin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1;
|
||||||
|
static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1;
|
||||||
|
using reverse_packet = reverse_packet_cond<PacketType, ReversePacket>;
|
||||||
|
|
||||||
|
Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row;
|
||||||
|
Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col;
|
||||||
|
Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin;
|
||||||
|
|
||||||
|
m_argImpl.template writePacketSegment<LoadMode>(actualRow, actualCol, reverse_packet::run(x), actualBegin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin,
|
||||||
|
Index count) {
|
||||||
|
static constexpr int PacketSize = unpacket_traits<PacketType>::size;
|
||||||
|
|
||||||
|
Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize;
|
||||||
|
Index actualBegin = PacketSize - count - begin;
|
||||||
|
|
||||||
|
m_argImpl.template writePacketSegment<LoadMode>(actualIndex, preverse(x), actualBegin, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -1621,7 +1982,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex>> : evaluator_base<Diagonal<ArgType
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
evaluator<ArgType> m_argImpl;
|
evaluator<ArgType> m_argImpl;
|
||||||
const internal::variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
|
const variable_if_dynamicindex<Index, XprType::DiagIndex> m_index;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowOffset() const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EIGEN_CONSTEXPR Index rowOffset() const {
|
||||||
|
@ -1562,6 +1562,72 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
|
|||||||
return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
|
return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
||||||
|
* outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/
|
||||||
|
template <typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
||||||
|
Index count) {
|
||||||
|
using Scalar = typename unpacket_traits<Packet>::type;
|
||||||
|
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
||||||
|
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
||||||
|
Scalar aux[PacketSize];
|
||||||
|
smart_copy(from + begin, from + begin + count, aux + begin);
|
||||||
|
return ploadu<Packet>(aux);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
||||||
|
* outside this range are not defined. \a *from must be aligned, and cannot be null.*/
|
||||||
|
template <typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
||||||
|
Index count) {
|
||||||
|
return ploaduSegment<Packet>(from, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
||||||
|
Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be
|
||||||
|
null if \a count is zero.*/
|
||||||
|
template <typename Scalar, typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
||||||
|
constexpr Index PacketSize = unpacket_traits<Packet>::size;
|
||||||
|
eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range");
|
||||||
|
Scalar aux[PacketSize];
|
||||||
|
pstoreu<Scalar, Packet>(aux, from);
|
||||||
|
smart_copy(aux + begin, aux + begin + count, to + begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
||||||
|
Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be
|
||||||
|
null.*/
|
||||||
|
template <typename Scalar, typename Packet>
|
||||||
|
EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
||||||
|
return pstoreuSegment(to, from, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements
|
||||||
|
* outside this range are not defined.*/
|
||||||
|
template <typename Packet, int Alignment>
|
||||||
|
EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits<Packet>::type* from, Index begin,
|
||||||
|
Index count) {
|
||||||
|
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
||||||
|
if (Alignment >= RequiredAlignment) {
|
||||||
|
return ploadSegment<Packet>(from, begin, count);
|
||||||
|
} else {
|
||||||
|
return ploaduSegment<Packet>(from, begin, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to.
|
||||||
|
Elements outside of the range [begin, begin + count) are not defined.*/
|
||||||
|
template <typename Scalar, typename Packet, int Alignment>
|
||||||
|
EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) {
|
||||||
|
constexpr int RequiredAlignment = unpacket_traits<Packet>::alignment;
|
||||||
|
if (Alignment >= RequiredAlignment) {
|
||||||
|
pstoreSegment<Scalar, Packet>(to, from, begin, count);
|
||||||
|
} else {
|
||||||
|
pstoreuSegment<Scalar, Packet>(to, from, begin, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#ifndef EIGEN_NO_IO
|
#ifndef EIGEN_NO_IO
|
||||||
|
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
|
@ -283,7 +283,7 @@ void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, cons
|
|||||||
template <typename Lhs, typename Rhs>
|
template <typename Lhs, typename Rhs>
|
||||||
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
|
struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, OuterProduct> {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct is_row_major : std::conditional_t<(int(T::Flags) & RowMajorBit), internal::true_type, internal::false_type> {};
|
struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {};
|
||||||
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
typedef typename Product<Lhs, Rhs>::Scalar Scalar;
|
||||||
|
|
||||||
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
// TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
|
||||||
@ -445,7 +445,7 @@ struct generic_product_impl<Lhs, Rhs, DenseShape, DenseShape, CoeffBasedProductM
|
|||||||
|
|
||||||
eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
eval_dynamic_impl(dst, blas_traits<Lhs>::extract(lhs).template conjugateIf<ConjLhs>(),
|
||||||
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
|
blas_traits<Rhs>::extract(rhs).template conjugateIf<ConjRhs>(), func, actualAlpha,
|
||||||
std::conditional_t<HasScalarFactor, true_type, false_type>());
|
bool_constant<HasScalarFactor>());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -635,6 +635,24 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
return packet<LoadMode, PacketType>(row, col);
|
return packet<LoadMode, PacketType>(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin,
|
||||||
|
Index count) const {
|
||||||
|
PacketType res;
|
||||||
|
typedef etor_product_packet_impl<bool(int(Flags) & RowMajorBit) ? RowMajor : ColMajor,
|
||||||
|
Unroll ? int(InnerSize) : Dynamic, LhsEtorType, RhsEtorType, PacketType, LoadMode>
|
||||||
|
PacketImpl;
|
||||||
|
PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const {
|
||||||
|
const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index;
|
||||||
|
const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0;
|
||||||
|
return packetSegment<LoadMode, PacketType>(row, col, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
add_const_on_value_type_t<LhsNested> m_lhs;
|
add_const_on_value_type_t<LhsNested> m_lhs;
|
||||||
add_const_on_value_type_t<RhsNested> m_rhs;
|
add_const_on_value_type_t<RhsNested> m_rhs;
|
||||||
@ -670,6 +688,13 @@ struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|||||||
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
|
||||||
rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
|
rhs.template packet<LoadMode, Packet>(Index(UnrollingIndex - 1), col), res);
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index innerDim, Packet& res, Index begin, Index count) {
|
||||||
|
etor_product_packet_impl<RowMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
|
||||||
|
row, col, lhs, rhs, innerDim, res, begin, count);
|
||||||
|
res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex - 1))),
|
||||||
|
rhs.template packetSegment<LoadMode, Packet>(Index(UnrollingIndex - 1), col, begin, count), res);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -681,6 +706,13 @@ struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, Load
|
|||||||
res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
|
res = pmadd(lhs.template packet<LoadMode, Packet>(row, Index(UnrollingIndex - 1)),
|
||||||
pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
|
pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index innerDim, Packet& res, Index begin, Index count) {
|
||||||
|
etor_product_packet_impl<ColMajor, UnrollingIndex - 1, Lhs, Rhs, Packet, LoadMode>::run_segment(
|
||||||
|
row, col, lhs, rhs, innerDim, res, begin, count);
|
||||||
|
res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, Index(UnrollingIndex - 1), begin, count),
|
||||||
|
pset1<Packet>(rhs.coeff(Index(UnrollingIndex - 1), col)), res);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -689,6 +721,12 @@ struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
Index /*innerDim*/, Packet& res) {
|
Index /*innerDim*/, Packet& res) {
|
||||||
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))), rhs.template packet<LoadMode, Packet>(Index(0), col));
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index /*innerDim*/, Packet& res, Index begin,
|
||||||
|
Index count) {
|
||||||
|
res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),
|
||||||
|
rhs.template packetSegment<LoadMode, Packet>(Index(0), col, begin, count));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -697,6 +735,12 @@ struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
Index /*innerDim*/, Packet& res) {
|
Index /*innerDim*/, Packet& res) {
|
||||||
res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
res = pmul(lhs.template packet<LoadMode, Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index /*innerDim*/, Packet& res, Index begin,
|
||||||
|
Index count) {
|
||||||
|
res = pmul(lhs.template packetSegment<LoadMode, Packet>(row, Index(0), begin, count),
|
||||||
|
pset1<Packet>(rhs.coeff(Index(0), col)));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -705,6 +749,11 @@ struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
||||||
|
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
|
||||||
|
Index /*begin*/, Index /*count*/) {
|
||||||
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -713,6 +762,11 @@ struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) {
|
||||||
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/,
|
||||||
|
const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res,
|
||||||
|
Index /*begin*/, Index /*count*/) {
|
||||||
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -723,6 +777,13 @@ struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
for (Index i = 0; i < innerDim; ++i)
|
for (Index i = 0; i < innerDim; ++i)
|
||||||
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
|
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode, Packet>(i, col), res);
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index innerDim, Packet& res, Index begin, Index count) {
|
||||||
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
|
for (Index i = 0; i < innerDim; ++i)
|
||||||
|
res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packetSegment<LoadMode, Packet>(i, col, begin, count),
|
||||||
|
res);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
template <typename Lhs, typename Rhs, typename Packet, int LoadMode>
|
||||||
@ -733,6 +794,13 @@ struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode> {
|
|||||||
for (Index i = 0; i < innerDim; ++i)
|
for (Index i = 0; i < innerDim; ++i)
|
||||||
res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
res = pmadd(lhs.template packet<LoadMode, Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
|
||||||
}
|
}
|
||||||
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs,
|
||||||
|
Index innerDim, Packet& res, Index begin, Index count) {
|
||||||
|
res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
|
||||||
|
for (Index i = 0; i < innerDim; ++i)
|
||||||
|
res = pmadd(lhs.template packetSegment<LoadMode, Packet>(row, i, begin, count), pset1<Packet>(rhs.coeff(i, col)),
|
||||||
|
res);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
@ -871,6 +939,26 @@ struct diagonal_product_evaluator_base : evaluator_base<Derived> {
|
|||||||
m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
|
m_diagImpl.template packet<DiagonalPacketLoadMode, PacketType>(id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
|
||||||
|
internal::true_type) const {
|
||||||
|
return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
||||||
|
internal::pset1<PacketType>(m_diagImpl.coeff(id)));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count,
|
||||||
|
internal::false_type) const {
|
||||||
|
enum {
|
||||||
|
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
|
||||||
|
DiagonalPacketLoadMode = plain_enum_min(
|
||||||
|
LoadMode,
|
||||||
|
((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
|
||||||
|
};
|
||||||
|
return internal::pmul(m_matImpl.template packetSegment<LoadMode, PacketType>(row, col, begin, count),
|
||||||
|
m_diagImpl.template packetSegment<DiagonalPacketLoadMode, PacketType>(id, begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
evaluator<DiagonalType> m_diagImpl;
|
evaluator<DiagonalType> m_diagImpl;
|
||||||
evaluator<MatrixType> m_matImpl;
|
evaluator<MatrixType> m_matImpl;
|
||||||
};
|
};
|
||||||
@ -892,7 +980,8 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|||||||
typedef typename XprType::PlainObject PlainObject;
|
typedef typename XprType::PlainObject PlainObject;
|
||||||
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
typedef typename Lhs::DiagonalVectorType DiagonalType;
|
||||||
|
|
||||||
enum { StorageOrder = Base::StorageOrder_ };
|
static constexpr int StorageOrder = Base::StorageOrder_;
|
||||||
|
using IsRowMajor_t = bool_constant<StorageOrder == RowMajor>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {}
|
||||||
|
|
||||||
@ -905,8 +994,7 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|||||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||||
// See also similar calls below.
|
// See also similar calls below.
|
||||||
return this->template packet_impl<LoadMode, PacketType>(
|
return this->template packet_impl<LoadMode, PacketType>(row, col, row, IsRowMajor_t());
|
||||||
row, col, row, std::conditional_t<int(StorageOrder) == RowMajor, internal::true_type, internal::false_type>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
@ -914,6 +1002,19 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
|
|||||||
return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
|
return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
|
||||||
int(StorageOrder) == ColMajor ? 0 : idx);
|
int(StorageOrder) == ColMajor ? 0 : idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
// FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
|
||||||
|
// See also similar calls below.
|
||||||
|
return this->template packet_segment_impl<LoadMode, PacketType>(row, col, row, begin, count, IsRowMajor_t());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
|
||||||
|
return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
|
||||||
|
begin, count);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -933,7 +1034,8 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|||||||
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
typedef Product<Lhs, Rhs, ProductKind> XprType;
|
||||||
typedef typename XprType::PlainObject PlainObject;
|
typedef typename XprType::PlainObject PlainObject;
|
||||||
|
|
||||||
enum { StorageOrder = Base::StorageOrder_ };
|
static constexpr int StorageOrder = Base::StorageOrder_;
|
||||||
|
using IsColMajor_t = bool_constant<StorageOrder == ColMajor>;
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
|
EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {}
|
||||||
|
|
||||||
@ -944,14 +1046,23 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape,
|
|||||||
#ifndef EIGEN_GPUCC
|
#ifndef EIGEN_GPUCC
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const {
|
||||||
return this->template packet_impl<LoadMode, PacketType>(
|
return this->template packet_impl<LoadMode, PacketType>(row, col, col, IsColMajor_t());
|
||||||
row, col, col, std::conditional_t<int(StorageOrder) == ColMajor, internal::true_type, internal::false_type>());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <int LoadMode, typename PacketType>
|
template <int LoadMode, typename PacketType>
|
||||||
EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
|
EIGEN_STRONG_INLINE PacketType packet(Index idx) const {
|
||||||
return packet<LoadMode, PacketType>(int(StorageOrder) == ColMajor ? idx : 0,
|
return packet<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx);
|
||||||
int(StorageOrder) == ColMajor ? 0 : idx);
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const {
|
||||||
|
return this->template packet_segment_impl<LoadMode, PacketType>(row, col, col, begin, count, IsColMajor_t());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const {
|
||||||
|
return packetSegment<LoadMode, PacketType>(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx,
|
||||||
|
begin, count);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
@ -65,6 +65,31 @@ class generic_dense_assignment_kernel<DstEvaluatorTypeT, SrcEvaluatorTypeT,
|
|||||||
Index col = Base::colIndexByOuterInner(outer, inner);
|
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||||
assignPacket<StoreMode, LoadMode, PacketType>(row, col);
|
assignPacket<StoreMode, LoadMode, PacketType>(row, col);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) {
|
||||||
|
PacketType tmp = m_src.template packetSegment<LoadMode, PacketType>(row, col, begin, count);
|
||||||
|
const_cast<SrcEvaluatorTypeT &>(m_src).template writePacketSegment<LoadMode>(
|
||||||
|
row, col, m_dst.template packetSegment<StoreMode, PacketType>(row, col, begin, count), begin, count);
|
||||||
|
m_dst.template writePacketSegment<StoreMode>(row, col, tmp, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int StoreMode, int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) {
|
||||||
|
PacketType tmp = m_src.template packetSegment<LoadMode, PacketType>(index, begin, count);
|
||||||
|
const_cast<SrcEvaluatorTypeT &>(m_src).template writePacketSegment<LoadMode>(
|
||||||
|
index, m_dst.template packetSegment<StoreMode, PacketType>(index, begin, count), begin, count);
|
||||||
|
m_dst.template writePacketSegment<StoreMode>(index, tmp, begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I
|
||||||
|
// mean no CRTP (Gael)
|
||||||
|
template <int StoreMode, int LoadMode, typename PacketType>
|
||||||
|
EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin, Index count) {
|
||||||
|
Index row = Base::rowIndexByOuterInner(outer, inner);
|
||||||
|
Index col = Base::colIndexByOuterInner(outer, inner);
|
||||||
|
assignPacketSegment<StoreMode, LoadMode, PacketType>(row, col, begin, count);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
@ -36,6 +36,10 @@ template <typename MatrixType, typename MemberOp, int Direction>
|
|||||||
class PartialReduxExpr;
|
class PartialReduxExpr;
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
template <typename ArgType, typename MemberOp, int Direction>
|
||||||
|
struct enable_packet_segment<PartialReduxExpr<ArgType, MemberOp, Direction>> : std::false_type {};
|
||||||
|
|
||||||
template <typename MatrixType, typename MemberOp, int Direction>
|
template <typename MatrixType, typename MemberOp, int Direction>
|
||||||
struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > : traits<MatrixType> {
|
struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> > : traits<MatrixType> {
|
||||||
typedef typename MemberOp::result_type Scalar;
|
typedef typename MemberOp::result_type Scalar;
|
||||||
|
@ -507,6 +507,57 @@ EIGEN_STRONG_INLINE Packet2cd pnmsub(const Packet2cd& a, const Packet2cd& b, con
|
|||||||
return pnegate(pmadd(a, b, c));
|
return pnegate(pmadd(a, b, c));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*---------------- load/store segment support ----------------*/
|
||||||
|
|
||||||
|
/*---------------- std::complex<float> ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet2cf> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4cf> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet2cf ploaduSegment<Packet2cf>(const std::complex<float>* from, Index begin, Index count) {
|
||||||
|
return (Packet2cf)_mm_maskload_ps(&numext::real_ref(*from), segment_mask_2x64(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index begin,
|
||||||
|
Index count) {
|
||||||
|
_mm_maskstore_ps(&numext::real_ref(*to), segment_mask_2x64(begin, count), from.v);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4cf ploaduSegment<Packet4cf>(const std::complex<float>* from, Index begin, Index count) {
|
||||||
|
return (Packet4cf)_mm256_maskload_ps(&numext::real_ref(*from), segment_mask_4x64(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<std::complex<float>, Packet4cf>(std::complex<float>* to, const Packet4cf& from, Index begin,
|
||||||
|
Index count) {
|
||||||
|
_mm256_maskstore_ps(&numext::real_ref(*to), segment_mask_4x64(begin, count), from.v);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- std::complex<double> ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet2cd> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet2cd ploaduSegment<Packet2cd>(const std::complex<double>* from, Index begin, Index count) {
|
||||||
|
return (Packet2cd)_mm256_maskload_pd(&numext::real_ref(*from), segment_mask_4x64(2 * begin, 2 * count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<std::complex<double>, Packet2cd>(std::complex<double>* to, const Packet2cd& from,
|
||||||
|
Index begin, Index count) {
|
||||||
|
_mm256_maskstore_pd(&numext::real_ref(*to), segment_mask_4x64(2 * begin, 2 * count), from.v);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- end load/store segment support ----------------*/
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -2938,6 +2938,258 @@ EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet8bf, 4>& kernel) {
|
|||||||
kernel.packet[3] = _mm_unpackhi_epi32(ab_47, cd_47);
|
kernel.packet[3] = _mm_unpackhi_epi32(ab_47, cd_47);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*---------------- load/store segment support ----------------*/
|
||||||
|
|
||||||
|
// returns a mask of 8-bit elements (at most 4) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m128i segment_mask_4x8(Index begin, Index count) {
|
||||||
|
eigen_assert(begin >= 0 && begin + count <= 4);
|
||||||
|
long long mask = 1;
|
||||||
|
mask <<= CHAR_BIT * count;
|
||||||
|
mask--;
|
||||||
|
mask <<= CHAR_BIT * begin;
|
||||||
|
#if defined(_WIN32) && !defined(_WIN64)
|
||||||
|
return _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&mask));
|
||||||
|
#else
|
||||||
|
return _mm_cvtsi64_si128(mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a mask of 8-bit elements (at most 8) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m128i segment_mask_8x8(Index begin, Index count) {
|
||||||
|
eigen_assert(begin >= 0 && begin + count <= 8);
|
||||||
|
long long mask = 1;
|
||||||
|
// avoid UB when count == 8
|
||||||
|
mask <<= (CHAR_BIT / 2) * count;
|
||||||
|
mask <<= (CHAR_BIT / 2) * count;
|
||||||
|
mask--;
|
||||||
|
mask <<= CHAR_BIT * begin;
|
||||||
|
#if defined(_WIN32) && !defined(_WIN64)
|
||||||
|
return _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&mask));
|
||||||
|
#else
|
||||||
|
return _mm_cvtsi64_si128(mask);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a mask of 32-bit elements (at most 4) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m128i segment_mask_4x32(Index begin, Index count) {
|
||||||
|
eigen_assert(begin >= 0 && begin + count <= 4);
|
||||||
|
return _mm_cvtepi8_epi32(segment_mask_4x8(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a mask of 64-bit elements (at most 2) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m128i segment_mask_2x64(Index begin, Index count) {
|
||||||
|
eigen_assert(begin >= 0 && begin + count <= 2);
|
||||||
|
return _mm_cvtepi8_epi64(segment_mask_4x8(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a mask of 32-bit elements (at most 8) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m256i segment_mask_8x32(Index begin, Index count) {
|
||||||
|
__m128i mask_epi8 = segment_mask_8x8(begin, count);
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
__m256i mask_epi32 = _mm256_cvtepi8_epi32(mask_epi8);
|
||||||
|
#else
|
||||||
|
__m128i mask_epi32_lo = _mm_cvtepi8_epi32(mask_epi8);
|
||||||
|
__m128i mask_epi32_hi = _mm_cvtepi8_epi32(_mm_srli_epi64(mask_epi8, 32));
|
||||||
|
__m256i mask_epi32 = _mm256_insertf128_si256(_mm256_castsi128_si256(mask_epi32_lo), mask_epi32_hi, 1);
|
||||||
|
#endif
|
||||||
|
return mask_epi32;
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns a mask of 64-bit elements (at most 4) that are all 1's in the range [begin, begin + count) and 0 elsewhere.
|
||||||
|
inline __m256i segment_mask_4x64(Index begin, Index count) {
|
||||||
|
__m128i mask_epi8 = segment_mask_4x8(begin, count);
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
__m256i mask_epi64 = _mm256_cvtepi8_epi64(mask_epi8);
|
||||||
|
#else
|
||||||
|
__m128i mask_epi64_lo = _mm_cvtepi8_epi64(mask_epi8);
|
||||||
|
__m128i mask_epi64_hi = _mm_cvtepi8_epi64(_mm_srli_epi64(mask_epi8, 16));
|
||||||
|
__m256i mask_epi64 = _mm256_insertf128_si256(_mm256_castsi128_si256(mask_epi64_lo), mask_epi64_hi, 1);
|
||||||
|
#endif
|
||||||
|
return mask_epi64;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- float ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4f> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet8f> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4f ploaduSegment<Packet4f>(const float* from, Index begin, Index count) {
|
||||||
|
return _mm_maskload_ps(from, segment_mask_4x32(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<float, Packet4f>(float* to, const Packet4f& from, Index begin, Index count) {
|
||||||
|
_mm_maskstore_ps(to, segment_mask_4x32(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet8f ploaduSegment<Packet8f>(const float* from, Index begin, Index count) {
|
||||||
|
return _mm256_maskload_ps(from, segment_mask_8x32(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<float, Packet8f>(float* to, const Packet8f& from, Index begin, Index count) {
|
||||||
|
_mm256_maskstore_ps(to, segment_mask_8x32(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- int32 ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4i> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet8i> : std::true_type {};
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4i ploaduSegment<Packet4i>(const int* from, Index begin, Index count) {
|
||||||
|
return _mm_maskload_epi32(from, segment_mask_4x32(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int, Packet4i>(int* to, const Packet4i& from, Index begin, Index count) {
|
||||||
|
_mm_maskstore_epi32(to, segment_mask_4x32(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet8i ploaduSegment<Packet8i>(const int* from, Index begin, Index count) {
|
||||||
|
return _mm256_maskload_epi32(from, segment_mask_8x32(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int, Packet8i>(int* to, const Packet8i& from, Index begin, Index count) {
|
||||||
|
_mm256_maskstore_epi32(to, segment_mask_8x32(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4i ploaduSegment<Packet4i>(const int* from, Index begin, Index count) {
|
||||||
|
return _mm_castps_si128(ploaduSegment<Packet4f>(reinterpret_cast<const float*>(from), begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int, Packet4i>(int* to, const Packet4i& from, Index begin, Index count) {
|
||||||
|
pstoreuSegment<float, Packet4f>(reinterpret_cast<float*>(to), _mm_castsi128_ps(from), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet8i ploaduSegment<Packet8i>(const int* from, Index begin, Index count) {
|
||||||
|
return _mm256_castps_si256(ploaduSegment<Packet8f>(reinterpret_cast<const float*>(from), begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int, Packet8i>(int* to, const Packet8i& from, Index begin, Index count) {
|
||||||
|
pstoreuSegment<float, Packet8f>(reinterpret_cast<float*>(to), _mm256_castsi256_ps(from), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*---------------- uint32 ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4ui> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet8ui> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4ui ploaduSegment<Packet4ui>(const uint32_t* from, Index begin, Index count) {
|
||||||
|
return Packet4ui(ploaduSegment<Packet4i>(reinterpret_cast<const int*>(from), begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<uint32_t, Packet4ui>(uint32_t* to, const Packet4ui& from, Index begin, Index count) {
|
||||||
|
pstoreuSegment<int, Packet4i>(reinterpret_cast<int*>(to), Packet4i(from), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet8ui ploaduSegment<Packet8ui>(const uint32_t* from, Index begin, Index count) {
|
||||||
|
return Packet8ui(ploaduSegment<Packet8i>(reinterpret_cast<const int*>(from), begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<uint32_t, Packet8ui>(uint32_t* to, const Packet8ui& from, Index begin, Index count) {
|
||||||
|
pstoreuSegment<int, Packet8i>(reinterpret_cast<int*>(to), Packet8i(from), begin, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- double ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet2d> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4d> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet2d ploaduSegment<Packet2d>(const double* from, Index begin, Index count) {
|
||||||
|
return _mm_maskload_pd(from, segment_mask_2x64(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<double, Packet2d>(double* to, const Packet2d& from, Index begin, Index count) {
|
||||||
|
_mm_maskstore_pd(to, segment_mask_2x64(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4d ploaduSegment<Packet4d>(const double* from, Index begin, Index count) {
|
||||||
|
return _mm256_maskload_pd(from, segment_mask_4x64(begin, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<double, Packet4d>(double* to, const Packet4d& from, Index begin, Index count) {
|
||||||
|
_mm256_maskstore_pd(to, segment_mask_4x64(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
|
||||||
|
/*---------------- int64_t ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet2l> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4l> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet2l ploaduSegment<Packet2l>(const int64_t* from, Index begin, Index count) {
|
||||||
|
return _mm_maskload_epi64(reinterpret_cast<const long long*>(from), segment_mask_2x64(begin, count));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int64_t, Packet2l>(int64_t* to, const Packet2l& from, Index begin, Index count) {
|
||||||
|
_mm_maskstore_epi64(reinterpret_cast<long long*>(to), segment_mask_2x64(begin, count), from);
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline Packet4l ploaduSegment<Packet4l>(const int64_t* from, Index begin, Index count) {
|
||||||
|
return _mm256_maskload_epi64(reinterpret_cast<const long long*>(from), segment_mask_4x64(begin, count));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<int64_t, Packet4l>(int64_t* to, const Packet4l& from, Index begin, Index count) {
|
||||||
|
_mm256_maskstore_epi64(reinterpret_cast<long long*>(to), segment_mask_4x64(begin, count), from);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------------- uint64_t ----------------*/
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct has_packet_segment<Packet4ul> : std::true_type {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline Packet4ul ploaduSegment<Packet4ul>(const uint64_t* from, Index begin, Index count) {
|
||||||
|
return Packet4ul(ploaduSegment<Packet4l>(reinterpret_cast<const int64_t*>(from), begin, count));
|
||||||
|
}
|
||||||
|
template <>
|
||||||
|
inline void pstoreuSegment<uint64_t, Packet4ul>(uint64_t* to, const Packet4ul& from, Index begin, Index count) {
|
||||||
|
pstoreuSegment<int64_t, Packet4l>(reinterpret_cast<int64_t*>(to), Packet4l(from), begin, count);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*---------------- end load/store segment support ----------------*/
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -29,6 +29,11 @@ struct assign_op {
|
|||||||
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const {
|
EIGEN_STRONG_INLINE void assignPacket(DstScalar* a, const Packet& b) const {
|
||||||
pstoret<DstScalar, Packet, Alignment>(a, b);
|
pstoret<DstScalar, Packet, Alignment>(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int Alignment, typename Packet>
|
||||||
|
EIGEN_STRONG_INLINE void assignPacketSegment(DstScalar* a, const Packet& b, Index begin, Index count) const {
|
||||||
|
pstoretSegment<DstScalar, Packet, Alignment>(a, b, begin, count);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Empty overload for void type (used by PermutationMatrix)
|
// Empty overload for void type (used by PermutationMatrix)
|
||||||
@ -60,6 +65,12 @@ struct compound_assign_op {
|
|||||||
assign_op<DstScalar, DstScalar>().template assignPacket<Alignment, Packet>(
|
assign_op<DstScalar, DstScalar>().template assignPacket<Alignment, Packet>(
|
||||||
a, Func().packetOp(ploadt<Packet, Alignment>(a), b));
|
a, Func().packetOp(ploadt<Packet, Alignment>(a), b));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <int Alignment, typename Packet>
|
||||||
|
EIGEN_STRONG_INLINE void assignPacketSegment(DstScalar* a, const Packet& b, Index begin, Index count) const {
|
||||||
|
assign_op<DstScalar, DstScalar>().template assignPacketSegment<Alignment, Packet>(
|
||||||
|
a, Func().packetOp(ploadtSegment<Packet, Alignment>(a, begin, count), b), begin, count);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename DstScalar, typename SrcScalar, typename Func>
|
template <typename DstScalar, typename SrcScalar, typename Func>
|
||||||
|
@ -438,7 +438,6 @@ struct scalar_quotient_op : binary_op_base<LhsScalar, RhsScalar> {
|
|||||||
}
|
}
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const {
|
||||||
maybe_raise_div_by_zero<Packet>::run(b);
|
|
||||||
return internal::pdiv(a, b);
|
return internal::pdiv(a, b);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -28,7 +28,7 @@ struct scalar_constant_op {
|
|||||||
const Scalar m_other;
|
const Scalar m_other;
|
||||||
};
|
};
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<scalar_constant_op<Scalar> > {
|
struct functor_traits<scalar_constant_op<Scalar>> {
|
||||||
enum {
|
enum {
|
||||||
Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,
|
Cost = 0 /* as the constant value should be loaded in register only once for the whole expression */,
|
||||||
PacketAccess = packet_traits<Scalar>::Vectorizable,
|
PacketAccess = packet_traits<Scalar>::Vectorizable,
|
||||||
@ -56,7 +56,7 @@ struct scalar_identity_op {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<scalar_identity_op<Scalar> > {
|
struct functor_traits<scalar_identity_op<Scalar>> {
|
||||||
enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true };
|
enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true };
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -86,18 +86,19 @@ struct linspaced_op_impl<Scalar, /*IsInteger*/ false> {
|
|||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const {
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(IndexType i) const {
|
||||||
// Principle:
|
// Principle:
|
||||||
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
|
||||||
|
Packet low = pset1<Packet>(m_low);
|
||||||
|
Packet high = pset1<Packet>(m_high);
|
||||||
|
Packet step = pset1<Packet>(m_step);
|
||||||
if (m_flip) {
|
if (m_flip) {
|
||||||
Packet pi = plset<Packet>(Scalar(i - m_size1));
|
Packet pi = plset<Packet>(Scalar(i - m_size1));
|
||||||
Packet res = padd(pset1<Packet>(m_high), pmul(pset1<Packet>(m_step), pi));
|
Packet res = pmadd(step, pi, high);
|
||||||
if (EIGEN_PREDICT_TRUE(i != 0)) return res;
|
Packet mask = pcmp_lt(pzero(res), plset<Packet>(Scalar(i)));
|
||||||
Packet mask = pcmp_lt(pset1<Packet>(0), plset<Packet>(0));
|
return pselect<Packet>(mask, res, low);
|
||||||
return pselect<Packet>(mask, res, pset1<Packet>(m_low));
|
|
||||||
} else {
|
} else {
|
||||||
Packet pi = plset<Packet>(Scalar(i));
|
Packet pi = plset<Packet>(Scalar(i));
|
||||||
Packet res = padd(pset1<Packet>(m_low), pmul(pset1<Packet>(m_step), pi));
|
Packet res = pmadd(step, pi, low);
|
||||||
if (EIGEN_PREDICT_TRUE(i != m_size1 - unpacket_traits<Packet>::size + 1)) return res;
|
Packet mask = pcmp_lt(pi, pset1<Packet>(Scalar(m_size1)));
|
||||||
Packet mask = pcmp_lt(plset<Packet>(0), pset1<Packet>(unpacket_traits<Packet>::size - 1));
|
return pselect<Packet>(mask, res, high);
|
||||||
return pselect<Packet>(mask, res, pset1<Packet>(m_high));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,7 +140,7 @@ struct linspaced_op_impl<Scalar, /*IsInteger*/ true> {
|
|||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct linspaced_op;
|
struct linspaced_op;
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<linspaced_op<Scalar> > {
|
struct functor_traits<linspaced_op<Scalar>> {
|
||||||
enum {
|
enum {
|
||||||
Cost = 1,
|
Cost = 1,
|
||||||
PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear,
|
PacketAccess = (!NumTraits<Scalar>::IsInteger) && packet_traits<Scalar>::HasSetLinear,
|
||||||
@ -192,7 +193,7 @@ struct equalspaced_op {
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Scalar>
|
template <typename Scalar>
|
||||||
struct functor_traits<equalspaced_op<Scalar> > {
|
struct functor_traits<equalspaced_op<Scalar>> {
|
||||||
enum {
|
enum {
|
||||||
Cost = NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost,
|
Cost = NumTraits<Scalar>::AddCost + NumTraits<Scalar>::MulCost,
|
||||||
PacketAccess =
|
PacketAccess =
|
||||||
|
@ -514,6 +514,12 @@ template <typename Xpr>
|
|||||||
struct eigen_memset_helper;
|
struct eigen_memset_helper;
|
||||||
template <typename Xpr, bool use_memset = eigen_memset_helper<Xpr>::value>
|
template <typename Xpr, bool use_memset = eigen_memset_helper<Xpr>::value>
|
||||||
struct eigen_zero_impl;
|
struct eigen_zero_impl;
|
||||||
|
|
||||||
|
template <typename Packet>
|
||||||
|
struct has_packet_segment : std::false_type {};
|
||||||
|
|
||||||
|
template <typename Xpr>
|
||||||
|
struct enable_packet_segment : std::true_type {};
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -90,12 +90,8 @@ namespace internal {
|
|||||||
* we however don't want to add a dependency to Boost.
|
* we however don't want to add a dependency to Boost.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
struct true_type {
|
using std::false_type;
|
||||||
enum { value = 1 };
|
using std::true_type;
|
||||||
};
|
|
||||||
struct false_type {
|
|
||||||
enum { value = 0 };
|
|
||||||
};
|
|
||||||
|
|
||||||
template <bool Condition>
|
template <bool Condition>
|
||||||
struct bool_constant;
|
struct bool_constant;
|
||||||
|
@ -996,6 +996,27 @@ struct is_matrix_base_xpr : std::is_base_of<MatrixBase<remove_all_t<XprType>>, r
|
|||||||
template <typename XprType>
|
template <typename XprType>
|
||||||
struct is_permutation_base_xpr : std::is_base_of<PermutationBase<remove_all_t<XprType>>, remove_all_t<XprType>> {};
|
struct is_permutation_base_xpr : std::is_base_of<PermutationBase<remove_all_t<XprType>>, remove_all_t<XprType>> {};
|
||||||
|
|
||||||
|
/*---------------- load/store segment support ----------------*/
|
||||||
|
|
||||||
|
// recursively traverse unary, binary, and ternary expressions to determine if packet segments are supported
|
||||||
|
|
||||||
|
template <typename Func, typename Xpr>
|
||||||
|
struct enable_packet_segment<CwiseNullaryOp<Func, Xpr>> : enable_packet_segment<remove_all_t<Xpr>> {};
|
||||||
|
|
||||||
|
template <typename Func, typename Xpr>
|
||||||
|
struct enable_packet_segment<CwiseUnaryOp<Func, Xpr>> : enable_packet_segment<remove_all_t<Xpr>> {};
|
||||||
|
|
||||||
|
template <typename Func, typename LhsXpr, typename RhsXpr>
|
||||||
|
struct enable_packet_segment<CwiseBinaryOp<Func, LhsXpr, RhsXpr>>
|
||||||
|
: bool_constant<enable_packet_segment<remove_all_t<LhsXpr>>::value &&
|
||||||
|
enable_packet_segment<remove_all_t<RhsXpr>>::value> {};
|
||||||
|
|
||||||
|
template <typename Func, typename LhsXpr, typename MidXpr, typename RhsXpr>
|
||||||
|
struct enable_packet_segment<CwiseTernaryOp<Func, LhsXpr, MidXpr, RhsXpr>>
|
||||||
|
: bool_constant<enable_packet_segment<remove_all_t<LhsXpr>>::value &&
|
||||||
|
enable_packet_segment<remove_all_t<MidXpr>>::value &&
|
||||||
|
enable_packet_segment<remove_all_t<RhsXpr>>::value> {};
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
/** \class ScalarBinaryOpTraits
|
/** \class ScalarBinaryOpTraits
|
||||||
|
@ -308,19 +308,24 @@ struct dense_assignment_loop_with_device<Kernel, CoreThreadPoolDevice, LinearVec
|
|||||||
this->template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
|
this->template assignPacket<DstAlignment, SrcAlignment, PacketType>(index);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment;
|
||||||
|
using head_loop =
|
||||||
|
unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, DstIsAligned>;
|
||||||
|
using tail_loop = unaligned_dense_assignment_loop<PacketType, DstAlignment, SrcAlignment, UsePacketSegment, false>;
|
||||||
|
|
||||||
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Kernel& kernel, CoreThreadPoolDevice& device) {
|
||||||
const Index size = kernel.size();
|
const Index size = kernel.size();
|
||||||
const Index alignedStart =
|
const Index alignedStart =
|
||||||
DstIsAligned ? 0 : internal::first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
|
DstIsAligned ? 0 : internal::first_aligned<RequestedAlignment>(kernel.dstDataPtr(), size);
|
||||||
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize);
|
||||||
|
|
||||||
unaligned_dense_assignment_loop<DstIsAligned != 0>::run(kernel, 0, alignedStart);
|
head_loop::run(kernel, 0, alignedStart);
|
||||||
|
|
||||||
constexpr float cost = static_cast<float>(XprEvaluationCost);
|
constexpr float cost = static_cast<float>(XprEvaluationCost);
|
||||||
AssignmentFunctor functor(kernel);
|
AssignmentFunctor functor(kernel);
|
||||||
device.template parallelFor<AssignmentFunctor, PacketSize>(alignedStart, alignedEnd, functor, cost);
|
device.template parallelFor<AssignmentFunctor, PacketSize>(alignedStart, alignedEnd, functor, cost);
|
||||||
|
|
||||||
unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size);
|
tail_loop::run(kernel, alignedEnd, size);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -186,6 +186,7 @@ ei_add_test(mixingtypes)
|
|||||||
ei_add_test(float_conversion)
|
ei_add_test(float_conversion)
|
||||||
ei_add_test(io)
|
ei_add_test(io)
|
||||||
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
|
ei_add_test(packetmath "-DEIGEN_FAST_MATH=1")
|
||||||
|
ei_add_test(packet_segment)
|
||||||
ei_add_test(vectorization_logic)
|
ei_add_test(vectorization_logic)
|
||||||
ei_add_test(basicstuff)
|
ei_add_test(basicstuff)
|
||||||
ei_add_test(constexpr)
|
ei_add_test(constexpr)
|
||||||
|
168
test/packet_segment.cpp
Normal file
168
test/packet_segment.cpp
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
// This file is part of Eigen, a lightweight C++ template library
|
||||||
|
// for linear algebra.
|
||||||
|
//
|
||||||
|
// Copyright (C) 2025 The Eigen Authors
|
||||||
|
//
|
||||||
|
// This Source Code Form is subject to the terms of the Mozilla
|
||||||
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||||
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
#include "main.h"
|
||||||
|
|
||||||
|
template <typename Scalar, typename Packet>
|
||||||
|
void verify_data(const Scalar* data_in, const Scalar* data_out, const Packet& a, Index begin, Index count) {
|
||||||
|
constexpr int PacketSize = internal::unpacket_traits<Packet>::size;
|
||||||
|
bool ok = true;
|
||||||
|
for (Index i = begin; i < begin + count; i++) {
|
||||||
|
ok = ok && numext::equal_strict(data_in[i], data_out[i]);
|
||||||
|
}
|
||||||
|
if (!ok) {
|
||||||
|
std::cout << "begin: " << begin << ", count: " << count << "\n";
|
||||||
|
std::cout << "Scalar type: " << type_name(Scalar()) << " x " << PacketSize << "\n";
|
||||||
|
std::cout << "data in: {";
|
||||||
|
for (Index i = 0; i < PacketSize; i++) {
|
||||||
|
if (i > 0) std::cout << ",";
|
||||||
|
if (i < begin || i >= begin + count) {
|
||||||
|
std::cout << "MASK";
|
||||||
|
} else {
|
||||||
|
std::cout << data_in[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "}\n";
|
||||||
|
std::cout << "data out: {";
|
||||||
|
for (Index i = 0; i < PacketSize; i++) {
|
||||||
|
if (i > 0) std::cout << ",";
|
||||||
|
if (i < begin || i >= begin + count) {
|
||||||
|
std::cout << "MASK";
|
||||||
|
} else {
|
||||||
|
std::cout << data_out[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "}\n";
|
||||||
|
std::cout << "packet: ";
|
||||||
|
std::cout << internal::postream(a) << "\n";
|
||||||
|
}
|
||||||
|
VERIFY(ok);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Scalar, int PacketSize, bool Run = internal::find_packet_by_size<Scalar, PacketSize>::value>
|
||||||
|
struct packet_segment_test_impl {
|
||||||
|
using Packet = typename internal::find_packet_by_size<Scalar, PacketSize>::type;
|
||||||
|
static void test_unaligned() {
|
||||||
|
// test loading a packet segment from unaligned memory that includes unallocated memory
|
||||||
|
|
||||||
|
// | X X X X | * * * X | X X X X |
|
||||||
|
// begin -> { X | * * * } <- begin + count
|
||||||
|
|
||||||
|
VectorX<Scalar> data_in(PacketSize), data_out(PacketSize);
|
||||||
|
data_in.setRandom();
|
||||||
|
data_out.setRandom();
|
||||||
|
|
||||||
|
Scalar* unaligned_data_in = data_in.data() - 1;
|
||||||
|
Scalar* unaligned_data_out = data_out.data() - 1;
|
||||||
|
|
||||||
|
Index begin = 1;
|
||||||
|
Index count = PacketSize - 1;
|
||||||
|
|
||||||
|
Packet a = internal::ploaduSegment<Packet>(unaligned_data_in, begin, count);
|
||||||
|
internal::pstoreuSegment<Scalar, Packet>(unaligned_data_out, a, begin, count);
|
||||||
|
|
||||||
|
verify_data(unaligned_data_in, unaligned_data_out, a, begin, count);
|
||||||
|
|
||||||
|
// test loading the entire packet
|
||||||
|
|
||||||
|
data_in.setRandom();
|
||||||
|
data_out.setRandom();
|
||||||
|
|
||||||
|
unaligned_data_in = data_in.data();
|
||||||
|
unaligned_data_out = data_out.data();
|
||||||
|
|
||||||
|
begin = 0;
|
||||||
|
count = PacketSize;
|
||||||
|
|
||||||
|
Packet b = internal::ploaduSegment<Packet>(unaligned_data_in, begin, count);
|
||||||
|
internal::pstoreuSegment<Scalar, Packet>(unaligned_data_out, b, begin, count);
|
||||||
|
|
||||||
|
verify_data(unaligned_data_in, unaligned_data_out, b, begin, count);
|
||||||
|
|
||||||
|
// test loading an empty packet segment in unallocated memory
|
||||||
|
count = 0;
|
||||||
|
|
||||||
|
for (begin = 0; begin < PacketSize; begin++) {
|
||||||
|
data_in.setRandom();
|
||||||
|
data_out = data_in;
|
||||||
|
Packet c = internal::ploaduSegment<Packet>(data_in.data(), begin, count);
|
||||||
|
internal::pstoreuSegment<Scalar, Packet>(data_out.data(), c, begin, count);
|
||||||
|
// verify that ploaduSegment / pstoreuSegment did nothing
|
||||||
|
VERIFY_IS_CWISE_EQUAL(data_in, data_out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void test_aligned() {
|
||||||
|
// test loading a packet segment from aligned memory that includes unallocated memory
|
||||||
|
|
||||||
|
// | X X X X | * * * X | X X X X |
|
||||||
|
// begin -> { * * * X } <- begin + count
|
||||||
|
|
||||||
|
VectorX<Scalar> data_in(PacketSize - 1), data_out(PacketSize - 1);
|
||||||
|
data_in.setRandom();
|
||||||
|
data_out.setRandom();
|
||||||
|
|
||||||
|
Scalar* aligned_data_in = data_in.data();
|
||||||
|
Scalar* aligned_data_out = data_out.data();
|
||||||
|
|
||||||
|
Index begin = 0;
|
||||||
|
Index count = PacketSize - 1;
|
||||||
|
|
||||||
|
Packet b = internal::ploadSegment<Packet>(aligned_data_in, begin, count);
|
||||||
|
internal::pstoreSegment<Scalar, Packet>(aligned_data_out, b, begin, count);
|
||||||
|
|
||||||
|
verify_data(aligned_data_in, aligned_data_out, b, begin, count);
|
||||||
|
}
|
||||||
|
static void run() {
|
||||||
|
test_unaligned();
|
||||||
|
test_aligned();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, int PacketSize>
|
||||||
|
struct packet_segment_test_impl<Scalar, PacketSize, false> {
|
||||||
|
static void run() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar, int PacketSize>
|
||||||
|
struct packet_segment_test_driver {
|
||||||
|
static void run() {
|
||||||
|
packet_segment_test_impl<Scalar, PacketSize>::run();
|
||||||
|
packet_segment_test_driver<Scalar, PacketSize / 2>::run();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
struct packet_segment_test_driver<Scalar, 1> {
|
||||||
|
static void run() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename Scalar>
|
||||||
|
void test_packet_segment() {
|
||||||
|
packet_segment_test_driver<Scalar, internal::packet_traits<Scalar>::size>::run();
|
||||||
|
}
|
||||||
|
|
||||||
|
EIGEN_DECLARE_TEST(packet_segment) {
|
||||||
|
for (int i = 0; i < g_repeat; i++) {
|
||||||
|
test_packet_segment<bool>();
|
||||||
|
test_packet_segment<int8_t>();
|
||||||
|
test_packet_segment<uint8_t>();
|
||||||
|
test_packet_segment<int16_t>();
|
||||||
|
test_packet_segment<uint16_t>();
|
||||||
|
test_packet_segment<int32_t>();
|
||||||
|
test_packet_segment<uint32_t>();
|
||||||
|
test_packet_segment<int64_t>();
|
||||||
|
test_packet_segment<uint64_t>();
|
||||||
|
test_packet_segment<bfloat16>();
|
||||||
|
test_packet_segment<half>();
|
||||||
|
test_packet_segment<float>();
|
||||||
|
test_packet_segment<double>();
|
||||||
|
test_packet_segment<std::complex<float>>();
|
||||||
|
test_packet_segment<std::complex<double>>();
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user