mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Enable slice-vectorization+inner-unrolling when unaligned vectorization is allowed. For instance, this permits to vectorize 5x5 matrices (including product)
This commit is contained in:
parent
5fbe7aa604
commit
4057f9b1fc
@ -88,10 +88,11 @@ private:
|
|||||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||||
so it's only good for large enough sizes. */
|
so it's only good for large enough sizes. */
|
||||||
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
MaySliceVectorize = bool(MightVectorize) && bool(DstHasDirectAccess)
|
||||||
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=3*InnerPacketSize)
|
&& (int(InnerMaxSize)==Dynamic || int(InnerMaxSize)>=(EIGEN_UNALIGNED_VECTORIZE?InnerPacketSize:(3*InnerPacketSize)))
|
||||||
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
/* slice vectorization can be slow, so we only want it if the slices are big, which is
|
||||||
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block
|
||||||
in a fixed-size matrix */
|
in a fixed-size matrix
|
||||||
|
However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -136,6 +137,11 @@ public:
|
|||||||
: int(Traversal) == int(LinearTraversal)
|
: int(Traversal) == int(LinearTraversal)
|
||||||
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling)
|
||||||
: int(NoUnrolling) )
|
: int(NoUnrolling) )
|
||||||
|
#if EIGEN_UNALIGNED_VECTORIZE
|
||||||
|
: int(Traversal) == int(SliceVectorizedTraversal)
|
||||||
|
? ( bool(MayUnrollInner) ? int(InnerUnrolling)
|
||||||
|
: int(NoUnrolling) )
|
||||||
|
#endif
|
||||||
: int(NoUnrolling)
|
: int(NoUnrolling)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -277,24 +283,20 @@ struct copy_using_evaluator_innervec_CompleteUnrolling<Kernel, Stop, Stop>
|
|||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel&) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Kernel, int Index_, int Stop>
|
template<typename Kernel, int Index_, int Stop, int SrcAlignment, int DstAlignment>
|
||||||
struct copy_using_evaluator_innervec_InnerUnrolling
|
struct copy_using_evaluator_innervec_InnerUnrolling
|
||||||
{
|
{
|
||||||
typedef typename Kernel::PacketType PacketType;
|
typedef typename Kernel::PacketType PacketType;
|
||||||
enum {
|
|
||||||
SrcAlignment = Kernel::AssignmentTraits::SrcAlignment,
|
|
||||||
DstAlignment = Kernel::AssignmentTraits::DstAlignment
|
|
||||||
};
|
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel, Index outer)
|
||||||
{
|
{
|
||||||
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
kernel.template assignPacketByOuterInner<DstAlignment, SrcAlignment, PacketType>(outer, Index_);
|
||||||
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
enum { NextIndex = Index_ + unpacket_traits<PacketType>::size };
|
||||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop>::run(kernel, outer);
|
copy_using_evaluator_innervec_InnerUnrolling<Kernel, NextIndex, Stop, SrcAlignment, DstAlignment>::run(kernel, outer);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename Kernel, int Stop>
|
template<typename Kernel, int Stop, int SrcAlignment, int DstAlignment>
|
||||||
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop>
|
struct copy_using_evaluator_innervec_InnerUnrolling<Kernel, Stop, Stop, SrcAlignment, DstAlignment>
|
||||||
{
|
{
|
||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &, Index) { }
|
||||||
};
|
};
|
||||||
@ -423,9 +425,10 @@ struct dense_assignment_loop<Kernel, LinearVectorizedTraversal, CompleteUnrollin
|
|||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||||
{
|
{
|
||||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||||
|
typedef typename Kernel::PacketType PacketType;
|
||||||
|
|
||||||
enum { size = DstXprType::SizeAtCompileTime,
|
enum { size = DstXprType::SizeAtCompileTime,
|
||||||
packetSize = packet_traits<typename Kernel::Scalar>::size,
|
packetSize =unpacket_traits<PacketType>::size,
|
||||||
alignedSize = (size/packetSize)*packetSize };
|
alignedSize = (size/packetSize)*packetSize };
|
||||||
|
|
||||||
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
copy_using_evaluator_innervec_CompleteUnrolling<Kernel, 0, alignedSize>::run(kernel);
|
||||||
@ -472,9 +475,11 @@ struct dense_assignment_loop<Kernel, InnerVectorizedTraversal, InnerUnrolling>
|
|||||||
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel &kernel)
|
||||||
{
|
{
|
||||||
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||||
|
typedef typename Kernel::AssignmentTraits Traits;
|
||||||
const Index outerSize = kernel.outerSize();
|
const Index outerSize = kernel.outerSize();
|
||||||
for(Index outer = 0; outer < outerSize; ++outer)
|
for(Index outer = 0; outer < outerSize; ++outer)
|
||||||
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime>::run(kernel, outer);
|
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, DstXprType::InnerSizeAtCompileTime,
|
||||||
|
Traits::SrcAlignment, Traits::DstAlignment>::run(kernel, outer);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -554,6 +559,29 @@ struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, NoUnrolling>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if EIGEN_UNALIGNED_VECTORIZE
|
||||||
|
template<typename Kernel>
|
||||||
|
struct dense_assignment_loop<Kernel, SliceVectorizedTraversal, InnerUnrolling>
|
||||||
|
{
|
||||||
|
EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel)
|
||||||
|
{
|
||||||
|
typedef typename Kernel::DstEvaluatorType::XprType DstXprType;
|
||||||
|
typedef typename Kernel::PacketType PacketType;
|
||||||
|
|
||||||
|
enum { size = DstXprType::InnerSizeAtCompileTime,
|
||||||
|
packetSize =unpacket_traits<PacketType>::size,
|
||||||
|
vectorizableSize = (size/packetSize)*packetSize };
|
||||||
|
|
||||||
|
for(Index outer = 0; outer < kernel.outerSize(); ++outer)
|
||||||
|
{
|
||||||
|
copy_using_evaluator_innervec_InnerUnrolling<Kernel, 0, vectorizableSize, 0, 0>::run(kernel, outer);
|
||||||
|
copy_using_evaluator_DefaultTraversal_InnerUnrolling<Kernel, vectorizableSize, size>::run(kernel, outer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Part 4 : Generic dense assignment kernel
|
* Part 4 : Generic dense assignment kernel
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user