mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-13 12:19:12 +08:00
add "slice vectorization" of redux (eg. m.block().minCoeff() is now
vectorized)
This commit is contained in:
parent
c087373968
commit
8aa5aa269a
@ -39,16 +39,24 @@ struct ei_redux_traits
|
|||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
enum {
|
enum {
|
||||||
PacketSize = ei_packet_traits<typename Derived::Scalar>::size
|
PacketSize = ei_packet_traits<typename Derived::Scalar>::size,
|
||||||
|
InnerMaxSize = int(Derived::Flags)&RowMajorBit
|
||||||
|
? Derived::MaxColsAtCompileTime
|
||||||
|
: Derived::MaxRowsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||||
|
&& (ei_functor_traits<Func>::PacketAccess),
|
||||||
|
MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit),
|
||||||
|
MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
Vectorization = int(MayLinearVectorize) ? int(LinearVectorization)
|
||||||
&& (int(Derived::Flags)&LinearAccessBit)
|
: int(MaySliceVectorize) ? int(SliceVectorization)
|
||||||
&& (ei_functor_traits<Func>::PacketAccess)
|
: int(NoVectorization)
|
||||||
? LinearVectorization
|
|
||||||
: NoVectorization
|
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -227,6 +235,45 @@ struct ei_redux_impl<Func, Derived, LinearVectorization, NoUnrolling>
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename Func, typename Derived>
|
||||||
|
struct ei_redux_impl<Func, Derived, SliceVectorization, NoUnrolling>
|
||||||
|
{
|
||||||
|
typedef typename Derived::Scalar Scalar;
|
||||||
|
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
|
||||||
|
|
||||||
|
static Scalar run(const Derived& mat, const Func& func)
|
||||||
|
{
|
||||||
|
const int innerSize = mat.innerSize();
|
||||||
|
const int outerSize = mat.outerSize();
|
||||||
|
enum {
|
||||||
|
packetSize = ei_packet_traits<Scalar>::size,
|
||||||
|
isRowMajor = Derived::Flags&RowMajorBit?1:0
|
||||||
|
};
|
||||||
|
const int packetedInnerSize = ((innerSize)/packetSize)*packetSize;
|
||||||
|
Scalar res;
|
||||||
|
if(packetedInnerSize)
|
||||||
|
{
|
||||||
|
PacketScalar packet_res = mat.template packet<Unaligned>(0,0);
|
||||||
|
for(int j=0; j<outerSize; ++j)
|
||||||
|
for(int i=0; i<packetedInnerSize; i+=int(packetSize))
|
||||||
|
packet_res = func.packetOp(packet_res, mat.template packet<Unaligned>
|
||||||
|
(isRowMajor?j:i, isRowMajor?i:j));
|
||||||
|
|
||||||
|
res = func.predux(packet_res);
|
||||||
|
for(int j=0; j<outerSize; ++j)
|
||||||
|
for(int i=packetedInnerSize; i<innerSize; ++i)
|
||||||
|
res = func(res, mat.coeff(isRowMajor?j:i, isRowMajor?i:j));
|
||||||
|
}
|
||||||
|
else // too small to vectorize anything.
|
||||||
|
// since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize.
|
||||||
|
{
|
||||||
|
res = ei_redux_impl<Func, Derived, NoVectorization, NoUnrolling>::run(mat, func);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename Func, typename Derived>
|
template<typename Func, typename Derived>
|
||||||
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
|
struct ei_redux_impl<Func, Derived, LinearVectorization, CompleteUnrolling>
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user