From 8aa5aa269a6b7a5f4237940f4de55bfa15f88791 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Mon, 9 Mar 2009 23:16:39 +0000 Subject: [PATCH] add "slice vectorization" of redux (eg. m.block().minCoeff() is now vectorized) --- Eigen/src/Core/Redux.h | 61 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 889a12d30..24ecac7d2 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -39,18 +39,26 @@ struct ei_redux_traits { private: enum { - PacketSize = ei_packet_traits::size + PacketSize = ei_packet_traits::size, + InnerMaxSize = int(Derived::Flags)&RowMajorBit + ? Derived::MaxColsAtCompileTime + : Derived::MaxRowsAtCompileTime + }; + + enum { + MightVectorize = (int(Derived::Flags)&ActualPacketAccessBit) + && (ei_functor_traits::PacketAccess), + MayLinearVectorize = MightVectorize && (int(Derived::Flags)&LinearAccessBit), + MaySliceVectorize = MightVectorize && int(InnerMaxSize)>=3*PacketSize }; public: enum { - Vectorization = (int(Derived::Flags)&ActualPacketAccessBit) - && (int(Derived::Flags)&LinearAccessBit) - && (ei_functor_traits::PacketAccess) - ? LinearVectorization - : NoVectorization + Vectorization = int(MayLinearVectorize) ? int(LinearVectorization) + : int(MaySliceVectorize) ? int(SliceVectorization) + : int(NoVectorization) }; - + private: enum { Cost = Derived::SizeAtCompileTime * Derived::CoeffReadCost @@ -227,6 +235,45 @@ struct ei_redux_impl } }; +template +struct ei_redux_impl +{ + typedef typename Derived::Scalar Scalar; + typedef typename ei_packet_traits::type PacketScalar; + + static Scalar run(const Derived& mat, const Func& func) + { + const int innerSize = mat.innerSize(); + const int outerSize = mat.outerSize(); + enum { + packetSize = ei_packet_traits::size, + isRowMajor = Derived::Flags&RowMajorBit?1:0 + }; + const int packetedInnerSize = ((innerSize)/packetSize)*packetSize; + Scalar res; + if(packetedInnerSize) + { + PacketScalar packet_res = mat.template packet(0,0); + for(int j=0; j + (isRowMajor?j:i, isRowMajor?i:j)); + + res = func.predux(packet_res); + for(int j=0; j::run(mat, func); + } + + return res; + } +}; + template struct ei_redux_impl {