diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 886b0aeba..c49c2a50f 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -29,10 +29,15 @@ // This implementation is based on Assign.h -// copy_using_evaluator_traits is based on assign_traits - namespace internal { +/*************************************************************************** +* Part 1 : the logic deciding a strategy for traversal and unrolling * +***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits +// (actually, it's identical) + template struct copy_using_evaluator_traits { @@ -101,15 +106,15 @@ public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( - int(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling) - : int(MayUnrollInner) ? int(NoUnrolling) // int(InnerUnrolling) + int(MayUnrollCompletely) ? int(CompleteUnrolling) + : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearVectorizedTraversal) - ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(NoUnrolling) // int(CompleteUnrolling) + ? ( bool(MayUnrollCompletely) && bool(DstIsAligned) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(Traversal) == int(LinearTraversal) - ? ( bool(MayUnrollCompletely) ? int(NoUnrolling) // int(CompleteUnrolling) + ? ( bool(MayUnrollCompletely) ? int(CompleteUnrolling) : int(NoUnrolling) ) : int(NoUnrolling) }; @@ -138,6 +143,175 @@ public: #endif }; +/*************************************************************************** +* Part 2 : meta-unrollers +***************************************************************************/ + +// TODO:`Ideally, we want to use only the evaluator objects here, not the expression objects +// However, we need to access .rowIndexByOuterInner() which is in the expression object + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime + }; + + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // TODO: Use copyCoeffByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst, + int outer) + { + // TODO: Use copyCoeffByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); + copy_using_evaluator_DefaultTraversal_InnerUnrolling + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // use copyCoeff ? + dstEvaluator.coeffRef(Index) = srcEvaluator.coeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + enum { + outer = Index / DstXprType::InnerSizeAtCompileTime, + inner = Index % DstXprType::InnerSizeAtCompileTime, + JointAlignment = copy_using_evaluator_traits::JointAlignment + }; + + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst) + { + // TODO: Use copyPacketByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, inner); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.template writePacket(row, col, srcEvaluator.template packet(row, col)); + copy_using_evaluator_innervec_CompleteUnrolling::size, Stop>::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&) { } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + EIGEN_STRONG_INLINE static void run(DstEvaluatorType &dstEvaluator, + SrcEvaluatorType &srcEvaluator, + const DstXprType &dst, + int outer) + { + // TODO: Use copyPacketByOuterInner ? + typename DstXprType::Index row = dst.rowIndexByOuterInner(outer, Index); + typename DstXprType::Index col = dst.colIndexByOuterInner(outer, Index); + dstEvaluator.template writePacket(row, col, srcEvaluator.template packet(row, col)); + copy_using_evaluator_innervec_InnerUnrolling::size, Stop>::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling +{ + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + EIGEN_STRONG_INLINE static void run(DstEvaluatorType&, SrcEvaluatorType&, const DstXprType&, int) { } +}; + +/*************************************************************************** +* Part 3 : implementation of all cases +***************************************************************************/ + // copy_using_evaluator_impl is based on assign_impl template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + /*************************** *** Linear vectorization *** ***************************/ @@ -239,6 +448,29 @@ struct copy_using_evaluator_impl +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + enum { size = DstXprType::SizeAtCompileTime, + packetSize = packet_traits::size, + alignedSize = (size/packetSize)*packetSize }; + + copy_using_evaluator_innervec_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + /************************** *** Inner vectorization *** **************************/ @@ -260,6 +492,7 @@ struct copy_using_evaluator_impl::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) { + // TODO: Use copyPacketByOuterInner ? Index row = dst.rowIndexByOuterInner(outer, inner); Index col = dst.colIndexByOuterInner(outer, inner); dstEvaluator.template writePacket(row, col, srcEvaluator.template packet(row, col)); @@ -267,6 +500,41 @@ struct copy_using_evaluator_impl +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_innervec_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + +template +struct copy_using_evaluator_impl +{ + typedef typename DstXprType::Index Index; + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index outerSize = dst.outerSize(); + for(Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling + ::run(dstEvaluator, srcEvaluator, dst, outer); + } +}; + /*********************** *** Linear traversal *** ***********************/ @@ -289,6 +557,22 @@ struct copy_using_evaluator_impl +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + copy_using_evaluator_LinearTraversal_CompleteUnrolling + ::run(dstEvaluator, srcEvaluator, dst); + } +}; + /************************** *** Slice vectorization *** ***************************/ @@ -348,6 +632,10 @@ struct copy_using_evaluator_impl diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index db6faca10..6b08c78a0 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -219,6 +219,12 @@ struct evaluator_impl > return m_functor(index); } + template + PacketScalar packet(Index row, Index col) const + { + return m_functor.packetOp(row, col); + } + template PacketScalar packet(Index index) const { diff --git a/test/evaluators.cpp b/test/evaluators.cpp index aa57e4ad5..4c55736eb 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -1,4 +1,3 @@ - #define EIGEN_ENABLE_EVALUATORS #include "main.h"