From af9851d1d73b3aae5804f62d2d9e690be33bc3a5 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 7 Nov 2013 12:03:12 +0100 Subject: [PATCH] bug #99: move the creation of the evaluator to a central place, and make generic_dense_assignment_kernel hold the destination and source evaluators --- Eigen/src/Core/AssignEvaluator.h | 559 +++++++++++++---------------- Eigen/src/Core/ProductEvaluators.h | 2 +- 2 files changed, 254 insertions(+), 307 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index adea7ef13..5b5d29ca9 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -139,9 +139,10 @@ public: *** Default traversal *** ************************/ -template +template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; enum { @@ -149,126 +150,101 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling inner = Index % DstXprType::InnerSizeAtCompileTime }; - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, - const Kernel &kernel - ) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); - copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + kernel.assignCoeffByOuterInner(outer, inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); } }; -template -struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } + static EIGEN_STRONG_INLINE void run(Kernel&) { } }; -template +template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, - const Kernel &kernel, - int outer) + static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) { - kernel.assignCoeffByOuterInner(outer, Index, dstEvaluator, srcEvaluator); - copy_using_evaluator_DefaultTraversal_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel, outer); + kernel.assignCoeffByOuterInner(outer, Index); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); } }; -template -struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&, int) { } + static EIGEN_STRONG_INLINE void run(Kernel&, int) { } }; /*********************** *** Linear traversal *** ***********************/ -template +template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, - const Kernel& kernel - ) + static EIGEN_STRONG_INLINE void run(Kernel& kernel) { - kernel.assignCoeff(Index, dstEvaluator, srcEvaluator); - copy_using_evaluator_LinearTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + kernel.assignCoeff(Index); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); } }; -template -struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } + static EIGEN_STRONG_INLINE void run(Kernel&) { } }; /************************** *** Inner vectorization *** **************************/ -template +template struct copy_using_evaluator_innervec_CompleteUnrolling { + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; typedef typename DstEvaluatorType::XprType DstXprType; - typedef typename SrcEvaluatorType::XprType SrcXprType; enum { outer = Index / DstXprType::InnerSizeAtCompileTime, inner = Index % DstXprType::InnerSizeAtCompileTime, - JointAlignment = copy_using_evaluator_traits::JointAlignment + JointAlignment = Kernel::AssignmentTraits::JointAlignment }; - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, - const Kernel &kernel - ) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, inner); enum { NextIndex = Index + packet_traits::size }; - copy_using_evaluator_innervec_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } }; -template -struct copy_using_evaluator_innervec_CompleteUnrolling +template +struct copy_using_evaluator_innervec_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } + static EIGEN_STRONG_INLINE void run(Kernel&) { } }; -template +template struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, - const Kernel &kernel, - int outer) + static EIGEN_STRONG_INLINE void run(Kernel &kernel, int outer) { - kernel.template assignPacketByOuterInner(outer, Index, dstEvaluator, srcEvaluator); - typedef typename DstEvaluatorType::XprType DstXprType; + kernel.template assignPacketByOuterInner(outer, Index); + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; enum { NextIndex = Index + packet_traits::size }; - copy_using_evaluator_innervec_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } }; -template -struct copy_using_evaluator_innervec_InnerUnrolling +template +struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel &, int) { } + static EIGEN_STRONG_INLINE void run(Kernel &, int) { } }; /*************************************************************************** @@ -277,69 +253,51 @@ struct copy_using_evaluator_innervec_InnerUnrolling::Traversal, - int Unrolling = copy_using_evaluator_traits::Unrolling> +template struct dense_assignment_loop; /************************ *** Default traversal *** ************************/ -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - static void run(DstXprType& dst, const SrcXprType& src, const Kernel &kernel) + static void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - typedef typename DstXprType::Index Index; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - for(Index outer = 0; outer < dst.outerSize(); ++outer) { - for(Index inner = 0; inner < dst.innerSize(); ++inner) { - kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); + typedef typename Kernel::Index Index; + + for(Index outer = 0; outer < kernel.outerSize(); ++outer) { + for(Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); } } } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + typedef typename Kernel::Index Index; + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - const Index outerSize = dst.outerSize(); + const Index outerSize = kernel.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_DefaultTraversal_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel, outer); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); } }; @@ -355,9 +313,8 @@ template struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing - template - static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, const Kernel&, - typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} + template + static EIGEN_STRONG_INLINE void run(Kernel&, typename Kernel::Index, typename Kernel::Index) {} }; template <> @@ -367,80 +324,63 @@ struct unaligned_dense_assignment_loop // packet access path. // FIXME check which version exhibits this issue #ifdef _MSC_VER - template - static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, - const SrcEvaluatorType &srcEvaluator, - const Kernel &kernel, - typename DstEvaluatorType::Index start, - typename DstEvaluatorType::Index end) + template + static EIGEN_DONT_INLINE void run(Kernel &kernel, + typename Kernel::Index start, + typename Kernel::Index end) #else - template - static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - const SrcEvaluatorType &srcEvaluator, - const Kernel &kernel, - typename DstEvaluatorType::Index start, - typename DstEvaluatorType::Index end) + template + static EIGEN_STRONG_INLINE void run(Kernel &kernel, + typename Kernel::Index start, + typename Kernel::Index end) #endif { - for (typename DstEvaluatorType::Index index = start; index < end; ++index) - kernel.assignCoeff(index, dstEvaluator, srcEvaluator); + for (typename Kernel::Index index = start; index < end; ++index) + kernel.assignCoeff(index); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - typedef typename DstXprType::Index Index; + typedef typename Kernel::Index Index; - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - const Index size = dst.size(); - typedef packet_traits PacketTraits; + const Index size = kernel.size(); + typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, - dstIsAligned = int(copy_using_evaluator_traits::DstIsAligned), + dstIsAligned = int(Kernel::AssignmentTraits::DstIsAligned), dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, - srcAlignment = copy_using_evaluator_traits::JointAlignment + srcAlignment = Kernel::AssignmentTraits::JointAlignment }; - const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&dstEvaluator.coeffRef(0), size); + const Index alignedStart = dstIsAligned ? 0 : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0), size); const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; - unaligned_dense_assignment_loop::run(dstEvaluator, srcEvaluator, kernel, 0, alignedStart); + unaligned_dense_assignment_loop::run(kernel, 0, alignedStart); for(Index index = alignedStart; index < alignedEnd; index += packetSize) - kernel.template assignPacket(index, dstEvaluator, srcEvaluator); + kernel.template assignPacket(index); - unaligned_dense_assignment_loop<>::run(dstEvaluator, srcEvaluator, kernel, alignedEnd, size); + unaligned_dense_assignment_loop<>::run(kernel, alignedEnd, size); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + typedef typename Kernel::Index Index; + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + enum { size = DstXprType::SizeAtCompileTime, - packetSize = packet_traits::size, + packetSize = packet_traits::size, alignedSize = (size/packetSize)*packetSize }; - copy_using_evaluator_innervec_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); - copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); } }; @@ -448,61 +388,42 @@ struct dense_assignment_loop -struct dense_assignment_loop +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static inline void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - typedef typename DstXprType::Index Index; + typedef typename Kernel::Index Index; - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index packetSize = packet_traits::size; + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index packetSize = packet_traits::size; for(Index outer = 0; outer < outerSize; ++outer) for(Index inner = 0; inner < innerSize; inner+=packetSize) - kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, inner); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - copy_using_evaluator_innervec_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + typedef typename Kernel::Index Index; + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - const Index outerSize = dst.outerSize(); + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + const Index outerSize = kernel.outerSize(); for(Index outer = 0; outer < outerSize; ++outer) - copy_using_evaluator_innervec_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel, outer); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, outer); } }; @@ -510,38 +431,25 @@ struct dense_assignment_loop -struct dense_assignment_loop +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static inline void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - typedef typename DstXprType::Index Index; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - const Index size = dst.size(); + typedef typename Kernel::Index Index; + const Index size = kernel.size(); for(Index i = 0; i < size; ++i) - kernel.assignCoeff(i, dstEvaluator, srcEvaluator); + kernel.assignCoeff(i); } }; -template -struct dense_assignment_loop +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static EIGEN_STRONG_INLINE void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - copy_using_evaluator_LinearTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator, kernel); + typedef typename Kernel::DstEvaluatorType::XprType DstXprType; + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); } }; @@ -549,45 +457,39 @@ struct dense_assignment_loop -struct dense_assignment_loop +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) + static inline void run(Kernel &kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - typedef typename DstXprType::Index Index; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - - typedef packet_traits PacketTraits; + typedef typename Kernel::Index Index; + typedef packet_traits PacketTraits; enum { packetSize = PacketTraits::size, alignable = PacketTraits::AlignedOnScalar, - dstAlignment = alignable ? Aligned : int(copy_using_evaluator_traits::DstIsAligned) + dstAlignment = alignable ? Aligned : int(Kernel::AssignmentTraits::DstIsAligned) }; const Index packetAlignedMask = packetSize - 1; - const Index innerSize = dst.innerSize(); - const Index outerSize = dst.outerSize(); - const Index alignedStep = alignable ? (packetSize - dst.outerStride() % packetSize) & packetAlignedMask : 0; - Index alignedStart = ((!alignable) || copy_using_evaluator_traits::DstIsAligned) ? 0 - : internal::first_aligned(&dstEvaluator.coeffRef(0,0), innerSize); + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = alignable ? (packetSize - kernel.outerStride() % packetSize) & packetAlignedMask : 0; + Index alignedStart = ((!alignable) || Kernel::AssignmentTraits::DstIsAligned) ? 0 + : internal::first_aligned(&kernel.dstEvaluator().coeffRef(0,0), innerSize); for(Index outer = 0; outer < outerSize; ++outer) { const Index alignedEnd = alignedStart + ((innerSize-alignedStart) & ~packetAlignedMask); // do the non-vectorizable part of the assignment for(Index inner = 0; inner(outer, inner, dstEvaluator, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, inner); // do the non-vectorizable part of the assignment for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); } @@ -599,21 +501,15 @@ struct dense_assignment_loop -struct dense_assignment_loop +// Indeed, what to do with the kernel's functor?? +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &/*kernel*/) + static inline void run(Kernel & kernel) { - typedef typename evaluator::type DstEvaluatorType; - typedef typename evaluator::type SrcEvaluatorType; - - DstEvaluatorType dstEvaluator(dst); - SrcEvaluatorType srcEvaluator(src); - // Evaluate rhs in temporary to prevent aliasing problems in a = a * a; // TODO: Do not pass the xpr object to evalTo() (Jitse) - srcEvaluator.evalTo(dstEvaluator, dst); + kernel.srcEvaluator().evalTo(kernel.dstEvaluator(), kernel.dstExpression()); } }; @@ -623,94 +519,123 @@ struct dense_assignment_loop -struct generic_dense_assignment_kernel +template +class generic_dense_assignment_kernel { - const Functor &m_functor; - generic_dense_assignment_kernel(const Functor &func) : m_functor(func) {} +protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; +public: - template - void assignCoeff(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef typename DstEvaluatorType::Index Index; + typedef copy_using_evaluator_traits AssignmentTraits; + + + generic_dense_assignment_kernel(DstEvaluatorType &dst, const SrcEvaluatorType &src, const Functor &func, DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) + {} + + Index size() const { return m_dstExpr.size(); } + Index innerSize() const { return m_dstExpr.innerSize(); } + Index outerSize() const { return m_dstExpr.outerSize(); } + Index outerStride() const { return m_dstExpr.outerStride(); } + + // TODO get rid of this one: + DstXprType& dstExpression() const { return m_dstExpr; } + + DstEvaluatorType& dstEvaluator() { return m_dst; } + const SrcEvaluatorType& srcEvaluator() const { return m_src; } + + void assignCoeff(Index row, Index col) { - m_functor.assignCoeff(dst.coeffRef(row,col), src.coeff(row,col)); + m_functor.assignCoeff(m_dst.coeffRef(row,col), m_src.coeff(row,col)); } - template - void assignCoeff(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + void assignCoeff(Index index) { - m_functor.assignCoeff(dst.coeffRef(index), src.coeff(index)); + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); } - template - void assignCoeffByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + void assignCoeffByOuterInner(Index outer, Index inner) { - typedef typename DstEvaluatorType::Index Index; - Index row = rowIndexByOuterInner(outer, inner); - Index col = colIndexByOuterInner(outer, inner); - assignCoeff(row, col, dst, src); + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); } - template - void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + template + void assignPacket(Index row, Index col) { - m_functor.assignPacket(&dst.coeffRef(row,col), src.template packet(row,col)); + m_functor.assignPacket(&m_dst.coeffRef(row,col), m_src.template packet(row,col)); } - template - void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + template + void assignPacket(Index index) { - m_functor.assignPacket(&dst.coeffRef(index), src.template packet(index)); + m_functor.assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); } - template - void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + template + void assignPacketByOuterInner(Index outer, Index inner) { - typedef typename DstEvaluatorType::Index Index; - Index row = rowIndexByOuterInner(outer, inner); - Index col = colIndexByOuterInner(outer, inner); - assignPacket(row, col, dst, src); + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); } - template static Index rowIndexByOuterInner(Index outer, Index inner) { - typedef typename EvaluatorType::ExpressionTraits Traits; + typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner : int(Traits::Flags)&RowMajorBit ? outer : inner; } - template static Index colIndexByOuterInner(Index outer, Index inner) { - typedef typename EvaluatorType::ExpressionTraits Traits; + typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner : int(Traits::Flags)&RowMajorBit ? inner : outer; } + +protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor &m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; }; template void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) { #ifdef EIGEN_DEBUG_ASSIGN + // TODO these traits should be computed from information provided by the evaluators internal::copy_using_evaluator_traits::debug(); #endif eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - typedef generic_dense_assignment_kernel Kernel; - Kernel kernel(func); + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); - dense_assignment_loop::run(const_cast(dst), src, kernel); + dense_assignment_loop::run(kernel); } template @@ -804,41 +729,63 @@ const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, // Arrays or Matrices. (Jitse) // Overload default assignPacket behavior for swapping them -template -struct swap_kernel : generic_dense_assignment_kernel > +template +class swap_kernel : public generic_dense_assignment_kernel > { - typedef generic_dense_assignment_kernel > Base; + typedef generic_dense_assignment_kernel > Base; + typedef typename DstEvaluatorTypeT::PacketScalar PacketScalar; + using Base::m_dst; + using Base::m_src; using Base::m_functor; - swap_kernel() : Base(swap_assign_op()) {} - template - void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const +public: + typedef typename Base::Scalar Scalar; + typedef typename Base::Index Index; + typedef typename Base::DstXprType DstXprType; + + swap_kernel(DstEvaluatorTypeT &dst, const SrcEvaluatorTypeT &src, DstXprType& dstExpr) + : Base(dst, src, swap_assign_op(), dstExpr) + {} + + template + void assignPacket(Index row, Index col) { - m_functor.template swapPacket(&dst.coeffRef(row,col), &const_cast(src).coeffRef(row,col)); + m_functor.template swapPacket(&m_dst.coeffRef(row,col), &const_cast(m_src).coeffRef(row,col)); } - template - void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + template + void assignPacket(Index index) { - m_functor.template swapPacket(&dst.coeffRef(index), &const_cast(src).coeffRef(index)); + m_functor.template swapPacket(&m_dst.coeffRef(index), &const_cast(m_src).coeffRef(index)); } // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) - template - void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + template + void assignPacketByOuterInner(Index outer, Index inner) { - typedef typename DstEvaluatorType::Index Index; - Index row = Base::template rowIndexByOuterInner(outer, inner); - Index col = Base::template colIndexByOuterInner(outer, inner); - assignPacket(row, col, dst, src); + Index row = Base::rowIndexByOuterInner(outer, inner); + Index col = Base::colIndexByOuterInner(outer, inner); + assignPacket(row, col); } }; template void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) { - typedef swap_kernel kernel; - dense_assignment_loop::run(const_cast(dst), src, kernel()); + // TODO there is too much redundancy with call_dense_assignment_loop + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + + DstEvaluatorType dstEvaluator(dst); + SrcEvaluatorType srcEvaluator(src); + + typedef swap_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); } // Based on MatrixBase::operator+= (in CwiseBinaryOp.h) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 8aed51022..855914f2e 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -95,7 +95,7 @@ struct product_evaluator_dispatcher, GeneralProduct - void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) + void evalTo(DstEvaluatorType /* not used */, DstXprType& dst) const { dst.resize(m_xpr.rows(), m_xpr.cols()); GeneralProduct(m_xpr.lhs(), m_xpr.rhs()).evalTo(dst);