From 8edc9647345445c2b3e8b9ff1c70c625f43755c6 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 6 Nov 2013 18:17:59 +0100 Subject: [PATCH] bug #99: refactor assignment and compound assignment mechanism through "assignment functors" and "assignement kernels". The former is very low level and generic. The later abstarct the former for dense expressions. This refactoring permits to get rid of the very ugly SwapWrapper and SelfCwiseBinaryOp classes. In the future, this will also permit to simplify all these evaluation loops and perhaps to reuse them for reduxions. That will also permit to specialize for operations like expr1 += expr2 outside Eigen, and so for any kind of expressions (dense, sparse, tensor, etc.) --- Eigen/Core | 1 + Eigen/src/Core/AssignEvaluator.h | 463 ++++++++++++------- Eigen/src/Core/CoreEvaluators.h | 12 +- Eigen/src/Core/functors/AssignmentFunctors.h | 167 +++++++ test/evaluator_common.h | 0 5 files changed, 478 insertions(+), 165 deletions(-) create mode 100644 Eigen/src/Core/functors/AssignmentFunctors.h create mode 100644 test/evaluator_common.h diff --git a/Eigen/Core b/Eigen/Core index bf2d3a908..722a49030 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -383,6 +383,7 @@ using std::ptrdiff_t; #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_ENABLE_EVALUATORS +#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5f2fb9d38..adea7ef13 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob -// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011-2013 Gael Guennebaud // Copyright (C) 2011-2012 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla @@ -94,7 +94,7 @@ public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) + int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) @@ -139,7 +139,7 @@ public: *** Default traversal *** ************************/ -template +template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -150,69 +150,74 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template +template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, Index, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template -struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&, int) { } }; /*********************** *** Linear traversal *** ***********************/ -template +template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel& kernel + ) { - dstEvaluator.copyCoeff(Index, srcEvaluator); + kernel.assignCoeff(Index, dstEvaluator, srcEvaluator); copy_using_evaluator_LinearTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; /************************** *** Inner vectorization *** **************************/ -template +template struct copy_using_evaluator_innervec_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -225,63 +230,66 @@ struct copy_using_evaluator_innervec_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_innervec_CompleteUnrolling +template +struct copy_using_evaluator_innervec_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template +template struct copy_using_evaluator_innervec_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.template copyPacketByOuterInner(outer, Index, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, Index, dstEvaluator, srcEvaluator); typedef typename DstEvaluatorType::XprType DstXprType; enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template -struct copy_using_evaluator_innervec_InnerUnrolling +template +struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel &, int) { } }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ -// copy_using_evaluator_impl is based on assign_impl +// dense_assignment_loop is based on assign_impl -template::Traversal, int Unrolling = copy_using_evaluator_traits::Unrolling> -struct copy_using_evaluator_impl; +struct dense_assignment_loop; /************************ *** Default traversal *** ************************/ -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static void run(DstXprType& dst, const SrcXprType& src) + static void run(DstXprType& dst, const SrcXprType& src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -292,16 +300,16 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -310,16 +318,16 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -330,8 +338,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -339,43 +347,50 @@ struct copy_using_evaluator_impl -struct unaligned_copy_using_evaluator_impl +struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing - template - static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, + template + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, const Kernel&, typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} }; template <> -struct unaligned_copy_using_evaluator_impl +struct unaligned_dense_assignment_loop { // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. + // FIXME check which version exhibits this issue #ifdef _MSC_VER - template + template static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #else - template + template static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #endif { for (typename DstEvaluatorType::Index index = start; index < end; ++index) - dstEvaluator.copyCoeff(index, srcEvaluator); + kernel.assignCoeff(index, dstEvaluator, srcEvaluator); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -395,22 +410,20 @@ struct copy_using_evaluator_impl::run(dstEvaluator, srcEvaluator, 0, alignedStart); + unaligned_dense_assignment_loop::run(dstEvaluator, srcEvaluator, kernel, 0, alignedStart); for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dstEvaluator.template copyPacket(index, srcEvaluator); - } + kernel.template assignPacket(index, dstEvaluator, srcEvaluator); - unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size); + unaligned_dense_assignment_loop<>::run(dstEvaluator, srcEvaluator, kernel, alignedEnd, size); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -423,11 +436,11 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -435,10 +448,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -451,16 +464,15 @@ struct copy_using_evaluator_impl::size; for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) { - dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); - } + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -469,16 +481,16 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -489,8 +501,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -498,10 +510,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -512,14 +524,14 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -528,8 +540,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -537,10 +549,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -566,19 +578,16 @@ struct copy_using_evaluator_impl(outer, inner, srcEvaluator); - } + for(Index inner = alignedStart; inner(outer, inner, dstEvaluator, srcEvaluator); // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); } @@ -589,10 +598,12 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael) +// Indeed, what to do with the kernel?? +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &/*kernel*/) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -601,23 +612,126 @@ struct copy_using_evaluator_impl +struct generic_dense_assignment_kernel +{ + const Functor &m_functor; + generic_dense_assignment_kernel(const Functor &func) : m_functor(func) {} + + template + void assignCoeff(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(row,col), src.coeff(row,col)); + } + + template + void assignCoeff(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(index), src.coeff(index)); + } + + template + void assignCoeffByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col, dst, src); + } + + + template + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket(&dst.coeffRef(row,col), src.template packet(row,col)); + } + + template + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket(&dst.coeffRef(index), src.template packet(index)); + } + + template + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col, dst, src); + } + + template + static Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? outer + : inner; + } + + template + static Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? inner + : outer; + } +}; + +template +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ +#ifdef EIGEN_DEBUG_ASSIGN + internal::copy_using_evaluator_traits::debug(); +#endif + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(func); + + dense_assignment_loop::run(const_cast(dst), src, kernel); +} + +template +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 5 : Entry points ***************************************************************************/ // Based on DenseBase::LazyAssign() +// The following functions are just for testing and they are meant to be moved to operator= and the likes. template class StorageBase, typename SrcXprType> EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const EigenBase& src) { - return noalias_copy_using_evaluator(dst.expression(), src.derived()); + return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op()); } template::AssumeAliasing> @@ -641,57 +755,90 @@ struct AddEvalIfAssumingAliasing } }; +template +EIGEN_STRONG_INLINE +const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) +{ + return noalias_copy_using_evaluator(dst.const_cast_derived(), + AddEvalIfAssumingAliasing::run(src.derived()), + func + ); +} + +// this mimics operator= template EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src) { - return noalias_copy_using_evaluator(dst.const_cast_derived(), - AddEvalIfAssumingAliasing::run(src.derived())); + return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op()); } -template +template EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src) +const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src, const Functor &func) { #ifdef EIGEN_DEBUG_ASSIGN internal::copy_using_evaluator_traits::debug(); #endif #ifdef EIGEN_NO_AUTOMATIC_RESIZING eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) - : (dst.rows() == src.rows() && dst.cols() == src.cols()))) - && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + : (dst.rows() == src.rows() && dst.cols() == src.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); #else dst.const_cast_derived().resizeLike(src.derived()); #endif - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } -template +template EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src) +const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) { - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); -} - -template -const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src) -{ -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits::debug(); -#endif - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - copy_using_evaluator_impl::run(const_cast(dst), src); - return dst; + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } // Based on DenseBase::swap() -// TODO: Chech whether we need to do something special for swapping two -// Arrays or Matrices. +// TODO: Check whether we need to do something special for swapping two +// Arrays or Matrices. (Jitse) +// Overload default assignPacket behavior for swapping them +template +struct swap_kernel : generic_dense_assignment_kernel > +{ + typedef generic_dense_assignment_kernel > Base; + using Base::m_functor; + swap_kernel() : Base(swap_assign_op()) {} + + template + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket(&dst.coeffRef(row,col), &const_cast(src).coeffRef(row,col)); + } + + template + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket(&dst.coeffRef(index), &const_cast(src).coeffRef(index)); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) + template + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = Base::template rowIndexByOuterInner(outer, inner); + Index col = Base::template colIndexByOuterInner(outer, inner); + assignPacket(row, col, dst, src); + } +}; + template void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) { - copy_using_evaluator(SwapWrapper(const_cast(dst)), src); + typedef swap_kernel kernel; + dense_assignment_loop::run(const_cast(dst), src, kernel()); } // Based on MatrixBase::operator+= (in CwiseBinaryOp.h) @@ -699,8 +846,7 @@ template void add_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); } // Based on ArrayBase::operator+= @@ -708,42 +854,37 @@ template void add_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); } -// TODO: Add add_assign_using_evaluator for EigenBase ? +// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse) template void subtract_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); } template void subtract_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); } template void multiply_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op()); } template void divide_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), div_assign_op()); } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3240ec6ed..082c00df4 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -78,6 +78,8 @@ template struct evaluator_impl_base { typedef typename ExpressionType::Index Index; + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; template void copyCoeff(Index row, Index col, const OtherEvaluatorType& other) @@ -307,15 +309,17 @@ struct evaluator_impl > evaluator_impl(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, xpr.arg()); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); } // This constructor is used when nesting an EvalTo evaluator in another evaluator evaluator_impl(const ArgType& arg) : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, arg); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); } typedef typename PlainObject::Index Index; diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h new file mode 100644 index 000000000..ae264aa64 --- /dev/null +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H +#define EIGEN_ASSIGNMENT_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \brief Template functor for scalar/packet assignment + * + */ +template struct assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,b); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost, + PacketAccess = packet_traits::IsVectorized + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with addition + * + */ +template struct add_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = packet_traits::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with subtraction + * + */ +template struct sub_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = packet_traits::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with multiplication + * + */ +template struct mul_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::pmul(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::MulCost, + PacketAccess = packet_traits::HasMul + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with diviving + * + */ +template struct div_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(div_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::pdiv(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::MulCost, + PacketAccess = packet_traits::HasMul + }; +}; + + +/** \internal + * \brief Template functor for scalar/packet assignment with swaping + * + * It works as follow. For a non-vectorized evaluation loop, we have: + * for(i) func(A.coeffRef(i), B.coeff(i)); + * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef. + * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable + * B.coeff already returns a const reference to the underlying scalar value. + * + * The case of a vectorized loop is more tricky: + * for(i,j) func.assignPacket(&A.coeffRef(i,j), B.packet(i,j)); + * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*, + * the actual alignment and Packet type. + * + */ +template struct swap_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const + { + using std::swap; + swap(a,const_cast(b)); + } + + template + EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const + { + Packet tmp = internal::ploadt(b); + internal::pstoret(b, internal::ploadt(a)); + internal::pstoret(a, tmp); + } +}; +template +struct functor_traits > { + enum { + Cost = 3 * NumTraits::ReadCost, + PacketAccess = packet_traits::IsVectorized + }; +}; + +} // namespace internal + +} // namespace Eigen + +#endif // EIGEN_ASSIGNMENT_FUNCTORS_H diff --git a/test/evaluator_common.h b/test/evaluator_common.h new file mode 100644 index 000000000..e69de29bb