diff --git a/Eigen/Core b/Eigen/Core index bf2d3a908..722a49030 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -383,6 +383,7 @@ using std::ptrdiff_t; #include "src/Core/ArrayWrapper.h" #ifdef EIGEN_ENABLE_EVALUATORS +#include "src/Core/functors/AssignmentFunctors.h" #include "src/Core/Product.h" #include "src/Core/CoreEvaluators.h" #include "src/Core/AssignEvaluator.h" diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 5f2fb9d38..adea7ef13 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -2,7 +2,7 @@ // for linear algebra. // // Copyright (C) 2011 Benoit Jacob -// Copyright (C) 2011 Gael Guennebaud +// Copyright (C) 2011-2013 Gael Guennebaud // Copyright (C) 2011-2012 Jitse Niesen // // This Source Code Form is subject to the terms of the Mozilla @@ -94,7 +94,7 @@ public: enum { Unrolling = (int(Traversal) == int(InnerVectorizedTraversal) || int(Traversal) == int(DefaultTraversal)) ? ( - int(MayUnrollCompletely) ? int(CompleteUnrolling) + int(MayUnrollCompletely) ? int(CompleteUnrolling) : int(MayUnrollInner) ? int(InnerUnrolling) : int(NoUnrolling) ) @@ -139,7 +139,7 @@ public: *** Default traversal *** ************************/ -template +template struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -150,69 +150,74 @@ struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.copyCoeffByOuterInner(outer, inner, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, inner, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template +template struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.copyCoeffByOuterInner(outer, Index, srcEvaluator); + kernel.assignCoeffByOuterInner(outer, Index, dstEvaluator, srcEvaluator); copy_using_evaluator_DefaultTraversal_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template -struct copy_using_evaluator_DefaultTraversal_InnerUnrolling +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&, int) { } }; /*********************** *** Linear traversal *** ***********************/ -template +template struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel& kernel + ) { - dstEvaluator.copyCoeff(Index, srcEvaluator); + kernel.assignCoeff(Index, dstEvaluator, srcEvaluator); copy_using_evaluator_LinearTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_LinearTraversal_CompleteUnrolling +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; /************************** *** Inner vectorization *** **************************/ -template +template struct copy_using_evaluator_innervec_CompleteUnrolling { typedef typename DstEvaluatorType::XprType DstXprType; @@ -225,63 +230,66 @@ struct copy_using_evaluator_innervec_CompleteUnrolling }; static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator) + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel + ) { - dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_innervec_CompleteUnrolling +template +struct copy_using_evaluator_innervec_CompleteUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel&) { } }; -template +template struct copy_using_evaluator_innervec_InnerUnrolling { static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, - SrcEvaluatorType &srcEvaluator, + SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, int outer) { - dstEvaluator.template copyPacketByOuterInner(outer, Index, srcEvaluator); + kernel.template assignPacketByOuterInner(outer, Index, dstEvaluator, srcEvaluator); typedef typename DstEvaluatorType::XprType DstXprType; enum { NextIndex = Index + packet_traits::size }; copy_using_evaluator_innervec_InnerUnrolling - - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; -template -struct copy_using_evaluator_innervec_InnerUnrolling +template +struct copy_using_evaluator_innervec_InnerUnrolling { - static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, int) { } + static EIGEN_STRONG_INLINE void run(DstEvaluatorType&, SrcEvaluatorType&, const Kernel &, int) { } }; /*************************************************************************** * Part 3 : implementation of all cases ***************************************************************************/ -// copy_using_evaluator_impl is based on assign_impl +// dense_assignment_loop is based on assign_impl -template::Traversal, int Unrolling = copy_using_evaluator_traits::Unrolling> -struct copy_using_evaluator_impl; +struct dense_assignment_loop; /************************ *** Default traversal *** ************************/ -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static void run(DstXprType& dst, const SrcXprType& src) + static void run(DstXprType& dst, const SrcXprType& src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -292,16 +300,16 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -310,16 +318,16 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -330,8 +338,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -339,43 +347,50 @@ struct copy_using_evaluator_impl -struct unaligned_copy_using_evaluator_impl +struct unaligned_dense_assignment_loop { // if IsAligned = true, then do nothing - template - static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, + template + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, const Kernel&, typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} }; template <> -struct unaligned_copy_using_evaluator_impl +struct unaligned_dense_assignment_loop { // MSVC must not inline this functions. If it does, it fails to optimize the // packet access path. + // FIXME check which version exhibits this issue #ifdef _MSC_VER - template + template static EIGEN_DONT_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #else - template + template static EIGEN_STRONG_INLINE void run(DstEvaluatorType &dstEvaluator, const SrcEvaluatorType &srcEvaluator, + const Kernel &kernel, typename DstEvaluatorType::Index start, typename DstEvaluatorType::Index end) #endif { for (typename DstEvaluatorType::Index index = start; index < end; ++index) - dstEvaluator.copyCoeff(index, srcEvaluator); + kernel.assignCoeff(index, dstEvaluator, srcEvaluator); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -395,22 +410,20 @@ struct copy_using_evaluator_impl::run(dstEvaluator, srcEvaluator, 0, alignedStart); + unaligned_dense_assignment_loop::run(dstEvaluator, srcEvaluator, kernel, 0, alignedStart); for(Index index = alignedStart; index < alignedEnd; index += packetSize) - { - dstEvaluator.template copyPacket(index, srcEvaluator); - } + kernel.template assignPacket(index, dstEvaluator, srcEvaluator); - unaligned_copy_using_evaluator_impl<>::run(dstEvaluator, srcEvaluator, alignedEnd, size); + unaligned_dense_assignment_loop<>::run(dstEvaluator, srcEvaluator, kernel, alignedEnd, size); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -423,11 +436,11 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); copy_using_evaluator_DefaultTraversal_CompleteUnrolling - - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -435,10 +448,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -451,16 +464,15 @@ struct copy_using_evaluator_impl::size; for(Index outer = 0; outer < outerSize; ++outer) - for(Index inner = 0; inner < innerSize; inner+=packetSize) { - dstEvaluator.template copyPacketByOuterInner(outer, inner, srcEvaluator); - } + for(Index inner = 0; inner < innerSize; inner+=packetSize) + kernel.template assignPacketByOuterInner(outer, inner, dstEvaluator, srcEvaluator); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -469,16 +481,16 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; -template -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { typedef typename DstXprType::Index Index; - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -489,8 +501,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator, outer); + + ::run(dstEvaluator, srcEvaluator, kernel, outer); } }; @@ -498,10 +510,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -512,14 +524,14 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src) + static EIGEN_STRONG_INLINE void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -528,8 +540,8 @@ struct copy_using_evaluator_impl - ::run(dstEvaluator, srcEvaluator); + + ::run(dstEvaluator, srcEvaluator, kernel); } }; @@ -537,10 +549,10 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &kernel) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -566,19 +578,16 @@ struct copy_using_evaluator_impl(outer, inner, srcEvaluator); - } + for(Index inner = alignedStart; inner(outer, inner, dstEvaluator, srcEvaluator); // do the non-vectorizable part of the assignment - for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); } @@ -589,10 +598,12 @@ struct copy_using_evaluator_impl -struct copy_using_evaluator_impl +// TODO: this 'AllAtOnceTraversal' should be dropped or caught earlier (Gael) +// Indeed, what to do with the kernel?? +template +struct dense_assignment_loop { - static inline void run(DstXprType &dst, const SrcXprType &src) + static inline void run(DstXprType &dst, const SrcXprType &src, const Kernel &/*kernel*/) { typedef typename evaluator::type DstEvaluatorType; typedef typename evaluator::type SrcEvaluatorType; @@ -601,23 +612,126 @@ struct copy_using_evaluator_impl +struct generic_dense_assignment_kernel +{ + const Functor &m_functor; + generic_dense_assignment_kernel(const Functor &func) : m_functor(func) {} + + template + void assignCoeff(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(row,col), src.coeff(row,col)); + } + + template + void assignCoeff(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignCoeff(dst.coeffRef(index), src.coeff(index)); + } + + template + void assignCoeffByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col, dst, src); + } + + + template + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket(&dst.coeffRef(row,col), src.template packet(row,col)); + } + + template + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.assignPacket(&dst.coeffRef(index), src.template packet(index)); + } + + template + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col, dst, src); + } + + template + static Index rowIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? outer + : inner; + } + + template + static Index colIndexByOuterInner(Index outer, Index inner) + { + typedef typename EvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(Traits::Flags)&RowMajorBit ? inner + : outer; + } +}; + +template +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src, const Functor &func) +{ +#ifdef EIGEN_DEBUG_ASSIGN + internal::copy_using_evaluator_traits::debug(); +#endif + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(func); + + dense_assignment_loop::run(const_cast(dst), src, kernel); +} + +template +void call_dense_assignment_loop(const DstXprType& dst, const SrcXprType& src) +{ + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** +* Part 5 : Entry points ***************************************************************************/ // Based on DenseBase::LazyAssign() +// The following functions are just for testing and they are meant to be moved to operator= and the likes. template class StorageBase, typename SrcXprType> EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const NoAlias& dst, const EigenBase& src) { - return noalias_copy_using_evaluator(dst.expression(), src.derived()); + return noalias_copy_using_evaluator(dst.expression(), src.derived(), internal::assign_op()); } template::AssumeAliasing> @@ -641,57 +755,90 @@ struct AddEvalIfAssumingAliasing } }; +template +EIGEN_STRONG_INLINE +const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) +{ + return noalias_copy_using_evaluator(dst.const_cast_derived(), + AddEvalIfAssumingAliasing::run(src.derived()), + func + ); +} + +// this mimics operator= template EIGEN_STRONG_INLINE const DstXprType& copy_using_evaluator(const EigenBase& dst, const EigenBase& src) { - return noalias_copy_using_evaluator(dst.const_cast_derived(), - AddEvalIfAssumingAliasing::run(src.derived())); + return copy_using_evaluator(dst.const_cast_derived(), src.derived(), internal::assign_op()); } -template +template EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src) +const DstXprType& noalias_copy_using_evaluator(const PlainObjectBase& dst, const EigenBase& src, const Functor &func) { #ifdef EIGEN_DEBUG_ASSIGN internal::copy_using_evaluator_traits::debug(); #endif #ifdef EIGEN_NO_AUTOMATIC_RESIZING eigen_assert((dst.size()==0 || (IsVectorAtCompileTime ? (dst.size() == src.size()) - : (dst.rows() == src.rows() && dst.cols() == src.cols()))) - && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + : (dst.rows() == src.rows() && dst.cols() == src.cols()))) + && "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); #else dst.const_cast_derived().resizeLike(src.derived()); #endif - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } -template +template EIGEN_STRONG_INLINE -const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src) +const DstXprType& noalias_copy_using_evaluator(const EigenBase& dst, const EigenBase& src, const Functor &func) { - return copy_using_evaluator_without_resizing(dst.const_cast_derived(), src.derived()); -} - -template -const DstXprType& copy_using_evaluator_without_resizing(const DstXprType& dst, const SrcXprType& src) -{ -#ifdef EIGEN_DEBUG_ASSIGN - internal::copy_using_evaluator_traits::debug(); -#endif - eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); - copy_using_evaluator_impl::run(const_cast(dst), src); - return dst; + call_dense_assignment_loop(dst.const_cast_derived(), src.derived(), func); + return dst.derived(); } // Based on DenseBase::swap() -// TODO: Chech whether we need to do something special for swapping two -// Arrays or Matrices. +// TODO: Check whether we need to do something special for swapping two +// Arrays or Matrices. (Jitse) +// Overload default assignPacket behavior for swapping them +template +struct swap_kernel : generic_dense_assignment_kernel > +{ + typedef generic_dense_assignment_kernel > Base; + using Base::m_functor; + swap_kernel() : Base(swap_assign_op()) {} + + template + void assignPacket(typename DstEvaluatorType::Index row, typename DstEvaluatorType::Index col, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket(&dst.coeffRef(row,col), &const_cast(src).coeffRef(row,col)); + } + + template + void assignPacket(typename DstEvaluatorType::Index index, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + m_functor.template swapPacket(&dst.coeffRef(index), &const_cast(src).coeffRef(index)); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I mean no CRTP (Gael) + template + void assignPacketByOuterInner(typename DstEvaluatorType::Index outer, typename DstEvaluatorType::Index inner, DstEvaluatorType &dst, const SrcEvaluatorType &src) const + { + typedef typename DstEvaluatorType::Index Index; + Index row = Base::template rowIndexByOuterInner(outer, inner); + Index col = Base::template colIndexByOuterInner(outer, inner); + assignPacket(row, col, dst, src); + } +}; + template void swap_using_evaluator(const DstXprType& dst, const SrcXprType& src) { - copy_using_evaluator(SwapWrapper(const_cast(dst)), src); + typedef swap_kernel kernel; + dense_assignment_loop::run(const_cast(dst), src, kernel()); } // Based on MatrixBase::operator+= (in CwiseBinaryOp.h) @@ -699,8 +846,7 @@ template void add_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); } // Based on ArrayBase::operator+= @@ -708,42 +854,37 @@ template void add_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), add_assign_op()); } -// TODO: Add add_assign_using_evaluator for EigenBase ? +// TODO: Add add_assign_using_evaluator for EigenBase ? (Jitse) template void subtract_assign_using_evaluator(const MatrixBase& dst, const MatrixBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); } template void subtract_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), sub_assign_op()); } template void multiply_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), mul_assign_op()); } template void divide_assign_using_evaluator(const ArrayBase& dst, const ArrayBase& src) { typedef typename DstXprType::Scalar Scalar; - SelfCwiseBinaryOp, DstXprType, SrcXprType> tmp(dst.const_cast_derived()); - copy_using_evaluator(tmp, src.derived()); + copy_using_evaluator(dst.derived(), src.derived(), div_assign_op()); } diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 3240ec6ed..082c00df4 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -78,6 +78,8 @@ template struct evaluator_impl_base { typedef typename ExpressionType::Index Index; + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle outer,inner indices. + typedef traits ExpressionTraits; template void copyCoeff(Index row, Index col, const OtherEvaluatorType& other) @@ -307,15 +309,17 @@ struct evaluator_impl > evaluator_impl(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, xpr.arg()); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, xpr.arg()); } // This constructor is used when nesting an EvalTo evaluator in another evaluator evaluator_impl(const ArgType& arg) : m_result(arg.rows(), arg.cols()), m_resultImpl(m_result) - { - copy_using_evaluator_without_resizing(m_result, arg); + { + // TODO we should simply do m_result(xpr.arg()); + call_dense_assignment_loop(m_result, arg); } typedef typename PlainObject::Index Index; diff --git a/Eigen/src/Core/functors/AssignmentFunctors.h b/Eigen/src/Core/functors/AssignmentFunctors.h new file mode 100644 index 000000000..ae264aa64 --- /dev/null +++ b/Eigen/src/Core/functors/AssignmentFunctors.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGNMENT_FUNCTORS_H +#define EIGEN_ASSIGNMENT_FUNCTORS_H + +namespace Eigen { + +namespace internal { + +/** \internal + * \brief Template functor for scalar/packet assignment + * + */ +template struct assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a = b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,b); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost, + PacketAccess = packet_traits::IsVectorized + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with addition + * + */ +template struct add_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(add_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a += b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::padd(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = packet_traits::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with subtraction + * + */ +template struct sub_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(sub_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a -= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::psub(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::AddCost, + PacketAccess = packet_traits::HasAdd + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with multiplication + * + */ +template struct mul_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(mul_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a *= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::pmul(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::MulCost, + PacketAccess = packet_traits::HasMul + }; +}; + +/** \internal + * \brief Template functor for scalar/packet assignment with diviving + * + */ +template struct div_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(div_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const { a /= b; } + + template + EIGEN_STRONG_INLINE void assignPacket(Scalar* a, const Packet& b) const + { internal::pstoret(a,internal::pdiv(internal::ploadt(a),b)); } +}; +template +struct functor_traits > { + enum { + Cost = NumTraits::ReadCost + NumTraits::MulCost, + PacketAccess = packet_traits::HasMul + }; +}; + + +/** \internal + * \brief Template functor for scalar/packet assignment with swaping + * + * It works as follow. For a non-vectorized evaluation loop, we have: + * for(i) func(A.coeffRef(i), B.coeff(i)); + * where B is a SwapWrapper expression. The trick is to make SwapWrapper::coeff behaves like a non-const coeffRef. + * Actually, SwapWrapper might not even be needed since even if B is a plain expression, since it has to be writable + * B.coeff already returns a const reference to the underlying scalar value. + * + * The case of a vectorized loop is more tricky: + * for(i,j) func.assignPacket(&A.coeffRef(i,j), B.packet(i,j)); + * Here, B must be a SwapWrapper whose packet function actually returns a proxy object holding a Scalar*, + * the actual alignment and Packet type. + * + */ +template struct swap_assign_op { + + EIGEN_EMPTY_STRUCT_CTOR(swap_assign_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Scalar& a, const Scalar& b) const + { + using std::swap; + swap(a,const_cast(b)); + } + + template + EIGEN_STRONG_INLINE void swapPacket(Scalar* a, Scalar* b) const + { + Packet tmp = internal::ploadt(b); + internal::pstoret(b, internal::ploadt(a)); + internal::pstoret(a, tmp); + } +}; +template +struct functor_traits > { + enum { + Cost = 3 * NumTraits::ReadCost, + PacketAccess = packet_traits::IsVectorized + }; +}; + +} // namespace internal + +} // namespace Eigen + +#endif // EIGEN_ASSIGNMENT_FUNCTORS_H diff --git a/test/evaluator_common.h b/test/evaluator_common.h new file mode 100644 index 000000000..e69de29bb