From 5c204d1ff7b7b57bba2ef6e5701597d000e63842 Mon Sep 17 00:00:00 2001 From: Jitse Niesen Date: Fri, 25 Mar 2011 16:30:41 +0000 Subject: [PATCH] Evaluators: Implement LinearVectorizedTraversal, packet ops in evaluators. --- Eigen/src/Core/AssignEvaluator.h | 106 +++++++++++++++++++++++++------ Eigen/src/Core/CoreEvaluators.h | 65 ++++++++++++++++++- test/evaluators.cpp | 11 ++++ 3 files changed, 159 insertions(+), 23 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 4210b046f..7001abb40 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -77,7 +77,7 @@ private: public: enum { Traversal = int(MayInnerVectorize) ? int(DefaultTraversal) // int(InnerVectorizedTraversal) - : int(MayLinearVectorize) ? int(DefaultTraversal) // int(LinearVectorizedTraversal) + : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(DefaultTraversal) // int(SliceVectorizedTraversal) : int(MayLinearize) ? int(DefaultTraversal) // int(LinearTraversal) : int(DefaultTraversal), @@ -140,40 +140,104 @@ public: // copy_using_evaluator_impl is based on assign_impl -template::Traversal, - int Unrolling = copy_using_evaluator_traits::Unrolling> +template::Traversal, + int Unrolling = copy_using_evaluator_traits::Unrolling> struct copy_using_evaluator_impl; -template -struct copy_using_evaluator_impl +template +struct copy_using_evaluator_impl { - static void run(const LhsXprType& lhs, const RhsXprType& rhs) + static void run(const DstXprType& dst, const SrcXprType& src) { - typedef typename evaluator::type LhsEvaluatorType; - typedef typename evaluator::type RhsEvaluatorType; - typedef typename LhsXprType::Index Index; + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; - LhsEvaluatorType lhsEvaluator(lhs.const_cast_derived()); - RhsEvaluatorType rhsEvaluator(rhs); + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); - for(Index outer = 0; outer < lhs.outerSize(); ++outer) { - for(Index inner = 0; inner < lhs.innerSize(); ++inner) { - Index row = lhs.rowIndexByOuterInner(outer, inner); - Index col = lhs.colIndexByOuterInner(outer, inner); - lhsEvaluator.coeffRef(row, col) = rhsEvaluator.coeff(row, col); + for(Index outer = 0; outer < dst.outerSize(); ++outer) { + for(Index inner = 0; inner < dst.innerSize(); ++inner) { + Index row = dst.rowIndexByOuterInner(outer, inner); + Index col = dst.colIndexByOuterInner(outer, inner); + dstEvaluator.coeffRef(row, col) = srcEvaluator.coeff(row, col); // TODO: use copyCoeff ? } } } }; +template +struct unaligned_copy_using_evaluator_impl +{ + template + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType&, DstEvaluatorType&, + typename SrcEvaluatorType::Index, typename SrcEvaluatorType::Index) {} +}; + +// TODO: check why no ... ???? + +template <> +struct unaligned_copy_using_evaluator_impl +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template + static EIGEN_DONT_INLINE void run(const SrcEvaluatorType& src, DstEvaluatorType& dst, + typename SrcEvaluatorType::Index start, typename SrcEvaluatorType::Index end) +#else + template + static EIGEN_STRONG_INLINE void run(const SrcEvaluatorType& src, DstEvaluatorType& dst, + typename SrcEvaluatorType::Index start, typename SrcEvaluatorType::Index end) +#endif + { + for (typename SrcEvaluatorType::Index index = start; index < end; ++index) + dst.copyCoeff(index, src); + } +}; + +template +struct copy_using_evaluator_impl +{ + EIGEN_STRONG_INLINE static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index size = dst.size(); + typedef packet_traits PacketTraits; + enum { + packetSize = PacketTraits::size, + dstIsAligned = int(copy_using_evaluator_traits::DstIsAligned), + dstAlignment = PacketTraits::AlignedOnScalar ? Aligned : dstIsAligned, + srcAlignment = copy_using_evaluator_traits::JointAlignment + }; + const Index alignedStart = dstIsAligned ? 0 : first_aligned(&dst.coeffRef(0), size); + const Index alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize; + + unaligned_copy_using_evaluator_impl::run(src,dst.const_cast_derived(),0,alignedStart); + + for(Index index = alignedStart; index < alignedEnd; index += packetSize) + { + dstEvaluator.template writePacket(index, srcEvaluator.template packet(index)); + } + + unaligned_copy_using_evaluator_impl<>::run(src,dst.const_cast_derived(),alignedEnd,size); + } +}; + // Based on DenseBase::LazyAssign() -template -const LhsXprType& copy_using_evaluator(const LhsXprType& lhs, const RhsXprType& rhs) +template +const DstXprType& copy_using_evaluator(const DstXprType& dst, const SrcXprType& src) { - copy_using_evaluator_impl::run(lhs, rhs); - return lhs; + copy_using_evaluator_impl::run(dst, src); + return dst; } } // namespace internal diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 43fc53ccd..5666daae9 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -45,6 +45,7 @@ struct evaluator typedef evaluator_impl type; }; +// -------------------- Transpose -------------------- template struct evaluator_impl > @@ -64,10 +65,17 @@ struct evaluator_impl > return m_argImpl.coeffRef(j, i); } + template + const typename ExpressionType::PacketScalar packet(Index index) const + { + return m_argImpl.template packet(index); + } + protected: typename evaluator::type m_argImpl; }; +// -------------------- Matrix -------------------- template struct evaluator_impl > @@ -88,10 +96,27 @@ struct evaluator_impl > return m_matrix.const_cast_derived().coeffRef(i, j); } + template + typename MatrixType::PacketReturnType packet(Index index) const + { + // eigen_internal_assert(index >= 0 && index < size()); + return m_matrix.template packet(index); + } + + template + void writePacket(Index index, const typename MatrixType::PacketScalar& x) + { + // eigen_internal_assert(index >= 0 && index < size()); + m_matrix.const_cast_derived().template writePacket(index, x); + } + protected: const MatrixType &m_matrix; }; +// -------------------- Array -------------------- + +// TODO: should be sharing code with Matrix case template struct evaluator_impl > @@ -117,10 +142,25 @@ struct evaluator_impl > return m_array.const_cast_derived().coeffRef(i, j); } + template + typename ArrayType::PacketReturnType packet(Index index) const + { + // eigen_internal_assert(index >= 0 && index < size()); + return m_array.template packet(index); + } + + template + void writePacket(Index index, const typename ArrayType::PacketScalar& x) + { + // eigen_internal_assert(index >= 0 && index < size()); + m_array.const_cast_derived().template writePacket(index, x); + } + protected: const ArrayType &m_array; }; +// -------------------- CwiseNullaryOp -------------------- template struct evaluator_impl > @@ -136,10 +176,17 @@ struct evaluator_impl > return m_nullaryOp.coeff(i, j); } + template + typename NullaryOpType::PacketScalar packet(Index index) const + { + return m_nullaryOp.template packet(index); + } + protected: const NullaryOpType& m_nullaryOp; }; +// -------------------- CwiseUnaryOp -------------------- template struct evaluator_impl > @@ -155,11 +202,18 @@ struct evaluator_impl > return m_unaryOp.functor()(m_argImpl.coeff(i, j)); } + template + typename UnaryOpType::PacketScalar packet(Index index) const + { + return m_unaryOp.functor().packetOp(m_argImpl.template packet(index)); + } + protected: const UnaryOpType& m_unaryOp; typename evaluator::type m_argImpl; }; +// -------------------- CwiseBinaryOp -------------------- template struct evaluator_impl > @@ -172,7 +226,14 @@ struct evaluator_impl > typename BinaryOpType::CoeffReturnType coeff(Index i, Index j) const { - return m_binaryOp.functor()(m_lhsImpl.coeff(i, j),m_rhsImpl.coeff(i, j)); + return m_binaryOp.functor()(m_lhsImpl.coeff(i, j), m_rhsImpl.coeff(i, j)); + } + + template + typename BinaryOpType::PacketScalar packet(Index index) const + { + return m_binaryOp.functor().packetOp(m_lhsImpl.template packet(index), + m_rhsImpl.template packet(index)); } protected: @@ -181,7 +242,7 @@ protected: typename evaluator::type m_rhsImpl; }; -// product +// -------------------- Product -------------------- template struct evaluator_impl > : public evaluator::PlainObject>::type diff --git a/test/evaluators.cpp b/test/evaluators.cpp index 636529e68..0e703360b 100644 --- a/test/evaluators.cpp +++ b/test/evaluators.cpp @@ -78,4 +78,15 @@ void test_evaluators() // test CwiseBinaryOp VERIFY_IS_APPROX_EVALUATOR(v2, v + Vector2d::Ones()); VERIFY_IS_APPROX_EVALUATOR(w, (v + Vector2d::Ones()).transpose().cwiseProduct(RowVector2d::Constant(3))); + + // dynamic matrices and arrays + MatrixXd mat1(6,6), mat2(6,6); + VERIFY_IS_APPROX_EVALUATOR(mat1, MatrixXd::Identity(6,6)); + VERIFY_IS_APPROX_EVALUATOR(mat2, mat1); + copy_using_evaluator(mat2.transpose(), mat1); + VERIFY_IS_APPROX(mat2.transpose(), mat1); + + ArrayXXd arr1(6,6), arr2(6,6); + VERIFY_IS_APPROX_EVALUATOR(arr1, ArrayXXd::Constant(6,6, 3.0)); + VERIFY_IS_APPROX_EVALUATOR(arr2, arr1); }