From 1b17a674dd409ea55cea4079ba9b8db18778e012 Mon Sep 17 00:00:00 2001 From: Jitse Niesen Date: Sun, 27 Mar 2011 13:49:15 +0100 Subject: [PATCH] Evaluators: Implement inner vectorization. The implementation is minimal (I only wrote the functions called by the unit test) and ugly (lots of copy and pasting). --- Eigen/src/Core/AssignEvaluator.h | 36 +++++++++++++- Eigen/src/Core/CoreEvaluators.h | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 7001abb40..2d61e7ff6 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -76,7 +76,7 @@ private: public: enum { - Traversal = int(MayInnerVectorize) ? int(DefaultTraversal) // int(InnerVectorizedTraversal) + Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MaySliceVectorize) ? int(DefaultTraversal) // int(SliceVectorizedTraversal) : int(MayLinearize) ? int(DefaultTraversal) // int(LinearTraversal) @@ -145,6 +145,10 @@ template::Unrolling> struct copy_using_evaluator_impl; +/************************ +*** Default traversal *** +************************/ + template struct copy_using_evaluator_impl { @@ -167,6 +171,10 @@ struct copy_using_evaluator_impl struct unaligned_copy_using_evaluator_impl { @@ -231,6 +239,32 @@ struct copy_using_evaluator_impl +struct copy_using_evaluator_impl +{ + inline static void run(const DstXprType &dst, const SrcXprType &src) + { + typedef typename evaluator::type DstEvaluatorType; + typedef typename evaluator::type SrcEvaluatorType; + typedef typename DstXprType::Index Index; + + DstEvaluatorType dstEvaluator(dst.const_cast_derived()); + SrcEvaluatorType srcEvaluator(src); + + const Index innerSize = dst.innerSize(); + const Index outerSize = dst.outerSize(); + const Index packetSize = packet_traits::size; + for(Index outer = 0; outer < outerSize; ++outer) + for(Index inner = 0; inner < innerSize; inner+=packetSize) + dstEvaluator.template writePacketByOuterInner(outer, inner, srcEvaluator.template packetByOuterInner(outer, inner)); + } +}; + + // Based on DenseBase::LazyAssign() template diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 5666daae9..c06d9303e 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -71,6 +71,27 @@ struct evaluator_impl > return m_argImpl.template packet(index); } + // TODO: Difference between PacketScalar and PacketReturnType? + // TODO: Get this function by inheriting from DenseCoeffBase? + template + const typename ExpressionType::PacketScalar packetByOuterInner(Index outer, Index inner) const + { + return m_argImpl.template packetByOuterInner(outer, inner); + } + +// TODO: Is this function needed? +// template +// void writePacket(Index index, const typename ExpressionType::PacketScalar& x) +// { +// m_argImpl.template writePacket(index, x); +// } + + template + void writePacketByOuterInner(Index outer, Index inner, const typename ExpressionType::PacketScalar& x) + { + m_argImpl.template writePacketByOuterInner(outer, inner, x); + } + protected: typename evaluator::type m_argImpl; }; @@ -86,6 +107,16 @@ struct evaluator_impl > typedef typename MatrixType::Index Index; + Index colIndexByOuterInner(Index outer, Index inner) const + { + return m_matrix.colIndexByOuterInner(outer, inner); + } + + Index rowIndexByOuterInner(Index outer, Index inner) const + { + return m_matrix.rowIndexByOuterInner(outer, inner); + } + typename MatrixType::CoeffReturnType coeff(Index i, Index j) const { return m_matrix.coeff(i, j); @@ -103,6 +134,18 @@ struct evaluator_impl > return m_matrix.template packet(index); } + template + typename MatrixType::PacketReturnType packet(Index row, Index col) const + { + return m_matrix.template packet(row, col); + } + + template + typename MatrixType::PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return m_matrix.template packetByOuterInner(outer, inner); + } + template void writePacket(Index index, const typename MatrixType::PacketScalar& x) { @@ -110,6 +153,12 @@ struct evaluator_impl > m_matrix.const_cast_derived().template writePacket(index, x); } + template + void writePacketByOuterInner(Index outer, Index inner, const typename MatrixType::PacketScalar& x) + { + m_matrix.const_cast_derived().template writePacketByOuterInner(outer, inner, x); + } + protected: const MatrixType &m_matrix; }; @@ -149,6 +198,18 @@ struct evaluator_impl > return m_array.template packet(index); } + template + typename ArrayType::PacketReturnType packet(Index row, Index col) const + { + return m_array.template packet(row, col); + } + + template + typename ArrayType::PacketReturnType packetByOuterInner(Index outer, Index inner) const + { + return m_array.template packetByOuterInner(outer, inner); + } + template void writePacket(Index index, const typename ArrayType::PacketScalar& x) { @@ -156,6 +217,12 @@ struct evaluator_impl > m_array.const_cast_derived().template writePacket(index, x); } + template + void writePacketByOuterInner(Index outer, Index inner, const typename ArrayType::PacketScalar& x) + { + m_array.const_cast_derived().template writePacketByOuterInner(outer, inner, x); + } + protected: const ArrayType &m_array; }; @@ -208,6 +275,19 @@ struct evaluator_impl > return m_unaryOp.functor().packetOp(m_argImpl.template packet(index)); } + template + typename UnaryOpType::PacketScalar packet(Index row, Index col) const + { + return m_unaryOp.functor().packetOp(m_argImpl.template packet(row, col)); + } + + template + typename UnaryOpType::PacketScalar packetByOuterInner(Index outer, Index inner) const + { + return packet(m_argImpl.rowIndexByOuterInner(outer, inner), + m_argImpl.colIndexByOuterInner(outer, inner)); + } + protected: const UnaryOpType& m_unaryOp; typename evaluator::type m_argImpl;