From 8dd3b716e39d4b4b472b948de1af20838bf17493 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 12 Mar 2014 13:34:11 +0100 Subject: [PATCH] Move evaluation related flags from traits to evaluator and fix evaluators of MapBase and Replicate --- Eigen/src/Core/AssignEvaluator.h | 15 +- Eigen/src/Core/Block.h | 11 + Eigen/src/Core/CoreEvaluators.h | 231 +++++++++++++----- Eigen/src/Core/CwiseBinaryOp.h | 8 +- Eigen/src/Core/CwiseNullaryOp.h | 7 +- Eigen/src/Core/CwiseUnaryOp.h | 7 +- Eigen/src/Core/CwiseUnaryView.h | 4 +- Eigen/src/Core/Diagonal.h | 5 +- Eigen/src/Core/DiagonalMatrix.h | 1 + Eigen/src/Core/DiagonalProduct.h | 8 +- Eigen/src/Core/Map.h | 7 +- Eigen/src/Core/MapBase.h | 9 +- Eigen/src/Core/Product.h | 27 +- Eigen/src/Core/ProductEvaluators.h | 111 +++++++-- Eigen/src/Core/Redux.h | 11 + Eigen/src/Core/Replicate.h | 7 +- Eigen/src/Core/Reverse.h | 7 +- Eigen/src/Core/Select.h | 5 +- Eigen/src/Core/Transpose.h | 7 +- Eigen/src/Core/VectorwiseOp.h | 4 + .../Core/products/TriangularMatrixVector.h | 2 +- Eigen/src/Core/util/ForwardDeclarations.h | 12 + Eigen/src/Core/util/XprHelper.h | 52 ++++ 23 files changed, 433 insertions(+), 125 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 2ea1cc126..05816094c 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -28,11 +28,10 @@ template struct copy_using_evaluator_traits { typedef typename DstEvaluator::XprType Dst; - typedef typename SrcEvaluator::XprType Src; - // TODO, we should get these flags from the evaluators + enum { - DstFlags = Dst::Flags, - SrcFlags = Src::Flags + DstFlags = DstEvaluator::Flags, + SrcFlags = SrcEvaluator::Flags }; public: @@ -56,7 +55,9 @@ private: }; enum { - StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + DstIsRowMajor = DstEvaluator::Flags&RowMajorBit, + SrcIsRowMajor = SrcEvaluator::Flags&RowMajorBit, + StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)), MightVectorize = StorageOrdersAgree && (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), @@ -596,7 +597,7 @@ public: typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::RowsAtCompileTime) == 1 ? 0 : int(Traits::ColsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? outer + : int(DstEvaluatorType::Flags)&RowMajorBit ? outer : inner; } @@ -605,7 +606,7 @@ public: typedef typename DstEvaluatorType::ExpressionTraits Traits; return int(Traits::ColsAtCompileTime) == 1 ? 0 : int(Traits::RowsAtCompileTime) == 1 ? inner - : int(Traits::Flags)&RowMajorBit ? inner + : int(DstEvaluatorType::Flags)&RowMajorBit ? inner : outer; } diff --git a/Eigen/src/Core/Block.h b/Eigen/src/Core/Block.h index 31cd5c72c..d92797a98 100644 --- a/Eigen/src/Core/Block.h +++ b/Eigen/src/Core/Block.h @@ -68,6 +68,7 @@ struct traits > : traits::MaxColsAtCompileTime), + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 @@ -80,6 +81,10 @@ struct traits > : traits::ret) : int(inner_stride_at_compile_time::ret), + // IsAligned is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator + IsAligned = 0, +#ifndef EIGEN_TEST_EVALUATORS MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) && (InnerStrideAtCompileTime == 1) ? PacketAccessBit : 0, @@ -92,6 +97,12 @@ struct traits > : traits::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags = (traits::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit // FIXME DirectAccessBit should not be handled by expressions +#endif }; }; diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h index 33c89c2d4..a5de3593c 100644 --- a/Eigen/src/Core/CoreEvaluators.h +++ b/Eigen/src/Core/CoreEvaluators.h @@ -136,7 +136,9 @@ struct evaluator > RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, - CoeffReadCost = NumTraits::ReadCost + CoeffReadCost = NumTraits::ReadCost, + Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime, + Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret }; evaluator() @@ -323,7 +325,8 @@ struct evaluator > typedef Transpose XprType; enum { - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit }; evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} @@ -389,9 +392,16 @@ struct evaluator > : evaluator_base > { typedef CwiseNullaryOp XprType; + typedef typename internal::remove_all::type PlainObjectTypeCleaned; enum { - CoeffReadCost = internal::functor_traits::Cost + CoeffReadCost = internal::functor_traits::Cost, + + Flags = (evaluator::Flags + & ( HereditaryBits + | (functor_has_linear_access::ret ? LinearAccessBit : 0) + | (functor_traits::PacketAccess ? PacketAccessBit : 0))) + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore }; evaluator(const XprType& n) @@ -437,7 +447,11 @@ struct evaluator > typedef CwiseUnaryOp XprType; enum { - CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = evaluator::Flags & ( + HereditaryBits | LinearAccessBit | AlignedBit + | (functor_traits::PacketAccess ? PacketAccessBit : 0)) }; evaluator(const XprType& op) @@ -485,7 +499,22 @@ struct evaluator > typedef CwiseBinaryOp XprType; enum { - CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost + CoeffReadCost = evaluator::CoeffReadCost + evaluator::CoeffReadCost + functor_traits::Cost, + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & ( + HereditaryBits + | (int(LhsFlags) & int(RhsFlags) & + ( AlignedBit + | (StorageOrdersAgree ? LinearAccessBit : 0) + | (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0) + ) + ) + ), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) }; evaluator(const XprType& xpr) @@ -537,7 +566,9 @@ struct evaluator > typedef CwiseUnaryView XprType; enum { - CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost + CoeffReadCost = evaluator::CoeffReadCost + functor_traits::Cost, + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)) }; evaluator(const XprType& op) @@ -576,12 +607,15 @@ protected: // -------------------- Map -------------------- -template -struct evaluator > - : evaluator_base +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base { - typedef MapBase MapType; - typedef Derived XprType; + typedef Derived XprType; typedef typename XprType::PointerType PointerType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; @@ -590,81 +624,103 @@ struct evaluator > typedef typename XprType::PacketReturnType PacketReturnType; enum { - RowsAtCompileTime = XprType::RowsAtCompileTime, + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, CoeffReadCost = NumTraits::ReadCost }; - evaluator(const XprType& map) + mapbase_evaluator(const XprType& map) : m_data(const_cast(map.data())), - m_rowStride(map.rowStride()), - m_colStride(map.colStride()) - { } + m_xpr(map) + { + EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + } CoeffReturnType coeff(Index row, Index col) const - { - return m_data[col * m_colStride + row * m_rowStride]; + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } CoeffReturnType coeff(Index index) const - { - return coeff(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return m_data[index * m_xpr.innerStride()]; } Scalar& coeffRef(Index row, Index col) - { - return m_data[col * m_colStride + row * m_rowStride]; + { + return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()]; } Scalar& coeffRef(Index index) - { - return coeffRef(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return m_data[index * m_xpr.innerStride()]; } template PacketReturnType packet(Index row, Index col) const - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::ploadt(ptr); } template PacketReturnType packet(Index index) const - { - return packet(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0); + { + return internal::ploadt(m_data + index * m_xpr.innerStride()); } template void writePacket(Index row, Index col, const PacketScalar& x) - { - PointerType ptr = m_data + row * m_rowStride + col * m_colStride; + { + PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride(); return internal::pstoret(ptr, x); } template void writePacket(Index index, const PacketScalar& x) - { - return writePacket(RowsAtCompileTime == 1 ? 0 : index, - RowsAtCompileTime == 1 ? index : 0, - x); + { + internal::pstoret(m_data + index * m_xpr.innerStride(), x); } protected: PointerType m_data; - int m_rowStride; - int m_colStride; + const XprType& m_xpr; }; template struct evaluator > - : public evaluator > > + : public mapbase_evaluator, PlainObjectType> { typedef Map XprType; + typedef typename XprType::Scalar Scalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), + IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, + KeepsPacketAccess = bool(HasNoInnerStride) + && ( bool(IsDynamicSize) + || HasNoOuterStride + || ( OuterStrideAtCompileTime!=Dynamic + && ((static_cast(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ), + Flags0 = evaluator::Flags, + Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit), + Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime)) + ? int(Flags1) : int(Flags1 & ~LinearAccessBit), + Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit) + }; evaluator(const XprType& map) - : evaluator >(map) + : mapbase_evaluator(map) { } }; @@ -672,12 +728,16 @@ struct evaluator > template struct evaluator > - : public evaluator > > + : public mapbase_evaluator, PlainObjectType> { typedef Ref XprType; + + enum { + Flags = evaluator >::Flags + }; - evaluator(const XprType& map) - : evaluator >(map) + evaluator(const XprType& ref) + : mapbase_evaluator(ref) { } }; @@ -691,8 +751,39 @@ struct evaluator > : block_evaluator { typedef Block XprType; + typedef typename XprType::Scalar Scalar; + enum { - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + XprTypeIsRowMajor = (int(traits::Flags)&RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType + ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) + && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit : 0, + MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + FlagsRowMajorBit = XprType::Flags&RowMajorBit, + Flags0 = traits::Flags & ( (HereditaryBits & ~RowMajorBit) | + DirectAccessBit | + MaskPacketAccessBit | + MaskAlignedBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit }; typedef block_evaluator block_evaluator_type; evaluator(const XprType& block) : block_evaluator_type(block) {} @@ -778,18 +869,23 @@ protected: template struct block_evaluator - : evaluator > > + : mapbase_evaluator, + typename Block::PlainObject> { typedef Block XprType; block_evaluator(const XprType& block) - : evaluator >(block) - { } + : mapbase_evaluator(block) + { + // FIXME this should be an internal assertion + eigen_assert(EIGEN_IMPLIES(evaluator::Flags&AlignedBit, (size_t(block.data()) % 16) == 0) && "data is not aligned"); + } }; // -------------------- Select -------------------- +// TODO enable vectorization for Select template struct evaluator > : evaluator_base > @@ -798,7 +894,9 @@ struct evaluator > enum { CoeffReadCost = evaluator::CoeffReadCost + EIGEN_SIZE_MAX(evaluator::CoeffReadCost, - evaluator::CoeffReadCost) + evaluator::CoeffReadCost), + + Flags = (unsigned int)evaluator::Flags & evaluator::Flags & HereditaryBits }; evaluator(const XprType& select) @@ -850,7 +948,9 @@ struct evaluator > typedef typename internal::remove_all::type ArgTypeNestedCleaned; enum { - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (evaluator::Flags & HereditaryBits & ~RowMajorBit) | (traits::Flags & RowMajorBit) }; evaluator(const XprType& replicate) @@ -858,7 +958,7 @@ struct evaluator > m_argImpl(m_arg), m_rows(replicate.nestedExpression().rows()), m_cols(replicate.nestedExpression().cols()) - { } + {} CoeffReturnType coeff(Index row, Index col) const { @@ -907,17 +1007,19 @@ struct evaluator > typedef PartialReduxExpr XprType; typedef typename XprType::Scalar InputScalar; enum { - TraversalSize = Direction==Vertical ? XprType::RowsAtCompileTime : XprType::ColsAtCompileTime + TraversalSize = Direction==Vertical ? ArgType::RowsAtCompileTime : XprType::ColsAtCompileTime }; typedef typename MemberOp::template Cost CostOpType; enum { CoeffReadCost = TraversalSize==Dynamic ? Dynamic - : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value) + : TraversalSize * evaluator::CoeffReadCost + int(CostOpType::value), + + Flags = (traits::Flags&RowMajorBit) | (evaluator::Flags&HereditaryBits) }; evaluator(const XprType expr) : m_expr(expr) - { } + {} typedef typename XprType::Index Index; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -948,7 +1050,8 @@ struct evaluator_wrapper_base { typedef typename remove_all::type ArgType; enum { - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags }; evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} @@ -1058,7 +1161,15 @@ struct evaluator > || ((Direction == Vertical) && IsColMajor) || ((Direction == Horizontal) && IsRowMajor), - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) ) + ? LinearAccessBit : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess) }; typedef internal::reverse_packet_cond reverse_packet; @@ -1071,7 +1182,7 @@ struct evaluator > CoeffReturnType coeff(Index row, Index col) const { return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } CoeffReturnType coeff(Index index) const @@ -1082,7 +1193,7 @@ struct evaluator > Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, - ReverseCol ? m_cols.value() - col - 1 : col); + ReverseCol ? m_cols.value() - col - 1 : col); } Scalar& coeffRef(Index index) @@ -1138,7 +1249,9 @@ struct evaluator > typedef Diagonal XprType; enum { - CoeffReadCost = evaluator::CoeffReadCost + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = (unsigned int)evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit }; evaluator(const XprType& diagonal) diff --git a/Eigen/src/Core/CwiseBinaryOp.h b/Eigen/src/Core/CwiseBinaryOp.h index 105e7fb11..07861dbc9 100644 --- a/Eigen/src/Core/CwiseBinaryOp.h +++ b/Eigen/src/Core/CwiseBinaryOp.h @@ -65,6 +65,7 @@ struct traits > typedef typename remove_reference::type _LhsNested; typedef typename remove_reference::type _RhsNested; enum { +#ifndef EIGEN_TEST_EVALUATORS LhsFlags = _LhsNested::Flags, RhsFlags = _RhsNested::Flags, SameType = is_same::value, @@ -78,12 +79,13 @@ struct traits > ) ) ), - Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit) -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + LhsCoeffReadCost = _LhsNested::CoeffReadCost, RhsCoeffReadCost = _RhsNested::CoeffReadCost, CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits::Cost +#else + Flags = _LhsNested::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 560e03f12..f9f127cc2 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -35,14 +35,15 @@ template struct traits > : traits { enum { +#ifndef EIGEN_TEST_EVALUATORS Flags = (traits::Flags & ( HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | (functor_traits::PacketAccess ? PacketAccessBit : 0))) - | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit) -#ifndef EIGEN_TEST_EVALUATORS - , + | (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), CoeffReadCost = functor_traits::Cost +#else + Flags = traits::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseUnaryOp.h b/Eigen/src/Core/CwiseUnaryOp.h index 25da52ab7..af05a9108 100644 --- a/Eigen/src/Core/CwiseUnaryOp.h +++ b/Eigen/src/Core/CwiseUnaryOp.h @@ -44,12 +44,13 @@ struct traits > typedef typename XprType::Nested XprTypeNested; typedef typename remove_reference::type _XprTypeNested; enum { +#ifndef EIGEN_TEST_EVALUATORS Flags = _XprTypeNested::Flags & ( HereditaryBits | LinearAccessBit | AlignedBit - | (functor_traits::PacketAccess ? PacketAccessBit : 0)) -#ifndef EIGEN_TEST_EVALUATORS - , + | (functor_traits::PacketAccess ? PacketAccessBit : 0)), CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits::Cost +#else + Flags = _XprTypeNested::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/CwiseUnaryView.h b/Eigen/src/Core/CwiseUnaryView.h index a0bd80fb9..9cdebb8e7 100644 --- a/Eigen/src/Core/CwiseUnaryView.h +++ b/Eigen/src/Core/CwiseUnaryView.h @@ -37,9 +37,11 @@ struct traits > typedef typename MatrixType::Nested MatrixTypeNested; typedef typename remove_all::type _MatrixTypeNested; enum { - Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), #ifndef EIGEN_TEST_EVALUATORS + Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)), CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits::Cost, +#else + Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | LvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions #endif MatrixTypeInnerStride = inner_stride_at_compile_time::ret, // need to cast the sizeof's from size_t to int explicitly, otherwise: diff --git a/Eigen/src/Core/Diagonal.h b/Eigen/src/Core/Diagonal.h index 02ab04980..3ff6a3e66 100644 --- a/Eigen/src/Core/Diagonal.h +++ b/Eigen/src/Core/Diagonal.h @@ -51,10 +51,13 @@ struct traits > : (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0), MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))), MaxColsAtCompileTime = 1, +#ifndef EIGEN_TEST_EVALUATORS MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, -#ifndef EIGEN_TEST_EVALUATORS CoeffReadCost = _MatrixTypeNested::CoeffReadCost, +#else + MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions #endif MatrixTypeOuterStride = outer_stride_at_compile_time::ret, InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1, diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 784e4b1ce..ba0042ba4 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -275,6 +275,7 @@ struct traits > typedef typename DiagonalVectorType::Scalar Scalar; typedef typename DiagonalVectorType::Index Index; typedef typename DiagonalVectorType::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; enum { RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, diff --git a/Eigen/src/Core/DiagonalProduct.h b/Eigen/src/Core/DiagonalProduct.h index 840b70dbb..c6dafdddc 100644 --- a/Eigen/src/Core/DiagonalProduct.h +++ b/Eigen/src/Core/DiagonalProduct.h @@ -26,6 +26,7 @@ struct traits > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS _StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor, _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), @@ -34,11 +35,10 @@ struct traits > //_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), _Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))), _LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0, - - Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit, //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit), CoeffReadCost = NumTraits::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost +#else + Flags = RowMajorBit & (unsigned int)(MatrixType::Flags) #endif }; }; diff --git a/Eigen/src/Core/Map.h b/Eigen/src/Core/Map.h index 8ea13cfb7..23bbb46bf 100644 --- a/Eigen/src/Core/Map.h +++ b/Eigen/src/Core/Map.h @@ -79,10 +79,11 @@ struct traits > OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 ? int(PlainObjectType::OuterStrideAtCompileTime) : int(StrideType::OuterStrideAtCompileTime), + IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), +#ifndef EIGEN_TEST_EVALUATORS HasNoInnerStride = InnerStrideAtCompileTime == 1, HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, HasNoStride = HasNoInnerStride && HasNoOuterStride, - IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned), IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic, KeepsPacketAccess = bool(HasNoInnerStride) && ( bool(IsDynamicSize) @@ -95,6 +96,10 @@ struct traits > ? int(Flags1) : int(Flags1 & ~LinearAccessBit), Flags3 = is_lvalue::value ? int(Flags2) : (int(Flags2) & ~LvalueBit), Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit) +#else + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) +#endif }; private: enum { Options }; // Expressions don't have Options diff --git a/Eigen/src/Core/MapBase.h b/Eigen/src/Core/MapBase.h index ffa1371c2..de1424b09 100644 --- a/Eigen/src/Core/MapBase.h +++ b/Eigen/src/Core/MapBase.h @@ -161,11 +161,16 @@ template class MapBase EIGEN_DEVICE_FUNC void checkSanity() const { +#ifndef EIGEN_TEST_EVALUATORS + // moved to evaluator EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits::Flags&PacketAccessBit, internal::inner_stride_at_compile_time::ret==1), PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); - eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) - && "data is not aligned"); + eigen_assert(EIGEN_IMPLIES(internal::traits::Flags&AlignedBit, (size_t(m_data) % 16) == 0) && "data is not aligned"); +#else + eigen_assert(EIGEN_IMPLIES(internal::traits::IsAligned, (size_t(m_data) % 16) == 0) && "data is not aligned"); +#endif + } PointerType m_data; diff --git a/Eigen/src/Core/Product.h b/Eigen/src/Core/Product.h index cac90bc1f..453180049 100644 --- a/Eigen/src/Core/Product.h +++ b/Eigen/src/Core/Product.h @@ -33,14 +33,29 @@ template class Pro namespace internal { template struct traits > - : traits > -{ - // We want A+B*C to be of type Product and not Product - // TODO: This flag should eventually go in a separate evaluator traits class +{ + typedef typename remove_all::type LhsCleaned; + typedef typename remove_all::type RhsCleaned; + + typedef MatrixXpr XprKind; + + typedef typename scalar_product_traits::ReturnType Scalar; + typedef typename promote_storage_type::StorageKind, + typename traits::StorageKind>::ret StorageKind; + typedef typename promote_index_type::Index, + typename traits::Index>::type Index; + enum { - Flags = traits >::Flags & ~(EvalBeforeNestingBit | DirectAccessBit) + RowsAtCompileTime = LhsCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsCleaned::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime, + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0) }; }; + } // end namespace internal @@ -59,8 +74,6 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option, typename internal::promote_storage_type::ret>::Base Base; EIGEN_GENERIC_PUBLIC_INTERFACE(Product) - - typedef typename internal::nested::type LhsNested; typedef typename internal::nested::type RhsNested; diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index 7ebf31696..1159c2f44 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -17,19 +17,6 @@ namespace Eigen { namespace internal { -/** \internal - * \class product_evaluator - * Products need their own evaluator with more template arguments allowing for - * easier partial template specializations. - */ -template< typename T, - int ProductTag = internal::product_type::ret, - typename LhsShape = typename evaluator_traits::Shape, - typename RhsShape = typename evaluator_traits::Shape, - typename LhsScalar = typename T::Lhs::Scalar, - typename RhsScalar = typename T::Rhs::Scalar - > struct product_evaluator; - /** \internal * Evaluator of a product expression. * Since products require special treatments to handle all possible cases, @@ -119,6 +106,18 @@ struct product_evaluator, ProductTag, DenseSha : m_result(xpr.rows(), xpr.cols()) { ::new (static_cast(this)) Base(m_result); + +// FIXME shall we handle nested_eval here? +// typedef typename internal::nested_eval::type LhsNested; +// typedef typename internal::nested_eval::type RhsNested; +// typedef typename internal::remove_all::type LhsNestedCleaned; +// typedef typename internal::remove_all::type RhsNestedCleaned; +// +// const LhsNested lhs(xpr.lhs()); +// const RhsNested rhs(xpr.rhs()); +// +// generic_product_impl::evalTo(m_result, lhs, rhs); + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); } @@ -133,6 +132,7 @@ struct Assignment, internal::assign_ typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { + // FIXME shall we handle nested_eval here? generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); } }; @@ -144,6 +144,7 @@ struct Assignment, internal::add_ass typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op &) { + // FIXME shall we handle nested_eval here? generic_product_impl::addTo(dst, src.lhs(), src.rhs()); } }; @@ -155,6 +156,7 @@ struct Assignment, internal::sub_ass typedef Product SrcXprType; static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op &) { + // FIXME shall we handle nested_eval here? generic_product_impl::subTo(dst, src.lhs(), src.rhs()); } }; @@ -368,7 +370,6 @@ struct product_evaluator, ProductTag, DenseShape, : evaluator_base > { typedef Product XprType; - typedef CoeffBasedProduct CoeffBasedProductType; typedef typename XprType::Index Index; typedef typename XprType::Scalar Scalar; typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -396,9 +397,13 @@ struct product_evaluator, ProductTag, DenseShape, typedef typename evaluator::type RhsEtorType; enum { - RowsAtCompileTime = traits::RowsAtCompileTime, + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime, + PacketSize = packet_traits::size, - InnerSize = traits::InnerSize, LhsCoeffReadCost = LhsEtorType::CoeffReadCost, RhsCoeffReadCost = RhsEtorType::CoeffReadCost, @@ -407,8 +412,51 @@ struct product_evaluator, ProductTag, DenseShape, + (InnerSize - 1) * NumTraits::AddCost, Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT, - CanVectorizeInner = traits::CanVectorizeInner, - Flags = traits::Flags + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + SameType = is_same::value, + + CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit) + && (ColsAtCompileTime == Dynamic + || ( (ColsAtCompileTime % packet_traits::size) == 0 + && (RhsFlags&AlignedBit) + ) + ), + + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) + && (RowsAtCompileTime == Dynamic + || ( (RowsAtCompileTime % packet_traits::size) == 0 + && (LhsFlags&AlignedBit) + ) + ), + + EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 + : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 + : (RhsRowMajor && !CanVectorizeLhs), + + Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit) + | (EvalToRowMajor ? RowMajorBit : 0) + | (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0) + | (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0), + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType + && LhsRowMajor + && (!RhsRowMajor) + && (LhsFlags & RhsFlags & ActualPacketAccessBit) + && (LhsFlags & RhsFlags & AlignedBit) + && (InnerSize % packet_traits::size == 0) }; const CoeffReturnType coeff(Index row, Index col) const @@ -689,7 +737,7 @@ protected: * Diagonal products ***************************************************************************/ -template +template struct diagonal_product_evaluator_base : evaluator_base { @@ -698,7 +746,20 @@ struct diagonal_product_evaluator_base typedef typename internal::packet_traits::type PacketScalar; public: enum { - CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost + CoeffReadCost = NumTraits::MulCost + evaluator::CoeffReadCost + evaluator::CoeffReadCost, + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor, + _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft) + ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)), + _SameTypes = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))), + _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))), + _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0, + Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit + //(int(MatrixFlags)&int(DiagFlags)&AlignedBit), }; diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag) @@ -724,7 +785,7 @@ protected: { enum { InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, - DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) + DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned) }; return internal::pmul(m_matImpl.template packet(row, col), m_diagImpl.template packet(id)); @@ -737,9 +798,9 @@ protected: // diagonal * dense template struct product_evaluator, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar> - : diagonal_product_evaluator_base > + : diagonal_product_evaluator_base, OnTheLeft> { - typedef diagonal_product_evaluator_base > Base; + typedef diagonal_product_evaluator_base, OnTheLeft> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; @@ -783,9 +844,9 @@ struct product_evaluator, ProductTag, DiagonalSha // dense * diagonal template struct product_evaluator, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar> - : diagonal_product_evaluator_base > + : diagonal_product_evaluator_base, OnTheRight> { - typedef diagonal_product_evaluator_base > Base; + typedef diagonal_product_evaluator_base, OnTheRight> Base; using Base::m_diagImpl; using Base::m_matImpl; using Base::coeff; diff --git a/Eigen/src/Core/Redux.h b/Eigen/src/Core/Redux.h index 41290323f..6c8c58e95 100644 --- a/Eigen/src/Core/Redux.h +++ b/Eigen/src/Core/Redux.h @@ -389,8 +389,19 @@ DenseBase::redux(const Func& func) const eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix"); #ifdef EIGEN_TEST_EVALUATORS + // FIXME, eval_nest should be handled by redux_evaluator, however: + // - it is currently difficult to provide the right Flags since they are still handled by the expressions + // - handling it here might reduce the number of template instantiations +// typedef typename internal::nested_eval::type ThisNested; +// typedef typename internal::remove_all::type ThisNestedCleaned; +// typedef typename internal::redux_evaluator ThisEvaluator; +// +// ThisNested thisNested(derived()); +// ThisEvaluator thisEval(thisNested); + typedef typename internal::redux_evaluator ThisEvaluator; ThisEvaluator thisEval(derived()); + return internal::redux_impl::run(thisEval, func); #else diff --git a/Eigen/src/Core/Replicate.h b/Eigen/src/Core/Replicate.h index 1e640d8aa..2dff03ea3 100644 --- a/Eigen/src/Core/Replicate.h +++ b/Eigen/src/Core/Replicate.h @@ -53,10 +53,13 @@ struct traits > IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1 : MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0 : (MatrixType::Flags & RowMajorBit) ? 1 : 0, - Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0) + #ifndef EIGEN_TEST_EVALUATORS - , + Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0), CoeffReadCost = _MatrixTypeNested::CoeffReadCost +#else + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 #endif }; }; diff --git a/Eigen/src/Core/Reverse.h b/Eigen/src/Core/Reverse.h index 495b44cc4..4969bb4fc 100644 --- a/Eigen/src/Core/Reverse.h +++ b/Eigen/src/Core/Reverse.h @@ -45,14 +45,15 @@ struct traits > MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS // let's enable LinearAccess only with vectorization because of the product overhead LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) ) ? LinearAccessBit : 0, - Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess) -#ifndef EIGEN_TEST_EVALUATORS - , + Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess), CoeffReadCost = _MatrixTypeNested::CoeffReadCost +#else + Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit) #endif }; }; diff --git a/Eigen/src/Core/Select.h b/Eigen/src/Core/Select.h index abcba2d15..d4fd88e62 100644 --- a/Eigen/src/Core/Select.h +++ b/Eigen/src/Core/Select.h @@ -43,12 +43,13 @@ struct traits > ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime, - Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits #ifndef EIGEN_TEST_EVALUATORS - , + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits, CoeffReadCost = traits::type>::CoeffReadCost + EIGEN_SIZE_MAX(traits::type>::CoeffReadCost, traits::type>::CoeffReadCost) +#else + Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit #endif }; }; diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 98f9e888f..11b0e45a8 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -41,12 +41,17 @@ struct traits > : traits ColsAtCompileTime = MatrixType::RowsAtCompileTime, MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), Flags1 = Flags0 | FlagsLvalueBit, Flags = Flags1 ^ RowMajorBit, -#ifndef EIGEN_TEST_EVALUATORS CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost, +#else + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit), + Flags1 = Flags0 | FlagsLvalueBit, + Flags = Flags1 ^ RowMajorBit, #endif InnerStrideAtCompileTime = inner_stride_at_compile_time::ret, OuterStrideAtCompileTime = outer_stride_at_compile_time::ret diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index 702d0006d..672b9662f 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -48,8 +48,12 @@ struct traits > ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime, MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, +#ifndef EIGEN_TEST_EVALUATORS Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits, Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0), +#else + Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0, +#endif TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime }; #ifndef EIGEN_TEST_EVALUATORS diff --git a/Eigen/src/Core/products/TriangularMatrixVector.h b/Eigen/src/Core/products/TriangularMatrixVector.h index eed7f4258..771613b11 100644 --- a/Eigen/src/Core/products/TriangularMatrixVector.h +++ b/Eigen/src/Core/products/TriangularMatrixVector.h @@ -259,7 +259,7 @@ template struct trmv_selector typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; - + typedef Map, Aligned> MappedDest; typename internal::add_const_on_value_type::type actualLhs = LhsBlasTraits::extract(lhs); diff --git a/Eigen/src/Core/util/ForwardDeclarations.h b/Eigen/src/Core/util/ForwardDeclarations.h index 975fdbf2a..092ba758e 100644 --- a/Eigen/src/Core/util/ForwardDeclarations.h +++ b/Eigen/src/Core/util/ForwardDeclarations.h @@ -157,6 +157,18 @@ template struct product_type; +/** \internal + * \class product_evaluator + * Products need their own evaluator with more template arguments allowing for + * easier partial template specializations. + */ +template< typename T, + int ProductTag = internal::product_type::ret, + typename LhsShape = typename evaluator_traits::Shape, + typename RhsShape = typename evaluator_traits::Shape, + typename LhsScalar = typename T::Lhs::Scalar, + typename RhsScalar = typename T::Rhs::Scalar + > struct product_evaluator; } template type; }; +#ifndef EIGEN_TEST_EVALUATORS template class compute_matrix_flags { @@ -158,6 +159,57 @@ class compute_matrix_flags enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit }; }; +#else // EIGEN_TEST_EVALUATORS + +template +class compute_matrix_flags +{ + enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 }; + public: + // FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<> + // and then propagate this information to the evaluator's flags. + // However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage. + enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit }; +}; +#endif + +#ifdef EIGEN_ENABLE_EVALUATORS +template +class compute_matrix_evaluator_flags +{ + enum { + row_major_bit = Options&RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic, + + aligned_bit = + ( + ((Options&DontAlign)==0) + && ( +#if EIGEN_ALIGN_STATICALLY + ((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0)) +#else + 0 +#endif + + || + +#if EIGEN_ALIGN + is_dynamic_size_storage +#else + 0 +#endif + + ) + ) ? AlignedBit : 0, + packet_access_bit = packet_traits::Vectorizable && aligned_bit ? PacketAccessBit : 0 + }; + + public: + enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit }; +}; + +#endif // EIGEN_ENABLE_EVALUATORS + template struct size_at_compile_time { enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };