Move evaluation related flags from traits to evaluator and fix evaluators of MapBase and Replicate

This commit is contained in:
Gael Guennebaud 2014-03-12 13:34:11 +01:00
parent 7eefdb948c
commit 8dd3b716e3
23 changed files with 433 additions and 125 deletions

View File

@ -28,11 +28,10 @@ template <typename DstEvaluator, typename SrcEvaluator, typename AssignFunc>
struct copy_using_evaluator_traits
{
typedef typename DstEvaluator::XprType Dst;
typedef typename SrcEvaluator::XprType Src;
// TODO, we should get these flags from the evaluators
enum {
DstFlags = Dst::Flags,
SrcFlags = Src::Flags
DstFlags = DstEvaluator::Flags,
SrcFlags = SrcEvaluator::Flags
};
public:
@ -56,7 +55,9 @@ private:
};
enum {
StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
DstIsRowMajor = DstEvaluator::Flags&RowMajorBit,
SrcIsRowMajor = SrcEvaluator::Flags&RowMajorBit,
StorageOrdersAgree = (int(DstIsRowMajor) == int(SrcIsRowMajor)),
MightVectorize = StorageOrdersAgree
&& (int(DstFlags) & int(SrcFlags) & ActualPacketAccessBit)
&& (functor_traits<AssignFunc>::PacketAccess),
@ -596,7 +597,7 @@ public:
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::RowsAtCompileTime) == 1 ? 0
: int(Traits::ColsAtCompileTime) == 1 ? inner
: int(Traits::Flags)&RowMajorBit ? outer
: int(DstEvaluatorType::Flags)&RowMajorBit ? outer
: inner;
}
@ -605,7 +606,7 @@ public:
typedef typename DstEvaluatorType::ExpressionTraits Traits;
return int(Traits::ColsAtCompileTime) == 1 ? 0
: int(Traits::RowsAtCompileTime) == 1 ? inner
: int(Traits::Flags)&RowMajorBit ? inner
: int(DstEvaluatorType::Flags)&RowMajorBit ? inner
: outer;
}

View File

@ -68,6 +68,7 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaxColsAtCompileTime = BlockCols==0 ? 0
: ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime)
: int(traits<XprType>::MaxColsAtCompileTime),
XprTypeIsRowMajor = (int(traits<XprType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
@ -80,6 +81,10 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
// IsAligned is needed by MapBase's assertions
// We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the respective evaluator
IsAligned = 0,
#ifndef EIGEN_TEST_EVALUATORS
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
@ -92,6 +97,12 @@ struct traits<Block<XprType, BlockRows, BlockCols, InnerPanel> > : traits<XprTyp
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit
#else
// FIXME, this traits is rather specialized for dense object...
FlagsLvalueBit = is_lvalue<XprType>::value ? LvalueBit : 0,
FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0,
Flags = (traits<XprType>::Flags & DirectAccessBit) | FlagsLvalueBit | FlagsRowMajorBit // FIXME DirectAccessBit should not be handled by expressions
#endif
};
};

View File

@ -136,7 +136,9 @@ struct evaluator<PlainObjectBase<Derived> >
RowsAtCompileTime = PlainObjectType::RowsAtCompileTime,
ColsAtCompileTime = PlainObjectType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost
CoeffReadCost = NumTraits<Scalar>::ReadCost,
Flags = compute_matrix_evaluator_flags< Scalar,Derived::RowsAtCompileTime,Derived::ColsAtCompileTime,
Derived::Options,Derived::MaxRowsAtCompileTime,Derived::MaxColsAtCompileTime>::ret
};
evaluator()
@ -323,7 +325,8 @@ struct evaluator<Transpose<ArgType> >
typedef Transpose<ArgType> XprType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags ^ RowMajorBit
};
evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {}
@ -389,9 +392,16 @@ struct evaluator<CwiseNullaryOp<NullaryOp,PlainObjectType> >
: evaluator_base<CwiseNullaryOp<NullaryOp,PlainObjectType> >
{
typedef CwiseNullaryOp<NullaryOp,PlainObjectType> XprType;
typedef typename internal::remove_all<PlainObjectType>::type PlainObjectTypeCleaned;
enum {
CoeffReadCost = internal::functor_traits<NullaryOp>::Cost
CoeffReadCost = internal::functor_traits<NullaryOp>::Cost,
Flags = (evaluator<PlainObjectTypeCleaned>::Flags
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit) // FIXME EvalBeforeNestingBit should be needed anymore
};
evaluator(const XprType& n)
@ -437,7 +447,11 @@ struct evaluator<CwiseUnaryOp<UnaryOp, ArgType> >
typedef CwiseUnaryOp<UnaryOp, ArgType> XprType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = evaluator<ArgType>::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
};
evaluator(const XprType& op)
@ -485,7 +499,22 @@ struct evaluator<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef CwiseBinaryOp<BinaryOp, Lhs, Rhs> XprType;
enum {
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost
CoeffReadCost = evaluator<Lhs>::CoeffReadCost + evaluator<Rhs>::CoeffReadCost + functor_traits<BinaryOp>::Cost,
LhsFlags = evaluator<Lhs>::Flags,
RhsFlags = evaluator<Rhs>::Flags,
SameType = is_same<typename Lhs::Scalar,typename Rhs::Scalar>::value,
StorageOrdersAgree = (int(LhsFlags)&RowMajorBit)==(int(RhsFlags)&RowMajorBit),
Flags0 = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits
| (int(LhsFlags) & int(RhsFlags) &
( AlignedBit
| (StorageOrdersAgree ? LinearAccessBit : 0)
| (functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
};
evaluator(const XprType& xpr)
@ -537,7 +566,9 @@ struct evaluator<CwiseUnaryView<UnaryOp, ArgType> >
typedef CwiseUnaryView<UnaryOp, ArgType> XprType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost + functor_traits<UnaryOp>::Cost,
Flags = (evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit))
};
evaluator(const XprType& op)
@ -576,12 +607,15 @@ protected:
// -------------------- Map --------------------
template<typename Derived, int AccessorsType>
struct evaluator<MapBase<Derived, AccessorsType> >
: evaluator_base<Derived>
// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ?
// but that might complicate template specialization
template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator;
template<typename Derived, typename PlainObjectType>
struct mapbase_evaluator : evaluator_base<Derived>
{
typedef MapBase<Derived, AccessorsType> MapType;
typedef Derived XprType;
typedef Derived XprType;
typedef typename XprType::PointerType PointerType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
@ -590,81 +624,103 @@ struct evaluator<MapBase<Derived, AccessorsType> >
typedef typename XprType::PacketReturnType PacketReturnType;
enum {
RowsAtCompileTime = XprType::RowsAtCompileTime,
IsRowMajor = XprType::RowsAtCompileTime,
ColsAtCompileTime = XprType::ColsAtCompileTime,
CoeffReadCost = NumTraits<Scalar>::ReadCost
};
evaluator(const XprType& map)
mapbase_evaluator(const XprType& map)
: m_data(const_cast<PointerType>(map.data())),
m_rowStride(map.rowStride()),
m_colStride(map.colStride())
{ }
m_xpr(map)
{
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(evaluator<Derived>::Flags&PacketAccessBit, internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
}
CoeffReturnType coeff(Index row, Index col) const
{
return m_data[col * m_colStride + row * m_rowStride];
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
CoeffReturnType coeff(Index index) const
{
return coeff(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
{
return m_data[index * m_xpr.innerStride()];
}
Scalar& coeffRef(Index row, Index col)
{
return m_data[col * m_colStride + row * m_rowStride];
{
return m_data[col * m_xpr.colStride() + row * m_xpr.rowStride()];
}
Scalar& coeffRef(Index index)
{
return coeffRef(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
{
return m_data[index * m_xpr.innerStride()];
}
template<int LoadMode>
PacketReturnType packet(Index row, Index col) const
{
PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::ploadt<PacketScalar, LoadMode>(ptr);
}
template<int LoadMode>
PacketReturnType packet(Index index) const
{
return packet<LoadMode>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0);
{
return internal::ploadt<PacketScalar, LoadMode>(m_data + index * m_xpr.innerStride());
}
template<int StoreMode>
void writePacket(Index row, Index col, const PacketScalar& x)
{
PointerType ptr = m_data + row * m_rowStride + col * m_colStride;
{
PointerType ptr = m_data + row * m_xpr.rowStride() + col * m_xpr.colStride();
return internal::pstoret<Scalar, PacketScalar, StoreMode>(ptr, x);
}
template<int StoreMode>
void writePacket(Index index, const PacketScalar& x)
{
return writePacket<StoreMode>(RowsAtCompileTime == 1 ? 0 : index,
RowsAtCompileTime == 1 ? index : 0,
x);
{
internal::pstoret<Scalar, PacketScalar, StoreMode>(m_data + index * m_xpr.innerStride(), x);
}
protected:
PointerType m_data;
int m_rowStride;
int m_colStride;
const XprType& m_xpr;
};
template<typename PlainObjectType, int MapOptions, typename StrideType>
struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
: public evaluator<MapBase<Map<PlainObjectType, MapOptions, StrideType> > >
: public mapbase_evaluator<Map<PlainObjectType, MapOptions, StrideType>, PlainObjectType>
{
typedef Map<PlainObjectType, MapOptions, StrideType> XprType;
typedef typename XprType::Scalar Scalar;
enum {
InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0
? int(PlainObjectType::InnerStrideAtCompileTime)
: int(StrideType::InnerStrideAtCompileTime),
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
|| HasNoOuterStride
|| ( OuterStrideAtCompileTime!=Dynamic
&& ((static_cast<int>(sizeof(Scalar))*OuterStrideAtCompileTime)%16)==0 ) ),
Flags0 = evaluator<PlainObjectType>::Flags,
Flags1 = IsAligned ? (int(Flags0) | AlignedBit) : (int(Flags0) & ~AlignedBit),
Flags2 = (bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime))
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags = KeepsPacketAccess ? int(Flags2) : (int(Flags2) & ~PacketAccessBit)
};
evaluator(const XprType& map)
: evaluator<MapBase<XprType> >(map)
: mapbase_evaluator<XprType, PlainObjectType>(map)
{ }
};
@ -672,12 +728,16 @@ struct evaluator<Map<PlainObjectType, MapOptions, StrideType> >
template<typename PlainObjectType, int RefOptions, typename StrideType>
struct evaluator<Ref<PlainObjectType, RefOptions, StrideType> >
: public evaluator<MapBase<Ref<PlainObjectType, RefOptions, StrideType> > >
: public mapbase_evaluator<Ref<PlainObjectType, RefOptions, StrideType>, PlainObjectType>
{
typedef Ref<PlainObjectType, RefOptions, StrideType> XprType;
enum {
Flags = evaluator<Map<PlainObjectType, RefOptions, StrideType> >::Flags
};
evaluator(const XprType& map)
: evaluator<MapBase<XprType> >(map)
evaluator(const XprType& ref)
: mapbase_evaluator<XprType, PlainObjectType>(ref)
{ }
};
@ -691,8 +751,39 @@ struct evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel> >
: block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
typedef typename XprType::Scalar Scalar;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
RowsAtCompileTime = traits<ArgType>::RowsAtCompileTime,
ColsAtCompileTime = traits<ArgType>::ColsAtCompileTime,
MaxRowsAtCompileTime = traits<ArgType>::MaxRowsAtCompileTime,
MaxColsAtCompileTime = traits<ArgType>::MaxColsAtCompileTime,
XprTypeIsRowMajor = (int(traits<ArgType>::Flags)&RowMajorBit) != 0,
IsRowMajor = (MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1) ? 0
: XprTypeIsRowMajor,
HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor),
InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime),
InnerStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(inner_stride_at_compile_time<XprType>::ret)
: int(outer_stride_at_compile_time<XprType>::ret),
OuterStrideAtCompileTime = HasSameStorageOrderAsXprType
? int(outer_stride_at_compile_time<XprType>::ret)
: int(inner_stride_at_compile_time<XprType>::ret),
MaskPacketAccessBit = (InnerSize == Dynamic || (InnerSize % packet_traits<Scalar>::size) == 0)
&& (InnerStrideAtCompileTime == 1)
? PacketAccessBit : 0,
MaskAlignedBit = (InnerPanel && (OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) == 0)) ? AlignedBit : 0,
FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0,
FlagsRowMajorBit = XprType::Flags&RowMajorBit,
Flags0 = traits<XprType>::Flags & ( (HereditaryBits & ~RowMajorBit) |
DirectAccessBit |
MaskPacketAccessBit |
MaskAlignedBit),
Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit
};
typedef block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel> block_evaluator_type;
evaluator(const XprType& block) : block_evaluator_type(block) {}
@ -778,18 +869,23 @@ protected:
template<typename ArgType, int BlockRows, int BlockCols, bool InnerPanel>
struct block_evaluator<ArgType, BlockRows, BlockCols, InnerPanel, /* HasDirectAccess */ true>
: evaluator<MapBase<Block<ArgType, BlockRows, BlockCols, InnerPanel> > >
: mapbase_evaluator<Block<ArgType, BlockRows, BlockCols, InnerPanel>,
typename Block<ArgType, BlockRows, BlockCols, InnerPanel>::PlainObject>
{
typedef Block<ArgType, BlockRows, BlockCols, InnerPanel> XprType;
block_evaluator(const XprType& block)
: evaluator<MapBase<XprType> >(block)
{ }
: mapbase_evaluator<XprType, typename XprType::PlainObject>(block)
{
// FIXME this should be an internal assertion
eigen_assert(EIGEN_IMPLIES(evaluator<XprType>::Flags&AlignedBit, (size_t(block.data()) % 16) == 0) && "data is not aligned");
}
};
// -------------------- Select --------------------
// TODO enable vectorization for Select
template<typename ConditionMatrixType, typename ThenMatrixType, typename ElseMatrixType>
struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
: evaluator_base<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
@ -798,7 +894,9 @@ struct evaluator<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
enum {
CoeffReadCost = evaluator<ConditionMatrixType>::CoeffReadCost
+ EIGEN_SIZE_MAX(evaluator<ThenMatrixType>::CoeffReadCost,
evaluator<ElseMatrixType>::CoeffReadCost)
evaluator<ElseMatrixType>::CoeffReadCost),
Flags = (unsigned int)evaluator<ThenMatrixType>::Flags & evaluator<ElseMatrixType>::Flags & HereditaryBits
};
evaluator(const XprType& select)
@ -850,7 +948,9 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> >
typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned;
enum {
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost
CoeffReadCost = evaluator<ArgTypeNestedCleaned>::CoeffReadCost,
Flags = (evaluator<ArgTypeNestedCleaned>::Flags & HereditaryBits & ~RowMajorBit) | (traits<XprType>::Flags & RowMajorBit)
};
evaluator(const XprType& replicate)
@ -858,7 +958,7 @@ struct evaluator<Replicate<ArgType, RowFactor, ColFactor> >
m_argImpl(m_arg),
m_rows(replicate.nestedExpression().rows()),
m_cols(replicate.nestedExpression().cols())
{ }
{}
CoeffReturnType coeff(Index row, Index col) const
{
@ -907,17 +1007,19 @@ struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> >
typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType;
typedef typename XprType::Scalar InputScalar;
enum {
TraversalSize = Direction==Vertical ? XprType::RowsAtCompileTime : XprType::ColsAtCompileTime
TraversalSize = Direction==Vertical ? ArgType::RowsAtCompileTime : XprType::ColsAtCompileTime
};
typedef typename MemberOp::template Cost<InputScalar,int(TraversalSize)> CostOpType;
enum {
CoeffReadCost = TraversalSize==Dynamic ? Dynamic
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value)
: TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value),
Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&HereditaryBits)
};
evaluator(const XprType expr)
: m_expr(expr)
{ }
{}
typedef typename XprType::Index Index;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@ -948,7 +1050,8 @@ struct evaluator_wrapper_base
{
typedef typename remove_all<typename XprType::NestedExpressionType>::type ArgType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = evaluator<ArgType>::Flags
};
evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {}
@ -1058,7 +1161,15 @@ struct evaluator<Reverse<ArgType, Direction> >
|| ((Direction == Vertical) && IsColMajor)
|| ((Direction == Horizontal) && IsRowMajor),
CoeffReadCost = evaluator<ArgType>::CoeffReadCost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
// let's enable LinearAccess only with vectorization because of the product overhead
// FIXME enable DirectAccess with negative strides?
Flags0 = evaluator<ArgType>::Flags,
LinearAccess = ( (Direction==BothDirections) && (int(Flags0)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess)
};
typedef internal::reverse_packet_cond<PacketScalar,ReversePacket> reverse_packet;
@ -1071,7 +1182,7 @@ struct evaluator<Reverse<ArgType, Direction> >
CoeffReturnType coeff(Index row, Index col) const
{
return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col);
ReverseCol ? m_cols.value() - col - 1 : col);
}
CoeffReturnType coeff(Index index) const
@ -1082,7 +1193,7 @@ struct evaluator<Reverse<ArgType, Direction> >
Scalar& coeffRef(Index row, Index col)
{
return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row,
ReverseCol ? m_cols.value() - col - 1 : col);
ReverseCol ? m_cols.value() - col - 1 : col);
}
Scalar& coeffRef(Index index)
@ -1138,7 +1249,9 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
typedef Diagonal<ArgType, DiagIndex> XprType;
enum {
CoeffReadCost = evaluator<ArgType>::CoeffReadCost
CoeffReadCost = evaluator<ArgType>::CoeffReadCost,
Flags = (unsigned int)evaluator<ArgType>::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit) & ~RowMajorBit
};
evaluator(const XprType& diagonal)

View File

@ -65,6 +65,7 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
typedef typename remove_reference<LhsNested>::type _LhsNested;
typedef typename remove_reference<RhsNested>::type _RhsNested;
enum {
#ifndef EIGEN_TEST_EVALUATORS
LhsFlags = _LhsNested::Flags,
RhsFlags = _RhsNested::Flags,
SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
@ -78,12 +79,13 @@ struct traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
)
)
),
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit)
#ifndef EIGEN_TEST_EVALUATORS
,
Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit),
LhsCoeffReadCost = _LhsNested::CoeffReadCost,
RhsCoeffReadCost = _RhsNested::CoeffReadCost,
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + functor_traits<BinaryOp>::Cost
#else
Flags = _LhsNested::Flags & RowMajorBit
#endif
};
};

View File

@ -35,14 +35,15 @@ template<typename NullaryOp, typename PlainObjectType>
struct traits<CwiseNullaryOp<NullaryOp, PlainObjectType> > : traits<PlainObjectType>
{
enum {
#ifndef EIGEN_TEST_EVALUATORS
Flags = (traits<PlainObjectType>::Flags
& ( HereditaryBits
| (functor_has_linear_access<NullaryOp>::ret ? LinearAccessBit : 0)
| (functor_traits<NullaryOp>::PacketAccess ? PacketAccessBit : 0)))
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit)
#ifndef EIGEN_TEST_EVALUATORS
,
| (functor_traits<NullaryOp>::IsRepeatable ? 0 : EvalBeforeNestingBit),
CoeffReadCost = functor_traits<NullaryOp>::Cost
#else
Flags = traits<PlainObjectType>::Flags & RowMajorBit
#endif
};
};

View File

@ -44,12 +44,13 @@ struct traits<CwiseUnaryOp<UnaryOp, XprType> >
typedef typename XprType::Nested XprTypeNested;
typedef typename remove_reference<XprTypeNested>::type _XprTypeNested;
enum {
#ifndef EIGEN_TEST_EVALUATORS
Flags = _XprTypeNested::Flags & (
HereditaryBits | LinearAccessBit | AlignedBit
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0))
#ifndef EIGEN_TEST_EVALUATORS
,
| (functor_traits<UnaryOp>::PacketAccess ? PacketAccessBit : 0)),
CoeffReadCost = _XprTypeNested::CoeffReadCost + functor_traits<UnaryOp>::Cost
#else
Flags = _XprTypeNested::Flags & RowMajorBit
#endif
};
};

View File

@ -37,9 +37,11 @@ struct traits<CwiseUnaryView<ViewOp, MatrixType> >
typedef typename MatrixType::Nested MatrixTypeNested;
typedef typename remove_all<MatrixTypeNested>::type _MatrixTypeNested;
enum {
Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
#ifndef EIGEN_TEST_EVALUATORS
Flags = (traits<_MatrixTypeNested>::Flags & (HereditaryBits | LvalueBit | LinearAccessBit | DirectAccessBit)),
CoeffReadCost = traits<_MatrixTypeNested>::CoeffReadCost + functor_traits<ViewOp>::Cost,
#else
Flags = traits<_MatrixTypeNested>::Flags & (RowMajorBit | LvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions
#endif
MatrixTypeInnerStride = inner_stride_at_compile_time<MatrixType>::ret,
// need to cast the sizeof's from size_t to int explicitly, otherwise:

View File

@ -51,10 +51,13 @@ struct traits<Diagonal<MatrixType,DiagIndex> >
: (EIGEN_PLAIN_ENUM_MIN(MatrixType::MaxRowsAtCompileTime - EIGEN_PLAIN_ENUM_MAX(-DiagIndex, 0),
MatrixType::MaxColsAtCompileTime - EIGEN_PLAIN_ENUM_MAX( DiagIndex, 0))),
MaxColsAtCompileTime = 1,
#ifndef EIGEN_TEST_EVALUATORS
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags = (unsigned int)_MatrixTypeNested::Flags & (HereditaryBits | LinearAccessBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit,
#ifndef EIGEN_TEST_EVALUATORS
CoeffReadCost = _MatrixTypeNested::CoeffReadCost,
#else
MaskLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags = (unsigned int)_MatrixTypeNested::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions
#endif
MatrixTypeOuterStride = outer_stride_at_compile_time<MatrixType>::ret,
InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride+1,

View File

@ -275,6 +275,7 @@ struct traits<DiagonalWrapper<_DiagonalVectorType> >
typedef typename DiagonalVectorType::Scalar Scalar;
typedef typename DiagonalVectorType::Index Index;
typedef typename DiagonalVectorType::StorageKind StorageKind;
typedef typename traits<DiagonalVectorType>::XprKind XprKind;
enum {
RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,
ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime,

View File

@ -26,6 +26,7 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
#ifndef EIGEN_TEST_EVALUATORS
_StorageOrder = MatrixType::Flags & RowMajorBit ? RowMajor : ColMajor,
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
@ -34,11 +35,10 @@ struct traits<DiagonalProduct<MatrixType, DiagonalType, ProductOrder> >
//_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixType::Flags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagonalType::DiagonalVectorType::Flags)&PacketAccessBit))),
_LinearAccessMask = (RowsAtCompileTime==1 || ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
#ifndef EIGEN_TEST_EVALUATORS
,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixType::Flags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit, //(int(MatrixType::Flags)&int(DiagonalType::DiagonalVectorType::Flags)&AlignedBit),
CoeffReadCost = NumTraits<Scalar>::MulCost + MatrixType::CoeffReadCost + DiagonalType::DiagonalVectorType::CoeffReadCost
#else
Flags = RowMajorBit & (unsigned int)(MatrixType::Flags)
#endif
};
};

View File

@ -79,10 +79,11 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0
? int(PlainObjectType::OuterStrideAtCompileTime)
: int(StrideType::OuterStrideAtCompileTime),
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
#ifndef EIGEN_TEST_EVALUATORS
HasNoInnerStride = InnerStrideAtCompileTime == 1,
HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0,
HasNoStride = HasNoInnerStride && HasNoOuterStride,
IsAligned = bool(EIGEN_ALIGN) && ((int(MapOptions)&Aligned)==Aligned),
IsDynamicSize = PlainObjectType::SizeAtCompileTime==Dynamic,
KeepsPacketAccess = bool(HasNoInnerStride)
&& ( bool(IsDynamicSize)
@ -95,6 +96,10 @@ struct traits<Map<PlainObjectType, MapOptions, StrideType> >
? int(Flags1) : int(Flags1 & ~LinearAccessBit),
Flags3 = is_lvalue<PlainObjectType>::value ? int(Flags2) : (int(Flags2) & ~LvalueBit),
Flags = KeepsPacketAccess ? int(Flags3) : (int(Flags3) & ~PacketAccessBit)
#else
Flags0 = TraitsBase::Flags & (~NestByRefBit),
Flags = is_lvalue<PlainObjectType>::value ? int(Flags0) : (int(Flags0) & ~LvalueBit)
#endif
};
private:
enum { Options }; // Expressions don't have Options

View File

@ -161,11 +161,16 @@ template<typename Derived> class MapBase<Derived, ReadOnlyAccessors>
EIGEN_DEVICE_FUNC
void checkSanity() const
{
#ifndef EIGEN_TEST_EVALUATORS
// moved to evaluator
EIGEN_STATIC_ASSERT(EIGEN_IMPLIES(internal::traits<Derived>::Flags&PacketAccessBit,
internal::inner_stride_at_compile_time<Derived>::ret==1),
PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1);
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0)
&& "data is not aligned");
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::Flags&AlignedBit, (size_t(m_data) % 16) == 0) && "data is not aligned");
#else
eigen_assert(EIGEN_IMPLIES(internal::traits<Derived>::IsAligned, (size_t(m_data) % 16) == 0) && "data is not aligned");
#endif
}
PointerType m_data;

View File

@ -33,14 +33,29 @@ template<typename Lhs, typename Rhs, int Option, typename StorageKind> class Pro
namespace internal {
template<typename Lhs, typename Rhs, int Option>
struct traits<Product<Lhs, Rhs, Option> >
: traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> >
{
// We want A+B*C to be of type Product<Matrix, Sum> and not Product<Matrix, Matrix>
// TODO: This flag should eventually go in a separate evaluator traits class
{
typedef typename remove_all<Lhs>::type LhsCleaned;
typedef typename remove_all<Rhs>::type RhsCleaned;
typedef MatrixXpr XprKind;
typedef typename scalar_product_traits<typename LhsCleaned::Scalar, typename RhsCleaned::Scalar>::ReturnType Scalar;
typedef typename promote_storage_type<typename traits<LhsCleaned>::StorageKind,
typename traits<RhsCleaned>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<LhsCleaned>::Index,
typename traits<RhsCleaned>::Index>::type Index;
enum {
Flags = traits<CoeffBasedProduct<Lhs, Rhs, NestByRefBit> >::Flags & ~(EvalBeforeNestingBit | DirectAccessBit)
RowsAtCompileTime = LhsCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsCleaned::ColsAtCompileTime,
MaxRowsAtCompileTime = LhsCleaned::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsCleaned::MaxColsAtCompileTime,
// The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator.
Flags = (MaxRowsAtCompileTime==1 ? RowMajorBit : 0)
};
};
} // end namespace internal
@ -59,8 +74,6 @@ class Product : public ProductImpl<_Lhs,_Rhs,Option,
typename internal::promote_storage_type<typename Lhs::StorageKind,
typename Rhs::StorageKind>::ret>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE(Product)
typedef typename internal::nested<Lhs>::type LhsNested;
typedef typename internal::nested<Rhs>::type RhsNested;

View File

@ -17,19 +17,6 @@ namespace Eigen {
namespace internal {
/** \internal
* \class product_evaluator
* Products need their own evaluator with more template arguments allowing for
* easier partial template specializations.
*/
template< typename T,
int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
typename LhsScalar = typename T::Lhs::Scalar,
typename RhsScalar = typename T::Rhs::Scalar
> struct product_evaluator;
/** \internal
* Evaluator of a product expression.
* Since products require special treatments to handle all possible cases,
@ -119,6 +106,18 @@ struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, ProductTag, DenseSha
: m_result(xpr.rows(), xpr.cols())
{
::new (static_cast<Base*>(this)) Base(m_result);
// FIXME shall we handle nested_eval here?
// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
//
// const LhsNested lhs(xpr.lhs());
// const RhsNested rhs(xpr.rhs());
//
// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
generic_product_impl<Lhs, Rhs>::evalTo(m_result, xpr.lhs(), xpr.rhs());
}
@ -133,6 +132,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::assign_
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
}
};
@ -144,6 +144,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::add_ass
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
}
};
@ -155,6 +156,7 @@ struct Assignment<DstXprType, Product<Lhs,Rhs,DefaultProduct>, internal::sub_ass
typedef Product<Lhs,Rhs,DefaultProduct> SrcXprType;
static void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar> &)
{
// FIXME shall we handle nested_eval here?
generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
}
};
@ -368,7 +370,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
: evaluator_base<Product<Lhs, Rhs, LazyProduct> >
{
typedef Product<Lhs, Rhs, LazyProduct> XprType;
typedef CoeffBasedProduct<Lhs, Rhs, 0> CoeffBasedProductType;
typedef typename XprType::Index Index;
typedef typename XprType::Scalar Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
@ -396,9 +397,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
typedef typename evaluator<RhsNestedCleaned>::type RhsEtorType;
enum {
RowsAtCompileTime = traits<CoeffBasedProductType>::RowsAtCompileTime,
RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
PacketSize = packet_traits<Scalar>::size,
InnerSize = traits<CoeffBasedProductType>::InnerSize,
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
@ -407,8 +412,51 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
CanVectorizeInner = traits<CoeffBasedProductType>::CanVectorizeInner,
Flags = traits<CoeffBasedProductType>::Flags
LhsFlags = LhsEtorType::Flags,
RhsFlags = RhsEtorType::Flags,
LhsRowMajor = LhsFlags & RowMajorBit,
RhsRowMajor = RhsFlags & RowMajorBit,
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
&& (ColsAtCompileTime == Dynamic
|| ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (RhsFlags&AlignedBit)
)
),
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
&& (RowsAtCompileTime == Dynamic
|| ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
&& (LhsFlags&AlignedBit)
)
),
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
: (RhsRowMajor && !CanVectorizeLhs),
Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
| (EvalToRowMajor ? RowMajorBit : 0)
| (CanVectorizeLhs ? (LhsFlags & AlignedBit) : 0)
| (CanVectorizeRhs ? (RhsFlags & AlignedBit) : 0)
// TODO enable vectorization for mixed types
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
*/
CanVectorizeInner = SameType
&& LhsRowMajor
&& (!RhsRowMajor)
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
&& (LhsFlags & RhsFlags & AlignedBit)
&& (InnerSize % packet_traits<Scalar>::size == 0)
};
const CoeffReturnType coeff(Index row, Index col) const
@ -689,7 +737,7 @@ protected:
* Diagonal products
***************************************************************************/
template<typename MatrixType, typename DiagonalType, typename Derived>
template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
struct diagonal_product_evaluator_base
: evaluator_base<Derived>
{
@ -698,7 +746,20 @@ struct diagonal_product_evaluator_base
typedef typename internal::packet_traits<Scalar>::type PacketScalar;
public:
enum {
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost
CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
MatrixFlags = evaluator<MatrixType>::Flags,
DiagFlags = evaluator<DiagonalType>::Flags,
_StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
_ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
_SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
// FIXME currently we need same types, but in the future the next rule should be the one
//_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
_LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0) | AlignedBit
//(int(MatrixFlags)&int(DiagFlags)&AlignedBit),
};
diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
@ -724,7 +785,7 @@ protected:
{
enum {
InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagonalType::Flags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
DiagonalPacketLoadMode = (LoadMode == Aligned && (((InnerSize%16) == 0) || (int(DiagFlags)&AlignedBit)==AlignedBit) ? Aligned : Unaligned)
};
return internal::pmul(m_matImpl.template packet<LoadMode>(row, col),
m_diagImpl.template packet<DiagonalPacketLoadMode>(id));
@ -737,9 +798,9 @@ protected:
// diagonal * dense
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape, typename Lhs::Scalar, typename Rhs::Scalar>
: diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> >
: diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
{
typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base;
typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;
@ -783,9 +844,9 @@ struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalSha
// dense * diagonal
template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape, typename Lhs::Scalar, typename Rhs::Scalar>
: diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> >
: diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
{
typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct> > Base;
typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
using Base::m_diagImpl;
using Base::m_matImpl;
using Base::coeff;

View File

@ -389,8 +389,19 @@ DenseBase<Derived>::redux(const Func& func) const
eigen_assert(this->rows()>0 && this->cols()>0 && "you are using an empty matrix");
#ifdef EIGEN_TEST_EVALUATORS
// FIXME, eval_nest should be handled by redux_evaluator, however:
// - it is currently difficult to provide the right Flags since they are still handled by the expressions
// - handling it here might reduce the number of template instantiations
// typedef typename internal::nested_eval<Derived,1>::type ThisNested;
// typedef typename internal::remove_all<ThisNested>::type ThisNestedCleaned;
// typedef typename internal::redux_evaluator<ThisNestedCleaned> ThisEvaluator;
//
// ThisNested thisNested(derived());
// ThisEvaluator thisEval(thisNested);
typedef typename internal::redux_evaluator<Derived> ThisEvaluator;
ThisEvaluator thisEval(derived());
return internal::redux_impl<Func, ThisEvaluator>::run(thisEval, func);
#else

View File

@ -53,10 +53,13 @@ struct traits<Replicate<MatrixType,RowFactor,ColFactor> >
IsRowMajor = MaxRowsAtCompileTime==1 && MaxColsAtCompileTime!=1 ? 1
: MaxColsAtCompileTime==1 && MaxRowsAtCompileTime!=1 ? 0
: (MatrixType::Flags & RowMajorBit) ? 1 : 0,
Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0)
#ifndef EIGEN_TEST_EVALUATORS
,
Flags = (_MatrixTypeNested::Flags & HereditaryBits & ~RowMajorBit) | (IsRowMajor ? RowMajorBit : 0),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
#else
// FIXME enable DirectAccess with negative strides?
Flags = IsRowMajor ? RowMajorBit : 0
#endif
};
};

View File

@ -45,14 +45,15 @@ struct traits<Reverse<MatrixType, Direction> >
MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime,
#ifndef EIGEN_TEST_EVALUATORS
// let's enable LinearAccess only with vectorization because of the product overhead
LinearAccess = ( (Direction==BothDirections) && (int(_MatrixTypeNested::Flags)&PacketAccessBit) )
? LinearAccessBit : 0,
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess)
#ifndef EIGEN_TEST_EVALUATORS
,
Flags = int(_MatrixTypeNested::Flags) & (HereditaryBits | LvalueBit | PacketAccessBit | LinearAccess),
CoeffReadCost = _MatrixTypeNested::CoeffReadCost
#else
Flags = _MatrixTypeNested::Flags & (RowMajorBit | LvalueBit)
#endif
};
};

View File

@ -43,12 +43,13 @@ struct traits<Select<ConditionMatrixType, ThenMatrixType, ElseMatrixType> >
ColsAtCompileTime = ConditionMatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = ConditionMatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = ConditionMatrixType::MaxColsAtCompileTime,
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits
#ifndef EIGEN_TEST_EVALUATORS
,
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & HereditaryBits,
CoeffReadCost = traits<typename remove_all<ConditionMatrixNested>::type>::CoeffReadCost
+ EIGEN_SIZE_MAX(traits<typename remove_all<ThenMatrixNested>::type>::CoeffReadCost,
traits<typename remove_all<ElseMatrixNested>::type>::CoeffReadCost)
#else
Flags = (unsigned int)ThenMatrixType::Flags & ElseMatrixType::Flags & RowMajorBit
#endif
};
};

View File

@ -41,12 +41,17 @@ struct traits<Transpose<MatrixType> > : traits<MatrixType>
ColsAtCompileTime = MatrixType::RowsAtCompileTime,
MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime,
MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
#ifndef EIGEN_TEST_EVALUATORS
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit,
#ifndef EIGEN_TEST_EVALUATORS
CoeffReadCost = MatrixTypeNestedPlain::CoeffReadCost,
#else
FlagsLvalueBit = is_lvalue<MatrixType>::value ? LvalueBit : 0,
Flags0 = MatrixTypeNestedPlain::Flags & ~(LvalueBit | NestByRefBit),
Flags1 = Flags0 | FlagsLvalueBit,
Flags = Flags1 ^ RowMajorBit,
#endif
InnerStrideAtCompileTime = inner_stride_at_compile_time<MatrixType>::ret,
OuterStrideAtCompileTime = outer_stride_at_compile_time<MatrixType>::ret

View File

@ -48,8 +48,12 @@ struct traits<PartialReduxExpr<MatrixType, MemberOp, Direction> >
ColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::ColsAtCompileTime,
MaxRowsAtCompileTime = Direction==Vertical ? 1 : MatrixType::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Direction==Horizontal ? 1 : MatrixType::MaxColsAtCompileTime,
#ifndef EIGEN_TEST_EVALUATORS
Flags0 = (unsigned int)_MatrixTypeNested::Flags & HereditaryBits,
Flags = (Flags0 & ~RowMajorBit) | (RowsAtCompileTime == 1 ? RowMajorBit : 0),
#else
Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0,
#endif
TraversalSize = Direction==Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime
};
#ifndef EIGEN_TEST_EVALUATORS

View File

@ -259,7 +259,7 @@ template<int Mode> struct trmv_selector<Mode,ColMajor>
typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
typedef internal::blas_traits<Rhs> RhsBlasTraits;
typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
typedef Map<Matrix<ResScalar,Dynamic,1>, Aligned> MappedDest;
typename internal::add_const_on_value_type<ActualLhsType>::type actualLhs = LhsBlasTraits::extract(lhs);

View File

@ -157,6 +157,18 @@ template<typename _Scalar, int Rows=Dynamic, int Cols=Dynamic, int Supers=Dynami
namespace internal {
template<typename Lhs, typename Rhs> struct product_type;
/** \internal
* \class product_evaluator
* Products need their own evaluator with more template arguments allowing for
* easier partial template specializations.
*/
template< typename T,
int ProductTag = internal::product_type<typename T::Lhs,typename T::Rhs>::ret,
typename LhsShape = typename evaluator_traits<typename T::Lhs>::Shape,
typename RhsShape = typename evaluator_traits<typename T::Rhs>::Shape,
typename LhsScalar = typename T::Lhs::Scalar,
typename RhsScalar = typename T::Rhs::Scalar
> struct product_evaluator;
}
template<typename Lhs, typename Rhs,

View File

@ -124,6 +124,7 @@ template<typename _Scalar, int _Rows, int _Cols,
typedef Matrix<_Scalar, _Rows, _Cols, Options, _MaxRows, _MaxCols> type;
};
#ifndef EIGEN_TEST_EVALUATORS
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_flags
{
@ -158,6 +159,57 @@ class compute_matrix_flags
enum { ret = LinearAccessBit | LvalueBit | DirectAccessBit | NestByRefBit | packet_access_bit | row_major_bit | aligned_bit };
};
#else // EIGEN_TEST_EVALUATORS
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_flags
{
enum { row_major_bit = Options&RowMajor ? RowMajorBit : 0 };
public:
// FIXME currently we still have to handle DirectAccessBit at the expression level to handle DenseCoeffsBase<>
// and then propagate this information to the evaluator's flags.
// However, I (Gael) think that DirectAccessBit should only matter at the evaluation stage.
enum { ret = DirectAccessBit | LvalueBit | NestByRefBit | row_major_bit };
};
#endif
#ifdef EIGEN_ENABLE_EVALUATORS
template<typename Scalar, int Rows, int Cols, int Options, int MaxRows, int MaxCols>
class compute_matrix_evaluator_flags
{
enum {
row_major_bit = Options&RowMajor ? RowMajorBit : 0,
is_dynamic_size_storage = MaxRows==Dynamic || MaxCols==Dynamic,
aligned_bit =
(
((Options&DontAlign)==0)
&& (
#if EIGEN_ALIGN_STATICALLY
((!is_dynamic_size_storage) && (((MaxCols*MaxRows*int(sizeof(Scalar))) % 16) == 0))
#else
0
#endif
||
#if EIGEN_ALIGN
is_dynamic_size_storage
#else
0
#endif
)
) ? AlignedBit : 0,
packet_access_bit = packet_traits<Scalar>::Vectorizable && aligned_bit ? PacketAccessBit : 0
};
public:
enum { ret = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit | aligned_bit };
};
#endif // EIGEN_ENABLE_EVALUATORS
template<int _Rows, int _Cols> struct size_at_compile_time
{
enum { ret = (_Rows==Dynamic || _Cols==Dynamic) ? Dynamic : _Rows * _Cols };