fix bugs, old and new:

* old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order.
* new bug: in Assign.h, only consider linear traversal if both sides have the same storage order.
This commit is contained in:
Benoit Jacob 2009-11-18 17:20:39 -05:00
parent 11fa2ae2c6
commit 40865fa28c
2 changed files with 15 additions and 8 deletions

View File

@ -28,7 +28,7 @@
#define EIGEN_ASSIGN_H #define EIGEN_ASSIGN_H
/*************************************************************************** /***************************************************************************
* Part 1 : the logic deciding a strategy for vectorization and unrolling * Part 1 : the logic deciding a strategy for traversal and unrolling *
***************************************************************************/ ***************************************************************************/
template <typename Derived, typename OtherDerived> template <typename Derived, typename OtherDerived>
@ -53,11 +53,12 @@ private:
}; };
enum { enum {
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit) StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)), MightVectorize = StorageOrdersAgree
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0 MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
&& int(DstIsAligned) && int(SrcIsAligned), && int(DstIsAligned) && int(SrcIsAligned),
MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit), MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
MayLinearVectorize = MightVectorize && MayLinearize MayLinearVectorize = MightVectorize && MayLinearize
&& (DstIsAligned || InnerMaxSize == Dynamic), && (DstIsAligned || InnerMaxSize == Dynamic),
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll, /* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
@ -73,7 +74,7 @@ public:
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal) Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal) : int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal) : int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
// : int(MayLinearize) ? int(LinearTraversal) : int(MayLinearize) ? int(LinearTraversal)
: int(DefaultTraversal), : int(DefaultTraversal),
Vectorized = int(Traversal) == InnerVectorizedTraversal Vectorized = int(Traversal) == InnerVectorizedTraversal
|| int(Traversal) == LinearVectorizedTraversal || int(Traversal) == LinearVectorizedTraversal
@ -110,6 +111,7 @@ public:
EIGEN_DEBUG_VAR(InnerSize) EIGEN_DEBUG_VAR(InnerSize)
EIGEN_DEBUG_VAR(InnerMaxSize) EIGEN_DEBUG_VAR(InnerMaxSize)
EIGEN_DEBUG_VAR(PacketSize) EIGEN_DEBUG_VAR(PacketSize)
EIGEN_DEBUG_VAR(StorageOrdersAgree)
EIGEN_DEBUG_VAR(MightVectorize) EIGEN_DEBUG_VAR(MightVectorize)
EIGEN_DEBUG_VAR(MayInnerVectorize) EIGEN_DEBUG_VAR(MayInnerVectorize)
EIGEN_DEBUG_VAR(MayLinearVectorize) EIGEN_DEBUG_VAR(MayLinearVectorize)

View File

@ -67,11 +67,16 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
ColsAtCompileTime = Lhs::ColsAtCompileTime, ColsAtCompileTime = Lhs::ColsAtCompileTime,
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime, MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime, MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
Flags = (int(LhsFlags) | int(RhsFlags)) & ( Flags = (int(LhsFlags) | int(RhsFlags)) & (
HereditaryBits HereditaryBits
| (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit)) | (int(LhsFlags) & int(RhsFlags) &
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit)) ( AlignedBit
? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)), | (StorageOrdersAgree ? LinearAccessBit : 0)
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
)
)
),
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
}; };
}; };