mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00
fix bugs, old and new:
* old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order. * new bug: in Assign.h, only consider linear traversal if both sides have the same storage order.
This commit is contained in:
parent
11fa2ae2c6
commit
40865fa28c
@ -28,7 +28,7 @@
|
||||
#define EIGEN_ASSIGN_H
|
||||
|
||||
/***************************************************************************
|
||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
||||
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
||||
***************************************************************************/
|
||||
|
||||
template <typename Derived, typename OtherDerived>
|
||||
@ -53,11 +53,12 @@ private:
|
||||
};
|
||||
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||
StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
|
||||
MightVectorize = StorageOrdersAgree
|
||||
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||
MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
MayLinearVectorize = MightVectorize && MayLinearize
|
||||
&& (DstIsAligned || InnerMaxSize == Dynamic),
|
||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||
@ -73,7 +74,7 @@ public:
|
||||
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||
// : int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(MayLinearize) ? int(LinearTraversal)
|
||||
: int(DefaultTraversal),
|
||||
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
||||
|| int(Traversal) == LinearVectorizedTraversal
|
||||
@ -110,6 +111,7 @@ public:
|
||||
EIGEN_DEBUG_VAR(InnerSize)
|
||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||
EIGEN_DEBUG_VAR(PacketSize)
|
||||
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||
EIGEN_DEBUG_VAR(MightVectorize)
|
||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||
|
@ -67,11 +67,16 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
||||
ColsAtCompileTime = Lhs::ColsAtCompileTime,
|
||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
||||
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
|
||||
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
||||
HereditaryBits
|
||||
| (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit))
|
||||
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
||||
? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)),
|
||||
| (int(LhsFlags) & int(RhsFlags) &
|
||||
( AlignedBit
|
||||
| (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
|
||||
)
|
||||
)
|
||||
),
|
||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
||||
};
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user