mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-23 10:09:36 +08:00
fix bugs, old and new:
* old bug: in CwiseBinaryOp: only set the LinearAccessBit if both sides have the same storage order. * new bug: in Assign.h, only consider linear traversal if both sides have the same storage order.
This commit is contained in:
parent
11fa2ae2c6
commit
40865fa28c
@ -28,7 +28,7 @@
|
|||||||
#define EIGEN_ASSIGN_H
|
#define EIGEN_ASSIGN_H
|
||||||
|
|
||||||
/***************************************************************************
|
/***************************************************************************
|
||||||
* Part 1 : the logic deciding a strategy for vectorization and unrolling
|
* Part 1 : the logic deciding a strategy for traversal and unrolling *
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
template <typename Derived, typename OtherDerived>
|
template <typename Derived, typename OtherDerived>
|
||||||
@ -53,11 +53,12 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
StorageOrdersAgree = (int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit),
|
||||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
MightVectorize = StorageOrdersAgree
|
||||||
|
&& (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit),
|
||||||
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
MayInnerVectorize = MightVectorize && int(InnerSize)!=Dynamic && int(InnerSize)%int(PacketSize)==0
|
||||||
&& int(DstIsAligned) && int(SrcIsAligned),
|
&& int(DstIsAligned) && int(SrcIsAligned),
|
||||||
MayLinearize = (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
MayLinearize = StorageOrdersAgree && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||||
MayLinearVectorize = MightVectorize && MayLinearize
|
MayLinearVectorize = MightVectorize && MayLinearize
|
||||||
&& (DstIsAligned || InnerMaxSize == Dynamic),
|
&& (DstIsAligned || InnerMaxSize == Dynamic),
|
||||||
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
/* If the destination isn't aligned, we have to do runtime checks and we don't unroll,
|
||||||
@ -73,7 +74,7 @@ public:
|
|||||||
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
Traversal = int(MayInnerVectorize) ? int(InnerVectorizedTraversal)
|
||||||
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
: int(MayLinearVectorize) ? int(LinearVectorizedTraversal)
|
||||||
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
: int(MaySliceVectorize) ? int(SliceVectorizedTraversal)
|
||||||
// : int(MayLinearize) ? int(LinearTraversal)
|
: int(MayLinearize) ? int(LinearTraversal)
|
||||||
: int(DefaultTraversal),
|
: int(DefaultTraversal),
|
||||||
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
Vectorized = int(Traversal) == InnerVectorizedTraversal
|
||||||
|| int(Traversal) == LinearVectorizedTraversal
|
|| int(Traversal) == LinearVectorizedTraversal
|
||||||
@ -110,6 +111,7 @@ public:
|
|||||||
EIGEN_DEBUG_VAR(InnerSize)
|
EIGEN_DEBUG_VAR(InnerSize)
|
||||||
EIGEN_DEBUG_VAR(InnerMaxSize)
|
EIGEN_DEBUG_VAR(InnerMaxSize)
|
||||||
EIGEN_DEBUG_VAR(PacketSize)
|
EIGEN_DEBUG_VAR(PacketSize)
|
||||||
|
EIGEN_DEBUG_VAR(StorageOrdersAgree)
|
||||||
EIGEN_DEBUG_VAR(MightVectorize)
|
EIGEN_DEBUG_VAR(MightVectorize)
|
||||||
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
EIGEN_DEBUG_VAR(MayInnerVectorize)
|
||||||
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
EIGEN_DEBUG_VAR(MayLinearVectorize)
|
||||||
|
@ -67,11 +67,16 @@ struct ei_traits<CwiseBinaryOp<BinaryOp, Lhs, Rhs> >
|
|||||||
ColsAtCompileTime = Lhs::ColsAtCompileTime,
|
ColsAtCompileTime = Lhs::ColsAtCompileTime,
|
||||||
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = Lhs::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = Lhs::MaxColsAtCompileTime,
|
||||||
|
StorageOrdersAgree = (int(Lhs::Flags)&RowMajorBit)==(int(Rhs::Flags)&RowMajorBit),
|
||||||
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
Flags = (int(LhsFlags) | int(RhsFlags)) & (
|
||||||
HereditaryBits
|
HereditaryBits
|
||||||
| (int(LhsFlags) & int(RhsFlags) & (LinearAccessBit | AlignedBit))
|
| (int(LhsFlags) & int(RhsFlags) &
|
||||||
| (ei_functor_traits<BinaryOp>::PacketAccess && ((int(LhsFlags) & RowMajorBit)==(int(RhsFlags) & RowMajorBit))
|
( AlignedBit
|
||||||
? (int(LhsFlags) & int(RhsFlags) & PacketAccessBit) : 0)),
|
| (StorageOrdersAgree ? LinearAccessBit : 0)
|
||||||
|
| (ei_functor_traits<BinaryOp>::PacketAccess && StorageOrdersAgree ? PacketAccessBit : 0)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
CoeffReadCost = LhsCoeffReadCost + RhsCoeffReadCost + ei_functor_traits<BinaryOp>::Cost
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user