mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-07 03:39:04 +08:00
Fix Alignment in coeff-based product, and enable unaligned vectorization
This commit is contained in:
parent
03ad4fc504
commit
543bd28a24
@ -447,7 +447,7 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
|
||||||
|
|
||||||
PacketSize = packet_traits<Scalar>::size,
|
PacketSize = packet_traits<Scalar>::size,
|
||||||
|
|
||||||
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
||||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||||
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
|
||||||
@ -463,19 +463,16 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
LhsAlignment = LhsEtorType::Alignment,
|
LhsAlignment = LhsEtorType::Alignment,
|
||||||
RhsAlignment = RhsEtorType::Alignment,
|
RhsAlignment = RhsEtorType::Alignment,
|
||||||
|
|
||||||
LhsIsAligned = int(LhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
|
|
||||||
RhsIsAligned = int(RhsAlignment) >= int(unpacket_traits<PacketScalar>::alignment),
|
|
||||||
|
|
||||||
LhsRowMajor = LhsFlags & RowMajorBit,
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
||||||
RhsRowMajor = RhsFlags & RowMajorBit,
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
||||||
|
|
||||||
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
||||||
|
|
||||||
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
||||||
&& (ColsAtCompileTime == Dynamic || ( (ColsAtCompileTime % PacketSize) == 0 && RhsIsAligned ) ),
|
&& (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),
|
||||||
|
|
||||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||||
&& (RowsAtCompileTime == Dynamic || ( (RowsAtCompileTime % PacketSize) == 0 && LhsIsAligned ) ),
|
&& (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),
|
||||||
|
|
||||||
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||||
@ -487,10 +484,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
| (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
|
||||||
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
| (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
|
||||||
|
|
||||||
Alignment = CanVectorizeLhs ? LhsAlignment
|
LhsOuterStrideBytes = LhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename LhsNestedCleaned::Scalar),
|
||||||
: CanVectorizeRhs ? RhsAlignment
|
RhsOuterStrideBytes = RhsNestedCleaned::OuterStrideAtCompileTime * sizeof(typename RhsNestedCleaned::Scalar),
|
||||||
|
|
||||||
|
Alignment = CanVectorizeLhs ? (LhsOuterStrideBytes<0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
|
||||||
|
: CanVectorizeRhs ? (RhsOuterStrideBytes<0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
|
||||||
: 0,
|
: 0,
|
||||||
|
|
||||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||||
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||||
@ -500,7 +500,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
&& LhsRowMajor
|
&& LhsRowMajor
|
||||||
&& (!RhsRowMajor)
|
&& (!RhsRowMajor)
|
||||||
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
&& (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
||||||
&& (LhsIsAligned && RhsIsAligned)
|
|
||||||
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
&& (InnerSize % packet_traits<Scalar>::size == 0)
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -524,10 +523,9 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
const PacketType packet(Index row, Index col) const
|
const PacketType packet(Index row, Index col) const
|
||||||
{
|
{
|
||||||
PacketType res;
|
PacketType res;
|
||||||
typedef etor_product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
|
typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
|
||||||
Unroll ? InnerSize : Dynamic,
|
Unroll ? int(InnerSize) : Dynamic,
|
||||||
LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
|
LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
|
||||||
|
|
||||||
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user