mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-12 11:49:02 +08:00
Enable the use of half-packet in coeff-based product.
For instance, Matrix4f*Vector4f is now vectorized again when using AVX.
This commit is contained in:
parent
e3a184785c
commit
b67c983291
@ -410,8 +410,6 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
typedef Product<Lhs, Rhs, LazyProduct> XprType;
|
typedef Product<Lhs, Rhs, LazyProduct> XprType;
|
||||||
typedef typename XprType::Scalar Scalar;
|
typedef typename XprType::Scalar Scalar;
|
||||||
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
typedef typename XprType::CoeffReturnType CoeffReturnType;
|
||||||
typedef typename XprType::PacketScalar PacketScalar;
|
|
||||||
typedef typename XprType::PacketReturnType PacketReturnType;
|
|
||||||
|
|
||||||
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
|
||||||
explicit product_evaluator(const XprType& xpr)
|
explicit product_evaluator(const XprType& xpr)
|
||||||
@ -443,9 +441,13 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
|
ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
|
||||||
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
|
||||||
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
|
MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
|
||||||
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime,
|
MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
|
||||||
|
};
|
||||||
|
|
||||||
PacketSize = packet_traits<Scalar>::size,
|
typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
|
||||||
|
typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
|
||||||
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
|
||||||
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
|
||||||
@ -459,19 +461,23 @@ struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape,
|
|||||||
LhsFlags = LhsEtorType::Flags,
|
LhsFlags = LhsEtorType::Flags,
|
||||||
RhsFlags = RhsEtorType::Flags,
|
RhsFlags = RhsEtorType::Flags,
|
||||||
|
|
||||||
LhsAlignment = LhsEtorType::Alignment,
|
|
||||||
RhsAlignment = RhsEtorType::Alignment,
|
|
||||||
|
|
||||||
LhsRowMajor = LhsFlags & RowMajorBit,
|
LhsRowMajor = LhsFlags & RowMajorBit,
|
||||||
RhsRowMajor = RhsFlags & RowMajorBit,
|
RhsRowMajor = RhsFlags & RowMajorBit,
|
||||||
|
|
||||||
|
LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
|
||||||
|
RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
|
||||||
|
|
||||||
|
//
|
||||||
|
LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
|
||||||
|
RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
|
||||||
|
|
||||||
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
|
||||||
|
|
||||||
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
|
||||||
&& (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % PacketSize) == 0) ),
|
&& (ColsAtCompileTime == Dynamic || ((ColsAtCompileTime % RhsVecPacketSize) == 0) ),
|
||||||
|
|
||||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||||
&& (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % PacketSize) == 0) ),
|
&& (RowsAtCompileTime == Dynamic || ((RowsAtCompileTime % LhsVecPacketSize) == 0) ),
|
||||||
|
|
||||||
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
|
||||||
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
: (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user