mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
* do the ActualPacketAccesBit change as discussed on list
* add comment in Product.h about CanVectorizeInner * fix typo in test/product.cpp
This commit is contained in:
parent
8463b7d3f4
commit
a9d319d44f
@ -46,7 +46,7 @@ private:
|
||||
};
|
||||
|
||||
enum {
|
||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit)
|
||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||
MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
|
||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||
|
@ -34,7 +34,7 @@ struct ei_dot_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&PacketAccessBit)
|
||||
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
|
||||
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
|
||||
? LinearVectorization
|
||||
: NoVectorization
|
||||
|
@ -25,12 +25,8 @@
|
||||
#ifndef EIGEN_IO_H
|
||||
#define EIGEN_IO_H
|
||||
|
||||
/** \relates MatrixBase
|
||||
*
|
||||
* Outputs the matrix, laid out as an array as usual, to the given stream.
|
||||
*/
|
||||
template<typename Derived>
|
||||
std::ostream & operator <<
|
||||
std::ostream & ei_print_matrix
|
||||
(std::ostream & s,
|
||||
const MatrixBase<Derived> & m)
|
||||
{
|
||||
@ -45,4 +41,16 @@ std::ostream & operator <<
|
||||
return s;
|
||||
}
|
||||
|
||||
/** \relates MatrixBase
|
||||
*
|
||||
* Outputs the matrix, laid out as an array as usual, to the given stream.
|
||||
*/
|
||||
template<typename Derived>
|
||||
std::ostream & operator <<
|
||||
(std::ostream & s,
|
||||
const MatrixBase<Derived> & m)
|
||||
{
|
||||
return ei_print_matrix(s, m.eval());
|
||||
}
|
||||
|
||||
#endif // EIGEN_IO_H
|
||||
|
@ -141,9 +141,6 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
|
||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||
|
||||
CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit)
|
||||
&& (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0),
|
||||
|
||||
EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),
|
||||
|
||||
RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
|
||||
@ -156,7 +153,15 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
|
||||
|
||||
CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost
|
||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||
|
||||
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
||||
*/
|
||||
CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
||||
&& (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0)
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -35,7 +35,7 @@ struct ei_sum_traits
|
||||
{
|
||||
public:
|
||||
enum {
|
||||
Vectorization = (int(Derived::Flags)&PacketAccessBit)
|
||||
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||
&& (int(Derived::Flags)&LinearAccessBit)
|
||||
? LinearVectorization
|
||||
: NoVectorization
|
||||
|
@ -56,7 +56,6 @@ const unsigned int EvalBeforeNestingBit = 0x2;
|
||||
* means the expression should be evaluated before any assignement */
|
||||
const unsigned int EvalBeforeAssigningBit = 0x4;
|
||||
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
/** \ingroup flags
|
||||
*
|
||||
* Short version: means the expression might be vectorized
|
||||
@ -70,12 +69,23 @@ const unsigned int EvalBeforeAssigningBit = 0x4;
|
||||
* on the total size, so it might not be possible to access the few last coeffs
|
||||
* by packets.
|
||||
*
|
||||
* \note If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
|
||||
* is set to the value 0.
|
||||
* \note This bit can be set regardless of whether vectorization is actually enabled.
|
||||
* To check for actual vectorizability, see \a ActualPacketAccessBit.
|
||||
*/
|
||||
const unsigned int PacketAccessBit = 0x8;
|
||||
|
||||
#ifdef EIGEN_VECTORIZE
|
||||
/** \ingroup flags
|
||||
*
|
||||
* If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
|
||||
* is set to the value \a PacketAccessBit.
|
||||
*
|
||||
* If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
|
||||
* is set to the value 0.
|
||||
*/
|
||||
const unsigned int ActualPacketAccessBit = PacketAccessBit;
|
||||
#else
|
||||
const unsigned int PacketAccessBit = 0x0;
|
||||
const unsigned int ActualPacketAccessBit = 0x0;
|
||||
#endif
|
||||
|
||||
/** \ingroup flags
|
||||
|
@ -33,7 +33,7 @@ template<typename MatrixType> void product(const MatrixType& m)
|
||||
typedef typename MatrixType::Scalar Scalar;
|
||||
typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> RowSquareMatrixType;
|
||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
|
||||
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
|
||||
|
||||
int rows = m.rows();
|
||||
|
Loading…
x
Reference in New Issue
Block a user