mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-06-04 18:54:00 +08:00
* do the ActualPacketAccesBit change as discussed on list
* add comment in Product.h about CanVectorizeInner * fix typo in test/product.cpp
This commit is contained in:
parent
8463b7d3f4
commit
a9d319d44f
@ -46,7 +46,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit)
|
MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
|
||||||
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
&& ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
|
||||||
MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
|
MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
|
||||||
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
|
||||||
|
@ -34,7 +34,7 @@ struct ei_dot_traits
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&PacketAccessBit)
|
Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
|
||||||
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
|
&& (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
|
||||||
? LinearVectorization
|
? LinearVectorization
|
||||||
: NoVectorization
|
: NoVectorization
|
||||||
|
@ -25,12 +25,8 @@
|
|||||||
#ifndef EIGEN_IO_H
|
#ifndef EIGEN_IO_H
|
||||||
#define EIGEN_IO_H
|
#define EIGEN_IO_H
|
||||||
|
|
||||||
/** \relates MatrixBase
|
|
||||||
*
|
|
||||||
* Outputs the matrix, laid out as an array as usual, to the given stream.
|
|
||||||
*/
|
|
||||||
template<typename Derived>
|
template<typename Derived>
|
||||||
std::ostream & operator <<
|
std::ostream & ei_print_matrix
|
||||||
(std::ostream & s,
|
(std::ostream & s,
|
||||||
const MatrixBase<Derived> & m)
|
const MatrixBase<Derived> & m)
|
||||||
{
|
{
|
||||||
@ -45,4 +41,16 @@ std::ostream & operator <<
|
|||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** \relates MatrixBase
|
||||||
|
*
|
||||||
|
* Outputs the matrix, laid out as an array as usual, to the given stream.
|
||||||
|
*/
|
||||||
|
template<typename Derived>
|
||||||
|
std::ostream & operator <<
|
||||||
|
(std::ostream & s,
|
||||||
|
const MatrixBase<Derived> & m)
|
||||||
|
{
|
||||||
|
return ei_print_matrix(s, m.eval());
|
||||||
|
}
|
||||||
|
|
||||||
#endif // EIGEN_IO_H
|
#endif // EIGEN_IO_H
|
||||||
|
@ -141,9 +141,6 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
|
|||||||
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
|
||||||
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
&& (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),
|
||||||
|
|
||||||
CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit)
|
|
||||||
&& (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0),
|
|
||||||
|
|
||||||
EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),
|
EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),
|
||||||
|
|
||||||
RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
|
RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
|
||||||
@ -156,7 +153,15 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
|
|||||||
|
|
||||||
CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
CoeffReadCost = InnerSize == Dynamic ? Dynamic
|
||||||
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
: InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
|
||||||
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost
|
+ (InnerSize - 1) * NumTraits<Scalar>::AddCost,
|
||||||
|
|
||||||
|
/* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
|
||||||
|
* of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
|
||||||
|
* loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
|
||||||
|
* the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
|
||||||
|
*/
|
||||||
|
CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
|
||||||
|
&& (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0)
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ struct ei_sum_traits
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
enum {
|
enum {
|
||||||
Vectorization = (int(Derived::Flags)&PacketAccessBit)
|
Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
|
||||||
&& (int(Derived::Flags)&LinearAccessBit)
|
&& (int(Derived::Flags)&LinearAccessBit)
|
||||||
? LinearVectorization
|
? LinearVectorization
|
||||||
: NoVectorization
|
: NoVectorization
|
||||||
|
@ -56,7 +56,6 @@ const unsigned int EvalBeforeNestingBit = 0x2;
|
|||||||
* means the expression should be evaluated before any assignement */
|
* means the expression should be evaluated before any assignement */
|
||||||
const unsigned int EvalBeforeAssigningBit = 0x4;
|
const unsigned int EvalBeforeAssigningBit = 0x4;
|
||||||
|
|
||||||
#ifdef EIGEN_VECTORIZE
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
*
|
*
|
||||||
* Short version: means the expression might be vectorized
|
* Short version: means the expression might be vectorized
|
||||||
@ -70,12 +69,23 @@ const unsigned int EvalBeforeAssigningBit = 0x4;
|
|||||||
* on the total size, so it might not be possible to access the few last coeffs
|
* on the total size, so it might not be possible to access the few last coeffs
|
||||||
* by packets.
|
* by packets.
|
||||||
*
|
*
|
||||||
* \note If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
|
* \note This bit can be set regardless of whether vectorization is actually enabled.
|
||||||
* is set to the value 0.
|
* To check for actual vectorizability, see \a ActualPacketAccessBit.
|
||||||
*/
|
*/
|
||||||
const unsigned int PacketAccessBit = 0x8;
|
const unsigned int PacketAccessBit = 0x8;
|
||||||
|
|
||||||
|
#ifdef EIGEN_VECTORIZE
|
||||||
|
/** \ingroup flags
|
||||||
|
*
|
||||||
|
* If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
|
||||||
|
* is set to the value \a PacketAccessBit.
|
||||||
|
*
|
||||||
|
* If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
|
||||||
|
* is set to the value 0.
|
||||||
|
*/
|
||||||
|
const unsigned int ActualPacketAccessBit = PacketAccessBit;
|
||||||
#else
|
#else
|
||||||
const unsigned int PacketAccessBit = 0x0;
|
const unsigned int ActualPacketAccessBit = 0x0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/** \ingroup flags
|
/** \ingroup flags
|
||||||
|
@ -33,7 +33,7 @@ template<typename MatrixType> void product(const MatrixType& m)
|
|||||||
typedef typename MatrixType::Scalar Scalar;
|
typedef typename MatrixType::Scalar Scalar;
|
||||||
typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
|
typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
|
||||||
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> RowSquareMatrixType;
|
typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
|
||||||
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
|
typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;
|
||||||
|
|
||||||
int rows = m.rows();
|
int rows = m.rows();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user