* do the ActualPacketAccesBit change as discussed on list

* add comment in Product.h about CanVectorizeInner * fix typo in test/product.cpp
2025-07-31 09:12:02 +08:00 · 2008-07-04 12:43:55 +00:00 · 2008-07-04 12:43:55 +00:00 · a9d319d44f
commit a9d319d44f
parent 8463b7d3f4
7 changed files with 40 additions and 17 deletions
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@ -46,7 +46,7 @@ private:
  };

  enum {
-    MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & PacketAccessBit)
+    MightVectorize = (int(Derived::Flags) & int(OtherDerived::Flags) & ActualPacketAccessBit)
             && ((int(Derived::Flags)&RowMajorBit)==(int(OtherDerived::Flags)&RowMajorBit)),
    MayInnerVectorize = MightVectorize && InnerSize!=Dynamic && int(InnerSize)%int(PacketSize)==0,
    MayLinearVectorize = MightVectorize && (int(Derived::Flags) & int(OtherDerived::Flags) & LinearAccessBit),
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@ -34,7 +34,7 @@ struct ei_dot_traits
 {
 public:
  enum {
-    Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&PacketAccessBit)
+    Vectorization = (int(Derived1::Flags)&int(Derived2::Flags)&ActualPacketAccessBit)
                 && (int(Derived1::Flags)&int(Derived2::Flags)&LinearAccessBit)
                  ? LinearVectorization
                  : NoVectorization
--- a/Eigen/src/Core/IO.h
+++ b/Eigen/src/Core/IO.h
@ -25,12 +25,8 @@
 #ifndef EIGEN_IO_H
 #define EIGEN_IO_H

-/** \relates MatrixBase
-  *
-  * Outputs the matrix, laid out as an array as usual, to the given stream.
-  */
 template<typename Derived>
-std::ostream & operator <<
+std::ostream & ei_print_matrix
 (std::ostream & s,
 const MatrixBase<Derived> & m)
 {
@ -45,4 +41,16 @@ std::ostream & operator <<
  return s;
 }

+/** \relates MatrixBase
+  *
+  * Outputs the matrix, laid out as an array as usual, to the given stream.
+  */
+template<typename Derived>
+std::ostream & operator <<
+(std::ostream & s,
+ const MatrixBase<Derived> & m)
+{
+  return ei_print_matrix(s, m.eval());
+}
+
 #endif // EIGEN_IO_H
--- a/Eigen/src/Core/Product.h
+++ b/Eigen/src/Core/Product.h
@ -141,9 +141,6 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >
    CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
                    && (RowsAtCompileTime % ei_packet_traits<Scalar>::size == 0),

-    CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & PacketAccessBit) && (RhsFlags & PacketAccessBit)
-                      && (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0),
-
    EvalToRowMajor = RhsRowMajor && (ProductMode==(int)CacheFriendlyProduct ? LhsRowMajor : (!CanVectorizeLhs)),

    RemovedBits = ~((EvalToRowMajor ? 0 : RowMajorBit)
@ -156,7 +153,15 @@ struct ei_traits<Product<LhsNested, RhsNested, ProductMode> >

    CoeffReadCost = InnerSize == Dynamic ? Dynamic
                  : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
-                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost
+                    + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
+
+    /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
+     * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
+     * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
+     * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
+     */
+    CanVectorizeInner = LhsRowMajor && (!RhsRowMajor) && (LhsFlags & RhsFlags & ActualPacketAccessBit)
+                      && (InnerSize!=Dynamic) && (InnerSize % ei_packet_traits<Scalar>::size == 0)
  };
 };

--- a/Eigen/src/Core/Sum.h
+++ b/Eigen/src/Core/Sum.h
@ -35,7 +35,7 @@ struct ei_sum_traits
 {
 public:
  enum {
-    Vectorization = (int(Derived::Flags)&PacketAccessBit)
+    Vectorization = (int(Derived::Flags)&ActualPacketAccessBit)
                 && (int(Derived::Flags)&LinearAccessBit)
                  ? LinearVectorization
                  : NoVectorization
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@ -56,7 +56,6 @@ const unsigned int EvalBeforeNestingBit = 0x2;
  * means the expression should be evaluated before any assignement */
 const unsigned int EvalBeforeAssigningBit = 0x4;

-#ifdef EIGEN_VECTORIZE
 /** \ingroup flags
  *
  * Short version: means the expression might be vectorized
@ -70,12 +69,23 @@ const unsigned int EvalBeforeAssigningBit = 0x4;
  * on the total size, so it might not be possible to access the few last coeffs
  * by packets.
  *
-  * \note If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
-  * is set to the value 0.
+  * \note This bit can be set regardless of whether vectorization is actually enabled.
+  *       To check for actual vectorizability, see \a ActualPacketAccessBit.
  */
 const unsigned int PacketAccessBit = 0x8;
+
+#ifdef EIGEN_VECTORIZE
+/** \ingroup flags
+  *
+  * If vectorization is enabled (EIGEN_VECTORIZE is defined) this constant
+  * is set to the value \a PacketAccessBit.
+  *
+  * If vectorization is not enabled (EIGEN_VECTORIZE is not defined) this constant
+  * is set to the value 0.
+  */
+const unsigned int ActualPacketAccessBit = PacketAccessBit;
 #else
-const unsigned int PacketAccessBit = 0x0;
+const unsigned int ActualPacketAccessBit = 0x0;
 #endif

 /** \ingroup flags
--- a/test/product.cpp
+++ b/test/product.cpp
@ -33,7 +33,7 @@ template<typename MatrixType> void product(const MatrixType& m)
  typedef typename MatrixType::Scalar Scalar;
  typedef typename NumTraits<Scalar>::FloatingPoint FloatingPoint;
  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, 1> VectorType;
-  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::ColsAtCompileTime> RowSquareMatrixType;
+  typedef Matrix<Scalar, MatrixType::RowsAtCompileTime, MatrixType::RowsAtCompileTime> RowSquareMatrixType;
  typedef Matrix<Scalar, MatrixType::ColsAtCompileTime, MatrixType::ColsAtCompileTime> ColSquareMatrixType;

  int rows = m.rows();