Added an ei_linspaced_op to create linearly spaced vectors.

Added setLinSpaced/LinSpaced functionality to DenseBase. Improved vectorized assignment - overcomes MSVC optimization issues. CwiseNullaryOp is now requiring functors to offer 1D and 2D operators. Adapted existing functors to the new CwiseNullaryOp requirements. Added ei_plset to create packages as [a, a+1, ..., a+size]. Added more nullaray unit tests.
2025-08-12 03:39:01 +08:00 · 2010-01-26 19:42:17 +01:00 · 2010-01-26 19:42:17 +01:00 · 4365a48748
commit 4365a48748
parent afb9bf6281
13 changed files with 252 additions and 19 deletions
--- a/Eigen/src/Array/Random.h
+++ b/Eigen/src/Array/Random.h
@ -27,7 +27,7 @@

 template<typename Scalar> struct ei_scalar_random_op {
  EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op)
-  inline const Scalar operator() (int, int) const { return ei_random<Scalar>(); }
+  inline const Scalar operator() (int, int = 0) const { return ei_random<Scalar>(); }
 };
 template<typename Scalar>
 struct ei_functor_traits<ei_scalar_random_op<Scalar> >
--- a/Eigen/src/Core/Assign.h
+++ b/Eigen/src/Core/Assign.h
@ -378,6 +378,31 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
 *** Linear vectorization ***
 ***************************/

+template <bool IsAligned = false>
+struct ei_unaligned_assign_impl
+{
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) {}
+};
+
+template <>
+struct ei_unaligned_assign_impl<false>
+{
+  // MSVC must not inline this functions. If it does, it fails to optimize the
+  // packet access path.  
+#ifdef _MSC_VER
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end)
+#else
+  template <typename Derived, typename OtherDerived>
+  static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end)
+#endif
+  {
+    for (int index = start; index < end; ++index)
+      dst.copyCoeff(index, src);
+  }
+};
+
 template<typename Derived1, typename Derived2>
 struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
 {
@ -389,16 +414,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
                           : ei_first_aligned(&dst.coeffRef(0), size);
    const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;

-    for(int index = 0; index < alignedStart; ++index)
-      dst.copyCoeff(index, src);
-
+    ei_unaligned_assign_impl<ei_assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
+    
    for(int index = alignedStart; index < alignedEnd; index += packetSize)
    {
      dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
    }

-    for(int index = alignedEnd; index < size; ++index)
-      dst.copyCoeff(index, src);
+    ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
  }
 };

--- a/Eigen/src/Core/CwiseNullaryOp.h
+++ b/Eigen/src/Core/CwiseNullaryOp.h
@ -80,23 +80,20 @@ class CwiseNullaryOp : ei_no_assignment_operator,
    }

    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(int, int) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const
    {
-      return m_functor.packetOp();
+      return m_functor.packetOp(row, col);
    }

    EIGEN_STRONG_INLINE const Scalar coeff(int index) const
    {
-      if(RowsAtCompileTime == 1)
-        return m_functor(0, index);
-      else
-        return m_functor(index, 0);
+      return m_functor(index);
    }

    template<int LoadMode>
-    EIGEN_STRONG_INLINE PacketScalar packet(int) const
+    EIGEN_STRONG_INLINE PacketScalar packet(int index) const
    {
-      return m_functor.packetOp();
+      return m_functor.packetOp(index);
    }

  protected:
@ -228,6 +225,49 @@ DenseBase<Derived>::Constant(const Scalar& value)
  return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value));
 }

+/**
+  * \brief Sets a linearly space vector.
+  *
+  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  * This particular version of LinSpaced() uses sequential access, i.e. vector access is
+  * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
+  * and yields faster code than the random access version.
+  *
+  * \only_for_vectors
+  *
+  * Example: \include DenseBase_LinSpaced_seq.cpp
+  * Output: \verbinclude DenseBase_LinSpaced_seq.out
+  *
+  * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Scalar,Scalar,int), CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Sequential_t, Scalar low, Scalar high, int size)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
+}
+
+/**
+  * \brief Sets a linearly space vector.
+  *
+  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  *
+  * \only_for_vectors
+  *
+  * Example: \include DenseBase_LinSpaced.cpp
+  * Output: \verbinclude DenseBase_LinSpaced.out
+  *
+  * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Sequential_t,Scalar,Scalar,int), CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
+DenseBase<Derived>::LinSpaced(Scalar low, Scalar high, int size)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size));
+}
+
 /** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
 template<typename Derived>
 bool DenseBase<Derived>::isApproxToConstant
@ -305,6 +345,24 @@ DenseStorageBase<Derived,_Base,_Options>::setConstant(int rows, int cols, const
  return setConstant(value);
 }

+/**
+  * \brief Sets a linearly space vector.
+  *
+  * The function generates 'size' equally spaced values in the closed interval [low,high].
+  *
+  * \only_for_vectors
+  *
+  * Example: \include DenseBase_setLinSpaced.cpp
+  * Output: \verbinclude DenseBase_setLinSpaced.out
+  *
+  * \sa CwiseNullaryOp
+  */
+template<typename Derived>
+EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Scalar low, Scalar high, int size)
+{
+  EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
+  return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
+}

 // zero:

--- a/Eigen/src/Core/DenseBase.h
+++ b/Eigen/src/Core/DenseBase.h
@ -192,11 +192,15 @@ template<typename Derived> class DenseBase

    /** \internal Represents a matrix with all coefficients equal to one another*/
    typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
+    /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
+    typedef CwiseNullaryOp<ei_linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
+    /** \internal Represents a vector with linearly spaced coefficients that allows random access. */
+    typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
    /** \internal the return type of MatrixBase::eigenvalues() */
    typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
-    /** \internal expression tyepe of a column */
+    /** \internal expression type of a column */
    typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
-    /** \internal expression tyepe of a column */
+    /** \internal expression type of a column */
    typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
 #endif // not EIGEN_PARSED_BY_DOXYGEN

@ -343,6 +347,11 @@ template<typename Derived> class DenseBase
    static const ConstantReturnType
    Constant(const Scalar& value);

+    static const SequentialLinSpacedReturnType
+    LinSpaced(Sequential_t, Scalar low, Scalar high, int size);
+    static const RandomAccessLinSpacedReturnType
+    LinSpaced(Scalar low, Scalar high, int size);
+
    template<typename CustomNullaryOp>
    static const CwiseNullaryOp<CustomNullaryOp, Derived>
    NullaryExpr(int rows, int cols, const CustomNullaryOp& func);
@ -362,11 +371,11 @@ template<typename Derived> class DenseBase

    void fill(const Scalar& value);
    Derived& setConstant(const Scalar& value);
+    Derived& setLinSpaced(Scalar low, Scalar high, int size);
    Derived& setZero();
    Derived& setOnes();
    Derived& setRandom();

-
    template<typename OtherDerived>
    bool isApprox(const DenseBase<OtherDerived>& other,
                  RealScalar prec = dummy_precision<Scalar>()) const;
--- a/Eigen/src/Core/Functors.h
+++ b/Eigen/src/Core/Functors.h
@ -437,7 +437,7 @@ struct ei_scalar_constant_op {
  EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
  EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
  EIGEN_STRONG_INLINE const Scalar operator() (int, int = 0) const { return m_other; }
-  EIGEN_STRONG_INLINE const PacketScalar packetOp() const { return ei_pset1(m_other); }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(int, int = 0) const { return ei_pset1(m_other); }
  const Scalar m_other;
 };
 template<typename Scalar>
@ -452,6 +452,75 @@ template<typename Scalar>
 struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
 { enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };

+template <typename Scalar, bool RandomAccess> struct ei_linspaced_op_impl;
+
+// linear access for packet ops:
+// 1) initialization
+//   base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
+// 2) each step
+//   base += [size*step, ..., size*step]
+template <typename Scalar>
+struct ei_linspaced_op_impl<Scalar,false>
+{
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+  
+  ei_linspaced_op_impl(Scalar low, Scalar step) : 
+  m_low(low), m_step(step), 
+  m_packetStep(ei_pset1(ei_packet_traits<Scalar>::size*step)), 
+  m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
+
+  EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(int) const { return m_base = ei_padd(m_base,m_packetStep); }  
+
+  const Scalar m_low;
+  const Scalar m_step;
+  const PacketScalar m_packetStep;
+  mutable PacketScalar m_base;
+};
+
+// random access for packet ops:
+// 1) each step
+//   [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
+template <typename Scalar> 
+struct ei_linspaced_op_impl<Scalar,true>
+{
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+
+  ei_linspaced_op_impl(Scalar low, Scalar step) : 
+  m_low(low), m_step(step), 
+  m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
+
+  EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(int i) const 
+  { return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Scalar>(i),m_interPacket))); }
+
+  const Scalar m_low;
+  const Scalar m_step;
+  const PacketScalar m_lowPacket;
+  const PacketScalar m_stepPacket;
+  const PacketScalar m_interPacket;
+};
+
+// ----- Linspace functor ----------------------------------------------------------------
+
+// Forward declaration (we default to random access which does not really give
+// us a speed gain when using packet access but it allows to use the functor in
+// nested expressions).
+template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
+template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
+{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
+template <typename Scalar, bool RandomAccess> struct ei_linspaced_op 
+{
+  typedef typename ei_packet_traits<Scalar>::type PacketScalar;
+  ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {}
+  EIGEN_STRONG_INLINE const Scalar operator() (int i, int = 0) const { return impl(i); }
+  EIGEN_STRONG_INLINE const PacketScalar packetOp(int i, int = 0) const { return impl.packetOp(i); }
+  // This proxy object handles the actual required temporaries, the different 
+  // implementations (random vs. sequential access) as well as the piping
+  // correct piping to size 2/4 packet operations.
+  const ei_linspaced_op_impl<Scalar,RandomAccess> impl;
+};
+
 // allow to add new functors and specializations of ei_functor_traits from outside Eigen.
 // this macro is really needed because ei_functor_traits must be specialized after it is declared but before it is used...
 #ifdef EIGEN_FUNCTORS_PLUGIN
--- a/Eigen/src/Core/GenericPacketMath.h
+++ b/Eigen/src/Core/GenericPacketMath.h
@ -157,6 +157,10 @@ ei_ploadu(const Scalar* from) { return *from; }
 template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
 ei_pset1(const Scalar& a) { return a; }

+/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
+template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
+ei_plset(const Scalar& a) { return a; }
+
 /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
 template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, const Packet& from)
 { (*to) = from; }
--- a/Eigen/src/Core/Transpose.h
+++ b/Eigen/src/Core/Transpose.h
@ -63,7 +63,7 @@ template<typename MatrixType> class Transpose
  public:

    typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageType>::Base Base;
-    EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose)
+    EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) 

    inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {}

--- a/Eigen/src/Core/arch/SSE/PacketMath.h
+++ b/Eigen/src/Core/arch/SSE/PacketMath.h
@ -91,6 +91,10 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { r
 #endif
 template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int&    from) { return _mm_set1_epi32(from); }

+template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); }
+template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); }
+template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); }
+
 template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
 template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
 template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }
--- a/Eigen/src/Core/util/Constants.h
+++ b/Eigen/src/Core/util/Constants.h
@ -224,6 +224,11 @@ namespace {
  EIGEN_UNUSED NoChange_t NoChange;
 }

+struct Sequential_t {};
+namespace {
+  EIGEN_UNUSED Sequential_t Sequential;
+}
+
 struct Default_t {};
 namespace {
  EIGEN_UNUSED Default_t Default;
--- a/doc/snippets/DenseBase_LinSpaced.cpp
+++ b/doc/snippets/DenseBase_LinSpaced.cpp
@ -0,0 +1,2 @@
+cout << VectorXi::LinSpaced(7,10,4) << endl;
+cout << VectorXd::LinSpaced(0.0,1.0,5) << endl;
--- a/doc/snippets/DenseBase_LinSpaced_seq.cpp
+++ b/doc/snippets/DenseBase_LinSpaced_seq.cpp
@ -0,0 +1,2 @@
+cout << VectorXi::LinSpaced(Sequential,7,10,4).transpose() << endl;
+cout << VectorXd::LinSpaced(Sequential,0.0,1.0,5).transpose() << endl;
--- a/doc/snippets/DenseBase_setLinSpaced.cpp
+++ b/doc/snippets/DenseBase_setLinSpaced.cpp
@ -0,0 +1,3 @@
+VectorXf v;
+v.setLinSpaced(0.5f,1.5f,5).transpose();
+cout << v << endl;
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@ -46,6 +46,54 @@ bool equalsIdentity(const MatrixType& A)
  return offDiagOK && diagOK;
 }

+template<typename VectorType>
+void testVectorType(const VectorType& base)
+{
+  typedef typename ei_traits<VectorType>::Scalar Scalar;
+  Scalar low = ei_random(-500,500);
+  Scalar high = ei_random(-500,500);  
+  if (low>high) std::swap(low,high);  
+  const int size = base.size();
+  const Scalar step = (high-low)/(size-1);
+
+  // check whether the result yields what we expect it to do
+  VectorType m(base);
+  m.setLinSpaced(low,high,size);
+
+  VectorType n(size);
+  for (int i=0; i<size; ++i)
+    n(i) = low+i*step;
+
+  VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
+
+  // random access version
+  m = VectorType::LinSpaced(low,high,size);
+  VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
+
+  // These guys sometimes fail! This is not good. Any ideas how to fix them!?
+  //VERIFY( m(m.size()-1) == high );
+  //VERIFY( m(0) == low );
+
+  // sequential access version
+  m = VectorType::LinSpaced(Sequential,low,high,size);
+  VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
+
+  // These guys sometimes fail! This is not good. Any ideas how to fix them!?
+  //VERIFY( m(m.size()-1) == high );
+  //VERIFY( m(0) == low );
+
+  // check whether everything works with row and col major vectors
+  Matrix<Scalar,Dynamic,1> row_vector(size);
+  Matrix<Scalar,1,Dynamic> col_vector(size);
+  row_vector.setLinSpaced(low,high,size);
+  col_vector.setLinSpaced(low,high,size);
+  VERIFY( (row_vector-col_vector.transpose()).norm() < 1e-10 );
+
+  Matrix<Scalar,Dynamic,1> size_changer(size+50);
+  size_changer.setLinSpaced(low,high,size);
+  VERIFY( size_changer.size() == size );  
+}
+
 template<typename MatrixType>
 void testMatrixType(const MatrixType& m)
 {
@ -63,4 +111,10 @@ void test_nullary()
  CALL_SUBTEST_1( testMatrixType(Matrix2d()) );
  CALL_SUBTEST_2( testMatrixType(MatrixXcf(50,50)) );
  CALL_SUBTEST_3( testMatrixType(MatrixXf(5,7)) );
+  CALL_SUBTEST_4( testVectorType(VectorXd(51)) );
+  CALL_SUBTEST_5( testVectorType(VectorXd(41)) );
+  CALL_SUBTEST_6( testVectorType(Vector3d()) );
+  CALL_SUBTEST_7( testVectorType(VectorXf(51)) );
+  CALL_SUBTEST_8( testVectorType(VectorXf(41)) );
+  CALL_SUBTEST_9( testVectorType(Vector3f()) );
 }