diff --git a/Eigen/src/Array/Random.h b/Eigen/src/Array/Random.h index 97ca0fba3..e1f4aa7ca 100644 --- a/Eigen/src/Array/Random.h +++ b/Eigen/src/Array/Random.h @@ -27,7 +27,7 @@ template struct ei_scalar_random_op { EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op) - inline const Scalar operator() (int, int) const { return ei_random(); } + inline const Scalar operator() (int, int = 0) const { return ei_random(); } }; template struct ei_functor_traits > diff --git a/Eigen/src/Core/Assign.h b/Eigen/src/Core/Assign.h index e5c17b3f4..41653098f 100644 --- a/Eigen/src/Core/Assign.h +++ b/Eigen/src/Core/Assign.h @@ -378,6 +378,31 @@ struct ei_assign_impl +struct ei_unaligned_assign_impl +{ + template + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) {} +}; + +template <> +struct ei_unaligned_assign_impl +{ + // MSVC must not inline this functions. If it does, it fails to optimize the + // packet access path. +#ifdef _MSC_VER + template + static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) +#else + template + static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) +#endif + { + for (int index = start; index < end; ++index) + dst.copyCoeff(index, src); + } +}; + template struct ei_assign_impl { @@ -389,16 +414,14 @@ struct ei_assign_impl::DstIsAligned!=0>::run(src,dst,0,alignedStart); + for(int index = alignedStart; index < alignedEnd; index += packetSize) { dst.template copyPacket::SrcAlignment>(index, src); } - for(int index = alignedEnd; index < size; ++index) - dst.copyCoeff(index, src); + ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size); } }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index c5d5cd97c..f4dd0b695 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -80,23 +80,20 @@ class CwiseNullaryOp : ei_no_assignment_operator, } template - EIGEN_STRONG_INLINE PacketScalar packet(int, int) const + EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const { - return m_functor.packetOp(); + return m_functor.packetOp(row, col); } EIGEN_STRONG_INLINE const Scalar coeff(int index) const { - if(RowsAtCompileTime == 1) - return m_functor(0, index); - else - return m_functor(index, 0); + return m_functor(index); } template - EIGEN_STRONG_INLINE PacketScalar packet(int) const + EIGEN_STRONG_INLINE PacketScalar packet(int index) const { - return m_functor.packetOp(); + return m_functor.packetOp(index); } protected: @@ -228,6 +225,49 @@ DenseBase::Constant(const Scalar& value) return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op(value)); } +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * This particular version of LinSpaced() uses sequential access, i.e. vector access is + * assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization + * and yields faster code than the random access version. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced_seq.cpp + * Output: \verbinclude DenseBase_LinSpaced_seq.out + * + * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Scalar,Scalar,int), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return NullaryExpr(size, ei_linspaced_op(low,high,size)); +} + +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Sequential_t,Scalar,Scalar,int), CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return NullaryExpr(size, ei_linspaced_op(low,high,size)); +} + /** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ template bool DenseBase::isApproxToConstant @@ -305,6 +345,24 @@ DenseStorageBase::setConstant(int rows, int cols, const return setConstant(value); } +/** + * \brief Sets a linearly space vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * \sa CwiseNullaryOp + */ +template +EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Scalar low, Scalar high, int size) +{ + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(size, ei_linspaced_op(low,high,size)); +} // zero: diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index 8ea0f3ddf..e0a3a04af 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -192,11 +192,15 @@ template class DenseBase /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp,Derived> ConstantReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ + typedef CwiseNullaryOp,Derived> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp,Derived> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, ei_traits::ColsAtCompileTime, 1> EigenvaluesReturnType; - /** \internal expression tyepe of a column */ + /** \internal expression type of a column */ typedef Block::RowsAtCompileTime, 1> ColXpr; - /** \internal expression tyepe of a column */ + /** \internal expression type of a column */ typedef Block::ColsAtCompileTime> RowXpr; #endif // not EIGEN_PARSED_BY_DOXYGEN @@ -343,6 +347,11 @@ template class DenseBase static const ConstantReturnType Constant(const Scalar& value); + static const SequentialLinSpacedReturnType + LinSpaced(Sequential_t, Scalar low, Scalar high, int size); + static const RandomAccessLinSpacedReturnType + LinSpaced(Scalar low, Scalar high, int size); + template static const CwiseNullaryOp NullaryExpr(int rows, int cols, const CustomNullaryOp& func); @@ -362,11 +371,11 @@ template class DenseBase void fill(const Scalar& value); Derived& setConstant(const Scalar& value); + Derived& setLinSpaced(Scalar low, Scalar high, int size); Derived& setZero(); Derived& setOnes(); Derived& setRandom(); - template bool isApprox(const DenseBase& other, RealScalar prec = dummy_precision()) const; diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index aa3eba5cc..31d0cff70 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -437,7 +437,7 @@ struct ei_scalar_constant_op { EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { } EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { } EIGEN_STRONG_INLINE const Scalar operator() (int, int = 0) const { return m_other; } - EIGEN_STRONG_INLINE const PacketScalar packetOp() const { return ei_pset1(m_other); } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int, int = 0) const { return ei_pset1(m_other); } const Scalar m_other; }; template @@ -452,6 +452,75 @@ template struct ei_functor_traits > { enum { Cost = NumTraits::AddCost, PacketAccess = false, IsRepeatable = true }; }; +template struct ei_linspaced_op_impl; + +// linear access for packet ops: +// 1) initialization +// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0]) +// 2) each step +// base += [size*step, ..., size*step] +template +struct ei_linspaced_op_impl +{ + typedef typename ei_packet_traits::type PacketScalar; + + ei_linspaced_op_impl(Scalar low, Scalar step) : + m_low(low), m_step(step), + m_packetStep(ei_pset1(ei_packet_traits::size*step)), + m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset(-ei_packet_traits::size)))) {} + + EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int) const { return m_base = ei_padd(m_base,m_packetStep); } + + const Scalar m_low; + const Scalar m_step; + const PacketScalar m_packetStep; + mutable PacketScalar m_base; +}; + +// random access for packet ops: +// 1) each step +// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) ) +template +struct ei_linspaced_op_impl +{ + typedef typename ei_packet_traits::type PacketScalar; + + ei_linspaced_op_impl(Scalar low, Scalar step) : + m_low(low), m_step(step), + m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset(0)) {} + + EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int i) const + { return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1(i),m_interPacket))); } + + const Scalar m_low; + const Scalar m_step; + const PacketScalar m_lowPacket; + const PacketScalar m_stepPacket; + const PacketScalar m_interPacket; +}; + +// ----- Linspace functor ---------------------------------------------------------------- + +// Forward declaration (we default to random access which does not really give +// us a speed gain when using packet access but it allows to use the functor in +// nested expressions). +template struct ei_linspaced_op; +template struct ei_functor_traits< ei_linspaced_op > +{ enum { Cost = 1, PacketAccess = ei_packet_traits::size>1, IsRepeatable = true }; }; +template struct ei_linspaced_op +{ + typedef typename ei_packet_traits::type PacketScalar; + ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {} + EIGEN_STRONG_INLINE const Scalar operator() (int i, int = 0) const { return impl(i); } + EIGEN_STRONG_INLINE const PacketScalar packetOp(int i, int = 0) const { return impl.packetOp(i); } + // This proxy object handles the actual required temporaries, the different + // implementations (random vs. sequential access) as well as the piping + // correct piping to size 2/4 packet operations. + const ei_linspaced_op_impl impl; +}; + // allow to add new functors and specializations of ei_functor_traits from outside Eigen. // this macro is really needed because ei_functor_traits must be specialized after it is declared but before it is used... #ifdef EIGEN_FUNCTORS_PLUGIN diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index ae1720eca..08981f89d 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -157,6 +157,10 @@ ei_ploadu(const Scalar* from) { return *from; } template inline typename ei_packet_traits::type ei_pset1(const Scalar& a) { return a; } +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template inline typename ei_packet_traits::type +ei_plset(const Scalar& a) { return a; } + /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ template inline void ei_pstore(Scalar* to, const Packet& from) { (*to) = from; } diff --git a/Eigen/src/Core/Transpose.h b/Eigen/src/Core/Transpose.h index 1113792fd..143b39033 100644 --- a/Eigen/src/Core/Transpose.h +++ b/Eigen/src/Core/Transpose.h @@ -63,7 +63,7 @@ template class Transpose public: typedef typename TransposeImpl::StorageType>::Base Base; - EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) + EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose) inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {} diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index f65801137..a5a56f759 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -91,6 +91,10 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pset1(const double& from) { r #endif template<> EIGEN_STRONG_INLINE Packet4i ei_pset1(const int& from) { return _mm_set1_epi32(from); } +template<> EIGEN_STRONG_INLINE Packet4f ei_plset(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); } +template<> EIGEN_STRONG_INLINE Packet2d ei_plset(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); } +template<> EIGEN_STRONG_INLINE Packet4i ei_plset(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); } + template<> EIGEN_STRONG_INLINE Packet4f ei_padd(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d ei_padd(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); } template<> EIGEN_STRONG_INLINE Packet4i ei_padd(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); } diff --git a/Eigen/src/Core/util/Constants.h b/Eigen/src/Core/util/Constants.h index 01c1aeb2f..6ae103e66 100644 --- a/Eigen/src/Core/util/Constants.h +++ b/Eigen/src/Core/util/Constants.h @@ -224,6 +224,11 @@ namespace { EIGEN_UNUSED NoChange_t NoChange; } +struct Sequential_t {}; +namespace { + EIGEN_UNUSED Sequential_t Sequential; +} + struct Default_t {}; namespace { EIGEN_UNUSED Default_t Default; diff --git a/doc/snippets/DenseBase_LinSpaced.cpp b/doc/snippets/DenseBase_LinSpaced.cpp new file mode 100644 index 000000000..540709adc --- /dev/null +++ b/doc/snippets/DenseBase_LinSpaced.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(7,10,4) << endl; +cout << VectorXd::LinSpaced(0.0,1.0,5) << endl; diff --git a/doc/snippets/DenseBase_LinSpaced_seq.cpp b/doc/snippets/DenseBase_LinSpaced_seq.cpp new file mode 100644 index 000000000..73873c4e9 --- /dev/null +++ b/doc/snippets/DenseBase_LinSpaced_seq.cpp @@ -0,0 +1,2 @@ +cout << VectorXi::LinSpaced(Sequential,7,10,4).transpose() << endl; +cout << VectorXd::LinSpaced(Sequential,0.0,1.0,5).transpose() << endl; diff --git a/doc/snippets/DenseBase_setLinSpaced.cpp b/doc/snippets/DenseBase_setLinSpaced.cpp new file mode 100644 index 000000000..a8ea73407 --- /dev/null +++ b/doc/snippets/DenseBase_setLinSpaced.cpp @@ -0,0 +1,3 @@ +VectorXf v; +v.setLinSpaced(0.5f,1.5f,5).transpose(); +cout << v << endl; diff --git a/test/nullary.cpp b/test/nullary.cpp index a7f6c60a7..240365529 100644 --- a/test/nullary.cpp +++ b/test/nullary.cpp @@ -46,6 +46,54 @@ bool equalsIdentity(const MatrixType& A) return offDiagOK && diagOK; } +template +void testVectorType(const VectorType& base) +{ + typedef typename ei_traits::Scalar Scalar; + Scalar low = ei_random(-500,500); + Scalar high = ei_random(-500,500); + if (low>high) std::swap(low,high); + const int size = base.size(); + const Scalar step = (high-low)/(size-1); + + // check whether the result yields what we expect it to do + VectorType m(base); + m.setLinSpaced(low,high,size); + + VectorType n(size); + for (int i=0; i::epsilon()*10e3 ); + + // random access version + m = VectorType::LinSpaced(low,high,size); + VERIFY( (m-n).norm() < std::numeric_limits::epsilon()*10e3 ); + + // These guys sometimes fail! This is not good. Any ideas how to fix them!? + //VERIFY( m(m.size()-1) == high ); + //VERIFY( m(0) == low ); + + // sequential access version + m = VectorType::LinSpaced(Sequential,low,high,size); + VERIFY( (m-n).norm() < std::numeric_limits::epsilon()*10e3 ); + + // These guys sometimes fail! This is not good. Any ideas how to fix them!? + //VERIFY( m(m.size()-1) == high ); + //VERIFY( m(0) == low ); + + // check whether everything works with row and col major vectors + Matrix row_vector(size); + Matrix col_vector(size); + row_vector.setLinSpaced(low,high,size); + col_vector.setLinSpaced(low,high,size); + VERIFY( (row_vector-col_vector.transpose()).norm() < 1e-10 ); + + Matrix size_changer(size+50); + size_changer.setLinSpaced(low,high,size); + VERIFY( size_changer.size() == size ); +} + template void testMatrixType(const MatrixType& m) { @@ -63,4 +111,10 @@ void test_nullary() CALL_SUBTEST_1( testMatrixType(Matrix2d()) ); CALL_SUBTEST_2( testMatrixType(MatrixXcf(50,50)) ); CALL_SUBTEST_3( testMatrixType(MatrixXf(5,7)) ); + CALL_SUBTEST_4( testVectorType(VectorXd(51)) ); + CALL_SUBTEST_5( testVectorType(VectorXd(41)) ); + CALL_SUBTEST_6( testVectorType(Vector3d()) ); + CALL_SUBTEST_7( testVectorType(VectorXf(51)) ); + CALL_SUBTEST_8( testVectorType(VectorXf(41)) ); + CALL_SUBTEST_9( testVectorType(Vector3f()) ); }