Added an ei_linspaced_op to create linearly spaced vectors.

Added setLinSpaced/LinSpaced functionality to DenseBase.
Improved vectorized assignment - overcomes MSVC optimization issues.
CwiseNullaryOp is now requiring functors to offer 1D and 2D operators.
Adapted existing functors to the new CwiseNullaryOp requirements.
Added ei_plset to create packages as [a, a+1, ..., a+size].
Added more nullaray unit tests.
This commit is contained in:
Hauke Heibel 2010-01-26 19:42:17 +01:00
parent afb9bf6281
commit 4365a48748
13 changed files with 252 additions and 19 deletions

View File

@ -27,7 +27,7 @@
template<typename Scalar> struct ei_scalar_random_op {
EIGEN_EMPTY_STRUCT_CTOR(ei_scalar_random_op)
inline const Scalar operator() (int, int) const { return ei_random<Scalar>(); }
inline const Scalar operator() (int, int = 0) const { return ei_random<Scalar>(); }
};
template<typename Scalar>
struct ei_functor_traits<ei_scalar_random_op<Scalar> >

View File

@ -378,6 +378,31 @@ struct ei_assign_impl<Derived1, Derived2, InnerVectorizedTraversal, InnerUnrolli
*** Linear vectorization ***
***************************/
template <bool IsAligned = false>
struct ei_unaligned_assign_impl
{
template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end) {}
};
template <>
struct ei_unaligned_assign_impl<false>
{
// MSVC must not inline this functions. If it does, it fails to optimize the
// packet access path.
#ifdef _MSC_VER
template <typename Derived, typename OtherDerived>
static EIGEN_DONT_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end)
#else
template <typename Derived, typename OtherDerived>
static EIGEN_STRONG_INLINE void run(const Derived& src, OtherDerived& dst, int start, int end)
#endif
{
for (int index = start; index < end; ++index)
dst.copyCoeff(index, src);
}
};
template<typename Derived1, typename Derived2>
struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling>
{
@ -389,16 +414,14 @@ struct ei_assign_impl<Derived1, Derived2, LinearVectorizedTraversal, NoUnrolling
: ei_first_aligned(&dst.coeffRef(0), size);
const int alignedEnd = alignedStart + ((size-alignedStart)/packetSize)*packetSize;
for(int index = 0; index < alignedStart; ++index)
dst.copyCoeff(index, src);
ei_unaligned_assign_impl<ei_assign_traits<Derived1,Derived2>::DstIsAligned!=0>::run(src,dst,0,alignedStart);
for(int index = alignedStart; index < alignedEnd; index += packetSize)
{
dst.template copyPacket<Derived2, Aligned, ei_assign_traits<Derived1,Derived2>::SrcAlignment>(index, src);
}
for(int index = alignedEnd; index < size; ++index)
dst.copyCoeff(index, src);
ei_unaligned_assign_impl<>::run(src,dst,alignedEnd,size);
}
};

View File

@ -80,23 +80,20 @@ class CwiseNullaryOp : ei_no_assignment_operator,
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(int, int) const
EIGEN_STRONG_INLINE PacketScalar packet(int row, int col) const
{
return m_functor.packetOp();
return m_functor.packetOp(row, col);
}
EIGEN_STRONG_INLINE const Scalar coeff(int index) const
{
if(RowsAtCompileTime == 1)
return m_functor(0, index);
else
return m_functor(index, 0);
return m_functor(index);
}
template<int LoadMode>
EIGEN_STRONG_INLINE PacketScalar packet(int) const
EIGEN_STRONG_INLINE PacketScalar packet(int index) const
{
return m_functor.packetOp();
return m_functor.packetOp(index);
}
protected:
@ -228,6 +225,49 @@ DenseBase<Derived>::Constant(const Scalar& value)
return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, ei_scalar_constant_op<Scalar>(value));
}
/**
* \brief Sets a linearly space vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
* This particular version of LinSpaced() uses sequential access, i.e. vector access is
* assumed to be a(0), a(1), ..., a(size). This assumption allows for better vectorization
* and yields faster code than the random access version.
*
* \only_for_vectors
*
* Example: \include DenseBase_LinSpaced_seq.cpp
* Output: \verbinclude DenseBase_LinSpaced_seq.out
*
* \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Scalar,Scalar,int), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::SequentialLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Sequential_t, Scalar low, Scalar high, int size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
}
/**
* \brief Sets a linearly space vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
*
* \only_for_vectors
*
* Example: \include DenseBase_LinSpaced.cpp
* Output: \verbinclude DenseBase_LinSpaced.out
*
* \sa setLinSpaced(Scalar,Scalar,int), LinSpaced(Sequential_t,Scalar,Scalar,int), CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE const typename DenseBase<Derived>::RandomAccessLinSpacedReturnType
DenseBase<Derived>::LinSpaced(Scalar low, Scalar high, int size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return NullaryExpr(size, ei_linspaced_op<Scalar,true>(low,high,size));
}
/** \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */
template<typename Derived>
bool DenseBase<Derived>::isApproxToConstant
@ -305,6 +345,24 @@ DenseStorageBase<Derived,_Base,_Options>::setConstant(int rows, int cols, const
return setConstant(value);
}
/**
* \brief Sets a linearly space vector.
*
* The function generates 'size' equally spaced values in the closed interval [low,high].
*
* \only_for_vectors
*
* Example: \include DenseBase_setLinSpaced.cpp
* Output: \verbinclude DenseBase_setLinSpaced.out
*
* \sa CwiseNullaryOp
*/
template<typename Derived>
EIGEN_STRONG_INLINE Derived& DenseBase<Derived>::setLinSpaced(Scalar low, Scalar high, int size)
{
EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived)
return derived() = Derived::NullaryExpr(size, ei_linspaced_op<Scalar,false>(low,high,size));
}
// zero:

View File

@ -192,11 +192,15 @@ template<typename Derived> class DenseBase
/** \internal Represents a matrix with all coefficients equal to one another*/
typedef CwiseNullaryOp<ei_scalar_constant_op<Scalar>,Derived> ConstantReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,false>,Derived> SequentialLinSpacedReturnType;
/** \internal Represents a vector with linearly spaced coefficients that allows random access. */
typedef CwiseNullaryOp<ei_linspaced_op<Scalar,true>,Derived> RandomAccessLinSpacedReturnType;
/** \internal the return type of MatrixBase::eigenvalues() */
typedef Matrix<typename NumTraits<typename ei_traits<Derived>::Scalar>::Real, ei_traits<Derived>::ColsAtCompileTime, 1> EigenvaluesReturnType;
/** \internal expression tyepe of a column */
/** \internal expression type of a column */
typedef Block<Derived, ei_traits<Derived>::RowsAtCompileTime, 1> ColXpr;
/** \internal expression tyepe of a column */
/** \internal expression type of a column */
typedef Block<Derived, 1, ei_traits<Derived>::ColsAtCompileTime> RowXpr;
#endif // not EIGEN_PARSED_BY_DOXYGEN
@ -343,6 +347,11 @@ template<typename Derived> class DenseBase
static const ConstantReturnType
Constant(const Scalar& value);
static const SequentialLinSpacedReturnType
LinSpaced(Sequential_t, Scalar low, Scalar high, int size);
static const RandomAccessLinSpacedReturnType
LinSpaced(Scalar low, Scalar high, int size);
template<typename CustomNullaryOp>
static const CwiseNullaryOp<CustomNullaryOp, Derived>
NullaryExpr(int rows, int cols, const CustomNullaryOp& func);
@ -362,11 +371,11 @@ template<typename Derived> class DenseBase
void fill(const Scalar& value);
Derived& setConstant(const Scalar& value);
Derived& setLinSpaced(Scalar low, Scalar high, int size);
Derived& setZero();
Derived& setOnes();
Derived& setRandom();
template<typename OtherDerived>
bool isApprox(const DenseBase<OtherDerived>& other,
RealScalar prec = dummy_precision<Scalar>()) const;

View File

@ -437,7 +437,7 @@ struct ei_scalar_constant_op {
EIGEN_STRONG_INLINE ei_scalar_constant_op(const ei_scalar_constant_op& other) : m_other(other.m_other) { }
EIGEN_STRONG_INLINE ei_scalar_constant_op(const Scalar& other) : m_other(other) { }
EIGEN_STRONG_INLINE const Scalar operator() (int, int = 0) const { return m_other; }
EIGEN_STRONG_INLINE const PacketScalar packetOp() const { return ei_pset1(m_other); }
EIGEN_STRONG_INLINE const PacketScalar packetOp(int, int = 0) const { return ei_pset1(m_other); }
const Scalar m_other;
};
template<typename Scalar>
@ -452,6 +452,75 @@ template<typename Scalar>
struct ei_functor_traits<ei_scalar_identity_op<Scalar> >
{ enum { Cost = NumTraits<Scalar>::AddCost, PacketAccess = false, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op_impl;
// linear access for packet ops:
// 1) initialization
// base = [low, ..., low] + ([step, ..., step] * [-size, ..., 0])
// 2) each step
// base += [size*step, ..., size*step]
template <typename Scalar>
struct ei_linspaced_op_impl<Scalar,false>
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
ei_linspaced_op_impl(Scalar low, Scalar step) :
m_low(low), m_step(step),
m_packetStep(ei_pset1(ei_packet_traits<Scalar>::size*step)),
m_base(ei_padd(ei_pset1(low),ei_pmul(ei_pset1(step),ei_plset<Scalar>(-ei_packet_traits<Scalar>::size)))) {}
EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; }
EIGEN_STRONG_INLINE const PacketScalar packetOp(int) const { return m_base = ei_padd(m_base,m_packetStep); }
const Scalar m_low;
const Scalar m_step;
const PacketScalar m_packetStep;
mutable PacketScalar m_base;
};
// random access for packet ops:
// 1) each step
// [low, ..., low] + ( [step, ..., step] * ( [i, ..., i] + [0, ..., size] ) )
template <typename Scalar>
struct ei_linspaced_op_impl<Scalar,true>
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
ei_linspaced_op_impl(Scalar low, Scalar step) :
m_low(low), m_step(step),
m_lowPacket(ei_pset1(m_low)), m_stepPacket(ei_pset1(m_step)), m_interPacket(ei_plset<Scalar>(0)) {}
EIGEN_STRONG_INLINE const Scalar operator() (int i) const { return m_low+i*m_step; }
EIGEN_STRONG_INLINE const PacketScalar packetOp(int i) const
{ return ei_padd(m_lowPacket, ei_pmul(m_stepPacket, ei_padd(ei_pset1<Scalar>(i),m_interPacket))); }
const Scalar m_low;
const Scalar m_step;
const PacketScalar m_lowPacket;
const PacketScalar m_stepPacket;
const PacketScalar m_interPacket;
};
// ----- Linspace functor ----------------------------------------------------------------
// Forward declaration (we default to random access which does not really give
// us a speed gain when using packet access but it allows to use the functor in
// nested expressions).
template <typename Scalar, bool RandomAccess = true> struct ei_linspaced_op;
template <typename Scalar, bool RandomAccess> struct ei_functor_traits< ei_linspaced_op<Scalar,RandomAccess> >
{ enum { Cost = 1, PacketAccess = ei_packet_traits<Scalar>::size>1, IsRepeatable = true }; };
template <typename Scalar, bool RandomAccess> struct ei_linspaced_op
{
typedef typename ei_packet_traits<Scalar>::type PacketScalar;
ei_linspaced_op(Scalar low, Scalar high, int num_steps) : impl(low, (high-low)/(num_steps-1)) {}
EIGEN_STRONG_INLINE const Scalar operator() (int i, int = 0) const { return impl(i); }
EIGEN_STRONG_INLINE const PacketScalar packetOp(int i, int = 0) const { return impl.packetOp(i); }
// This proxy object handles the actual required temporaries, the different
// implementations (random vs. sequential access) as well as the piping
// correct piping to size 2/4 packet operations.
const ei_linspaced_op_impl<Scalar,RandomAccess> impl;
};
// allow to add new functors and specializations of ei_functor_traits from outside Eigen.
// this macro is really needed because ei_functor_traits must be specialized after it is declared but before it is used...
#ifdef EIGEN_FUNCTORS_PLUGIN

View File

@ -157,6 +157,10 @@ ei_ploadu(const Scalar* from) { return *from; }
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_pset1(const Scalar& a) { return a; }
/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */
template<typename Scalar> inline typename ei_packet_traits<Scalar>::type
ei_plset(const Scalar& a) { return a; }
/** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */
template<typename Scalar, typename Packet> inline void ei_pstore(Scalar* to, const Packet& from)
{ (*to) = from; }

View File

@ -63,7 +63,7 @@ template<typename MatrixType> class Transpose
public:
typedef typename TransposeImpl<MatrixType,typename ei_traits<MatrixType>::StorageType>::Base Base;
EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose)
EIGEN_GENERIC_PUBLIC_INTERFACE_NEW(Transpose)
inline Transpose(const MatrixType& matrix) : m_matrix(matrix) {}

View File

@ -91,6 +91,10 @@ template<> EIGEN_STRONG_INLINE Packet2d ei_pset1<double>(const double& from) { r
#endif
template<> EIGEN_STRONG_INLINE Packet4i ei_pset1<int>(const int& from) { return _mm_set1_epi32(from); }
template<> EIGEN_STRONG_INLINE Packet4f ei_plset<float>(const float& a) { return _mm_add_ps(ei_pset1(a), _mm_set_ps(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet2d ei_plset<double>(const double& a) { return _mm_add_pd(ei_pset1(a),_mm_set_pd(1,0)); }
template<> EIGEN_STRONG_INLINE Packet4i ei_plset<int>(const int& a) { return _mm_add_epi32(ei_pset1(a),_mm_set_epi32(3,2,1,0)); }
template<> EIGEN_STRONG_INLINE Packet4f ei_padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); }
template<> EIGEN_STRONG_INLINE Packet2d ei_padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); }
template<> EIGEN_STRONG_INLINE Packet4i ei_padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_add_epi32(a,b); }

View File

@ -224,6 +224,11 @@ namespace {
EIGEN_UNUSED NoChange_t NoChange;
}
struct Sequential_t {};
namespace {
EIGEN_UNUSED Sequential_t Sequential;
}
struct Default_t {};
namespace {
EIGEN_UNUSED Default_t Default;

View File

@ -0,0 +1,2 @@
cout << VectorXi::LinSpaced(7,10,4) << endl;
cout << VectorXd::LinSpaced(0.0,1.0,5) << endl;

View File

@ -0,0 +1,2 @@
cout << VectorXi::LinSpaced(Sequential,7,10,4).transpose() << endl;
cout << VectorXd::LinSpaced(Sequential,0.0,1.0,5).transpose() << endl;

View File

@ -0,0 +1,3 @@
VectorXf v;
v.setLinSpaced(0.5f,1.5f,5).transpose();
cout << v << endl;

View File

@ -46,6 +46,54 @@ bool equalsIdentity(const MatrixType& A)
return offDiagOK && diagOK;
}
template<typename VectorType>
void testVectorType(const VectorType& base)
{
typedef typename ei_traits<VectorType>::Scalar Scalar;
Scalar low = ei_random(-500,500);
Scalar high = ei_random(-500,500);
if (low>high) std::swap(low,high);
const int size = base.size();
const Scalar step = (high-low)/(size-1);
// check whether the result yields what we expect it to do
VectorType m(base);
m.setLinSpaced(low,high,size);
VectorType n(size);
for (int i=0; i<size; ++i)
n(i) = low+i*step;
VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
// random access version
m = VectorType::LinSpaced(low,high,size);
VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
// These guys sometimes fail! This is not good. Any ideas how to fix them!?
//VERIFY( m(m.size()-1) == high );
//VERIFY( m(0) == low );
// sequential access version
m = VectorType::LinSpaced(Sequential,low,high,size);
VERIFY( (m-n).norm() < std::numeric_limits<Scalar>::epsilon()*10e3 );
// These guys sometimes fail! This is not good. Any ideas how to fix them!?
//VERIFY( m(m.size()-1) == high );
//VERIFY( m(0) == low );
// check whether everything works with row and col major vectors
Matrix<Scalar,Dynamic,1> row_vector(size);
Matrix<Scalar,1,Dynamic> col_vector(size);
row_vector.setLinSpaced(low,high,size);
col_vector.setLinSpaced(low,high,size);
VERIFY( (row_vector-col_vector.transpose()).norm() < 1e-10 );
Matrix<Scalar,Dynamic,1> size_changer(size+50);
size_changer.setLinSpaced(low,high,size);
VERIFY( size_changer.size() == size );
}
template<typename MatrixType>
void testMatrixType(const MatrixType& m)
{
@ -63,4 +111,10 @@ void test_nullary()
CALL_SUBTEST_1( testMatrixType(Matrix2d()) );
CALL_SUBTEST_2( testMatrixType(MatrixXcf(50,50)) );
CALL_SUBTEST_3( testMatrixType(MatrixXf(5,7)) );
CALL_SUBTEST_4( testVectorType(VectorXd(51)) );
CALL_SUBTEST_5( testVectorType(VectorXd(41)) );
CALL_SUBTEST_6( testVectorType(Vector3d()) );
CALL_SUBTEST_7( testVectorType(VectorXf(51)) );
CALL_SUBTEST_8( testVectorType(VectorXf(41)) );
CALL_SUBTEST_9( testVectorType(Vector3f()) );
}