From 6245591349bdf013bc421e6887d98164cdd15cd0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Fri, 7 Aug 2015 19:27:59 +0200 Subject: [PATCH] Fix prototype of plset and generalize linspace functor. --- Eigen/src/Core/CwiseNullaryOp.h | 10 +++++----- Eigen/src/Core/DenseBase.h | 4 ++-- Eigen/src/Core/GenericPacketMath.h | 4 ++-- Eigen/src/Core/arch/AVX/PacketMath.h | 4 ++-- Eigen/src/Core/arch/AltiVec/PacketMath.h | 6 +++--- Eigen/src/Core/arch/CUDA/PacketMath.h | 4 ++-- Eigen/src/Core/arch/NEON/PacketMath.h | 6 +++--- Eigen/src/Core/arch/SSE/PacketMath.h | 8 +++----- Eigen/src/Core/functors/NullaryFunctors.h | 14 +++++++------- test/packetmath.cpp | 2 +- 10 files changed, 30 insertions(+), 32 deletions(-) diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 395861c0c..2bc6933d9 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -245,7 +245,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::SequentialLinSpacedReturn DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -258,7 +258,7 @@ DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& hig { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** @@ -279,7 +279,7 @@ EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedRetu DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); + return DenseBase::NullaryExpr(size, internal::linspaced_op(low,high,size)); } /** @@ -292,7 +292,7 @@ DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) - return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::linspaced_op(low,high,Derived::SizeAtCompileTime)); } /** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ @@ -391,7 +391,7 @@ template EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, const Scalar& high) { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) - return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low,high,newSize)); } /** diff --git a/Eigen/src/Core/DenseBase.h b/Eigen/src/Core/DenseBase.h index dc22a128e..6051f01ac 100644 --- a/Eigen/src/Core/DenseBase.h +++ b/Eigen/src/Core/DenseBase.h @@ -271,9 +271,9 @@ template class DenseBase /** \internal Represents a matrix with all coefficients equal to one another*/ typedef CwiseNullaryOp,PlainObject> ConstantReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows sequential access only. */ - typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> SequentialLinSpacedReturnType; /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ - typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; + typedef CwiseNullaryOp,PlainObject> RandomAccessLinSpacedReturnType; /** \internal the return type of MatrixBase::eigenvalues() */ typedef Matrix::Scalar>::Real, internal::traits::ColsAtCompileTime, 1> EigenvaluesReturnType; diff --git a/Eigen/src/Core/GenericPacketMath.h b/Eigen/src/Core/GenericPacketMath.h index c656f61bf..c767757b4 100644 --- a/Eigen/src/Core/GenericPacketMath.h +++ b/Eigen/src/Core/GenericPacketMath.h @@ -261,8 +261,8 @@ inline void pbroadcast2(const typename unpacket_traits::type *a, } /** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ -template inline typename packet_traits::type -plset(const Scalar& a) { return a; } +template inline Packet +plset(const typename unpacket_traits::type& a) { return a; } /** \internal copy the packet \a from to \a *to, \a to must be 16 bytes aligned */ template EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) diff --git a/Eigen/src/Core/arch/AVX/PacketMath.h b/Eigen/src/Core/arch/AVX/PacketMath.h index 9f275a380..32c121ab6 100644 --- a/Eigen/src/Core/arch/AVX/PacketMath.h +++ b/Eigen/src/Core/arch/AVX/PacketMath.h @@ -111,8 +111,8 @@ template<> EIGEN_STRONG_INLINE Packet8i pset1(const int& from) { re template<> EIGEN_STRONG_INLINE Packet8f pload1(const float* from) { return _mm256_broadcast_ss(from); } template<> EIGEN_STRONG_INLINE Packet4d pload1(const double* from) { return _mm256_broadcast_sd(from); } -template<> EIGEN_STRONG_INLINE Packet8f plset(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } -template<> EIGEN_STRONG_INLINE Packet4d plset(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } +template<> EIGEN_STRONG_INLINE Packet8f plset(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } +template<> EIGEN_STRONG_INLINE Packet4d plset(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } template<> EIGEN_STRONG_INLINE Packet8f padd(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet4d padd(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index 7ab777009..485a16558 100755 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -289,8 +289,8 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(int* to, const to[3*stride] = ai[3]; } -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return vec_add(pset1(a), p4f_COUNTDOWN); } -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return vec_add(pset1(a), p4i_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return vec_add(pset1(a), p4f_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return vec_add(pset1(a), p4i_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return vec_add(a,b); } template<> EIGEN_STRONG_INLINE Packet4i padd(const Packet4i& a, const Packet4i& b) { return vec_add(a,b); } @@ -807,7 +807,7 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter(double* to, to[0*stride] = af[0]; to[1*stride] = af[1]; } -template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return vec_add(pset1(a), p2d_COUNTDOWN); } +template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return vec_add(pset1(a), p2d_COUNTDOWN); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return vec_add(a,b); } diff --git a/Eigen/src/Core/arch/CUDA/PacketMath.h b/Eigen/src/Core/arch/CUDA/PacketMath.h index 8b767a4e7..195debd0d 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMath.h +++ b/Eigen/src/Core/arch/CUDA/PacketMath.h @@ -76,10 +76,10 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 pset1(const do } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset(const float& a) { +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 plset(const float& a) { return make_float4(a, a+1, a+2, a+3); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset(const double& a) { +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double2 plset(const double& a) { return make_double2(a, a+1); } diff --git a/Eigen/src/Core/arch/NEON/PacketMath.h b/Eigen/src/Core/arch/NEON/PacketMath.h index a72f91f21..fc4c0d03a 100644 --- a/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/Eigen/src/Core/arch/NEON/PacketMath.h @@ -120,12 +120,12 @@ template<> struct unpacket_traits { typedef int type; enum {size=4, template<> EIGEN_STRONG_INLINE Packet4f pset1(const float& from) { return vdupq_n_f32(from); } template<> EIGEN_STRONG_INLINE Packet4i pset1(const int& from) { return vdupq_n_s32(from); } -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { Packet4f countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); return vaddq_f32(pset1(a), countdown); } -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { Packet4i countdown = EIGEN_INIT_NEON_PACKET4(0, 1, 2, 3); return vaddq_s32(pset1(a), countdown); @@ -574,7 +574,7 @@ template<> struct unpacket_traits { typedef double type; enum {size=2 template<> EIGEN_STRONG_INLINE Packet2d pset1(const double& from) { return vdupq_n_f64(from); } -template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) +template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { Packet2d countdown = EIGEN_INIT_NEON_PACKET2(0, 1); return vaddq_f64(pset1(a), countdown); diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index d5e852799..7eb7278af 100755 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -172,11 +172,9 @@ template<> EIGEN_STRONG_INLINE Packet4f pload1(const float *from) { } #endif -#ifndef EIGEN_VECTORIZE_AVX -template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); } -template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return _mm_add_pd(pset1(a),_mm_set_pd(1,0)); } -#endif -template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return _mm_add_epi32(pset1(a),_mm_set_epi32(3,2,1,0)); } +template<> EIGEN_STRONG_INLINE Packet4f plset(const float& a) { return _mm_add_ps(pset1(a), _mm_set_ps(3,2,1,0)); } +template<> EIGEN_STRONG_INLINE Packet2d plset(const double& a) { return _mm_add_pd(pset1(a),_mm_set_pd(1,0)); } +template<> EIGEN_STRONG_INLINE Packet4i plset(const int& a) { return _mm_add_epi32(pset1(a),_mm_set_epi32(3,2,1,0)); } template<> EIGEN_STRONG_INLINE Packet4f padd(const Packet4f& a, const Packet4f& b) { return _mm_add_ps(a,b); } template<> EIGEN_STRONG_INLINE Packet2d padd(const Packet2d& a, const Packet2d& b) { return _mm_add_pd(a,b); } diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h index 76a3445ce..130f20868 100644 --- a/Eigen/src/Core/functors/NullaryFunctors.h +++ b/Eigen/src/Core/functors/NullaryFunctors.h @@ -53,8 +53,8 @@ struct linspaced_op_impl { linspaced_op_impl(const Scalar& low, const Scalar& step) : m_low(low), m_step(step), - m_packetStep(pset1(packet_traits::size*step)), - m_base(padd(pset1(low), pmul(pset1(step),plset(-packet_traits::size)))) {} + m_packetStep(pset1(unpacket_traits::size*step)), + m_base(padd(pset1(low), pmul(pset1(step),plset(-unpacket_traits::size)))) {} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const @@ -80,7 +80,7 @@ struct linspaced_op_impl { linspaced_op_impl(const Scalar& low, const Scalar& step) : m_low(low), m_step(step), - m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} + m_lowPacket(pset1(m_low)), m_stepPacket(pset1(m_step)), m_interPacket(plset(0)) {} template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (Index i) const { return m_low+i*m_step; } @@ -101,10 +101,10 @@ struct linspaced_op_impl // Forward declaration (we default to random access which does not really give // us a speed gain when using packet access but it allows to use the functor in // nested expressions). -template struct linspaced_op; -template struct functor_traits< linspaced_op > +template struct linspaced_op; +template struct functor_traits< linspaced_op > { enum { Cost = 1, PacketAccess = packet_traits::HasSetLinear, IsRepeatable = true }; }; -template struct linspaced_op +template struct linspaced_op { linspaced_op(const Scalar& low, const Scalar& high, Index num_steps) : impl((num_steps==1 ? high : low), (num_steps==1 ? Scalar() : (high-low)/Scalar(num_steps-1))) {} @@ -136,7 +136,7 @@ template struct linspaced_op // implementations (random vs. sequential access) as well as the // correct piping to size 2/4 packet operations. // TODO find a way to make the packet type configurable - const linspaced_op_impl::type,RandomAccess> impl; + const linspaced_op_impl impl; }; // all functors allow linear access, except scalar_identity_op. So we fix here a quick meta diff --git a/test/packetmath.cpp b/test/packetmath.cpp index 3cf82eae0..b211e194a 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -414,7 +414,7 @@ template void packetmath_notcomplex() for (int i=0; i(data1[0])); VERIFY(areApprox(ref, data2, PacketSize) && "internal::plset"); }