From c9560df4a0c274eb5011f0596682a3cf3274363e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 23 Jun 2008 22:00:18 +0000 Subject: [PATCH] * add ei_pdiv intrinsic, make quotient functor vectorizable * add vdw benchmark from Tim's real-world use case --- Eigen/src/Core/DummyPacketMath.h | 3 + Eigen/src/Core/Functors.h | 18 +++-- Eigen/src/Core/Sum.h | 2 +- Eigen/src/Core/arch/AltiVec/PacketMath.h | 2 + Eigen/src/Core/arch/SSE/PacketMath.h | 3 + bench/vdw_new.cpp | 84 ++++++++++++++++++++++++ 6 files changed, 105 insertions(+), 7 deletions(-) create mode 100644 bench/vdw_new.cpp diff --git a/Eigen/src/Core/DummyPacketMath.h b/Eigen/src/Core/DummyPacketMath.h index b7d418a01..9de204df3 100644 --- a/Eigen/src/Core/DummyPacketMath.h +++ b/Eigen/src/Core/DummyPacketMath.h @@ -38,6 +38,9 @@ template inline Scalar ei_psub(const Scalar& a, const Scalar& /** \internal \returns a * b (coeff-wise) */ template inline Scalar ei_pmul(const Scalar& a, const Scalar& b) { return a * b; } +/** \internal \returns a / b (coeff-wise) */ +template inline Scalar ei_pdiv(const Scalar& a, const Scalar& b) { return a / b; } + /** \internal \returns a * b - c (coeff-wise) */ template inline Scalar ei_pmadd(const Scalar& a, const Scalar& b, const Scalar& c) { return ei_padd(ei_pmul(a, b),c); } diff --git a/Eigen/src/Core/Functors.h b/Eigen/src/Core/Functors.h index cda47fa25..7a2fe3945 100644 --- a/Eigen/src/Core/Functors.h +++ b/Eigen/src/Core/Functors.h @@ -131,12 +131,18 @@ struct ei_functor_traits > { * \sa class CwiseBinaryOp, MatrixBase::cwiseQuotient() */ template struct ei_scalar_quotient_op EIGEN_EMPTY_STRUCT { - inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; } + inline const Scalar operator() (const Scalar& a, const Scalar& b) const { return a / b; } + template + inline const PacketScalar packetOp(const PacketScalar& a, const PacketScalar& b) const + { return ei_pdiv(a,b); } }; template -struct ei_functor_traits > -{ enum { Cost = 2 * NumTraits::MulCost, PacketAccess = false }; }; - +struct ei_functor_traits > { + enum { + Cost = 2 * NumTraits::MulCost, + PacketAccess = ei_packet_traits::size>1 + }; +}; // unary functors: @@ -179,7 +185,7 @@ template struct ei_scalar_abs2_op EIGEN_EMPTY_STRUCT { }; template struct ei_functor_traits > -{ enum { Cost = NumTraits::MulCost, PacketAccess = NumTraits::IsComplex==false && int(ei_packet_traits::size)>1 }; }; +{ enum { Cost = NumTraits::MulCost, PacketAccess = int(ei_packet_traits::size)>1 }; }; /** \internal * \brief Template functor to compute the conjugate of a complex value @@ -272,7 +278,7 @@ struct ei_functor_traits > * \brief Template functor to divide a scalar by a fixed other one * * This functor is used to implement the quotient of a matrix by - * a scalar where the scalar type is not a floating point type. + * a scalar where the scalar type is not necessarily a floating point type. * * \sa class CwiseUnaryOp, MatrixBase::operator/ */ diff --git a/Eigen/src/Core/Sum.h b/Eigen/src/Core/Sum.h index d638f0979..8b4b021b8 100644 --- a/Eigen/src/Core/Sum.h +++ b/Eigen/src/Core/Sum.h @@ -216,7 +216,7 @@ struct ei_sum_impl if(alignedSize == size) return res; } else // too small to vectorize anything. - // since this is dynamic-size hence inefficient anyway, don't try to optimize. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. { res = Scalar(0); } diff --git a/Eigen/src/Core/arch/AltiVec/PacketMath.h b/Eigen/src/Core/arch/AltiVec/PacketMath.h index b2627ae4b..35c43eb12 100644 --- a/Eigen/src/Core/arch/AltiVec/PacketMath.h +++ b/Eigen/src/Core/arch/AltiVec/PacketMath.h @@ -59,6 +59,8 @@ inline vector int ei_pmul(const vector int a, const vector int b) return vec_add( lowProduct, highProduct ); } +inline vector float ei_pdiv(const vector float a, const vector float b) { return vec_div(a,b); } + inline vector float ei_pmadd(const vector float a, const vector float b, const vector float c) { return vec_madd(a, b, c); } inline vector float ei_pmin(const vector float a, const vector float b) { return vec_min(a,b); } diff --git a/Eigen/src/Core/arch/SSE/PacketMath.h b/Eigen/src/Core/arch/SSE/PacketMath.h index 03fa6bce5..bfec50f1b 100644 --- a/Eigen/src/Core/arch/SSE/PacketMath.h +++ b/Eigen/src/Core/arch/SSE/PacketMath.h @@ -55,6 +55,9 @@ template<> inline __m128i ei_pmul(const __m128i& a, const __m128i& b) _mm_setr_epi32(0xffffffff,0,0xffffffff,0)), 4)); } +template<> inline __m128 ei_pdiv(const __m128& a, const __m128& b) { return _mm_div_ps(a,b); } +template<> inline __m128d ei_pdiv(const __m128d& a, const __m128d& b) { return _mm_div_pd(a,b); } + // for some weird raisons, it has to be overloaded for packet integer template<> inline __m128i ei_pmadd(const __m128i& a, const __m128i& b, const __m128i& c) { return ei_padd(ei_pmul(a,b), c); } diff --git a/bench/vdw_new.cpp b/bench/vdw_new.cpp new file mode 100644 index 000000000..b1237c4c7 --- /dev/null +++ b/bench/vdw_new.cpp @@ -0,0 +1,84 @@ +#include + +namespace Eigen { + +template struct pow12_op EIGEN_EMPTY_STRUCT { + inline const Scalar operator() (const Scalar& a) const + { + Scalar b = a*a*a; + Scalar c = b*b; + return c*c; + } + template + inline const PacketScalar packetOp(const PacketScalar& a) const + { + PacketScalar b = ei_pmul(a, ei_pmul(a, a)); + PacketScalar c = ei_pmul(b, b); + return ei_pmul(c, c); + } +}; + +template +struct ei_functor_traits > +{ + enum { + Cost = 4 * NumTraits::MulCost, + PacketAccess = int(ei_packet_traits::size) > 1 + }; +}; + +} // namespace Eigen + +using Eigen::pow12_op; +USING_PART_OF_NAMESPACE_EIGEN + +#ifndef SCALAR +#define SCALAR float +#endif + +#ifndef SIZE +#define SIZE 10000 +#endif + +#ifndef REPEAT +#define REPEAT 10000 +#endif + +typedef Matrix Vec; + +using namespace std; + +SCALAR E_VDW(const Vec &interactions1, const Vec &interactions2) +{ + return interactions2 + .cwiseQuotient(interactions1) + .cwise(pow12_op()) + .sum(); +} + +int main() +{ + // + // 1 2 3 4 ... (interactions) + // ka . . . . ... + // rab . . . . ... + // energy . . . . ... + // ... ... ... ... ... ... + // (variables + // for + // interaction) + // + Vec interactions1(SIZE), interactions2(SIZE); // SIZE is the number of vdw interactions in our system + // SetupCalculations() + SCALAR rab = 1.0; + interactions1.setConstant(2.4); + interactions2.setConstant(rab); + + // Energy() + SCALAR energy = 0.0; + for (unsigned int i = 0; i