From 8dca9f97e38970b1f7fed6cb508c58d8ff39d526 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 21 Jan 2016 20:18:51 +0100 Subject: [PATCH 1/3] Add numext::sqrt function to enable custom optimized implementation. This changeset add two specializations for float/double on SSE. Those are mostly usefull with GCC for which std::sqrt add an extra and costly check on the result of _mm_sqrt_*. Clang does not add this burden. In this changeset, only DenseBase::norm() makes use of it. --- Eigen/src/Core/Dot.h | 3 +-- Eigen/src/Core/MathFunctions.h | 20 ++++++++++++++++++-- Eigen/src/Core/arch/SSE/MathFunctions.h | 22 ++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c5040c67b..ce42854cd 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { - EIGEN_USING_STD_MATH(sqrt) - return sqrt(squaredNorm()); + return numext::sqrt(squaredNorm()); } /** \returns an expression of the quotient of *this by its own norm. diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 4d5e1acb8..1c7b28a4b 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -954,8 +954,8 @@ T (ceil)(const T& x) return ceil(x); } -// Log base 2 for 32 bits positive integers. -// Conveniently returns 0 for x==0. +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ inline int log2(int x) { eigen_assert(x>=0); @@ -969,6 +969,22 @@ inline int log2(int x) return table[(v * 0x07C4ACDDU) >> 27]; } +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3b8b7303f..0dd52f96e 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -518,6 +518,28 @@ Packet2d prsqrt(const Packet2d& x) { } // end namespace internal +namespace numext { + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sqrt(const float &x) +{ + return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x))); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sqrt(const double &x) +{ +#if EIGEN_COMP_GNUC + return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x))); +#else + return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x))); +#endif +} + +} // end namespace numex + } // end namespace Eigen #endif // EIGEN_MATH_FUNCTIONS_SSE_H From 7cae8918c019feabf6c143c430d0cd82c74aeec3 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 21 Jan 2016 20:30:32 +0100 Subject: [PATCH 2/3] Fix compilation on old gcc+AVX --- Eigen/src/Core/arch/SSE/MathFunctions.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 0dd52f96e..74f6abc37 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -524,7 +524,7 @@ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sqrt(const float &x) { - return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x))); + return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x)))); } template<> @@ -532,9 +532,9 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sqrt(const double &x) { #if EIGEN_COMP_GNUC - return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x))); + return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x)))); #else - return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x))); + return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x)))); #endif } From ee37eb4eed09fe35be2acc3699e80f49a44ea99a Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 21 Jan 2016 20:43:42 +0100 Subject: [PATCH 3/3] bug #977: avoid division by 0 in normalize() and normalized(). --- Eigen/src/Core/Dot.h | 19 ++++++++++++++++--- test/adjoint.cpp | 9 +++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index ce42854cd..221fc3224 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -102,7 +102,10 @@ inline typename NumTraits::Scalar>::Real Matr return numext::sqrt(squaredNorm()); } -/** \returns an expression of the quotient of *this by its own norm. +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. * * \only_for_vectors * @@ -114,19 +117,29 @@ MatrixBase::normalized() const { typedef typename internal::nested_eval::type _Nested; _Nested n(derived()); - return n / n.norm(); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + return n / numext::sqrt(z); + else + return n; } /** Normalizes the vector, i.e. divides it by its own norm. * * \only_for_vectors * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * * \sa norm(), normalized() */ template inline void MatrixBase::normalize() { - *this /= norm(); + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if(z>RealScalar(0)) + derived() /= numext::sqrt(z); } //---------- implementation of other norms ---------- diff --git a/test/adjoint.cpp b/test/adjoint.cpp index 3b2a53c91..b1e69c2e5 100644 --- a/test/adjoint.cpp +++ b/test/adjoint.cpp @@ -42,6 +42,15 @@ template<> struct adjoint_specific { VERIFY_IS_APPROX(v1, v1.norm() * v3); VERIFY_IS_APPROX(v3, v1.normalized()); VERIFY_IS_APPROX(v3.norm(), RealScalar(1)); + + // check null inputs + VERIFY_IS_APPROX((v1*0).normalized(), (v1*0)); + RealScalar very_small = (std::numeric_limits::min)(); + VERIFY( (v1*very_small).norm() == 0 ); + VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small)); + v3 = v1*very_small; + v3.normalize(); + VERIFY_IS_APPROX(v3, (v1*very_small)); // check compatibility of dot and adjoint ref = NumTraits::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));