Pulled latest updates from trunk

2025-09-27 08:43:14 +08:00 · 2016-01-21 17:17:56 -08:00 · 2016-01-21 17:17:56 -08:00 · 7b68cf2e0f
commit 7b68cf2e0f
parent c33479324c ee37eb4eed
4 changed files with 66 additions and 7 deletions
--- a/Eigen/src/Core/Dot.h
+++ b/Eigen/src/Core/Dot.h
@ -99,11 +99,13 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
 template<typename Derived>
 inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
 {
-  EIGEN_USING_STD_MATH(sqrt)
-  return sqrt(squaredNorm());
+  return numext::sqrt(squaredNorm());
 }

-/** \returns an expression of the quotient of *this by its own norm.
+/** \returns an expression of the quotient of \c *this by its own norm.
+  *
+  * \warning If the input vector is too small (i.e., this->norm()==0),
+  *          then this function returns a copy of the input.
  *
  * \only_for_vectors
  *
@ -115,19 +117,29 @@ MatrixBase<Derived>::normalized() const
 {
  typedef typename internal::nested_eval<Derived,2>::type _Nested;
  _Nested n(derived());
-  return n / n.norm();
+  RealScalar z = n.squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    return n / numext::sqrt(z);
+  else
+    return n;
 }

 /** Normalizes the vector, i.e. divides it by its own norm.
  *
  * \only_for_vectors
  *
+  * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
+  *
  * \sa norm(), normalized()
  */
 template<typename Derived>
 inline void MatrixBase<Derived>::normalize()
 {
-  *this /= norm();
+  RealScalar z = squaredNorm();
+  // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
+  if(z>RealScalar(0))
+    derived() /= numext::sqrt(z);
 }

 //---------- implementation of other norms ----------
--- a/Eigen/src/Core/MathFunctions.h
+++ b/Eigen/src/Core/MathFunctions.h
@ -954,8 +954,8 @@ T (ceil)(const T& x)
  return ceil(x);
 }

-// Log base 2 for 32 bits positive integers.
-// Conveniently returns 0 for x==0.
+/** Log base 2 for 32 bits positive integers.
+  * Conveniently returns 0 for x==0. */
 inline int log2(int x)
 {
  eigen_assert(x>=0);
@ -969,6 +969,22 @@ inline int log2(int x)
  return table[(v * 0x07C4ACDDU) >> 27];
 }

+/** \returns the square root of \a x.
+  *
+  * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
+  * but slightly faster for float/double and some compilers (e.g., gcc), thanks to
+  * specializations when SSE is enabled.
+  *
+  * It's usage is justified in performance critical functions, like norm/normalize.
+  */
+template<typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+T sqrt(const T &x)
+{
+  EIGEN_USING_STD_MATH(sqrt);
+  return sqrt(x);
+}
+
 } // end namespace numext

 namespace internal {
--- a/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/Eigen/src/Core/arch/SSE/MathFunctions.h
@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {

 } // end namespace internal

+namespace numext {
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+float sqrt(const float &x)
+{
+  return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
+}
+
+template<>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+double sqrt(const double &x)
+{
+#if EIGEN_COMP_GNUC
+  return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
+#else
+  return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
+#endif
+}
+
+} // end namespace numex
+
 } // end namespace Eigen

 #endif // EIGEN_MATH_FUNCTIONS_SSE_H
--- a/test/adjoint.cpp
+++ b/test/adjoint.cpp
@ -42,6 +42,15 @@ template<> struct adjoint_specific<false> {
    VERIFY_IS_APPROX(v1, v1.norm() * v3);
    VERIFY_IS_APPROX(v3, v1.normalized());
    VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
+
+    // check null inputs
+    VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
+    RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
+    VERIFY( (v1*very_small).norm() == 0 );
+    VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
+    v3 = v1*very_small;
+    v3.normalize();
+    VERIFY_IS_APPROX(v3, (v1*very_small));
    
    // check compatibility of dot and adjoint
    ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));