diff --git a/Eigen/src/Core/Dot.h b/Eigen/src/Core/Dot.h index c5040c67b..ce42854cd 100644 --- a/Eigen/src/Core/Dot.h +++ b/Eigen/src/Core/Dot.h @@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits::Scala template inline typename NumTraits::Scalar>::Real MatrixBase::norm() const { - EIGEN_USING_STD_MATH(sqrt) - return sqrt(squaredNorm()); + return numext::sqrt(squaredNorm()); } /** \returns an expression of the quotient of *this by its own norm. diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 4d5e1acb8..1c7b28a4b 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -954,8 +954,8 @@ T (ceil)(const T& x) return ceil(x); } -// Log base 2 for 32 bits positive integers. -// Conveniently returns 0 for x==0. +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ inline int log2(int x) { eigen_assert(x>=0); @@ -969,6 +969,22 @@ inline int log2(int x) return table[(v * 0x07C4ACDDU) >> 27]; } +/** \returns the square root of \a x. + * + * It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode, + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +T sqrt(const T &x) +{ + EIGEN_USING_STD_MATH(sqrt); + return sqrt(x); +} + } // end namespace numext namespace internal { diff --git a/Eigen/src/Core/arch/SSE/MathFunctions.h b/Eigen/src/Core/arch/SSE/MathFunctions.h index 3b8b7303f..0dd52f96e 100644 --- a/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -518,6 +518,28 @@ Packet2d prsqrt(const Packet2d& x) { } // end namespace internal +namespace numext { + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +float sqrt(const float &x) +{ + return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x))); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +double sqrt(const double &x) +{ +#if EIGEN_COMP_GNUC + return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x))); +#else + return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x))); +#endif +} + +} // end namespace numex + } // end namespace Eigen #endif // EIGEN_MATH_FUNCTIONS_SSE_H