Add numext::sqrt function to enable custom optimized implementation.

This changeset add two specializations for float/double on SSE. Those
are mostly usefull with GCC for which std::sqrt add an extra and costly
check on the result of _mm_sqrt_*. Clang does not add this burden.

In this changeset, only DenseBase::norm() makes use of it.
This commit is contained in:
Gael Guennebaud 2016-01-21 20:18:51 +01:00
parent 34340458cb
commit 8dca9f97e3
3 changed files with 41 additions and 4 deletions

View File

@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{
EIGEN_USING_STD_MATH(sqrt)
return sqrt(squaredNorm());
return numext::sqrt(squaredNorm());
}
/** \returns an expression of the quotient of *this by its own norm.

View File

@ -954,8 +954,8 @@ T (ceil)(const T& x)
return ceil(x);
}
// Log base 2 for 32 bits positive integers.
// Conveniently returns 0 for x==0.
/** Log base 2 for 32 bits positive integers.
* Conveniently returns 0 for x==0. */
inline int log2(int x)
{
eigen_assert(x>=0);
@ -969,6 +969,22 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27];
}
/** \returns the square root of \a x.
*
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*
* It's usage is justified in performance critical functions, like norm/normalize.
*/
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sqrt(const T &x)
{
EIGEN_USING_STD_MATH(sqrt);
return sqrt(x);
}
} // end namespace numext
namespace internal {

View File

@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
} // end namespace internal
namespace numext {
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sqrt(const float &x)
{
return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x)));
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double sqrt(const double &x)
{
#if EIGEN_COMP_GNUC
return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x)));
#else
return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x)));
#endif
}
} // end namespace numex
} // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H