mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-05-03 09:14:11 +08:00
Add numext::sqrt function to enable custom optimized implementation.
This changeset add two specializations for float/double on SSE. Those are mostly usefull with GCC for which std::sqrt add an extra and costly check on the result of _mm_sqrt_*. Clang does not add this burden. In this changeset, only DenseBase::norm() makes use of it.
This commit is contained in:
parent
34340458cb
commit
8dca9f97e3
@ -99,8 +99,7 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
|
||||
template<typename Derived>
|
||||
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt)
|
||||
return sqrt(squaredNorm());
|
||||
return numext::sqrt(squaredNorm());
|
||||
}
|
||||
|
||||
/** \returns an expression of the quotient of *this by its own norm.
|
||||
|
@ -954,8 +954,8 @@ T (ceil)(const T& x)
|
||||
return ceil(x);
|
||||
}
|
||||
|
||||
// Log base 2 for 32 bits positive integers.
|
||||
// Conveniently returns 0 for x==0.
|
||||
/** Log base 2 for 32 bits positive integers.
|
||||
* Conveniently returns 0 for x==0. */
|
||||
inline int log2(int x)
|
||||
{
|
||||
eigen_assert(x>=0);
|
||||
@ -969,6 +969,22 @@ inline int log2(int x)
|
||||
return table[(v * 0x07C4ACDDU) >> 27];
|
||||
}
|
||||
|
||||
/** \returns the square root of \a x.
|
||||
*
|
||||
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
|
||||
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
|
||||
* specializations when SSE is enabled.
|
||||
*
|
||||
* It's usage is justified in performance critical functions, like norm/normalize.
|
||||
*/
|
||||
template<typename T>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
T sqrt(const T &x)
|
||||
{
|
||||
EIGEN_USING_STD_MATH(sqrt);
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
} // end namespace numext
|
||||
|
||||
namespace internal {
|
||||
|
@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
namespace numext {
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
float sqrt(const float &x)
|
||||
{
|
||||
return internal::pfirst(_mm_sqrt_ss(_mm_set_ss(x)));
|
||||
}
|
||||
|
||||
template<>
|
||||
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
|
||||
double sqrt(const double &x)
|
||||
{
|
||||
#if EIGEN_COMP_GNUC
|
||||
return internal::pfirst(__builtin_ia32_sqrtsd(_mm_set_sd(x)));
|
||||
#else
|
||||
return internal::pfirst(_mm_sqrt_pd(_mm_set_sd(x)));
|
||||
#endif
|
||||
}
|
||||
|
||||
} // end namespace numex
|
||||
|
||||
} // end namespace Eigen
|
||||
|
||||
#endif // EIGEN_MATH_FUNCTIONS_SSE_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user