Pulled latest updates from trunk

This commit is contained in:
Benoit Steiner 2016-01-21 17:17:56 -08:00
commit 7b68cf2e0f
4 changed files with 66 additions and 7 deletions

View File

@ -99,11 +99,13 @@ EIGEN_STRONG_INLINE typename NumTraits<typename internal::traits<Derived>::Scala
template<typename Derived> template<typename Derived>
inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const inline typename NumTraits<typename internal::traits<Derived>::Scalar>::Real MatrixBase<Derived>::norm() const
{ {
EIGEN_USING_STD_MATH(sqrt) return numext::sqrt(squaredNorm());
return sqrt(squaredNorm());
} }
/** \returns an expression of the quotient of *this by its own norm. /** \returns an expression of the quotient of \c *this by its own norm.
*
* \warning If the input vector is too small (i.e., this->norm()==0),
* then this function returns a copy of the input.
* *
* \only_for_vectors * \only_for_vectors
* *
@ -115,19 +117,29 @@ MatrixBase<Derived>::normalized() const
{ {
typedef typename internal::nested_eval<Derived,2>::type _Nested; typedef typename internal::nested_eval<Derived,2>::type _Nested;
_Nested n(derived()); _Nested n(derived());
return n / n.norm(); RealScalar z = n.squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
return n / numext::sqrt(z);
else
return n;
} }
/** Normalizes the vector, i.e. divides it by its own norm. /** Normalizes the vector, i.e. divides it by its own norm.
* *
* \only_for_vectors * \only_for_vectors
* *
* \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged.
*
* \sa norm(), normalized() * \sa norm(), normalized()
*/ */
template<typename Derived> template<typename Derived>
inline void MatrixBase<Derived>::normalize() inline void MatrixBase<Derived>::normalize()
{ {
*this /= norm(); RealScalar z = squaredNorm();
// NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU
if(z>RealScalar(0))
derived() /= numext::sqrt(z);
} }
//---------- implementation of other norms ---------- //---------- implementation of other norms ----------

View File

@ -954,8 +954,8 @@ T (ceil)(const T& x)
return ceil(x); return ceil(x);
} }
// Log base 2 for 32 bits positive integers. /** Log base 2 for 32 bits positive integers.
// Conveniently returns 0 for x==0. * Conveniently returns 0 for x==0. */
inline int log2(int x) inline int log2(int x)
{ {
eigen_assert(x>=0); eigen_assert(x>=0);
@ -969,6 +969,22 @@ inline int log2(int x)
return table[(v * 0x07C4ACDDU) >> 27]; return table[(v * 0x07C4ACDDU) >> 27];
} }
/** \returns the square root of \a x.
*
* It is essentially equivalent to \code using std::sqrt; return sqrt(x); \endcode,
* but slightly faster for float/double and some compilers (e.g., gcc), thanks to
* specializations when SSE is enabled.
*
* It's usage is justified in performance critical functions, like norm/normalize.
*/
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
T sqrt(const T &x)
{
EIGEN_USING_STD_MATH(sqrt);
return sqrt(x);
}
} // end namespace numext } // end namespace numext
namespace internal { namespace internal {

View File

@ -518,6 +518,28 @@ Packet2d prsqrt<Packet2d>(const Packet2d& x) {
} // end namespace internal } // end namespace internal
namespace numext {
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
float sqrt(const float &x)
{
return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
}
template<>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
double sqrt(const double &x)
{
#if EIGEN_COMP_GNUC
return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
#else
return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
#endif
}
} // end namespace numex
} // end namespace Eigen } // end namespace Eigen
#endif // EIGEN_MATH_FUNCTIONS_SSE_H #endif // EIGEN_MATH_FUNCTIONS_SSE_H

View File

@ -43,6 +43,15 @@ template<> struct adjoint_specific<false> {
VERIFY_IS_APPROX(v3, v1.normalized()); VERIFY_IS_APPROX(v3, v1.normalized());
VERIFY_IS_APPROX(v3.norm(), RealScalar(1)); VERIFY_IS_APPROX(v3.norm(), RealScalar(1));
// check null inputs
VERIFY_IS_APPROX((v1*0).normalized(), (v1*0));
RealScalar very_small = (std::numeric_limits<RealScalar>::min)();
VERIFY( (v1*very_small).norm() == 0 );
VERIFY_IS_APPROX((v1*very_small).normalized(), (v1*very_small));
v3 = v1*very_small;
v3.normalize();
VERIFY_IS_APPROX(v3, (v1*very_small));
// check compatibility of dot and adjoint // check compatibility of dot and adjoint
ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm())); ref = NumTraits<Scalar>::IsInteger ? 0 : (std::max)((std::max)(v1.norm(),v2.norm()),(std::max)((square * v2).norm(),(square.adjoint() * v1).norm()));
VERIFY(internal::isMuchSmallerThan(abs(v1.dot(square * v2) - (square.adjoint() * v1).dot(v2)), ref, test_precision<Scalar>())); VERIFY(internal::isMuchSmallerThan(abs(v1.dot(square * v2) - (square.adjoint() * v1).dot(v2)), ref, test_precision<Scalar>()));