mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-09 14:41:49 +08:00
Fix bug #642: add vectorization of sqrt for doubles, and make sqrt really safe if EIGEN_FAST_MATH is disabled
(grafted from d4dd6aaed2c70b5e32541e96b4864b90dc07c614 and c47010e3d221396cde2aad6b3250fa698a930e28 )
This commit is contained in:
parent
f9149f9ba0
commit
2b50ade6ca
@ -442,8 +442,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
||||
return _mm_xor_ps(y, sign_bit);
|
||||
}
|
||||
|
||||
#if EIGEN_FAST_MATH
|
||||
|
||||
// This is based on Quake3's fast inverse square root.
|
||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||
// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
|
||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
{
|
||||
@ -457,6 +460,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||
return pmul(_x,x);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||
|
||||
#endif
|
||||
|
||||
template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
} // end namespace Eigen
|
||||
|
@ -83,7 +83,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
||||
size=2,
|
||||
|
||||
HasDiv = 1,
|
||||
HasExp = 1
|
||||
HasExp = 1,
|
||||
HasSqrt = 1
|
||||
};
|
||||
};
|
||||
template<> struct packet_traits<int> : default_packet_traits
|
||||
|
@ -64,9 +64,9 @@ run time. However, these assertions do cost time and can thus be turned off.
|
||||
\c EIGEN_DONT_ALIGN is defined.
|
||||
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
|
||||
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
|
||||
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. The only
|
||||
optimization this currently includes is single precision sin() and cos() in the present of SSE
|
||||
vectorization. Defined by default.
|
||||
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
|
||||
enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default.
|
||||
Define it to 0 to disable.
|
||||
- \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
|
||||
unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not
|
||||
correspond to the number of iterations or the number of instructions. The default is value 100.
|
||||
|
Loading…
x
Reference in New Issue
Block a user