mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-07-09 22:51:51 +08:00
Fix bug #642: add vectorization of sqrt for doubles, and make sqrt really safe if EIGEN_FAST_MATH is disabled
(grafted from d4dd6aaed2c70b5e32541e96b4864b90dc07c614 and c47010e3d221396cde2aad6b3250fa698a930e28 )
This commit is contained in:
parent
f9149f9ba0
commit
2b50ade6ca
@ -442,8 +442,11 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
|
|||||||
return _mm_xor_ps(y, sign_bit);
|
return _mm_xor_ps(y, sign_bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if EIGEN_FAST_MATH
|
||||||
|
|
||||||
// This is based on Quake3's fast inverse square root.
|
// This is based on Quake3's fast inverse square root.
|
||||||
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
// For detail see here: http://www.beyond3d.com/content/articles/8/
|
||||||
|
// It lacks 1 (or 2 bits in some rare cases) of precision, and does not handle negative, +inf, or denormalized numbers correctly.
|
||||||
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
|
||||||
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
||||||
{
|
{
|
||||||
@ -457,6 +460,14 @@ Packet4f psqrt<Packet4f>(const Packet4f& _x)
|
|||||||
return pmul(_x,x);
|
return pmul(_x,x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -83,7 +83,8 @@ template<> struct packet_traits<double> : default_packet_traits
|
|||||||
size=2,
|
size=2,
|
||||||
|
|
||||||
HasDiv = 1,
|
HasDiv = 1,
|
||||||
HasExp = 1
|
HasExp = 1,
|
||||||
|
HasSqrt = 1
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
template<> struct packet_traits<int> : default_packet_traits
|
template<> struct packet_traits<int> : default_packet_traits
|
||||||
|
@ -64,9 +64,9 @@ run time. However, these assertions do cost time and can thus be turned off.
|
|||||||
\c EIGEN_DONT_ALIGN is defined.
|
\c EIGEN_DONT_ALIGN is defined.
|
||||||
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
|
- \b EIGEN_DONT_VECTORIZE - disables explicit vectorization when defined. Not defined by default, unless
|
||||||
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
|
alignment is disabled by %Eigen's platform test or the user defining \c EIGEN_DONT_ALIGN.
|
||||||
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. The only
|
- \b EIGEN_FAST_MATH - enables some optimizations which might affect the accuracy of the result. This currently
|
||||||
optimization this currently includes is single precision sin() and cos() in the present of SSE
|
enables the SSE vectorization of sin() and cos(), and speedups sqrt() for single precision. Defined to 1 by default.
|
||||||
vectorization. Defined by default.
|
Define it to 0 to disable.
|
||||||
- \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
|
- \b EIGEN_UNROLLING_LIMIT - defines the size of a loop to enable meta unrolling. Set it to zero to disable
|
||||||
unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not
|
unrolling. The size of a loop here is expressed in %Eigen's own notion of "number of FLOPS", it does not
|
||||||
correspond to the number of iterations or the number of instructions. The default is value 100.
|
correspond to the number of iterations or the number of instructions. The default is value 100.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user