* fix the binary bloat issue, Rohit's idea was the good one

* a few dox fixes (alloc routines do return 0 on error) and forgot to update version number in CMakeLists
This commit is contained in:
Benoit Jacob 2009-04-06 13:33:42 +00:00
parent 38f501a596
commit 502bf4a81d
3 changed files with 88 additions and 58 deletions

View File

@ -1,5 +1,5 @@
project(Eigen)
set(EIGEN_VERSION_NUMBER "2.0.51-unstable")
set(EIGEN_VERSION_NUMBER "2.0.52-unstable")
#if the svnversion program is absent, this will leave the SVN_REVISION string empty,
#but won't stop CMake.

View File

@ -52,38 +52,34 @@
#ifndef EIGEN_MATH_FUNCTIONS_SSE_H
#define EIGEN_MATH_FUNCTIONS_SSE_H
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375);
template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inv_mant_mask, ~0x7f800000);
/* the smallest non denormalized float number */
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(min_norm_pos, 0x00800000);
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
_EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292E-2);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, - 1.1514610310E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, - 1.2420140846E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, + 1.4249322787E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, - 1.6668057665E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, + 2.0000714765E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, - 2.4999993993E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, + 3.3333331174E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375);
Packet4i emm0;
Packet4f invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
@ -134,22 +130,27 @@ template<> EIGEN_DONT_INLINE Packet4f ei_plog(Packet4f x)
return _mm_or_ps(x, invalid_mask); // negative arg will be NAN
}
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1);
template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4i(0x7f, 0x7f);
_EIGEN_DECLARE_CONST_Packet4f(exp_hi, 88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(exp_lo, -88.3762626647949f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500E-4);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507E-3);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073E-3);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894E-2);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1);
Packet4f tmp = _mm_setzero_ps(), fx;
Packet4i emm0;
@ -202,19 +203,29 @@ template<> EIGEN_DONT_INLINE Packet4f ei_pexp(Packet4f x)
surprising but correct result.
*/
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
_EIGEN_DECLARE_CONST_Packet4f_FROM_INT(sign_mask, 0x80000000);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
Packet4i emm0, emm2;
@ -291,6 +302,25 @@ template<> EIGEN_DONT_INLINE Packet4f ei_psin(Packet4f x)
/* almost the same as ei_psin */
template<> Packet4f ei_pcos(Packet4f x)
{
_EIGEN_DECLARE_CONST_Packet4f(1 , 1.0);
_EIGEN_DECLARE_CONST_Packet4f(half, 0.5);
_EIGEN_DECLARE_CONST_Packet4i(1, 1);
_EIGEN_DECLARE_CONST_Packet4i(not1, ~1);
_EIGEN_DECLARE_CONST_Packet4i(2, 2);
_EIGEN_DECLARE_CONST_Packet4i(4, 4);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1,-0.78515625);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4);
_EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891E-4);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736E-3);
_EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611E-1);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948E-005);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765E-003);
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516); // 4 / M_PI
Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
Packet4i emm0, emm2;

View File

@ -65,7 +65,7 @@ inline void ei_handmade_aligned_free(void *ptr)
}
/** \internal allocates \a size bytes. The returned pointer is guaranteed to have 16 bytes alignment.
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
inline void* ei_aligned_malloc(size_t size)
{
@ -98,7 +98,7 @@ inline void* ei_aligned_malloc(size_t size)
}
/** allocates \a size bytes. If Align is true, then the returned ptr is 16-byte-aligned.
* On allocation error, the returned pointer is undefined, but if exceptions are enabled then a std::bad_alloc is thrown.
* On allocation error, the returned pointer is null, and if exceptions are enabled then a std::bad_alloc is thrown.
*/
template<bool Align> inline void* ei_conditional_aligned_malloc(size_t size)
{