mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-09-13 01:43:13 +08:00
Fix ldexp implementations.
The previous implementations produced garbage values if the exponent did not fit within the exponent bits. See #2131 for a complete discussion, and !375 for other possible implementations. Here we implement the 4-factor version. See `pldexp_impl` in `GenericPacketMathFunctions.h` for a full description. The SSE `pcmp*` methods were moved down since `pcmp_le<Packet4i>` requires `por`. Left as a "TODO" is to delegate to a faster version if we know the exponent does fit within the exponent bits. Fixes #2131.
This commit is contained in:
parent
7eb07da538
commit
4cb563a01e
@ -453,7 +453,7 @@ EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** \internal \returns a * 2^exponent
|
/** \internal \returns a * 2^((int)exponent)
|
||||||
* See https://en.cppreference.com/w/cpp/numeric/math/ldexp
|
* See https://en.cppreference.com/w/cpp/numeric/math/ldexp
|
||||||
*/
|
*/
|
||||||
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
|
||||||
@ -905,28 +905,6 @@ pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& th
|
|||||||
return ifPacket.select[0] ? thenPacket : elsePacket;
|
return ifPacket.select[0] ? thenPacket : elsePacket;
|
||||||
}
|
}
|
||||||
|
|
||||||
/***************************************************************************
|
|
||||||
* Some generic implementations to be used by implementors
|
|
||||||
***************************************************************************/
|
|
||||||
|
|
||||||
/** Default implementation of pfrexp for float.
|
|
||||||
* It is expected to be called by implementers of template<> pfrexp.
|
|
||||||
*/
|
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
|
||||||
pfrexp_float(const Packet& a, Packet& exponent);
|
|
||||||
|
|
||||||
/** Default implementation of pldexp for float.
|
|
||||||
* It is expected to be called by implementers of template<> pldexp.
|
|
||||||
*/
|
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
|
||||||
pldexp_float(Packet a, Packet exponent);
|
|
||||||
|
|
||||||
/** Default implementation of pldexp for double.
|
|
||||||
* It is expected to be called by implementers of template<> pldexp.
|
|
||||||
*/
|
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
|
||||||
pldexp_double(Packet a, Packet exponent);
|
|
||||||
|
|
||||||
} // end namespace internal
|
} // end namespace internal
|
||||||
|
|
||||||
} // end namespace Eigen
|
} // end namespace Eigen
|
||||||
|
@ -273,6 +273,15 @@ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b) {
|
||||||
|
#ifdef EIGEN_VECTORIZE_AVX2
|
||||||
|
return _mm256_sub_epi32(a,b);
|
||||||
|
#else
|
||||||
|
__m128i lo = _mm_sub_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0));
|
||||||
|
__m128i hi = _mm_sub_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1));
|
||||||
|
return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
|
||||||
{
|
{
|
||||||
@ -379,6 +388,7 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const
|
|||||||
return _mm256_min_pd(b,a);
|
return _mm256_min_pd(b,a);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
|
template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) {
|
||||||
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
|
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
|
||||||
// See pmin above
|
// See pmin above
|
||||||
@ -756,17 +766,29 @@ template<> EIGEN_STRONG_INLINE Packet8f pldexp<Packet8f>(const Packet8f& a, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4d pldexp<Packet4d>(const Packet4d& a, const Packet4d& exponent) {
|
template<> EIGEN_STRONG_INLINE Packet4d pldexp<Packet4d>(const Packet4d& a, const Packet4d& exponent) {
|
||||||
// Build e=2^n by constructing the exponents in a 128-bit vector and
|
// Clamp exponent to [-2099, 2099]
|
||||||
// shifting them to where they belong in double-precision values.
|
const Packet4d max_exponent = pset1<Packet4d>(2099.0);
|
||||||
Packet4i cst_1023 = pset1<Packet4i>(1023);
|
const Packet4i e = _mm256_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||||
__m128i emm0 = _mm256_cvtpd_epi32(exponent);
|
|
||||||
emm0 = _mm_add_epi32(emm0, cst_1023);
|
// Split 2^e into four factors and multiply.
|
||||||
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
|
const Packet4i bias = pset1<Packet4i>(1023);
|
||||||
__m128i lo = _mm_slli_epi64(emm0, 52);
|
Packet4i b = parithmetic_shift_right<2>(e); // floor(e/4)
|
||||||
__m128i hi = _mm_slli_epi64(_mm_srli_epi64(emm0, 32), 52);
|
|
||||||
__m256i e = _mm256_insertf128_si256(_mm256_setzero_si256(), lo, 0);
|
// 2^b
|
||||||
e = _mm256_insertf128_si256(e, hi, 1);
|
Packet4i hi = vec4i_swizzle1(padd(b, bias), 0, 2, 1, 3);
|
||||||
return pmul(a,_mm256_castsi256_pd(e));
|
Packet4i lo = _mm_slli_epi64(hi, 52);
|
||||||
|
hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
|
||||||
|
Packet4d c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
|
||||||
|
Packet4d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
|
||||||
|
// 2^(e - 3b)
|
||||||
|
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||||
|
hi = vec4i_swizzle1(padd(b, bias), 0, 2, 1, 3);
|
||||||
|
lo = _mm_slli_epi64(hi, 52);
|
||||||
|
hi = _mm_slli_epi64(_mm_srli_epi64(hi, 32), 52);
|
||||||
|
c = _mm256_castsi256_pd(_mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1));
|
||||||
|
out = pmul(out, c); // a * 2^e
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
template<> EIGEN_STRONG_INLINE float predux<Packet8f>(const Packet8f& a)
|
||||||
|
@ -917,16 +917,29 @@ template<> EIGEN_STRONG_INLINE Packet16f pldexp<Packet16f>(const Packet16f& a, c
|
|||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, const Packet8d& exponent) {
|
template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, const Packet8d& exponent) {
|
||||||
// Build e=2^n by constructing the exponents in a 256-bit vector and
|
// Clamp exponent to [-2099, 2099]
|
||||||
// shifting them to where they belong in double-precision values.
|
const Packet8d max_exponent = pset1<Packet8d>(2099.0);
|
||||||
Packet8i cst_1023 = pset1<Packet8i>(1023);
|
const Packet8i e = _mm512_cvtpd_epi32(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||||
__m256i emm0 = _mm512_cvtpd_epi32(exponent);
|
|
||||||
emm0 = _mm256_add_epi32(emm0, cst_1023);
|
// Split 2^e into four factors and multiply.
|
||||||
emm0 = _mm256_shuffle_epi32(emm0, _MM_SHUFFLE(3, 1, 2, 0));
|
const Packet8i bias = pset1<Packet8i>(1023);
|
||||||
__m256i lo = _mm256_slli_epi64(emm0, 52);
|
Packet8i b = parithmetic_shift_right<2>(e); // floor(e/4)
|
||||||
__m256i hi = _mm256_slli_epi64(_mm256_srli_epi64(emm0, 32), 52);
|
|
||||||
__m512d b = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
|
// 2^b
|
||||||
return pmul(a, b);
|
Packet8i hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
Packet8i lo = _mm256_slli_epi64(hi, 52);
|
||||||
|
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
|
||||||
|
Packet8d c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
|
||||||
|
Packet8d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
|
||||||
|
// 2^(e - 3b)
|
||||||
|
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||||
|
hi = _mm256_shuffle_epi32(padd(b, bias), _MM_SHUFFLE(3, 1, 2, 0));
|
||||||
|
lo = _mm256_slli_epi64(hi, 52);
|
||||||
|
hi = _mm256_slli_epi64(_mm256_srli_epi64(hi, 32), 52);
|
||||||
|
c = _mm512_castsi512_pd(_mm512_inserti64x4(_mm512_castsi256_si512(lo), hi, 1));
|
||||||
|
out = pmul(out, c); // a * 2^e
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
#ifdef EIGEN_VECTORIZE_AVX512DQ
|
||||||
|
@ -2452,38 +2452,134 @@ static inline Packet2l ConvertToPacket2l(const Packet2d& x) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
|
|
||||||
|
|
||||||
// build 2^n
|
|
||||||
Packet2l emm0 = ConvertToPacket2l(exponent);
|
|
||||||
|
|
||||||
#ifdef __POWER8_VECTOR__
|
// Packet2l shifts.
|
||||||
const Packet2l p2l_1023 = { 1023, 1023 };
|
// For POWER8 we simply use vec_sr/l.
|
||||||
const Packet2ul p2ul_52 = { 52, 52 };
|
//
|
||||||
emm0 = vec_add(emm0, p2l_1023);
|
// Things are more complicated for POWER7. There is actually a
|
||||||
emm0 = vec_sl(emm0, p2ul_52);
|
// vec_xxsxdi intrinsic but it is not supported by some gcc versions.
|
||||||
|
// So we need to shift by N % 32 and rearrage bytes.
|
||||||
|
#ifdef __POWER8_VECTOR__
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
|
||||||
|
const Packet2ul shift = { N, N };
|
||||||
|
return vec_sl(a, shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
|
||||||
|
const Packet2ul shift = { N, N };
|
||||||
|
return vec_sr(a, shift);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// Code is a bit complex for POWER7. There is actually a
|
|
||||||
// vec_xxsldi intrinsic but it is not supported by some gcc versions.
|
|
||||||
// So we shift (52-32) bits and do a word swap with zeros.
|
|
||||||
const Packet4i p4i_1023 = pset1<Packet4i>(1023);
|
|
||||||
const Packet4i p4i_20 = pset1<Packet4i>(20); // 52 - 32
|
|
||||||
|
|
||||||
Packet4i emm04i = reinterpret_cast<Packet4i>(emm0);
|
// Shifts [A, B, C, D] to [B, 0, D, 0].
|
||||||
emm04i = vec_add(emm04i, p4i_1023);
|
// Used to implement left shifts for Packet2l.
|
||||||
emm04i = vec_sl(emm04i, reinterpret_cast<Packet4ui>(p4i_20));
|
EIGEN_ALWAYS_INLINE Packet4i shift_even_left(const Packet4i& a) {
|
||||||
static const Packet16uc perm = {
|
static const Packet16uc perm = {
|
||||||
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
|
0x14, 0x15, 0x16, 0x17, 0x00, 0x01, 0x02, 0x03,
|
||||||
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
|
0x1c, 0x1d, 0x1e, 0x1f, 0x08, 0x09, 0x0a, 0x0b };
|
||||||
#ifdef _BIG_ENDIAN
|
#ifdef _BIG_ENDIAN
|
||||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(p4i_ZERO, emm04i, perm));
|
return vec_perm(p4i_ZERO, a, perm);
|
||||||
#else
|
#else
|
||||||
emm0 = reinterpret_cast<Packet2l>(vec_perm(emm04i, p4i_ZERO, perm));
|
return vec_perm(a, p4i_ZERO, perm);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shifts [A, B, C, D] to [0, A, 0, C].
|
||||||
|
// Used to implement right shifts for Packet2l.
|
||||||
|
EIGEN_ALWAYS_INLINE Packet4i shift_odd_right(const Packet4i& a) {
|
||||||
|
static const Packet16uc perm = {
|
||||||
|
0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13,
|
||||||
|
0x0c, 0x0d, 0x0e, 0x0f, 0x18, 0x19, 0x1a, 0x1b };
|
||||||
|
#ifdef _BIG_ENDIAN
|
||||||
|
return vec_perm(p4i_ZERO, a, perm);
|
||||||
|
#else
|
||||||
|
return vec_perm(a, p4i_ZERO, perm);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int N, typename EnableIf = void>
|
||||||
|
struct plogical_shift_left_impl;
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
struct plogical_shift_left_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {
|
||||||
|
static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
|
||||||
|
static const unsigned n = static_cast<unsigned>(N);
|
||||||
|
const Packet4ui shift = {n, n, n, n};
|
||||||
|
const Packet4i ai = reinterpret_cast<Packet4i>(a);
|
||||||
|
static const unsigned m = static_cast<unsigned>(32 - N);
|
||||||
|
const Packet4ui shift_right = {m, m, m, m};
|
||||||
|
const Packet4i out_hi = vec_sl(ai, shift);
|
||||||
|
const Packet4i out_lo = shift_even_left(vec_sr(ai, shift_right));
|
||||||
|
return reinterpret_cast<Packet2l>(por<Packet4i>(out_hi, out_lo));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
struct plogical_shift_left_impl<N, typename enable_if<(N >= 32)>::type> {
|
||||||
|
static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
|
||||||
|
static const unsigned m = static_cast<unsigned>(N - 32);
|
||||||
|
const Packet4ui shift = {m, m, m, m};
|
||||||
|
const Packet4i ai = reinterpret_cast<Packet4i>(a);
|
||||||
|
return reinterpret_cast<Packet2l>(shift_even_left(vec_sl(ai, shift)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
EIGEN_STRONG_INLINE Packet2l plogical_shift_left(const Packet2l& a) {
|
||||||
|
return plogical_shift_left_impl<N>::run(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int N, typename EnableIf = void>
|
||||||
|
struct plogical_shift_right_impl;
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
struct plogical_shift_right_impl<N, typename enable_if<(N < 32) && (N >= 0)>::type> {
|
||||||
|
static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
|
||||||
|
static const unsigned n = static_cast<unsigned>(N);
|
||||||
|
const Packet4ui shift = {n, n, n, n};
|
||||||
|
const Packet4i ai = reinterpret_cast<Packet4i>(a);
|
||||||
|
static const unsigned m = static_cast<unsigned>(32 - N);
|
||||||
|
const Packet4ui shift_left = {m, m, m, m};
|
||||||
|
const Packet4i out_lo = vec_sr(ai, shift);
|
||||||
|
const Packet4i out_hi = shift_odd_right(vec_sl(ai, shift_left));
|
||||||
|
return reinterpret_cast<Packet2l>(por<Packet4i>(out_hi, out_lo));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
struct plogical_shift_right_impl<N, typename enable_if<(N >= 32)>::type> {
|
||||||
|
static EIGEN_STRONG_INLINE Packet2l run(const Packet2l& a) {
|
||||||
|
static const unsigned m = static_cast<unsigned>(N - 32);
|
||||||
|
const Packet4ui shift = {m, m, m, m};
|
||||||
|
const Packet4i ai = reinterpret_cast<Packet4i>(a);
|
||||||
|
return reinterpret_cast<Packet2l>(shift_odd_right(vec_sr(ai, shift)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int N>
|
||||||
|
EIGEN_STRONG_INLINE Packet2l plogical_shift_right(const Packet2l& a) {
|
||||||
|
return plogical_shift_right_impl<N>::run(a);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
template<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
|
||||||
|
// Clamp exponent to [-2099, 2099]
|
||||||
|
const Packet2d max_exponent = pset1<Packet2d>(2099.0);
|
||||||
|
const Packet2l e = ConvertToPacket2l(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||||
|
|
||||||
return pmul(a, reinterpret_cast<Packet2d>(emm0));
|
// Split 2^e into four factors and multiply:
|
||||||
|
const Packet2l bias = { 1023, 1023 };
|
||||||
|
Packet2l b = plogical_shift_right<2>(e); // floor(e/4)
|
||||||
|
Packet2d c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias));
|
||||||
|
Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||||
|
c = reinterpret_cast<Packet2d>(plogical_shift_left<52>(b + bias)); // 2^(e - 3b)
|
||||||
|
out = pmul(out, c); // a * 2^e
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d> (const Packet2d& a, Packet2d& exponent) {
|
template<> EIGEN_STRONG_INLINE Packet2d pfrexp<Packet2d> (const Packet2d& a, Packet2d& exponent) {
|
||||||
|
@ -40,30 +40,99 @@ pfrexp_double(const Packet& a, Packet& exponent) {
|
|||||||
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
||||||
const Packet cst_1022d = pset1<Packet>(1022.0);
|
const Packet cst_1022d = pset1<Packet>(1022.0);
|
||||||
const Packet cst_half = pset1<Packet>(0.5);
|
const Packet cst_half = pset1<Packet>(0.5);
|
||||||
const Packet cst_inv_mant_mask = pset1frombits<Packet>(static_cast<uint64_t>(~0x7ff0000000000000ull));
|
const Packet cst_inv_mant_mask = pset1frombits<Packet, uint64_t>(static_cast<uint64_t>(~0x7ff0000000000000ull));
|
||||||
exponent = psub(pcast<PacketI,Packet>(plogical_shift_right<52>(preinterpret<PacketI>(pabs<Packet>(a)))), cst_1022d);
|
exponent = psub(pcast<PacketI,Packet>(plogical_shift_right<52>(preinterpret<PacketI>(pabs<Packet>(a)))), cst_1022d);
|
||||||
return por(pand(a, cst_inv_mant_mask), cst_half);
|
return por(pand(a, cst_inv_mant_mask), cst_half);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
// Safely applies ldexp, correctly handles overflows, underflows and denormals.
|
||||||
pldexp_float(Packet a, Packet exponent)
|
// Assumes IEEE floating point format.
|
||||||
{
|
template<typename Packet>
|
||||||
|
struct pldexp_impl {
|
||||||
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
||||||
const Packet cst_127 = pset1<Packet>(127.f);
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||||
// return a * 2^exponent
|
typedef typename unpacket_traits<PacketI>::type ScalarI;
|
||||||
PacketI ei = pcast<Packet,PacketI>(padd(exponent, cst_127));
|
enum {
|
||||||
return pmul(a, preinterpret<Packet>(plogical_shift_left<23>(ei)));
|
TotalBits = sizeof(Scalar) * CHAR_BIT,
|
||||||
}
|
MantissaBits = std::numeric_limits<Scalar>::digits - 1,
|
||||||
|
ExponentBits = int(TotalBits) - int(MantissaBits) - 1
|
||||||
|
};
|
||||||
|
|
||||||
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||||
|
Packet run(const Packet& a, const Packet& exponent) {
|
||||||
|
// We want to return a * 2^exponent, allowing for all possible integer
|
||||||
|
// exponents without overflowing or underflowing in intermediate
|
||||||
|
// computations.
|
||||||
|
//
|
||||||
|
// Since 'a' and the output can be denormal, the maximum range of 'exponent'
|
||||||
|
// to consider for a float is:
|
||||||
|
// -255-23 -> 255+23
|
||||||
|
// Below -278 any finite float 'a' will become zero, and above +278 any
|
||||||
|
// finite float will become inf, including when 'a' is the smallest possible
|
||||||
|
// denormal.
|
||||||
|
//
|
||||||
|
// Unfortunately, 2^(278) cannot be represented using either one or two
|
||||||
|
// finite normal floats, so we must split the scale factor into at least
|
||||||
|
// three parts. It turns out to be faster to split 'exponent' into four
|
||||||
|
// factors, since [exponent>>2] is much faster to compute that [exponent/3].
|
||||||
|
//
|
||||||
|
// Set e = min(max(exponent, -278), 278);
|
||||||
|
// b = floor(e/4);
|
||||||
|
// out = ((((a * 2^(b)) * 2^(b)) * 2^(b)) * 2^(e-3*b))
|
||||||
|
//
|
||||||
|
// This will avoid any intermediate overflows and correctly handle 0, inf,
|
||||||
|
// NaN cases.
|
||||||
|
const Packet max_exponent = pset1<Packet>(Scalar( (ScalarI(1)<<int(ExponentBits)) + ScalarI(MantissaBits) - ScalarI(1))); // 278
|
||||||
|
const PacketI bias = pset1<PacketI>((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1)); // 127
|
||||||
|
const PacketI e = pcast<Packet, PacketI>(pmin(pmax(exponent, pnegate(max_exponent)), max_exponent));
|
||||||
|
PacketI b = parithmetic_shift_right<2>(e); // floor(e/4);
|
||||||
|
Packet c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias))); // 2^b
|
||||||
|
Packet out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
b = psub(psub(psub(e, b), b), b); // e - 3b
|
||||||
|
c = preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(padd(b, bias))); // 2^(e-3*b)
|
||||||
|
out = pmul(out, c);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Explicitly multiplies
|
||||||
|
// a * (2^e)
|
||||||
|
// clamping e to the range
|
||||||
|
// [std::numeric_limits<Scalar>::min_exponent-2, std::numeric_limits<Scalar>::max_exponent]
|
||||||
|
//
|
||||||
|
// This is approx 7x faster than pldexp_impl, but will prematurely over/underflow
|
||||||
|
// if 2^e doesn't fit into a normal floating-point Scalar.
|
||||||
|
//
|
||||||
|
// Assumes IEEE floating point format
|
||||||
|
template<typename Packet>
|
||||||
|
struct pldexp_fast_impl {
|
||||||
|
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
||||||
|
typedef typename unpacket_traits<Packet>::type Scalar;
|
||||||
|
typedef typename unpacket_traits<PacketI>::type ScalarI;
|
||||||
|
enum {
|
||||||
|
TotalBits = sizeof(Scalar) * CHAR_BIT,
|
||||||
|
MantissaBits = std::numeric_limits<Scalar>::digits - 1,
|
||||||
|
ExponentBits = int(TotalBits) - int(MantissaBits) - 1
|
||||||
|
};
|
||||||
|
|
||||||
|
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
|
||||||
|
Packet run(const Packet& a, const Packet& exponent) {
|
||||||
|
const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(int(ExponentBits)-1)) - ScalarI(1))); // 127
|
||||||
|
const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) - ScalarI(1))); // 255
|
||||||
|
// restrict biased exponent between 0 and 255 for float.
|
||||||
|
const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponent, bias), pzero(limit)), limit)); // exponent + 127
|
||||||
|
// return a * (2^e)
|
||||||
|
return pmul(a, preinterpret<Packet>(plogical_shift_left<int(MantissaBits)>(e)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
pldexp_double(Packet a, Packet exponent)
|
pldexp_float(const Packet& a, const Packet& exponent)
|
||||||
{
|
{ return pldexp_impl<Packet>::run(a, exponent); }
|
||||||
typedef typename unpacket_traits<Packet>::integer_packet PacketI;
|
|
||||||
const Packet cst_1023 = pset1<Packet>(1023.0);
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
// return a * 2^exponent
|
pldexp_double(const Packet& a, const Packet& exponent)
|
||||||
PacketI ei = pcast<Packet,PacketI>(padd(exponent, cst_1023));
|
{ return pldexp_impl<Packet>::run(a, exponent); }
|
||||||
return pmul(a, preinterpret<Packet>(plogical_shift_left<52>(ei)));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Natural or base 2 logarithm.
|
// Natural or base 2 logarithm.
|
||||||
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
|
||||||
@ -394,6 +463,7 @@ Packet pexp_float(const Packet _x)
|
|||||||
y = pmadd(y, r2, y2);
|
y = pmadd(y, r2, y2);
|
||||||
|
|
||||||
// Return 2^m * exp(r).
|
// Return 2^m * exp(r).
|
||||||
|
// TODO: replace pldexp with faster implementation since y in [-1, 1).
|
||||||
return pmax(pldexp(y,m), _x);
|
return pmax(pldexp(y,m), _x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -462,6 +532,7 @@ Packet pexp_double(const Packet _x)
|
|||||||
|
|
||||||
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
// Construct the result 2^n * exp(g) = e * x. The max is used to catch
|
||||||
// non-finite values in the input.
|
// non-finite values in the input.
|
||||||
|
// TODO: replace pldexp with faster implementation since x in [-1, 1).
|
||||||
return pmax(pldexp(x,fx), _x);
|
return pmax(pldexp(x,fx), _x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -897,6 +968,8 @@ Packet generic_pow_impl(const Packet& x, const Packet& y) {
|
|||||||
// Note: I experimented with using Dekker's algorithms for the
|
// Note: I experimented with using Dekker's algorithms for the
|
||||||
// multiplication by ln(2) here, but did not see any difference.
|
// multiplication by ln(2) here, but did not see any difference.
|
||||||
Packet e_r = pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), r_z));
|
Packet e_r = pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), r_z));
|
||||||
|
// TODO: investigate bounds of e_r and n_z, potentially using faster
|
||||||
|
// implementation of ldexp.
|
||||||
return pldexp(e_r, n_z);
|
return pldexp(e_r, n_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -909,6 +982,7 @@ Packet generic_pow(const Packet& x, const Packet& y) {
|
|||||||
const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
|
const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
|
||||||
const Packet cst_zero = pset1<Packet>(Scalar(0));
|
const Packet cst_zero = pset1<Packet>(Scalar(0));
|
||||||
const Packet cst_one = pset1<Packet>(Scalar(1));
|
const Packet cst_one = pset1<Packet>(Scalar(1));
|
||||||
|
const Packet cst_half = pset1<Packet>(Scalar(0.5));
|
||||||
const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
|
const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());
|
||||||
|
|
||||||
Packet abs_x = pabs(x);
|
Packet abs_x = pabs(x);
|
||||||
@ -937,7 +1011,7 @@ Packet generic_pow(const Packet& x, const Packet& y) {
|
|||||||
|
|
||||||
// Predicates for whether y is integer and/or even.
|
// Predicates for whether y is integer and/or even.
|
||||||
Packet y_is_int = pcmp_eq(pfloor(y), y);
|
Packet y_is_int = pcmp_eq(pfloor(y), y);
|
||||||
Packet y_div_2 = pldexp(y, pset1<Packet>(Scalar(-1)));
|
Packet y_div_2 = pmul(y, cst_half);
|
||||||
Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
|
Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);
|
||||||
|
|
||||||
// Predicates encoding special cases for the value of pow(x,y)
|
// Predicates encoding special cases for the value of pow(x,y)
|
||||||
|
@ -21,14 +21,33 @@ namespace internal {
|
|||||||
template<typename Packet, int N> EIGEN_DEVICE_FUNC inline Packet
|
template<typename Packet, int N> EIGEN_DEVICE_FUNC inline Packet
|
||||||
pset(const typename unpacket_traits<Packet>::type (&a)[N] /* a */);
|
pset(const typename unpacket_traits<Packet>::type (&a)[N] /* a */);
|
||||||
|
|
||||||
|
/***************************************************************************
|
||||||
|
* Some generic implementations to be used by implementors
|
||||||
|
***************************************************************************/
|
||||||
|
|
||||||
|
/** Default implementation of pfrexp for float.
|
||||||
|
* It is expected to be called by implementers of template<> pfrexp.
|
||||||
|
*/
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
pfrexp_float(const Packet& a, Packet& exponent);
|
pfrexp_float(const Packet& a, Packet& exponent);
|
||||||
|
|
||||||
|
/** Default implementation of pfrexp for double.
|
||||||
|
* It is expected to be called by implementers of template<> pfrexp.
|
||||||
|
*/
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
pfrexp_double(const Packet& a, Packet& exponent);
|
pfrexp_double(const Packet& a, Packet& exponent);
|
||||||
|
|
||||||
|
/** Default implementation of pldexp for float.
|
||||||
|
* It is expected to be called by implementers of template<> pldexp.
|
||||||
|
*/
|
||||||
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
pldexp_float(Packet a, Packet exponent);
|
pldexp_float(const Packet& a, const Packet& exponent);
|
||||||
|
|
||||||
|
/** Default implementation of pldexp for double.
|
||||||
|
* It is expected to be called by implementers of template<> pldexp.
|
||||||
|
*/
|
||||||
|
template<typename Packet> EIGEN_STRONG_INLINE Packet
|
||||||
|
pldexp_double(const Packet& a, const Packet& exponent);
|
||||||
|
|
||||||
/** \internal \returns log(x) for single precision float */
|
/** \internal \returns log(x) for single precision float */
|
||||||
template <typename Packet>
|
template <typename Packet>
|
||||||
|
@ -399,21 +399,6 @@ template<> EIGEN_DEVICE_FUNC inline Packet16b pselect(const Packet16b& mask, con
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); }
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); }
|
|
||||||
|
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
|
template<> EIGEN_STRONG_INLINE Packet4i ptrue<Packet4i>(const Packet4i& a) { return _mm_cmpeq_epi32(a, a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) { return _mm_cmpeq_epi8(a, a); }
|
template<> EIGEN_STRONG_INLINE Packet16b ptrue<Packet16b>(const Packet16b& a) { return _mm_cmpeq_epi8(a, a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f
|
template<> EIGEN_STRONG_INLINE Packet4f
|
||||||
@ -447,6 +432,21 @@ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, con
|
|||||||
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }
|
template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return _mm_andnot_pd(b,a); }
|
||||||
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }
|
template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return _mm_andnot_si128(b,a); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f& a, const Packet4f& b) { return _mm_cmple_ps(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f& a, const Packet4f& b) { return _mm_cmplt_ps(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f& a, const Packet4f& b) { return _mm_cmpnge_ps(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4f pcmp_eq(const Packet4f& a, const Packet4f& b) { return _mm_cmpeq_ps(a,b); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pcmp_le(const Packet2d& a, const Packet2d& b) { return _mm_cmple_pd(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt(const Packet2d& a, const Packet2d& b) { return _mm_cmplt_pd(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pcmp_lt_or_nan(const Packet2d& a, const Packet2d& b) { return _mm_cmpnge_pd(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet2d pcmp_eq(const Packet2d& a, const Packet2d& b) { return _mm_cmpeq_pd(a,b); }
|
||||||
|
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4i pcmp_lt(const Packet4i& a, const Packet4i& b) { return _mm_cmplt_epi32(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4i pcmp_eq(const Packet4i& a, const Packet4i& b) { return _mm_cmpeq_epi32(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet16b pcmp_eq(const Packet16b& a, const Packet16b& b) { return _mm_cmpeq_epi8(a,b); }
|
||||||
|
template<> EIGEN_STRONG_INLINE Packet4i pcmp_le(const Packet4i& a, const Packet4i& b) { return por(pcmp_lt(a,b), pcmp_eq(a,b)); }
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
|
template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
|
||||||
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
|
#if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
|
||||||
// There appears to be a bug in GCC, by which the optimizer may
|
// There appears to be a bug in GCC, by which the optimizer may
|
||||||
@ -907,13 +907,22 @@ template<> EIGEN_STRONG_INLINE Packet4f pldexp<Packet4f>(const Packet4f& a, cons
|
|||||||
// We specialize pldexp here, since the generic implementation uses Packet2l, which is not well
|
// We specialize pldexp here, since the generic implementation uses Packet2l, which is not well
|
||||||
// supported by SSE, and has more range than is needed for exponents.
|
// supported by SSE, and has more range than is needed for exponents.
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
|
template<> EIGEN_STRONG_INLINE Packet2d pldexp<Packet2d>(const Packet2d& a, const Packet2d& exponent) {
|
||||||
const Packet2d cst_1023 = pset1<Packet2d>(1023.0);
|
// Clamp exponent to [-2099, 2099]
|
||||||
// Add exponent offset.
|
const Packet2d max_exponent = pset1<Packet2d>(2099.0);
|
||||||
Packet4i ei = _mm_cvtpd_epi32(padd(exponent, cst_1023));
|
const Packet2d e = pmin(pmax(exponent, pnegate(max_exponent)), max_exponent);
|
||||||
// Convert to exponents to int64 and swizzle to the low-order 32 bits.
|
|
||||||
ei = vec4i_swizzle1(ei, 0, 3, 1, 3);
|
// Convert e to integer and swizzle to low-order bits.
|
||||||
// return a * 2^exponent
|
const Packet4i ei = vec4i_swizzle1(_mm_cvtpd_epi32(e), 0, 3, 1, 3);
|
||||||
return pmul(a, _mm_castsi128_pd(_mm_slli_epi64(ei, 52)));
|
|
||||||
|
// Split 2^e into four factors and multiply:
|
||||||
|
const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
|
||||||
|
Packet4i b = parithmetic_shift_right<2>(ei); // floor(e/4)
|
||||||
|
Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(padd(b, bias), 52)); // 2^b
|
||||||
|
Packet2d out = pmul(pmul(pmul(a, c), c), c); // a * 2^(3b)
|
||||||
|
b = psub(psub(psub(ei, b), b), b); // e - 3b
|
||||||
|
c = _mm_castsi128_pd(_mm_slli_epi64(padd(b, bias), 52)); // 2^(e - 3b)
|
||||||
|
out = pmul(out, c); // a * 2^e
|
||||||
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// with AVX, the default implementations based on pload1 are faster
|
// with AVX, the default implementations based on pload1 are faster
|
||||||
|
@ -573,10 +573,41 @@ void packetmath_real() {
|
|||||||
data2[i] = Scalar(internal::random<double>(-1, 1));
|
data2[i] = Scalar(internal::random<double>(-1, 1));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < PacketSize; ++i) {
|
for (int i = 0; i < PacketSize; ++i) {
|
||||||
data1[i+PacketSize] = Scalar(internal::random<int>(0, 4));
|
data1[i+PacketSize] = Scalar(internal::random<int>(-4, 4));
|
||||||
data2[i+PacketSize] = Scalar(internal::random<double>(0, 4));
|
data2[i+PacketSize] = Scalar(internal::random<double>(-4, 4));
|
||||||
}
|
}
|
||||||
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
if (PacketTraits::HasExp) {
|
||||||
|
data1[0] = Scalar(-1);
|
||||||
|
// underflow to zero
|
||||||
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-10);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
// overflow to inf
|
||||||
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
// NaN stays NaN
|
||||||
|
data1[0] = NumTraits<Scalar>::quiet_NaN();
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
VERIFY((numext::isnan)(data2[0]));
|
||||||
|
// inf stays inf
|
||||||
|
data1[0] = NumTraits<Scalar>::infinity();
|
||||||
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-10);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
// zero stays zero
|
||||||
|
data1[0] = Scalar(0);
|
||||||
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
// Small number big exponent.
|
||||||
|
data1[0] = Scalar(std::ldexp(Scalar(1.0), std::numeric_limits<Scalar>::min_exponent-1));
|
||||||
|
data1[PacketSize] = Scalar(-std::numeric_limits<Scalar>::min_exponent
|
||||||
|
+std::numeric_limits<Scalar>::max_exponent);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
// Big number small exponent.
|
||||||
|
data1[0] = Scalar(std::ldexp(Scalar(1.0), std::numeric_limits<Scalar>::max_exponent-1));
|
||||||
|
data1[PacketSize] = Scalar(+std::numeric_limits<Scalar>::min_exponent
|
||||||
|
-std::numeric_limits<Scalar>::max_exponent);
|
||||||
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < size; ++i) {
|
for (int i = 0; i < size; ++i) {
|
||||||
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
data1[i] = Scalar(internal::random<double>(-1, 1) * std::pow(10., internal::random<double>(-6, 6)));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user