mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-04-22 09:39:34 +08:00
Revert "Fix rint for SSE/NEON."
This reverts commit e72dfeb8b9fa5662831b5d0bb9d132521f9173dd
This commit is contained in:
parent
e72dfeb8b9
commit
9a663973b4
@ -3207,34 +3207,20 @@ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
|
|||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
|
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
|
||||||
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
||||||
const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
|
const Packet4f offset =
|
||||||
const Packet4f abs_a = pabs(a);
|
pselect(pcmp_lt(a, pzero(a)),
|
||||||
// Inline asm to prevent the compiler from optimizing away the
|
pset1<Packet4f>(-static_cast<float>(1<<23)),
|
||||||
// addition and subtraction.
|
pset1<Packet4f>(+static_cast<float>(1<<23)));
|
||||||
// Packet4f r = psub(padd(abs_a, limit), limit);
|
return psub(padd(a, offset), offset);
|
||||||
Packet4f r = abs_a;
|
|
||||||
__asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t"
|
|
||||||
"vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit));
|
|
||||||
// If greater than limit, simply return a. Otherwise, account for sign.
|
|
||||||
r = pselect(pcmp_lt(abs_a, limit),
|
|
||||||
pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {
|
template<> EIGEN_STRONG_INLINE Packet2f print(const Packet2f& a) {
|
||||||
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
||||||
const Packet2f limit = pset1<Packet2f>(static_cast<float>(1<<23));
|
const Packet2f offset =
|
||||||
const Packet2f abs_a = pabs(a);
|
pselect(pcmp_lt(a, pzero(a)),
|
||||||
// Inline asm to prevent the compiler from optimizing away the
|
pset1<Packet2f>(-static_cast<float>(1<<23)),
|
||||||
// addition and subtraction.
|
pset1<Packet2f>(+static_cast<float>(1<<23)));
|
||||||
// Packet4f r = psub(padd(abs_a, limit), limit);
|
return psub(padd(a, offset), offset);
|
||||||
Packet2f r = abs_a;
|
|
||||||
__asm__ ("vadd.f32 %[r], %[r], %[limit]\n\t"
|
|
||||||
"vsub.f32 %[r], %[r], %[limit]" : [r] "+x" (r) : [limit] "x" (limit));
|
|
||||||
// If greater than limit, simply return a. Otherwise, account for sign.
|
|
||||||
r = pselect(pcmp_lt(abs_a, limit),
|
|
||||||
pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||||
|
@ -646,35 +646,20 @@ template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { re
|
|||||||
#else
|
#else
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
|
template<> EIGEN_STRONG_INLINE Packet4f print(const Packet4f& a) {
|
||||||
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
// Adds and subtracts signum(a) * 2^23 to force rounding.
|
||||||
const Packet4f limit = pset1<Packet4f>(static_cast<float>(1<<23));
|
const Packet4f offset =
|
||||||
const Packet4f abs_a = pabs(a);
|
pselect(pcmp_lt(a, pzero(a)),
|
||||||
// Inline asm to prevent the compiler from optimizing away the
|
pset1<Packet4f>(-static_cast<float>(1<<23)),
|
||||||
// addition and subtraction.
|
pset1<Packet4f>(+static_cast<float>(1<<23)));
|
||||||
// Packet4f r = psub(padd(abs_a, limit), limit);
|
return psub(padd(a, offset), offset);
|
||||||
Packet4f r = abs_a;
|
|
||||||
__asm__ ("addps %[limit], %[r]\n\t"
|
|
||||||
"subps %[limit], %[r]" : [r] "+x" (r) : [limit] "x" (limit));
|
|
||||||
// If greater than limit, simply return a. Otherwise, account for sign.
|
|
||||||
r = pselect(pcmp_lt(abs_a, limit),
|
|
||||||
pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) {
|
template<> EIGEN_STRONG_INLINE Packet2d print(const Packet2d& a) {
|
||||||
// Adds and subtracts signum(a) * 2^52 to force rounding.
|
// Adds and subtracts signum(a) * 2^52 to force rounding.
|
||||||
const Packet2d limit = pset1<Packet2d>(static_cast<double>(1ull<<52));
|
const Packet2d offset =
|
||||||
const Packet2d abs_a = pabs(a);
|
pselect(pcmp_lt(a, pzero(a)),
|
||||||
// Inline asm to prevent the compiler from optimizing away the
|
pset1<Packet2d>(-static_cast<double>(1ull<<52)),
|
||||||
// addition and subtraction.
|
pset1<Packet2d>(+static_cast<double>(1ull<<52)));
|
||||||
// Packet2d r = psub(padd(abs_a, limit), limit);
|
return psub(padd(a, offset), offset);
|
||||||
Packet2d r = abs_a;
|
|
||||||
asm("addpd %[limit], %[r] \n\t"
|
|
||||||
"subpd %[limit], %[r]" : [r] "+x" (r) : [limit] "x" (limit));
|
|
||||||
|
|
||||||
// If greater than limit, simply return a. Otherwise, account for sign.
|
|
||||||
r = pselect(pcmp_lt(abs_a, limit),
|
|
||||||
pselect(pcmp_lt(a, pzero(a)), pnegate(r), r), a);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
|
||||||
|
@ -543,10 +543,10 @@ void packetmath_real() {
|
|||||||
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
||||||
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
|
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
|
||||||
|
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
||||||
|
|
||||||
// Rounding edge cases.
|
// Rounding edge cases.
|
||||||
if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) {
|
if (PacketTraits::HasRound || PacketTraits::HasCeil || PacketTraits::HasFloor || PacketTraits::HasRint) {
|
||||||
@ -583,10 +583,10 @@ void packetmath_real() {
|
|||||||
|
|
||||||
for (size_t k=0; k<values.size(); ++k) {
|
for (size_t k=0; k<values.size(); ++k) {
|
||||||
data1[0] = values[k];
|
data1[0] = values[k];
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
||||||
CHECK_CWISE1_EXACT_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -644,7 +644,7 @@ void packetmath_real() {
|
|||||||
if (PacketTraits::HasExp) {
|
if (PacketTraits::HasExp) {
|
||||||
data1[0] = Scalar(-1);
|
data1[0] = Scalar(-1);
|
||||||
// underflow to zero
|
// underflow to zero
|
||||||
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-55);
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::min_exponent-10);
|
||||||
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
CHECK_CWISE2_IF(PacketTraits::HasExp, REF_LDEXP, internal::pldexp);
|
||||||
// overflow to inf
|
// overflow to inf
|
||||||
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
|
data1[PacketSize] = Scalar(std::numeric_limits<Scalar>::max_exponent+10);
|
||||||
|
@ -108,23 +108,6 @@ template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int s
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename Scalar> bool areEqual(const Scalar* a, const Scalar* b, int size)
|
|
||||||
{
|
|
||||||
for (int i=0; i<size; ++i)
|
|
||||||
{
|
|
||||||
if (a[i] != b[i])
|
|
||||||
{
|
|
||||||
if((numext::isnan)(a[i]) && (numext::isnan)(b[i]))
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CHECK_CWISE1(REFOP, POP) { \
|
#define CHECK_CWISE1(REFOP, POP) { \
|
||||||
for (int i=0; i<PacketSize; ++i) \
|
for (int i=0; i<PacketSize; ++i) \
|
||||||
ref[i] = REFOP(data1[i]); \
|
ref[i] = REFOP(data1[i]); \
|
||||||
@ -195,14 +178,6 @@ struct packet_helper<false,Packet>
|
|||||||
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHECK_CWISE1_EXACT_IF(COND, REFOP, POP) if(COND) { \
|
|
||||||
test::packet_helper<COND,Packet> h; \
|
|
||||||
for (int i=0; i<PacketSize; ++i) \
|
|
||||||
ref[i] = Scalar(REFOP(data1[i])); \
|
|
||||||
h.store(data2, POP(h.load(data1))); \
|
|
||||||
VERIFY(test::areEqual(ref, data2, PacketSize) && #POP); \
|
|
||||||
}
|
|
||||||
|
|
||||||
#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
|
#define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
|
||||||
test::packet_helper<COND,Packet> h; \
|
test::packet_helper<COND,Packet> h; \
|
||||||
for (int i=0; i<PacketSize; ++i) \
|
for (int i=0; i<PacketSize; ++i) \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user