Correct pnegate for floating-point zero.

(cherry picked from commit 8588d8c74b42eedde578af01605ecc90189bc329)
2025-07-15 01:21:48 +08:00 · 2022-11-15 18:07:23 +00:00 · 2022-11-15 18:07:23 +00:00 · 5547205092
commit 5547205092
parent 771e91860b
4 changed files with 21 additions and 19 deletions
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@ -285,11 +285,13 @@ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const

 template<> EIGEN_STRONG_INLINE Packet8f pnegate(const Packet8f& a)
 {
-  return _mm256_sub_ps(_mm256_set1_ps(0.0),a);
+  const Packet8f mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
+  return _mm256_xor_ps(a, mask);
 }
 template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a)
 {
-  return _mm256_sub_pd(_mm256_set1_pd(0.0),a);
+  const Packet4d mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000ULL));
+  return _mm256_xor_pd(a, mask);
 }

 template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; }
--- a/Eigen/src/Core/arch/AVX512/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX512/PacketMath.h
@ -296,11 +296,13 @@ EIGEN_STRONG_INLINE Packet16i psub<Packet16i>(const Packet16i& a,

 template <>
 EIGEN_STRONG_INLINE Packet16f pnegate(const Packet16f& a) {
-  return _mm512_sub_ps(_mm512_set1_ps(0.0), a);
+  const __m512i mask = _mm512_set1_epi32(0x80000000);
+  return _mm512_castsi512_ps(_mm512_xor_epi32(_mm512_castps_si512(a), mask));
 }
 template <>
 EIGEN_STRONG_INLINE Packet8d pnegate(const Packet8d& a) {
-  return _mm512_sub_pd(_mm512_set1_pd(0.0), a);
+  const __m512i mask = _mm512_set1_epi64(0x8000000000000000ULL);
+  return _mm512_castsi512_pd(_mm512_xor_epi64(_mm512_castpd_si512(a), mask));
 }

 template <>
--- a/Eigen/src/Core/util/Meta.h
+++ b/Eigen/src/Core/util/Meta.h
@ -189,21 +189,9 @@ template<> struct make_unsigned<signed int>       { typedef unsigned int type; }
 template<> struct make_unsigned<unsigned int>     { typedef unsigned int type; };
 template<> struct make_unsigned<signed long>      { typedef unsigned long type; };
 template<> struct make_unsigned<unsigned long>    { typedef unsigned long type; };
-#if EIGEN_COMP_MSVC
-template<> struct make_unsigned<signed __int64>   { typedef unsigned __int64 type; };
-template<> struct make_unsigned<unsigned __int64> { typedef unsigned __int64 type; };
-#endif
-
-// Some platforms define int64_t as `long long` even for C++03, where
-// `long long` is not guaranteed by the standard. In this case we are missing
-// the definition for make_unsigned. If we just define it, we run into issues
-// where `long long` doesn't exist in some compilers for C++03. We therefore add
-// the specialization for these platforms only.
-#if EIGEN_OS_MAC || EIGEN_COMP_MINGW
 template<> struct make_unsigned<unsigned long long> { typedef unsigned long long type; };
 template<> struct make_unsigned<long long>          { typedef unsigned long long type; };
 #endif
-#endif

 template <typename T> struct add_const { typedef const T type; };
 template <typename T> struct add_const<T&> { typedef T& type; };
--- a/test/packetmath.cpp
+++ b/test/packetmath.cpp
@ -637,9 +637,19 @@ void packetmath_real() {
  const int PacketSize = internal::unpacket_traits<Packet>::size;

  const int size = PacketSize * 4;
-  EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
-  EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
-  EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
+  EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4] = {};
+  EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4] = {};
+  EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4] = {};
+  
+  // Negate with -0.
+  if (PacketTraits::HasNegate) {
+    test::packet_helper<PacketTraits::HasNegate,Packet> h;
+    data1[0] = Scalar(-0);
+    h.store(data2, internal::pnegate(h.load(data1)));
+    typedef typename internal::make_unsigned<typename internal::make_integer<Scalar>::type>::type Bits;
+    Bits bits = numext::bit_cast<Bits>(data2[0]);
+    VERIFY_IS_EQUAL(bits, static_cast<Bits>(Bits(1)<<(sizeof(Scalar)*CHAR_BIT - 1)));
+  }

  for (int i = 0; i < size; ++i) {
    data1[i] = Scalar(internal::random<double>(0, 1) * std::pow(10., internal::random<double>(-6, 6)));