fix plog(+inf) with AVX512

2025-10-13 08:31:28 +08:00 · 2019-01-09 16:53:37 +01:00 · 2019-01-09 16:53:37 +01:00 · 3492a1ca74
commit 3492a1ca74
parent 47810cf5b7
1 changed files with 10 additions and 3 deletions
--- a/Eigen/src/Core/arch/AVX512/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX512/MathFunctions.h
@ -47,6 +47,7 @@ plog<Packet16f>(const Packet16f& _x) {
  // The smallest non denormalized float number.
  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(min_norm_pos, 0x00800000);
  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(minus_inf, 0xff800000);
+  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(pos_inf, 0x7f800000);
  _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(nan, 0x7fc00000);

  // Polynomial coefficients.
@ -116,9 +117,15 @@ plog<Packet16f>(const Packet16f& _x) {
  x = padd(x, y);
  x = padd(x, y2);

-  // Filter out invalid inputs, i.e. negative arg will be NAN, 0 will be -INF.
+  __mmask16 pos_inf_mask = _mm512_cmp_ps_mask(_x,p16f_pos_inf,_CMP_EQ_OQ);
+  // Filter out invalid inputs, i.e.:
+  //  - negative arg will be NAN,
+  //  - 0 will be -INF.
+  //  - +INF will be +INF
  return _mm512_mask_blend_ps(iszero_mask,
-                              _mm512_mask_blend_ps(invalid_mask, x, p16f_nan),
+            _mm512_mask_blend_ps(invalid_mask,
+              _mm512_mask_blend_ps(pos_inf_mask,x,p16f_pos_inf),
+              p16f_nan),
            p16f_minus_inf);
 }
 #endif