From a1a96fafde4d8d6e30a2677b669a31526b3da78d Mon Sep 17 00:00:00 2001
From: Charles Schlosser <cs.schlosser@gmail.com>
Date: Mon, 8 Jan 2024 23:28:41 +0000
Subject: [PATCH] Clean up stableNorm

---
 Eigen/src/Core/StableNorm.h | 67 ++++---------------------------------
 test/stable_norm.cpp        |  2 +-
 2 files changed, 8 insertions(+), 61 deletions(-)
diff --git a/Eigen/src/Core/StableNorm.h b/Eigen/src/Core/StableNorm.h
index 6513120e0..1ec71403c 100644
--- a/Eigen/src/Core/StableNorm.h
+++ b/Eigen/src/Core/StableNorm.h
@@ -46,78 +46,25 @@ inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& sc
     ssq += (bl * invScale).squaredNorm();
 }
 
-template <typename VectorType, typename RealScalar>
-void stable_norm_impl_inner_step(const VectorType& vec, RealScalar& ssq, RealScalar& scale, RealScalar& invScale) {
-  typedef typename VectorType::Scalar Scalar;
-  const Index blockSize = 4096;
-
-  typedef typename internal::nested_eval<VectorType, 2>::type VectorTypeCopy;
-  typedef internal::remove_all_t<VectorTypeCopy> VectorTypeCopyClean;
-  const VectorTypeCopy copy(vec);
-
-  enum {
-    CanAlign =
-        ((int(VectorTypeCopyClean::Flags) & DirectAccessBit) ||
-         (int(internal::evaluator<VectorTypeCopyClean>::Alignment) > 0)  // FIXME Alignment)>0 might not be enough
-         ) &&
-        (blockSize * sizeof(Scalar) * 2 < EIGEN_STACK_ALLOCATION_LIMIT) &&
-        (EIGEN_MAX_STATIC_ALIGN_BYTES >
-         0)  // if we cannot allocate on the stack, then let's not bother about this optimization
-  };
-  typedef std::conditional_t<
-      CanAlign,
-      Ref<const Matrix<Scalar, Dynamic, 1, 0, blockSize, 1>, internal::evaluator<VectorTypeCopyClean>::Alignment>,
-      typename VectorTypeCopyClean::ConstSegmentReturnType>
-      SegmentWrapper;
-  Index n = vec.size();
-
-  Index bi = internal::first_default_aligned(copy);
-  if (bi > 0) internal::stable_norm_kernel(copy.head(bi), ssq, scale, invScale);
-  for (; bi < n; bi += blockSize)
-    internal::stable_norm_kernel(SegmentWrapper(copy.segment(bi, numext::mini(blockSize, n - bi))), ssq, scale,
-                                 invScale);
-}
-
-template <typename VectorType>
-typename VectorType::RealScalar stable_norm_impl(const VectorType& vec,
-                                                 std::enable_if_t<VectorType::IsVectorAtCompileTime>* = 0) {
-  using std::abs;
-  using std::sqrt;
-
-  Index n = vec.size();
-
-  if (n == 1) return abs(vec.coeff(0));
-
-  typedef typename VectorType::RealScalar RealScalar;
-  RealScalar scale(0);
-  RealScalar invScale(1);
-  RealScalar ssq(0);  // sum of squares
-
-  stable_norm_impl_inner_step(vec, ssq, scale, invScale);
-
-  return scale * sqrt(ssq);
-}
-
 template <typename MatrixType>
-typename MatrixType::RealScalar stable_norm_impl(const MatrixType& mat,
-                                                 std::enable_if_t<!MatrixType::IsVectorAtCompileTime>* = 0) {
-  using std::sqrt;
+typename MatrixType::RealScalar stable_norm_impl(const MatrixType& mat) {
+  using numext::sqrt;
 
   typedef typename MatrixType::RealScalar RealScalar;
   RealScalar scale(0);
   RealScalar invScale(1);
   RealScalar ssq(0);  // sum of squares
 
-  for (Index j = 0; j < mat.outerSize(); ++j) stable_norm_impl_inner_step(mat.innerVector(j), ssq, scale, invScale);
+  stable_norm_kernel(mat, ssq, scale, invScale);
   return scale * sqrt(ssq);
 }
 
 template <typename Derived>
 inline typename NumTraits<typename traits<Derived>::Scalar>::Real blueNorm_impl(const EigenBase<Derived>& _vec) {
   typedef typename Derived::RealScalar RealScalar;
-  using std::abs;
-  using std::pow;
-  using std::sqrt;
+  using numext::abs;
+  using numext::pow;
+  using numext::sqrt;
 
   // This program calculates the machine-dependent constants
   // bl, b2, slm, s2m, relerr overfl
@@ -140,7 +87,7 @@ inline typename NumTraits<typename traits<Derived>::Scalar>::Real blueNorm_impl(
       RealScalar(pow(RealScalar(ibeta), RealScalar((2 - iemin) / 2)));  // scaling factor for lower range
   static const RealScalar s2m =
       RealScalar(pow(RealScalar(ibeta), RealScalar(-((iemax + it) / 2))));  // scaling factor for upper range
-  static const RealScalar eps = RealScalar(pow(double(ibeta), 1 - it));
+  static const RealScalar eps = RealScalar(pow(double(ibeta), double(1 - it)));
   static const RealScalar relerr = sqrt(eps);  // tolerance for neglecting asml
 
   const Derived& vec(_vec.derived());
diff --git a/test/stable_norm.cpp b/test/stable_norm.cpp
index 130284a53..c16e44d8d 100644
--- a/test/stable_norm.cpp
+++ b/test/stable_norm.cpp
@@ -220,7 +220,7 @@ void test_hypot() {
   while (numext::abs2(factor) < RealScalar(1e-4)) factor = internal::random<Scalar>();
   Scalar small = factor * ((std::numeric_limits<RealScalar>::min)() * RealScalar(1e4));
 
-  Scalar one(1), zero(0), sqrt2(std::sqrt(2)), nan(std::numeric_limits<RealScalar>::quiet_NaN());
+  Scalar one(1), zero(0), sqrt2(std::sqrt(Scalar(2))), nan(std::numeric_limits<RealScalar>::quiet_NaN());
 
   Scalar a = internal::random<Scalar>(-1, 1);
   Scalar b = internal::random<Scalar>(-1, 1);