diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index bd2ac8308..7634bf72f 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -301,12 +301,9 @@ struct digamma_impl {
     This implementation works on both scalars and Ts.
 */
 template <typename T>
-EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
-  // Clamp the inputs to the range [-4, 4] since anything outside
-  // this range is +/-1.0f in single-precision.
-  const T plus_4 = pset1<T>(4.f);
-  const T minus_4 = pset1<T>(-4.f);
-  const T x = pmax(pmin(a_x, plus_4), minus_4);
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& x) {
+  const float kErfInvOneMinusHalfULP = 3.832506856900711f;
+  const T clamp = pcmp_le(pset1<T>(kErfInvOneMinusHalfULP), pabs(x));
   // The monomial coefficients of the numerator polynomial (odd).
   const T alpha_1 = pset1<T>(-1.60960333262415e-02f);
   const T alpha_3 = pset1<T>(-2.95459980854025e-03f);
@@ -342,7 +339,8 @@ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T generic_fast_erf_float(const T& a_x) {
   q = pmadd(x2, q, beta_0);
 
   // Divide the numerator by the denominator.
-  return pdiv(p, q);
+  const T sign = pselect(pcmp_le(x, pset1<T>(0.0f)), pset1<T>(-1.0f), pset1<T>(1.0f));
+  return pselect(clamp, sign, pdiv(p, q));
 }
 
 template <typename T>