A few small fixes to a) prevent throwing in ctors and dtors of the threading code, and b) supporting matrix exponential on platforms with 113 bits of mantissa for long doubles.

2025-09-15 10:53:50 +08:00 · 2018-11-09 14:15:32 -08:00 · 2018-11-09 14:15:32 -08:00 · 93f9988a7e
commit 93f9988a7e
parent 784a3f13cf
5 changed files with 23 additions and 23 deletions
--- a/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/Barrier.h
@ -18,18 +18,18 @@ namespace Eigen {
 class Barrier {
 public:
  Barrier(unsigned int count) : state_(count << 1), notified_(false) {
-    eigen_assert(((count << 1) >> 1) == count);
+    eigen_plain_assert(((count << 1) >> 1) == count);
  }
  ~Barrier() { eigen_plain_assert((state_ >> 1) == 0); }

  void Notify() {
    unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
    if (v != 1) {
-      eigen_assert(((v + 2) & ~1) != 0);
+      eigen_plain_assert(((v + 2) & ~1) != 0);
      return;  // either count has not dropped to 0, or waiter is not waiting
    }
    std::unique_lock<std::mutex> l(mu_);
-    eigen_assert(!notified_);
+    eigen_plain_assert(!notified_);
    notified_ = true;
    cv_.notify_all();
  }
--- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
@ -51,7 +51,7 @@ class EventCount {
  class Waiter;

  EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) {
-    eigen_assert(waiters.size() < (1 << kWaiterBits) - 1);
+    eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1);
    // Initialize epoch to something close to overflow to test overflow.
    state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2);
  }
@ -88,7 +88,7 @@ class EventCount {
      // We've already been notified.
      if (int64_t((state & kEpochMask) - epoch) > 0) return;
      // Remove this thread from prewait counter and add it to the waiter list.
-      eigen_assert((state & kWaiterMask) != 0);
+      eigen_plain_assert((state & kWaiterMask) != 0);
      uint64_t newstate = state - kWaiterInc + kEpochInc;
      newstate = (newstate & ~kStackMask) | (w - &waiters_[0]);
      if ((state & kStackMask) == kStackMask)
@ -119,7 +119,7 @@ class EventCount {
      // We've already been notified.
      if (int64_t((state & kEpochMask) - epoch) > 0) return;
      // Remove this thread from prewait counter.
-      eigen_assert((state & kWaiterMask) != 0);
+      eigen_plain_assert((state & kWaiterMask) != 0);
      if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc,
                                       std::memory_order_relaxed))
        return;
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@ -42,7 +42,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
    // indices as (t + coprime) % num_threads, we will cover all threads without
    // repetitions (effectively getting a presudo-random permutation of thread
    // indices).
-    eigen_assert(num_threads_ < kMaxThreads);
+    eigen_plain_assert(num_threads_ < kMaxThreads);
    for (int i = 1; i <= num_threads_; ++i) {
      all_coprimes_.emplace_back(i);
      ComputeCoprimes(i, &all_coprimes_.back());
@ -85,7 +85,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
  }

  void SetStealPartitions(const std::vector<std::pair<unsigned, unsigned>>& partitions) {
-    eigen_assert(partitions.size() == static_cast<std::size_t>(num_threads_));
+    eigen_plain_assert(partitions.size() == static_cast<std::size_t>(num_threads_));

    // Pass this information to each thread queue.
    for (int i = 0; i < num_threads_; i++) {
@ -112,11 +112,11 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
    } else {
      // A free-standing thread (or worker of another pool), push onto a random
      // queue.
-      eigen_assert(start < limit);
-      eigen_assert(limit <= num_threads_);
+      eigen_plain_assert(start < limit);
+      eigen_plain_assert(limit <= num_threads_);
      int num_queues = limit - start;
      int rnd = Rand(&pt->rand) % num_queues;
-      eigen_assert(start + rnd < limit);
+      eigen_plain_assert(start + rnd < limit);
      Queue& q = thread_data_[start + rnd].queue;
      t = q.PushBack(std::move(t));
    }
@ -182,9 +182,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
  }

  void AssertBounds(int start, int end) {
-    eigen_assert(start >= 0);
-    eigen_assert(start < end);  // non-zero sized partition
-    eigen_assert(end <= num_threads_);
+    eigen_plain_assert(start >= 0);
+    eigen_plain_assert(start < end);  // non-zero sized partition
+    eigen_plain_assert(end <= num_threads_);
  }

  inline void SetStealPartition(size_t i, unsigned val) {
@ -253,7 +253,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
 #ifndef EIGEN_THREAD_LOCAL
    std::unique_ptr<PerThread> new_pt(new PerThread());
    per_thread_map_mutex_.lock();
-    eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
+    eigen_plain_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
    per_thread_map_mutex_.unlock();
    init_barrier_->Notify();
    init_barrier_->Wait();
@ -337,7 +337,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
    unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()];

    for (unsigned i = 0; i < size; i++) {
-      eigen_assert(start + victim < limit);
+      eigen_plain_assert(start + victim < limit);
      Task t = thread_data_[start + victim].queue.PopBack();
      if (t.f) {
        return t;
@ -371,7 +371,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
  // time to exit (returns false). Can optionally return a task to execute in t
  // (in such case t.f != nullptr on return).
  bool WaitForWork(EventCount::Waiter* waiter, Task* t) {
-    eigen_assert(!t->f);
+    eigen_plain_assert(!t->f);
    // We already did best-effort emptiness check in Steal, so prepare for
    // blocking.
    ec_.Prewait(waiter);
--- a/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/RunQueue.h
@ -39,9 +39,9 @@ class RunQueue {
 public:
  RunQueue() : front_(0), back_(0) {
    // require power-of-two for fast masking
-    eigen_assert((kSize & (kSize - 1)) == 0);
-    eigen_assert(kSize > 2);            // why would you do this?
-    eigen_assert(kSize <= (64 << 10));  // leave enough space for counter
+    eigen_plain_assert((kSize & (kSize - 1)) == 0);
+    eigen_plain_assert(kSize > 2);            // why would you do this?
+    eigen_plain_assert(kSize <= (64 << 10));  // leave enough space for counter
    for (unsigned i = 0; i < kSize; i++)
      array_[i].state.store(kEmpty, std::memory_order_relaxed);
  }
@ -137,7 +137,7 @@ class RunQueue {
      } else {
        // Note: no need to store temporal kBusy, we exclusively own these
        // elements.
-        eigen_assert(s == kReady);
+        eigen_plain_assert(s == kReady);
      }
      result->push_back(std::move(e->w));
      e->state.store(kEmpty, std::memory_order_release);
--- a/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
+++ b/unsupported/Eigen/src/MatrixFunctions/MatrixExponential.h
@ -314,7 +314,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
      matrix_exp_pade17(A, U, V);
    }
  
-#elif LDBL_MANT_DIG <= 112  // quadruple precision
+#elif LDBL_MANT_DIG <= 113  // quadruple precision
  
    if (l1norm < 1.639394610288918690547467954466970e-005L) {
      matrix_exp_pade3(arg, U, V);
@ -347,7 +347,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
 template<typename T> struct is_exp_known_type : false_type {};
 template<> struct is_exp_known_type<float> : true_type {};
 template<> struct is_exp_known_type<double> : true_type {};
-#if LDBL_MANT_DIG <= 112
+#if LDBL_MANT_DIG <= 113
 template<> struct is_exp_known_type<long double> : true_type {};
 #endif