A few small fixes to a) prevent throwing in ctors and dtors of the threading code, and b) supporting matrix exponential on platforms with 113 bits of mantissa for long doubles.

This commit is contained in:
Rasmus Munk Larsen 2018-11-09 14:15:32 -08:00
parent 784a3f13cf
commit 93f9988a7e
5 changed files with 23 additions and 23 deletions

View File

@ -18,18 +18,18 @@ namespace Eigen {
class Barrier {
public:
Barrier(unsigned int count) : state_(count << 1), notified_(false) {
eigen_assert(((count << 1) >> 1) == count);
eigen_plain_assert(((count << 1) >> 1) == count);
}
~Barrier() { eigen_plain_assert((state_ >> 1) == 0); }
void Notify() {
unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
if (v != 1) {
eigen_assert(((v + 2) & ~1) != 0);
eigen_plain_assert(((v + 2) & ~1) != 0);
return; // either count has not dropped to 0, or waiter is not waiting
}
std::unique_lock<std::mutex> l(mu_);
eigen_assert(!notified_);
eigen_plain_assert(!notified_);
notified_ = true;
cv_.notify_all();
}

View File

@ -51,7 +51,7 @@ class EventCount {
class Waiter;
EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) {
eigen_assert(waiters.size() < (1 << kWaiterBits) - 1);
eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1);
// Initialize epoch to something close to overflow to test overflow.
state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2);
}
@ -88,7 +88,7 @@ class EventCount {
// We've already been notified.
if (int64_t((state & kEpochMask) - epoch) > 0) return;
// Remove this thread from prewait counter and add it to the waiter list.
eigen_assert((state & kWaiterMask) != 0);
eigen_plain_assert((state & kWaiterMask) != 0);
uint64_t newstate = state - kWaiterInc + kEpochInc;
newstate = (newstate & ~kStackMask) | (w - &waiters_[0]);
if ((state & kStackMask) == kStackMask)
@ -119,7 +119,7 @@ class EventCount {
// We've already been notified.
if (int64_t((state & kEpochMask) - epoch) > 0) return;
// Remove this thread from prewait counter.
eigen_assert((state & kWaiterMask) != 0);
eigen_plain_assert((state & kWaiterMask) != 0);
if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc,
std::memory_order_relaxed))
return;

View File

@ -42,7 +42,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// indices as (t + coprime) % num_threads, we will cover all threads without
// repetitions (effectively getting a presudo-random permutation of thread
// indices).
eigen_assert(num_threads_ < kMaxThreads);
eigen_plain_assert(num_threads_ < kMaxThreads);
for (int i = 1; i <= num_threads_; ++i) {
all_coprimes_.emplace_back(i);
ComputeCoprimes(i, &all_coprimes_.back());
@ -85,7 +85,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
}
void SetStealPartitions(const std::vector<std::pair<unsigned, unsigned>>& partitions) {
eigen_assert(partitions.size() == static_cast<std::size_t>(num_threads_));
eigen_plain_assert(partitions.size() == static_cast<std::size_t>(num_threads_));
// Pass this information to each thread queue.
for (int i = 0; i < num_threads_; i++) {
@ -112,11 +112,11 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
} else {
// A free-standing thread (or worker of another pool), push onto a random
// queue.
eigen_assert(start < limit);
eigen_assert(limit <= num_threads_);
eigen_plain_assert(start < limit);
eigen_plain_assert(limit <= num_threads_);
int num_queues = limit - start;
int rnd = Rand(&pt->rand) % num_queues;
eigen_assert(start + rnd < limit);
eigen_plain_assert(start + rnd < limit);
Queue& q = thread_data_[start + rnd].queue;
t = q.PushBack(std::move(t));
}
@ -182,9 +182,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
}
void AssertBounds(int start, int end) {
eigen_assert(start >= 0);
eigen_assert(start < end); // non-zero sized partition
eigen_assert(end <= num_threads_);
eigen_plain_assert(start >= 0);
eigen_plain_assert(start < end); // non-zero sized partition
eigen_plain_assert(end <= num_threads_);
}
inline void SetStealPartition(size_t i, unsigned val) {
@ -253,7 +253,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
#ifndef EIGEN_THREAD_LOCAL
std::unique_ptr<PerThread> new_pt(new PerThread());
per_thread_map_mutex_.lock();
eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
eigen_plain_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
per_thread_map_mutex_.unlock();
init_barrier_->Notify();
init_barrier_->Wait();
@ -337,7 +337,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()];
for (unsigned i = 0; i < size; i++) {
eigen_assert(start + victim < limit);
eigen_plain_assert(start + victim < limit);
Task t = thread_data_[start + victim].queue.PopBack();
if (t.f) {
return t;
@ -371,7 +371,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// time to exit (returns false). Can optionally return a task to execute in t
// (in such case t.f != nullptr on return).
bool WaitForWork(EventCount::Waiter* waiter, Task* t) {
eigen_assert(!t->f);
eigen_plain_assert(!t->f);
// We already did best-effort emptiness check in Steal, so prepare for
// blocking.
ec_.Prewait(waiter);

View File

@ -39,9 +39,9 @@ class RunQueue {
public:
RunQueue() : front_(0), back_(0) {
// require power-of-two for fast masking
eigen_assert((kSize & (kSize - 1)) == 0);
eigen_assert(kSize > 2); // why would you do this?
eigen_assert(kSize <= (64 << 10)); // leave enough space for counter
eigen_plain_assert((kSize & (kSize - 1)) == 0);
eigen_plain_assert(kSize > 2); // why would you do this?
eigen_plain_assert(kSize <= (64 << 10)); // leave enough space for counter
for (unsigned i = 0; i < kSize; i++)
array_[i].state.store(kEmpty, std::memory_order_relaxed);
}
@ -137,7 +137,7 @@ class RunQueue {
} else {
// Note: no need to store temporal kBusy, we exclusively own these
// elements.
eigen_assert(s == kReady);
eigen_plain_assert(s == kReady);
}
result->push_back(std::move(e->w));
e->state.store(kEmpty, std::memory_order_release);

View File

@ -314,7 +314,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
matrix_exp_pade17(A, U, V);
}
#elif LDBL_MANT_DIG <= 112 // quadruple precision
#elif LDBL_MANT_DIG <= 113 // quadruple precision
if (l1norm < 1.639394610288918690547467954466970e-005L) {
matrix_exp_pade3(arg, U, V);
@ -347,7 +347,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
template<typename T> struct is_exp_known_type : false_type {};
template<> struct is_exp_known_type<float> : true_type {};
template<> struct is_exp_known_type<double> : true_type {};
#if LDBL_MANT_DIG <= 112
#if LDBL_MANT_DIG <= 113
template<> struct is_exp_known_type<long double> : true_type {};
#endif