A few small fixes to a) prevent throwing in ctors and dtors of the threading code, and b) supporting matrix exponential on platforms with 113 bits of mantissa for long doubles.

This commit is contained in:
Rasmus Munk Larsen 2018-11-09 14:15:32 -08:00
parent 784a3f13cf
commit 93f9988a7e
5 changed files with 23 additions and 23 deletions

View File

@ -18,18 +18,18 @@ namespace Eigen {
class Barrier { class Barrier {
public: public:
Barrier(unsigned int count) : state_(count << 1), notified_(false) { Barrier(unsigned int count) : state_(count << 1), notified_(false) {
eigen_assert(((count << 1) >> 1) == count); eigen_plain_assert(((count << 1) >> 1) == count);
} }
~Barrier() { eigen_plain_assert((state_ >> 1) == 0); } ~Barrier() { eigen_plain_assert((state_ >> 1) == 0); }
void Notify() { void Notify() {
unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
if (v != 1) { if (v != 1) {
eigen_assert(((v + 2) & ~1) != 0); eigen_plain_assert(((v + 2) & ~1) != 0);
return; // either count has not dropped to 0, or waiter is not waiting return; // either count has not dropped to 0, or waiter is not waiting
} }
std::unique_lock<std::mutex> l(mu_); std::unique_lock<std::mutex> l(mu_);
eigen_assert(!notified_); eigen_plain_assert(!notified_);
notified_ = true; notified_ = true;
cv_.notify_all(); cv_.notify_all();
} }

View File

@ -51,7 +51,7 @@ class EventCount {
class Waiter; class Waiter;
EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) { EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) {
eigen_assert(waiters.size() < (1 << kWaiterBits) - 1); eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1);
// Initialize epoch to something close to overflow to test overflow. // Initialize epoch to something close to overflow to test overflow.
state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2); state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2);
} }
@ -88,7 +88,7 @@ class EventCount {
// We've already been notified. // We've already been notified.
if (int64_t((state & kEpochMask) - epoch) > 0) return; if (int64_t((state & kEpochMask) - epoch) > 0) return;
// Remove this thread from prewait counter and add it to the waiter list. // Remove this thread from prewait counter and add it to the waiter list.
eigen_assert((state & kWaiterMask) != 0); eigen_plain_assert((state & kWaiterMask) != 0);
uint64_t newstate = state - kWaiterInc + kEpochInc; uint64_t newstate = state - kWaiterInc + kEpochInc;
newstate = (newstate & ~kStackMask) | (w - &waiters_[0]); newstate = (newstate & ~kStackMask) | (w - &waiters_[0]);
if ((state & kStackMask) == kStackMask) if ((state & kStackMask) == kStackMask)
@ -119,7 +119,7 @@ class EventCount {
// We've already been notified. // We've already been notified.
if (int64_t((state & kEpochMask) - epoch) > 0) return; if (int64_t((state & kEpochMask) - epoch) > 0) return;
// Remove this thread from prewait counter. // Remove this thread from prewait counter.
eigen_assert((state & kWaiterMask) != 0); eigen_plain_assert((state & kWaiterMask) != 0);
if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc, if (state_.compare_exchange_weak(state, state - kWaiterInc + kEpochInc,
std::memory_order_relaxed)) std::memory_order_relaxed))
return; return;

View File

@ -42,7 +42,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// indices as (t + coprime) % num_threads, we will cover all threads without // indices as (t + coprime) % num_threads, we will cover all threads without
// repetitions (effectively getting a presudo-random permutation of thread // repetitions (effectively getting a presudo-random permutation of thread
// indices). // indices).
eigen_assert(num_threads_ < kMaxThreads); eigen_plain_assert(num_threads_ < kMaxThreads);
for (int i = 1; i <= num_threads_; ++i) { for (int i = 1; i <= num_threads_; ++i) {
all_coprimes_.emplace_back(i); all_coprimes_.emplace_back(i);
ComputeCoprimes(i, &all_coprimes_.back()); ComputeCoprimes(i, &all_coprimes_.back());
@ -85,7 +85,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
} }
void SetStealPartitions(const std::vector<std::pair<unsigned, unsigned>>& partitions) { void SetStealPartitions(const std::vector<std::pair<unsigned, unsigned>>& partitions) {
eigen_assert(partitions.size() == static_cast<std::size_t>(num_threads_)); eigen_plain_assert(partitions.size() == static_cast<std::size_t>(num_threads_));
// Pass this information to each thread queue. // Pass this information to each thread queue.
for (int i = 0; i < num_threads_; i++) { for (int i = 0; i < num_threads_; i++) {
@ -112,11 +112,11 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
} else { } else {
// A free-standing thread (or worker of another pool), push onto a random // A free-standing thread (or worker of another pool), push onto a random
// queue. // queue.
eigen_assert(start < limit); eigen_plain_assert(start < limit);
eigen_assert(limit <= num_threads_); eigen_plain_assert(limit <= num_threads_);
int num_queues = limit - start; int num_queues = limit - start;
int rnd = Rand(&pt->rand) % num_queues; int rnd = Rand(&pt->rand) % num_queues;
eigen_assert(start + rnd < limit); eigen_plain_assert(start + rnd < limit);
Queue& q = thread_data_[start + rnd].queue; Queue& q = thread_data_[start + rnd].queue;
t = q.PushBack(std::move(t)); t = q.PushBack(std::move(t));
} }
@ -182,9 +182,9 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
} }
void AssertBounds(int start, int end) { void AssertBounds(int start, int end) {
eigen_assert(start >= 0); eigen_plain_assert(start >= 0);
eigen_assert(start < end); // non-zero sized partition eigen_plain_assert(start < end); // non-zero sized partition
eigen_assert(end <= num_threads_); eigen_plain_assert(end <= num_threads_);
} }
inline void SetStealPartition(size_t i, unsigned val) { inline void SetStealPartition(size_t i, unsigned val) {
@ -253,7 +253,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
#ifndef EIGEN_THREAD_LOCAL #ifndef EIGEN_THREAD_LOCAL
std::unique_ptr<PerThread> new_pt(new PerThread()); std::unique_ptr<PerThread> new_pt(new PerThread());
per_thread_map_mutex_.lock(); per_thread_map_mutex_.lock();
eigen_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second); eigen_plain_assert(per_thread_map_.emplace(GlobalThreadIdHash(), std::move(new_pt)).second);
per_thread_map_mutex_.unlock(); per_thread_map_mutex_.unlock();
init_barrier_->Notify(); init_barrier_->Notify();
init_barrier_->Wait(); init_barrier_->Wait();
@ -337,7 +337,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()]; unsigned inc = all_coprimes_[size - 1][r % all_coprimes_[size - 1].size()];
for (unsigned i = 0; i < size; i++) { for (unsigned i = 0; i < size; i++) {
eigen_assert(start + victim < limit); eigen_plain_assert(start + victim < limit);
Task t = thread_data_[start + victim].queue.PopBack(); Task t = thread_data_[start + victim].queue.PopBack();
if (t.f) { if (t.f) {
return t; return t;
@ -371,7 +371,7 @@ class ThreadPoolTempl : public Eigen::ThreadPoolInterface {
// time to exit (returns false). Can optionally return a task to execute in t // time to exit (returns false). Can optionally return a task to execute in t
// (in such case t.f != nullptr on return). // (in such case t.f != nullptr on return).
bool WaitForWork(EventCount::Waiter* waiter, Task* t) { bool WaitForWork(EventCount::Waiter* waiter, Task* t) {
eigen_assert(!t->f); eigen_plain_assert(!t->f);
// We already did best-effort emptiness check in Steal, so prepare for // We already did best-effort emptiness check in Steal, so prepare for
// blocking. // blocking.
ec_.Prewait(waiter); ec_.Prewait(waiter);

View File

@ -39,9 +39,9 @@ class RunQueue {
public: public:
RunQueue() : front_(0), back_(0) { RunQueue() : front_(0), back_(0) {
// require power-of-two for fast masking // require power-of-two for fast masking
eigen_assert((kSize & (kSize - 1)) == 0); eigen_plain_assert((kSize & (kSize - 1)) == 0);
eigen_assert(kSize > 2); // why would you do this? eigen_plain_assert(kSize > 2); // why would you do this?
eigen_assert(kSize <= (64 << 10)); // leave enough space for counter eigen_plain_assert(kSize <= (64 << 10)); // leave enough space for counter
for (unsigned i = 0; i < kSize; i++) for (unsigned i = 0; i < kSize; i++)
array_[i].state.store(kEmpty, std::memory_order_relaxed); array_[i].state.store(kEmpty, std::memory_order_relaxed);
} }
@ -137,7 +137,7 @@ class RunQueue {
} else { } else {
// Note: no need to store temporal kBusy, we exclusively own these // Note: no need to store temporal kBusy, we exclusively own these
// elements. // elements.
eigen_assert(s == kReady); eigen_plain_assert(s == kReady);
} }
result->push_back(std::move(e->w)); result->push_back(std::move(e->w));
e->state.store(kEmpty, std::memory_order_release); e->state.store(kEmpty, std::memory_order_release);

View File

@ -314,7 +314,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
matrix_exp_pade17(A, U, V); matrix_exp_pade17(A, U, V);
} }
#elif LDBL_MANT_DIG <= 112 // quadruple precision #elif LDBL_MANT_DIG <= 113 // quadruple precision
if (l1norm < 1.639394610288918690547467954466970e-005L) { if (l1norm < 1.639394610288918690547467954466970e-005L) {
matrix_exp_pade3(arg, U, V); matrix_exp_pade3(arg, U, V);
@ -347,7 +347,7 @@ struct matrix_exp_computeUV<MatrixType, long double>
template<typename T> struct is_exp_known_type : false_type {}; template<typename T> struct is_exp_known_type : false_type {};
template<> struct is_exp_known_type<float> : true_type {}; template<> struct is_exp_known_type<float> : true_type {};
template<> struct is_exp_known_type<double> : true_type {}; template<> struct is_exp_known_type<double> : true_type {};
#if LDBL_MANT_DIG <= 112 #if LDBL_MANT_DIG <= 113
template<> struct is_exp_known_type<long double> : true_type {}; template<> struct is_exp_known_type<long double> : true_type {};
#endif #endif