mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-08-08 17:59:00 +08:00
Avoid atomic false sharing in RunQueue
This commit is contained in:
parent
0b646f3f36
commit
fd98cc49f1
@ -38,6 +38,15 @@
|
||||
#define EIGEN_ALIGNOF(x) alignof(x)
|
||||
#endif
|
||||
|
||||
// Align to the boundary that avoids false sharing.
|
||||
// https://en.cppreference.com/w/cpp/thread/hardware_destructive_interference_size
|
||||
#ifdef __cpp_lib_hardware_interference_size
|
||||
#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(std::hardware_destructive_interference_size)
|
||||
#else
|
||||
// Overalign for the cache line size of 128 bytes (Apple M1)
|
||||
#define EIGEN_ALIGN_TO_AVOID_FALSE_SHARING EIGEN_ALIGN_TO_BOUNDARY(128)
|
||||
#endif
|
||||
|
||||
// If the user explicitly disable vectorization, then we also disable alignment
|
||||
#if defined(EIGEN_DONT_VECTORIZE)
|
||||
#if defined(EIGEN_GPUCC)
|
||||
|
@ -57,6 +57,9 @@ class EventCount {
|
||||
eigen_plain_assert(waiters.size() < (1 << kWaiterBits) - 1);
|
||||
}
|
||||
|
||||
EventCount(const EventCount&) = delete;
|
||||
void operator=(const EventCount&) = delete;
|
||||
|
||||
~EventCount() {
|
||||
// Ensure there are no waiters.
|
||||
eigen_plain_assert(state_.load() == kStackMask);
|
||||
@ -155,22 +158,6 @@ class EventCount {
|
||||
}
|
||||
}
|
||||
|
||||
class Waiter {
|
||||
friend class EventCount;
|
||||
// Align to 128 byte boundary to prevent false sharing with other Waiter
|
||||
// objects in the same vector.
|
||||
EIGEN_ALIGN_TO_BOUNDARY(128) std::atomic<uint64_t> next;
|
||||
EIGEN_MUTEX mu;
|
||||
EIGEN_CONDVAR cv;
|
||||
uint64_t epoch = 0;
|
||||
unsigned state = kNotSignaled;
|
||||
enum {
|
||||
kNotSignaled,
|
||||
kWaiting,
|
||||
kSignaled,
|
||||
};
|
||||
};
|
||||
|
||||
private:
|
||||
// State_ layout:
|
||||
// - low kWaiterBits is a stack of waiters committed wait
|
||||
@ -192,9 +179,25 @@ class EventCount {
|
||||
static const uint64_t kEpochBits = 64 - kEpochShift;
|
||||
static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift;
|
||||
static const uint64_t kEpochInc = 1ull << kEpochShift;
|
||||
std::atomic<uint64_t> state_;
|
||||
MaxSizeVector<Waiter>& waiters_;
|
||||
|
||||
public:
|
||||
class Waiter {
|
||||
friend class EventCount;
|
||||
|
||||
enum State {
|
||||
kNotSignaled,
|
||||
kWaiting,
|
||||
kSignaled,
|
||||
};
|
||||
|
||||
EIGEN_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<uint64_t> next{kStackMask};
|
||||
EIGEN_MUTEX mu;
|
||||
EIGEN_CONDVAR cv;
|
||||
uint64_t epoch{0};
|
||||
unsigned state{kNotSignaled};
|
||||
};
|
||||
|
||||
private:
|
||||
static void CheckState(uint64_t state, bool waiter = false) {
|
||||
static_assert(kEpochBits >= 20, "not enough bits to prevent ABA problem");
|
||||
const uint64_t waiters = (state & kWaiterMask) >> kWaiterShift;
|
||||
@ -229,8 +232,8 @@ class EventCount {
|
||||
}
|
||||
}
|
||||
|
||||
EventCount(const EventCount&) = delete;
|
||||
void operator=(const EventCount&) = delete;
|
||||
std::atomic<uint64_t> state_;
|
||||
MaxSizeVector<Waiter>& waiters_;
|
||||
};
|
||||
|
||||
} // namespace Eigen
|
||||
|
@ -154,16 +154,18 @@ class RunQueue {
|
||||
private:
|
||||
static const unsigned kMask = kSize - 1;
|
||||
static const unsigned kMask2 = (kSize << 1) - 1;
|
||||
struct Elem {
|
||||
std::atomic<uint8_t> state;
|
||||
Work w;
|
||||
};
|
||||
enum {
|
||||
|
||||
enum State {
|
||||
kEmpty,
|
||||
kBusy,
|
||||
kReady,
|
||||
};
|
||||
EIGEN_MUTEX mutex_;
|
||||
|
||||
struct Elem {
|
||||
std::atomic<uint8_t> state;
|
||||
Work w;
|
||||
};
|
||||
|
||||
// Low log(kSize) + 1 bits in front_ and back_ contain rolling index of
|
||||
// front/back, respectively. The remaining bits contain modification counters
|
||||
// that are incremented on Push operations. This allows us to (1) distinguish
|
||||
@ -171,9 +173,11 @@ class RunQueue {
|
||||
// position, these conditions would be indistinguishable); (2) obtain
|
||||
// consistent snapshot of front_/back_ for Size operation using the
|
||||
// modification counters.
|
||||
std::atomic<unsigned> front_;
|
||||
std::atomic<unsigned> back_;
|
||||
Elem array_[kSize];
|
||||
EIGEN_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned> front_;
|
||||
EIGEN_ALIGN_TO_AVOID_FALSE_SHARING std::atomic<unsigned> back_;
|
||||
EIGEN_MUTEX mutex_; // guards `PushBack` and `PopBack` (accesses `back_`)
|
||||
|
||||
EIGEN_ALIGN_TO_AVOID_FALSE_SHARING Elem array_[kSize];
|
||||
|
||||
// SizeOrNotEmpty returns current queue size; if NeedSizeEstimate is false,
|
||||
// only whether the size is 0 is guaranteed to be correct.
|
||||
@ -208,12 +212,12 @@ class RunQueue {
|
||||
EIGEN_ALWAYS_INLINE unsigned CalculateSize(unsigned front, unsigned back) const {
|
||||
int size = (front & kMask2) - (back & kMask2);
|
||||
// Fix overflow.
|
||||
if (size < 0) size += 2 * kSize;
|
||||
if (EIGEN_PREDICT_FALSE(size < 0)) size += 2 * kSize;
|
||||
// Order of modification in push/pop is crafted to make the queue look
|
||||
// larger than it is during concurrent modifications. E.g. push can
|
||||
// increment size before the corresponding pop has decremented it.
|
||||
// So the computed size can be up to kSize + 1, fix it.
|
||||
if (size > static_cast<int>(kSize)) size = kSize;
|
||||
if (EIGEN_PREDICT_FALSE(size > static_cast<int>(kSize))) size = kSize;
|
||||
return static_cast<unsigned>(size);
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user