-
Notifications
You must be signed in to change notification settings - Fork 501
SpinLockMutex small cleanup; improved and extended benchmark #3706
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
2e82cf7
30da174
0dd5029
0fac0b9
61f60a7
5bfa70d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,8 +82,7 @@ class SpinLockMutex | |
| */ | ||
| bool try_lock() noexcept | ||
| { | ||
| return !flag_.load(std::memory_order_relaxed) && | ||
| !flag_.exchange(true, std::memory_order_acquire); | ||
| return !flag_.exchange(true, std::memory_order_acquire); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -95,13 +94,9 @@ class SpinLockMutex | |
| */ | ||
| void lock() noexcept | ||
| { | ||
| for (;;) | ||
| // Try once | ||
| while (!try_lock()) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems less efficient under contention. Calling try_lock() in every iteration causes repeated atomic exchanges and cache invalidations. The previous version avoided that by doing the initial exchange outside the spin loop. |
||
| { | ||
| // Try once | ||
| if (!flag_.exchange(true, std::memory_order_acquire)) | ||
| { | ||
| return; | ||
| } | ||
| // Spin-Fast (goal ~10ns) | ||
| for (std::size_t i = 0; i < SPINLOCK_FAST_ITERATIONS; ++i) | ||
| { | ||
|
Comment on lines
+94
to
98
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -3,7 +3,7 @@ | |||||
|
|
||||||
| #include <benchmark/benchmark.h> | ||||||
| #include <atomic> | ||||||
| #include <cstdint> | ||||||
| #include <mutex> | ||||||
| #include <thread> | ||||||
| #include <vector> | ||||||
|
|
||||||
|
|
@@ -27,8 +27,8 @@ constexpr int TightLoopLocks = 10000; | |||||
| // | ||||||
| // lock: A lambda denoting how to lock. Accepts a reference to `SpinLockType`. | ||||||
| // unlock: A lambda denoting how to unlock. Accepts a reference to `SpinLockType`. | ||||||
| template <typename SpinLockType, typename LockF, typename UnlockF> | ||||||
| inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, UnlockF unlock) | ||||||
| template <typename LockF, typename UnlockF> | ||||||
| void SpinThrash(benchmark::State &s, LockF lock, UnlockF unlock) | ||||||
| { | ||||||
| auto num_threads = s.range(0); | ||||||
| // Value we will increment, fighting over a spinlock. | ||||||
|
|
@@ -49,9 +49,9 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, | |||||
| // to ensure maximum thread contention. | ||||||
| for (int i = 0; i < TightLoopLocks; i++) | ||||||
| { | ||||||
| lock(spinlock); | ||||||
| lock(); | ||||||
| value++; | ||||||
| unlock(spinlock); | ||||||
| unlock(); | ||||||
| } | ||||||
| }); | ||||||
| } | ||||||
|
|
@@ -63,35 +63,35 @@ inline void SpinThrash(benchmark::State &s, SpinLockType &spinlock, LockF lock, | |||||
| } | ||||||
|
|
||||||
| // Benchmark of full spin-lock implementation. | ||||||
| static void BM_SpinLockThrashing(benchmark::State &s) | ||||||
| void BM_SpinLockThrashing(benchmark::State &s) | ||||||
| { | ||||||
| SpinLockMutex spinlock; | ||||||
| SpinThrash(s, spinlock, [](SpinLockMutex &m) { m.lock(); }, [](SpinLockMutex &m) { m.unlock(); }); | ||||||
| SpinThrash(s, [&] { spinlock.lock(); }, [&] { spinlock.unlock(); }); | ||||||
| } | ||||||
|
|
||||||
| // Naive `while(try_lock()) {}` implementation of lock. | ||||||
| static void BM_NaiveSpinLockThrashing(benchmark::State &s) | ||||||
| void BM_NaiveSpinLockThrashing(benchmark::State &s) | ||||||
| { | ||||||
| SpinLockMutex spinlock; | ||||||
| SpinThrash( | ||||||
| s, spinlock, | ||||||
| [](SpinLockMutex &m) { | ||||||
| while (!m.try_lock()) | ||||||
| s, | ||||||
| [&] { | ||||||
| while (!spinlock.try_lock()) | ||||||
| { | ||||||
| // Left this comment to keep the same format on old and new versions of clang-format | ||||||
| } | ||||||
| }, | ||||||
| [](SpinLockMutex &m) { m.unlock(); }); | ||||||
| [&] { spinlock.unlock(); }); | ||||||
| } | ||||||
|
|
||||||
| // Simple `while(try_lock()) { yield-processor }` | ||||||
| static void BM_ProcYieldSpinLockThrashing(benchmark::State &s) | ||||||
| void BM_ProcYieldSpinLockThrashing(benchmark::State &s) | ||||||
| { | ||||||
| SpinLockMutex spinlock; | ||||||
| SpinThrash<SpinLockMutex>( | ||||||
| s, spinlock, | ||||||
| [](SpinLockMutex &m) { | ||||||
| while (!m.try_lock()) | ||||||
| SpinThrash( | ||||||
| s, | ||||||
| [&] { | ||||||
| while (!spinlock.try_lock()) | ||||||
| { | ||||||
| #if defined(_MSC_VER) | ||||||
| YieldProcessor(); | ||||||
|
|
@@ -108,33 +108,33 @@ static void BM_ProcYieldSpinLockThrashing(benchmark::State &s) | |||||
| #endif | ||||||
| } | ||||||
| }, | ||||||
| [](SpinLockMutex &m) { m.unlock(); }); | ||||||
| [&] { spinlock.unlock(); }); | ||||||
| } | ||||||
|
|
||||||
| // SpinLock thrashing with thread::yield(). | ||||||
| static void BM_ThreadYieldSpinLockThrashing(benchmark::State &s) | ||||||
| void BM_ThreadYieldSpinLockThrashing(benchmark::State &s) | ||||||
| { | ||||||
| #if defined(__cpp_lib_atomic_value_initialization) && \ | ||||||
| __cpp_lib_atomic_value_initialization >= 201911L | ||||||
| std::atomic_flag mutex{}; | ||||||
| #else | ||||||
| std::atomic_flag mutex = ATOMIC_FLAG_INIT; | ||||||
| alignas(8) std::atomic_flag mutex = ATOMIC_FLAG_INIT; | ||||||
|
||||||
| alignas(8) std::atomic_flag mutex = ATOMIC_FLAG_INIT; | |
| std::atomic_flag mutex = ATOMIC_FLAG_INIT; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is this more efficient? load(std::memory_order_relaxed) would avoid having a memory fence and wait for cache sync. Having said that the whole idea of SpinLock in user mode seems dubious especially in high contention scenarios.