diff --git a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp index f62b5b445e6e..fc9bb317ea87 100644 --- a/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/shared_mutex.hpp @@ -77,20 +77,18 @@ namespace hpx::detail { bool set_state(shared_state& s1, shared_state& s) noexcept { ++s.data.tag; - return s1.value == state.load(std::memory_order_relaxed).value && - state.compare_exchange_strong(s1, s, std::memory_order_release); + return state.compare_exchange_strong( + s1, s, std::memory_order_release, std::memory_order_relaxed); } bool set_state(shared_state& s1, shared_state& s, std::unique_lock& lk) noexcept { - if (s1.value != state.load(std::memory_order_relaxed).value) - return false; - ++s.data.tag; lk = std::unique_lock(state_change); - if (state.compare_exchange_strong(s1, s, std::memory_order_release)) + if (state.compare_exchange_strong(s1, s, std::memory_order_release, + std::memory_order_relaxed)) return true; lk.unlock(); @@ -121,9 +119,9 @@ namespace hpx::detail { bool try_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked) { return false; @@ -136,15 +134,37 @@ namespace hpx::detail { { break; } + s = s1; } return true; } + bool try_unlock_shared_fast() + { + auto s = state.load(std::memory_order_acquire); + while (true) + { + if (s.data.exclusive || s.data.exclusive_waiting_blocked || + s.data.upgrade || s.data.shared_count <= 1) + { + return false; + } + + auto s1 = s; + --s.data.shared_count; + if (set_state(s1, s)) + { + return true; + } + s = s1; + } + } + void unlock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; if (--s.data.shared_count == 0) @@ -184,14 +204,15 @@ namespace hpx::detail { { break; } + s = s1; } } void lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); while (s.data.shared_count != 0 || s.data.exclusive) { auto s1 = s; @@ -214,14 +235,15 @@ namespace hpx::detail { { break; } + s = s1; } } bool try_lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.shared_count || s.data.exclusive) { return false; @@ -234,15 +256,16 @@ namespace hpx::detail { { break; } + s = s1; } return true; } void unlock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -255,6 +278,7 @@ namespace hpx::detail { release_waiters(lk); break; } + s = s1; } } @@ -287,9 +311,9 @@ namespace hpx::detail { bool try_lock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked || s.data.upgrade) { @@ -304,15 +328,16 @@ namespace hpx::detail { { break; } + s = s1; } return true; } void unlock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; bool release = false; @@ -337,6 +362,7 @@ namespace hpx::detail { { break; } + s = s1; } } @@ -384,9 +410,9 @@ namespace hpx::detail { void unlock_and_lock_upgrade() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -401,14 +427,15 @@ namespace hpx::detail { release_waiters(lk); break; } + s = s1; } } void unlock_and_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive = false; @@ -422,14 +449,15 @@ namespace hpx::detail { release_waiters(lk); break; } + s = s1; } } bool try_unlock_shared_and_lock() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); if (s.data.exclusive || s.data.exclusive_waiting_blocked || s.data.upgrade || s.data.shared_count != 1) { @@ -444,15 +472,16 @@ namespace hpx::detail { { break; } + s = s1; } return true; } void unlock_upgrade_and_lock_shared() { + auto s = state.load(std::memory_order_acquire); while (true) { - auto s = state.load(std::memory_order_acquire); auto s1 = s; s.data.exclusive_waiting_blocked = false; @@ -465,6 +494,7 @@ namespace hpx::detail { release_waiters(lk); break; } + s = s1; } } @@ -510,6 +540,8 @@ namespace hpx::detail { void lock_shared() { auto data = data_; + if (data->try_lock_shared()) + return; data->lock_shared(); } @@ -522,6 +554,8 @@ namespace hpx::detail { void unlock_shared() { auto data = data_; + if (data->try_unlock_shared_fast()) + return; data->unlock_shared(); } diff --git a/tests/performance/local/CMakeLists.txt b/tests/performance/local/CMakeLists.txt index a920c07421d6..f5ac538e28a1 100644 --- a/tests/performance/local/CMakeLists.txt +++ b/tests/performance/local/CMakeLists.txt @@ -25,6 +25,7 @@ set(benchmarks skynet wait_all_timings benchmark_stealing + shared_mutex_overhead ) set(timed_task_spawn_SOURCES activate_counters.cpp) @@ -144,6 +145,7 @@ set(print_heterogeneous_payloads_PARAMETERS NO_HPX_MAIN) set(skynet_PARAMETERS NO_HPX_MAIN) set(timed_task_spawn_PARAMETERS NO_HPX_MAIN) set(benchmark_stealing_PARAMETERS NO_HPX_MAIN) +set(shared_mutex_overhead_PARAMETERS NO_HPX_MAIN) set(hpx_tls_overhead_PARAMETERS NO_HPX_MAIN) set(native_tls_overhead_PARAMETERS NO_HPX_MAIN) set(coroutines_call_overhead_PARAMETERS NO_HPX_MAIN) diff --git a/tests/performance/local/shared_mutex_overhead.cpp b/tests/performance/local/shared_mutex_overhead.cpp new file mode 100644 index 000000000000..cb06faa497c7 --- /dev/null +++ b/tests/performance/local/shared_mutex_overhead.cpp @@ -0,0 +1,75 @@ +// (C) Copyright 2026 Arpit Khandelwal +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +std::uint64_t num_iterations = 100000; +std::uint64_t reader_threads = 4; + +hpx::shared_mutex mtx; + +void reader() +{ + for (std::uint64_t i = 0; i < num_iterations; ++i) + { + std::shared_lock l(mtx); + } +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + num_iterations = vm["iterations"].as(); + reader_threads = hpx::get_num_worker_threads(); + + std::cout << "Starting benchmark with " << reader_threads << " threads..." + << std::endl; + + std::vector> futures; + futures.reserve(reader_threads); + + hpx::chrono::high_resolution_timer walltime; + + for (std::uint64_t i = 0; i < reader_threads; ++i) + { + futures.push_back(hpx::async(&reader)); + } + + hpx::wait_all(futures); + + double const duration = walltime.elapsed(); + + std::cout << "Total time: " << duration << " seconds" << std::endl; + std::cout << "Average time per reader thread: " << duration / reader_threads + << " seconds" << std::endl; + + hpx::util::print_cdash_timing("SharedMutexOverhead", duration); + + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + hpx::program_options::options_description cmdline( + "usage: " HPX_APPLICATION_STRING " [options]"); + + cmdline.add_options()("iterations", + hpx::program_options::value()->default_value(100000), + "number of iterations per thread"); + + hpx::local::init_params init_args; + init_args.desc_cmdline = cmdline; + + return hpx::local::init(hpx_main, argc, argv, init_args); +}