Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ namespace hpx::detail {

bool try_lock_shared()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
if (s.data.exclusive || s.data.exclusive_waiting_blocked)
{
return false;
Expand All @@ -136,15 +136,37 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
Comment thread
hkaiser marked this conversation as resolved.
Outdated
}
Comment thread
hkaiser marked this conversation as resolved.
return true;
}
Comment thread
hkaiser marked this conversation as resolved.

void unlock_shared()
bool try_unlock_shared_fast()
{
while (true)
{
auto s = state.load(std::memory_order_acquire);
Comment thread
hkaiser marked this conversation as resolved.
Outdated
if (s.data.exclusive || s.data.exclusive_waiting_blocked ||
s.data.upgrade || s.data.shared_count <= 1)
{
return false;
}

auto s1 = s;
--s.data.shared_count;
if (set_state(s1, s))
{
return true;
}
s = s1;
}
}

void unlock_shared()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s1 = s;

if (--s.data.shared_count == 0)
Expand Down Expand Up @@ -184,14 +206,15 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
}
}

void lock()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
while (s.data.shared_count != 0 || s.data.exclusive)
{
auto s1 = s;
Expand All @@ -214,14 +237,15 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
Comment thread
hkaiser marked this conversation as resolved.
Outdated
}
}

bool try_lock()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
if (s.data.shared_count || s.data.exclusive)
{
return false;
Expand All @@ -234,15 +258,16 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
}
return true;
}

void unlock()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
auto s1 = s;

s.data.exclusive = false;
Expand All @@ -255,6 +280,7 @@ namespace hpx::detail {
release_waiters(lk);
break;
}
s = state.load(std::memory_order_acquire);
Comment thread
hkaiser marked this conversation as resolved.
Outdated
}
}

Expand Down Expand Up @@ -287,9 +313,9 @@ namespace hpx::detail {

bool try_lock_upgrade()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
if (s.data.exclusive || s.data.exclusive_waiting_blocked ||
s.data.upgrade)
{
Expand All @@ -304,15 +330,16 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
}
return true;
}

void unlock_upgrade()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
auto s1 = s;

bool release = false;
Expand All @@ -337,6 +364,7 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
}
}

Expand Down Expand Up @@ -384,9 +412,9 @@ namespace hpx::detail {

void unlock_and_lock_upgrade()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
auto s1 = s;

s.data.exclusive = false;
Expand All @@ -401,14 +429,15 @@ namespace hpx::detail {
release_waiters(lk);
break;
}
s = state.load(std::memory_order_acquire);
Comment thread
hkaiser marked this conversation as resolved.
Outdated
}
}

void unlock_and_lock_shared()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
auto s1 = s;

s.data.exclusive = false;
Expand All @@ -422,14 +451,15 @@ namespace hpx::detail {
release_waiters(lk);
break;
}
s = state.load(std::memory_order_acquire);
}
}

bool try_unlock_shared_and_lock()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
if (s.data.exclusive || s.data.exclusive_waiting_blocked ||
s.data.upgrade || s.data.shared_count != 1)
{
Expand All @@ -444,15 +474,16 @@ namespace hpx::detail {
{
break;
}
s = state.load(std::memory_order_acquire);
}
return true;
}

void unlock_upgrade_and_lock_shared()
{
auto s = state.load(std::memory_order_acquire);
while (true)
{
auto s = state.load(std::memory_order_acquire);
auto s1 = s;

s.data.exclusive_waiting_blocked = false;
Expand All @@ -465,6 +496,7 @@ namespace hpx::detail {
release_waiters(lk);
break;
}
s = state.load(std::memory_order_acquire);
}
}

Expand Down Expand Up @@ -510,6 +542,8 @@ namespace hpx::detail {
void lock_shared()
{
auto data = data_;
if (data->try_lock_shared())
return;
data->lock_shared();
}

Expand All @@ -522,6 +556,8 @@ namespace hpx::detail {
void unlock_shared()
{
auto data = data_;
if (data->try_unlock_shared_fast())
return;
Comment on lines +557 to +558
Copy link

Copilot AI Apr 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shared_mutex::unlock_shared now attempts try_unlock_shared_fast() and on failure immediately calls unlock_shared(), which re-loads the atomic state and repeats much of the decision logic. For common cases where the shared count is 1 (or upgrade/exclusive-wait flags are set), this adds an extra atomic load/branching to every unlock. Consider folding the fast path into shared_mutex_data::unlock_shared() (single state load) or otherwise structuring it to avoid double-reading the state on the fallback path.

Suggested change
if (data->try_unlock_shared_fast())
return;

Copilot uses AI. Check for mistakes.
data->unlock_shared();
}

Expand Down
2 changes: 2 additions & 0 deletions tests/performance/local/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ set(benchmarks
skynet
wait_all_timings
benchmark_stealing
shared_mutex_overhead
)

set(timed_task_spawn_SOURCES activate_counters.cpp)
Expand Down Expand Up @@ -144,6 +145,7 @@ set(print_heterogeneous_payloads_PARAMETERS NO_HPX_MAIN)
set(skynet_PARAMETERS NO_HPX_MAIN)
set(timed_task_spawn_PARAMETERS NO_HPX_MAIN)
set(benchmark_stealing_PARAMETERS NO_HPX_MAIN)
set(shared_mutex_overhead_PARAMETERS NO_HPX_MAIN)
set(hpx_tls_overhead_PARAMETERS NO_HPX_MAIN)
Comment thread
arpittkhandelwal marked this conversation as resolved.
set(native_tls_overhead_PARAMETERS NO_HPX_MAIN)
set(coroutines_call_overhead_PARAMETERS NO_HPX_MAIN)
Expand Down
75 changes: 75 additions & 0 deletions tests/performance/local/shared_mutex_overhead.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// (C) Copyright 2026 Arpit Khandelwal
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/config.hpp>
#include <hpx/hpx.hpp>
#include <hpx/hpx_init.hpp>
#include <hpx/include/util.hpp>
#include <hpx/modules/testing.hpp>
#include <hpx/synchronization/shared_mutex.hpp>

#include <cstdint>
#include <iostream>
Copy link

Copilot AI Apr 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::shared_lock is used but the file does not include the standard header that defines it. hpx/synchronization/shared_mutex.hpp only includes <mutex>, so this may fail to compile on standard libraries that don't transitively include <shared_mutex>. Add an explicit #include <shared_mutex> (or otherwise include the header that provides std::shared_lock).

Suggested change
#include <iostream>
#include <iostream>
#include <shared_mutex>

Copilot uses AI. Check for mistakes.
#include <vector>

std::uint64_t num_iterations = 100000;
std::uint64_t reader_threads = 4;

hpx::shared_mutex mtx;

void reader()
{
for (std::uint64_t i = 0; i < num_iterations; ++i)
{
std::shared_lock<hpx::shared_mutex> l(mtx);
}
Comment on lines +24 to +28
Copy link

Copilot AI Apr 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

std::shared_lock is used but the file does not include the standard header that defines it. This will fail to compile on standard library implementations where shared_lock is only provided by <shared_mutex> (it is not guaranteed to be available via the HPX headers included here). Add the appropriate standard include (or switch to an HPX-provided lock type if that’s the intended API).

Copilot uses AI. Check for mistakes.
}

int hpx_main(hpx::program_options::variables_map& vm)
{
num_iterations = vm["iterations"].as<std::uint64_t>();
reader_threads = hpx::get_num_worker_threads();

std::cout << "Starting benchmark with " << reader_threads << " threads..."
<< std::endl;

std::vector<hpx::future<void>> futures;
futures.reserve(reader_threads);

hpx::chrono::high_resolution_timer walltime;

for (std::uint64_t i = 0; i < reader_threads; ++i)
{
futures.push_back(hpx::async(&reader));
}

hpx::wait_all(futures);

double const duration = walltime.elapsed();

std::cout << "Total time: " << duration << " seconds" << std::endl;
std::cout << "Average time per reader thread: " << duration / reader_threads
<< " seconds" << std::endl;

hpx::util::print_cdash_timing("SharedMutexOverhead", duration);

return hpx::local::finalize();
}

int main(int argc, char* argv[])
{
hpx::program_options::options_description cmdline(
"usage: " HPX_APPLICATION_STRING " [options]");

cmdline.add_options()("iterations",
hpx::program_options::value<std::uint64_t>()->default_value(100000),
"number of iterations per thread");

hpx::local::init_params init_args;
init_args.desc_cmdline = cmdline;

return hpx::local::init(hpx_main, argc, argv, init_args);
}
Loading