From 7bdc0a172b2840d89c426abe9cce38038b511a69 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Thu, 7 Nov 2024 14:29:33 +0000 Subject: [PATCH 01/13] Try running all e2e tests in TSAN --- CMakeLists.txt | 664 ++++++++++++++++++++++++------------------------- 1 file changed, 329 insertions(+), 335 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 65992691c8b3..ff26c9245524 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1000,8 +1000,9 @@ if(BUILD_TESTS) ) endif() + # Picobench benchmarks if(NOT TSAN) - # Picobench benchmarks + # Skip performance benchmarks when TASN is enabled - some are vvv slow add_picobench(map_bench SRCS src/ds/test/map_bench.cpp) add_picobench(logger_bench SRCS src/ds/test/logger_bench.cpp) add_picobench(json_bench SRCS src/ds/test/json_bench.cpp) @@ -1025,402 +1026,395 @@ if(BUILD_TESTS) ) add_picobench(merkle_bench SRCS src/node/test/merkle_bench.cpp) add_picobench(hash_bench SRCS src/ds/test/hash_bench.cpp) + endif() - if(LONG_TESTS) - set(ADDITIONAL_RECOVERY_ARGS --with-load) - - add_e2e_test( - NAME recovery_test_cft_api_0 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py - ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "classic" - ) - endif() + if(LONG_TESTS) + set(ADDITIONAL_RECOVERY_ARGS --with-load) add_e2e_test( - NAME recovery_test_cft_api_1 + NAME recovery_test_cft_api_0 PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py - ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version - "2024-07-01" + ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "classic" ) + endif() - add_e2e_test( - NAME recovery_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS - --test-duration - 150 - --test-suite - rekey_recovery - --test-suite - membership_recovery - --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates - ) + add_e2e_test( + NAME recovery_test_cft_api_1 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py + ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "2024-07-01" + ) - add_e2e_test( - NAME reconfiguration_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS - --test-duration 200 --test-suite reconfiguration --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates - ) + add_e2e_test( + NAME recovery_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --test-duration + 150 + --test-suite + rekey_recovery + --test-suite + membership_recovery + --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) - if(LONG_TESTS) - add_e2e_test( - NAME regression_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS --test-duration 200 --test-suite regression_5236 - ) - endif() + add_e2e_test( + NAME reconfiguration_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --test-duration 200 --test-suite reconfiguration --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) + if(LONG_TESTS) add_e2e_test( - NAME full_test_suite + NAME regression_test_suite PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py LABEL suite - ADDITIONAL_ARGS - --oe-binary - ${OE_BINDIR} - --ledger-recovery-timeout - 20 - --test-duration - 200 - --test-suite - all - --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates + ADDITIONAL_ARGS --test-duration 200 --test-suite regression_5236 ) + endif() - add_e2e_test( - NAME committable_suffix_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/committable.py - ADDITIONAL_ARGS --sig-ms-interval 100 - ) + add_e2e_test( + NAME full_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --oe-binary + ${OE_BINDIR} + --ledger-recovery-timeout + 20 + --test-duration + 200 + --test-suite + all + --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) - add_e2e_test( - NAME commit_latency - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/commit_latency.py - LABEL perf - ) + add_e2e_test( + NAME committable_suffix_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/committable.py + ADDITIONAL_ARGS --sig-ms-interval 100 + ) - add_e2e_test( - NAME js_batched_stress_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_batched.py - ADDITIONAL_ARGS - --js-app-bundle - ${CMAKE_SOURCE_DIR}/src/apps/batched - --election-timeout-ms - 10000 # Larger election timeout as recording large JS applications may - # trigger leadership changes - ) + add_e2e_test( + NAME commit_latency + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/commit_latency.py + LABEL perf + ) - add_e2e_test( - NAME modules_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/js-modules/modules.py - ADDITIONAL_ARGS - --package - libjs_generic - --election-timeout-ms - 10000 # Larger election timeout as recording - # large JS applications may trigger leadership changes - ) + add_e2e_test( + NAME js_batched_stress_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_batched.py + ADDITIONAL_ARGS + --js-app-bundle + ${CMAKE_SOURCE_DIR}/src/apps/batched + --election-timeout-ms + 10000 # Larger election timeout as recording large JS applications may + # trigger leadership changes + ) - add_e2e_test( - NAME auth - PYTHON_SCRIPT - ${CMAKE_SOURCE_DIR}/tests/js-custom-authorization/custom_authorization.py - ADDITIONAL_ARGS --package libjs_generic --js-app-bundle - ${CMAKE_SOURCE_DIR}/tests - ) + add_e2e_test( + NAME modules_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/js-modules/modules.py + ADDITIONAL_ARGS + --package libjs_generic --election-timeout-ms + 10000 # Larger election timeout as recording + # large JS applications may trigger leadership changes + ) - add_e2e_test( - NAME launch_host_process_test - PYTHON_SCRIPT - ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process/host_process.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process - ) - - set(CONSTITUTION_ARGS - --constitution - ${CCF_DIR}/samples/constitutions/default/actions.js - --constitution - ${CCF_DIR}/samples/constitutions/test/test_actions.js - --constitution - ${CCF_DIR}/samples/constitutions/default/validate.js - --constitution - ${CCF_DIR}/samples/constitutions/test/resolve.js - --constitution - ${CCF_DIR}/samples/constitutions/default/apply.js - ) + add_e2e_test( + NAME auth + PYTHON_SCRIPT + ${CMAKE_SOURCE_DIR}/tests/js-custom-authorization/custom_authorization.py + ADDITIONAL_ARGS --package libjs_generic --js-app-bundle + ${CMAKE_SOURCE_DIR}/tests + ) - add_e2e_test( - NAME governance_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/governance.py - CONSTITUTION ${CONSTITUTION_ARGS} - ADDITIONAL_ARGS - --oe-binary ${OE_BINDIR} --initial-operator-count 1 - --jinja-templates-path ${CMAKE_SOURCE_DIR}/samples/templates - ) + add_e2e_test( + NAME launch_host_process_test + PYTHON_SCRIPT + ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process/host_process.py + ADDITIONAL_ARGS --js-app-bundle + ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process + ) - add_e2e_test( - NAME jwt_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/jwt_test.py - LABEL snp_flaky - ) + set(CONSTITUTION_ARGS + --constitution + ${CCF_DIR}/samples/constitutions/default/actions.js + --constitution + ${CCF_DIR}/samples/constitutions/test/test_actions.js + --constitution + ${CCF_DIR}/samples/constitutions/default/validate.js + --constitution + ${CCF_DIR}/samples/constitutions/test/resolve.js + --constitution + ${CCF_DIR}/samples/constitutions/default/apply.js + ) - add_e2e_test( - NAME code_update_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py - ADDITIONAL_ARGS --oe-binary ${OE_BINDIR} --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js - ) - - if(BUILD_TPCC) - include(${CMAKE_CURRENT_SOURCE_DIR}/src/apps/tpcc/tpcc.cmake) - endif() - - if(CLIENT_PROTOCOLS_TEST) - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh - COMMAND - rm -rf ${CMAKE_CURRENT_BINARY_DIR}/testssl && git clone --depth 1 - --branch v3.0.7 --single-branch - https://github.com/drwetter/testssl.sh - ${CMAKE_CURRENT_BINARY_DIR}/testssl - ) - add_custom_target( - testssl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh - ) - endif() + add_e2e_test( + NAME governance_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/governance.py + CONSTITUTION ${CONSTITUTION_ARGS} + ADDITIONAL_ARGS + --oe-binary ${OE_BINDIR} --initial-operator-count 1 + --jinja-templates-path ${CMAKE_SOURCE_DIR}/samples/templates + ) - add_e2e_test( - NAME e2e_logging_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js - ) + add_e2e_test( + NAME jwt_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/jwt_test.py + LABEL snp_flaky + ) - set(RBAC_CONSTITUTION_ARGS - --constitution - ${CCF_DIR}/samples/constitutions/default/actions.js - --constitution - ${CCF_DIR}/samples/constitutions/roles/set_role_definition.js - --constitution - ${CCF_DIR}/samples/constitutions/default/validate.js - --constitution - ${CCF_DIR}/samples/constitutions/default/resolve.js - --constitution - ${CCF_DIR}/samples/constitutions/default/apply.js - ) + add_e2e_test( + NAME code_update_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py + ADDITIONAL_ARGS --oe-binary ${OE_BINDIR} --js-app-bundle + ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) - add_e2e_test( - NAME programmability - CONSTITUTION ${RBAC_CONSTITUTION_ARGS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/programmability.py + if(BUILD_TPCC) + include(${CMAKE_CURRENT_SOURCE_DIR}/src/apps/tpcc/tpcc.cmake) + endif() + + if(CLIENT_PROTOCOLS_TEST) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh + COMMAND + rm -rf ${CMAKE_CURRENT_BINARY_DIR}/testssl && git clone --depth 1 + --branch v3.0.7 --single-branch https://github.com/drwetter/testssl.sh + ${CMAKE_CURRENT_BINARY_DIR}/testssl ) + add_custom_target( + testssl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh + ) + endif() + + add_e2e_test( + NAME e2e_logging_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) - # This test uses large requests (so too slow for SAN) - if(NOT SAN) - add_e2e_test( - NAME e2e_limits PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/limits.py - ) - endif() + set(RBAC_CONSTITUTION_ARGS + --constitution + ${CCF_DIR}/samples/constitutions/default/actions.js + --constitution + ${CCF_DIR}/samples/constitutions/roles/set_role_definition.js + --constitution + ${CCF_DIR}/samples/constitutions/default/validate.js + --constitution + ${CCF_DIR}/samples/constitutions/default/resolve.js + --constitution + ${CCF_DIR}/samples/constitutions/default/apply.js + ) - add_e2e_test( - NAME e2e_redirects - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/redirects.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js - ) + add_e2e_test( + NAME programmability + CONSTITUTION ${RBAC_CONSTITUTION_ARGS} + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/programmability.py + ) + # This test uses large requests (so too slow for SAN) + if(NOT SAN) add_e2e_test( - NAME e2e_logging_http2 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js --http2 + NAME e2e_limits PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/limits.py ) + endif() + + add_e2e_test( + NAME e2e_redirects + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/redirects.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) - if(LONG_TESTS) - add_e2e_test( - NAME membership_api_0 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py - ADDITIONAL_ARGS --gov-api-version "classic" - ) - endif() + add_e2e_test( + NAME e2e_logging_http2 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + --http2 + ) + if(LONG_TESTS) add_e2e_test( - NAME membership_api_1 + NAME membership_api_0 PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py - ADDITIONAL_ARGS --gov-api-version "2024-07-01" + ADDITIONAL_ARGS --gov-api-version "classic" ) + endif() - set(PARTITIONS_TEST_ARGS - # Higher snapshot interval as the test currently assumes that no - # transactions - # are emitted while partitions are up. To be removed when - # https://github.com/microsoft/CCF/issues/2577 is implemented - --snapshot-tx-interval 10000 - ) + add_e2e_test( + NAME membership_api_1 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py + ADDITIONAL_ARGS --gov-api-version "2024-07-01" + ) - add_e2e_test( - NAME partitions_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/partitions_test.py - LABEL partitions - CONFIGURATIONS partitions - ADDITIONAL_ARGS ${PARTITIONS_TEST_ARGS} - ) + set(PARTITIONS_TEST_ARGS + # Higher snapshot interval as the test currently assumes that no + # transactions + # are emitted while partitions are up. To be removed when + # https://github.com/microsoft/CCF/issues/2577 is implemented + --snapshot-tx-interval 10000 + ) - add_e2e_test( - NAME connections_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py - ) + add_e2e_test( + NAME partitions_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/partitions_test.py + LABEL partitions + CONFIGURATIONS partitions + ADDITIONAL_ARGS ${PARTITIONS_TEST_ARGS} + ) - add_e2e_test( - NAME consistency_trace_validation - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/consistency_trace_validation.py - LABEL snp_flaky - ) + add_e2e_test( + NAME connections_cft PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py + ) + + add_e2e_test( + NAME consistency_trace_validation + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/consistency_trace_validation.py + LABEL snp_flaky + ) + + add_e2e_test( + NAME fuzz_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/fuzzing.py + ) + if(CLIENT_PROTOCOLS_TEST) add_e2e_test( - NAME fuzz_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/fuzzing.py + NAME client_protocols + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/client_protocols.py + LABEL protocolstest ) + endif() - if(CLIENT_PROTOCOLS_TEST) - add_e2e_test( - NAME client_protocols - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/client_protocols.py - LABEL protocolstest - ) - endif() + add_e2e_test( + NAME schema_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py + ADDITIONAL_ARGS + --schema-dir + ${CMAKE_SOURCE_DIR}/doc/schemas + --ledger-tutorial + ${CMAKE_SOURCE_DIR}/python/ledger_tutorial.py + --config-samples-dir + ${CMAKE_SOURCE_DIR}/samples/config + --config-file-1x + ${CMAKE_SOURCE_DIR}/python/config_1_x.ini + ) - add_e2e_test( - NAME schema_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py - ADDITIONAL_ARGS - --schema-dir - ${CMAKE_SOURCE_DIR}/doc/schemas - --ledger-tutorial - ${CMAKE_SOURCE_DIR}/python/ledger_tutorial.py - --config-samples-dir - ${CMAKE_SOURCE_DIR}/samples/config - --config-file-1x - ${CMAKE_SOURCE_DIR}/python/config_1_x.ini - ) - - list(APPEND LTS_TEST_ARGS --oe-binary ${OE_BINDIR} --ccf-version - ${CCF_VERSION} - ) - if(LONG_TESTS) - list(APPEND LTS_TEST_ARGS --check-ledger-compatibility) - endif() - - if(NOT SAN) - add_e2e_test( - NAME lts_compatibility - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/lts_compatibility.py - LABEL e2e - ADDITIONAL_ARGS ${LTS_TEST_ARGS} - ) - set_property( - TEST lts_compatibility - APPEND - PROPERTY ENVIRONMENT "LTS_COMPAT_GOV_CLIENT=1" - ) - endif() - - if(LONG_TESTS) - set(ROTATION_TEST_ARGS --rotation-retirements 10) - endif() + list(APPEND LTS_TEST_ARGS --oe-binary ${OE_BINDIR} --ccf-version + ${CCF_VERSION} + ) + if(LONG_TESTS) + list(APPEND LTS_TEST_ARGS --check-ledger-compatibility) + endif() + if(NOT SAN) add_e2e_test( - NAME rotation_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rotation.py - LABEL rotation - ADDITIONAL_ARGS ${ROTATION_TEST_ARGS} + NAME lts_compatibility + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/lts_compatibility.py + LABEL e2e + ADDITIONAL_ARGS ${LTS_TEST_ARGS} ) - - set(RECONFIG_TEST_ARGS --ccf-version ${CCF_VERSION}) - add_e2e_test( - NAME reconfiguration_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py - ADDITIONAL_ARGS ${RECONFIG_TEST_ARGS} + set_property( + TEST lts_compatibility + APPEND + PROPERTY ENVIRONMENT "LTS_COMPAT_GOV_CLIENT=1" ) + endif() + + if(LONG_TESTS) + set(ROTATION_TEST_ARGS --rotation-retirements 10) + endif() + add_e2e_test( + NAME rotation_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rotation.py + LABEL rotation + ADDITIONAL_ARGS ${ROTATION_TEST_ARGS} + ) + + set(RECONFIG_TEST_ARGS --ccf-version ${CCF_VERSION}) + add_e2e_test( + NAME reconfiguration_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py + ADDITIONAL_ARGS ${RECONFIG_TEST_ARGS} + ) + + add_e2e_test( + NAME election_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py + ) + + if(LONG_TESTS) add_e2e_test( - NAME election_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py + NAME acme_endorsement_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/acme_endorsement.py + LABEL ACME ) + endif() - if(LONG_TESTS) - add_e2e_test( - NAME acme_endorsement_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/acme_endorsement.py - LABEL ACME - ) - endif() + add_piccolo_test( + NAME pi_ls + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py + CLIENT_BIN ./submit PERF_LABEL "Logging" + ADDITIONAL_ARGS --package "samples/apps/logging/liblogging" + --max-writes-ahead 1000 --repetitions 10000 + ) - add_piccolo_test( - NAME pi_ls - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py - CLIENT_BIN ./submit PERF_LABEL "Logging" - ADDITIONAL_ARGS --package "samples/apps/logging/liblogging" - --max-writes-ahead 1000 --repetitions 10000 - ) + add_piccolo_test( + NAME pi_basic + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py + CLIENT_BIN ./submit PERF_LABEL "Basic" + ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def + "1,write,100000,primary" + ) - add_piccolo_test( - NAME pi_basic - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic" - ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def - "1,write,100000,primary" - ) + add_piccolo_test( + NAME pi_basic_js + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py + CLIENT_BIN ./submit PERF_LABEL "Basic JS" + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/basic/js + --client-def "1,write,100000,primary" + ) + if(WORKER_THREADS) add_piccolo_test( - NAME pi_basic_js + NAME pi_basic_mt PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic JS" - ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/basic/js - --client-def "1,write,100000,primary" + CLIENT_BIN ./submit PERF_LABEL "Basic Multi-Threaded" + ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def + "${WORKER_THREADS},write,100000,primary" ) + endif() - if(WORKER_THREADS) - add_piccolo_test( - NAME pi_basic_mt - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic Multi-Threaded" - ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def - "${WORKER_THREADS},write,100000,primary" - ) - endif() - - add_piccolo_test( - NAME pi_ls_jwt - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py - CLIENT_BIN ./submit PERF_LABEL "Logging JWT" - ADDITIONAL_ARGS - --package - "samples/apps/logging/liblogging" - --max-writes-ahead - 1000 - --repetitions - 1000 - --use-jwt - ) + add_piccolo_test( + NAME pi_ls_jwt + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py + CLIENT_BIN ./submit PERF_LABEL "Logging JWT" + ADDITIONAL_ARGS + --package + "samples/apps/logging/liblogging" + --max-writes-ahead + 1000 + --repetitions + 1000 + --use-jwt + ) - add_e2e_test( - NAME historical_query_perf_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_perf.py - LABEL perf PERF_LABEL "Historical Queries" - CONFIGURATIONS perf - ) + add_e2e_test( + NAME historical_query_perf_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_perf.py + LABEL perf PERF_LABEL "Historical Queries" + CONFIGURATIONS perf + ) - add_e2e_test( - NAME historical_query_cache_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_cache.py - ) - endif() + add_e2e_test( + NAME historical_query_cache_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_cache.py + ) endif() # Generate and install CMake export file for consumers using CMake From 9ec5904a60a079c8423e8dc4872bf9c00d7acbc2 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Thu, 7 Nov 2024 15:29:26 +0000 Subject: [PATCH 02/13] Speculative first fix --- src/enclave/rpc_sessions.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/enclave/rpc_sessions.h b/src/enclave/rpc_sessions.h index b1df9b9a3b68..3c7a445756f2 100644 --- a/src/enclave/rpc_sessions.h +++ b/src/enclave/rpc_sessions.h @@ -616,15 +616,15 @@ namespace ccf disp, ::tcp::tcp_inbound, [this](const uint8_t* data, size_t size) { auto id = serialized::peek(data, size); - auto search = sessions.find(id); - if (search == sessions.end()) + auto session = find_session(id); + if (session == nullptr) { LOG_DEBUG_FMT( "Ignoring tls_inbound for unknown or refused session: {}", id); return; } - search->second.second->handle_incoming_data({data, size}); + session->handle_incoming_data({data, size}); }); DISPATCHER_SET_MESSAGE_HANDLER( @@ -644,6 +644,7 @@ namespace ccf disp, udp::udp_inbound, [this](const uint8_t* data, size_t size) { auto id = serialized::peek(data, size); + // TODO: Take lock when accessing sessions auto search = sessions.find(id); if (search == sessions.end()) { From 4c984fec5095c388483c036a584e34d3340120ba Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 12:20:11 +0000 Subject: [PATCH 03/13] Working on a fresh suppressions file --- tsan_env_suppressions | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tsan_env_suppressions b/tsan_env_suppressions index 665ce4df0a27..efede4b5cd4a 100644 --- a/tsan_env_suppressions +++ b/tsan_env_suppressions @@ -1,12 +1,19 @@ # For ThreadSanitizerSuppressions + # https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions # Awkward usages of '*' in this file like '/ds/*ring_buffer.h' are necessary to handle the cases where tsan thinks + # src/ds/ring_buffer.h as src/ds/test/../ring_buffer.h for example # For partitions_test -deadlock:*/store.h -deadlock:*/untyped_map.h + +# deadlock:\*/store.h + +# deadlock:\*/untyped_map.h # For governance_test -race:*/node/*rpc/*frontend.h + +# race:*/node/*rpc/\*frontend.h + +deadlock:enclave_create_node From 77ee72c77cdeeef17cacfa37ea1f6d5eaa3cee19 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 12:20:20 +0000 Subject: [PATCH 04/13] Delay taking a lock --- src/node/history.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/node/history.h b/src/node/history.h index 2f8962d3091e..111cd5f7f3e7 100644 --- a/src/node/history.h +++ b/src/node/history.h @@ -674,7 +674,6 @@ namespace ccf bool init_from_snapshot( const std::vector& hash_at_snapshot) override { - std::lock_guard guard(state_lock); // The history can be initialised after a snapshot has been applied by // deserialising the tree in the signatures table and then applying the // hash of the transaction at which the snapshot was taken @@ -688,6 +687,9 @@ namespace ccf return false; } + // Delay taking this lock until _after_ the read above, to avoid lock + // inversions + std::lock_guard guard(state_lock); CCF_ASSERT_FMT( !replicated_state_tree.in_range(1), "Tree is not empty before initialising from snapshot"); From e30639f0ca96350540dcc0297fe2205e5498504b Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 12:20:39 +0000 Subject: [PATCH 05/13] Comment on unnecessary lock --- src/node/node_state.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/node/node_state.h b/src/node/node_state.h index 6b97f9939e94..8c66f98ac65a 100644 --- a/src/node/node_state.h +++ b/src/node/node_state.h @@ -464,7 +464,13 @@ namespace ccf StartupConfig&& config_, std::vector&& startup_snapshot_) { + // This lock is currently unnecessary - this function executes while the + // node is single-threaded. Taking this lock also raises a potential lock + // cycle in TSAN. That is currently suppressed, but in future it may be + // simpler to extract this function to where it is more clearly and + // robustly single-threaded. std::lock_guard guard(lock); + sm.expect(NodeStartupState::initialized); start_type = start_type_; From 7eed111b741fbd70d1a408c265b87f154020ce5c Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 12:32:05 +0000 Subject: [PATCH 06/13] More juggling of lock scopes --- src/node/node_state.h | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/node/node_state.h b/src/node/node_state.h index 8c66f98ac65a..14a28b71f82d 100644 --- a/src/node/node_state.h +++ b/src/node/node_state.h @@ -1193,7 +1193,7 @@ namespace ccf // Trigger a snapshot (at next signature) to ensure we have a working // snapshot signed by the current (now new) service identity, in case // we need to recover soon again. - trigger_snapshot(tx); + NodeState::trigger_snapshot_impl(tx); if (tx.commit() != ccf::kv::CommitResult::SUCCESS) { @@ -1322,7 +1322,14 @@ namespace ccf ccf::kv::CommittableTx::Flag::LEDGER_CHUNK_AT_NEXT_SIGNATURE); } + // Virtual override for dispatch purposes, but actual implementation is + // stateless and static void trigger_snapshot(ccf::kv::Tx& tx) override + { + trigger_snapshot_impl(tx); + } + + static void trigger_snapshot_impl(ccf::kv::Tx& tx) { auto committable_tx = static_cast(&tx); if (committable_tx == nullptr) @@ -1420,11 +1427,14 @@ namespace ccf AppMessage::launch_host_process, to_host, json, input); } + // transition_service_to_open is a virtual override for dispatch purposes, + // but that actual implementation is almost stateless, so marked as static + // to make that clear void transition_service_to_open( ccf::kv::Tx& tx, AbstractGovernanceEffects::ServiceIdentities identities) override { - std::lock_guard guard(lock); + std::unique_lock guard(lock); auto service = tx.rw(Tables::SERVICE); auto service_info = service->get(); @@ -1496,6 +1506,13 @@ namespace ccf } else if (is_part_of_network()) { + auto ident = network.identity->get_key_pair(); + + // Nothing after this relies on state, so the lock can be dropped, + // allowing the Tx interactions to run without triggering lock + // inversions + guard.unlock(); + // Otherwise, if the node is part of the network. Open the network // straight away. Recovery shares are allocated to each recovery // member. @@ -1510,9 +1527,8 @@ namespace ccf } InternalTablesAccess::open_service(tx); - InternalTablesAccess::endorse_previous_identity( - tx, *network.identity->get_key_pair()); - trigger_snapshot(tx); + InternalTablesAccess::endorse_previous_identity(tx, *ident); + NodeState::trigger_snapshot_impl(tx); return; } else From 16e5fd00bff166c104ce4db7b7e19bcc81a3c2bf Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 15:13:28 +0000 Subject: [PATCH 07/13] Stop locking all of the node state in hooks --- src/node/node_state.h | 92 +++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 33 deletions(-) diff --git a/src/node/node_state.h b/src/node/node_state.h index 14a28b71f82d..c24d57c2c17e 100644 --- a/src/node/node_state.h +++ b/src/node/node_state.h @@ -86,8 +86,6 @@ namespace ccf std::shared_ptr node_sign_kp; NodeId self; std::shared_ptr node_encrypt_kp; - ccf::crypto::Pem self_signed_node_cert; - std::optional endorsed_node_cert = std::nullopt; QuoteInfo quote_info; pal::PlatformAttestationMeasurement node_measurement; StartupConfig config; @@ -96,6 +94,12 @@ namespace ccf std::shared_ptr quote_endorsements_client = nullptr; + // These certs are protected by their own mutex. They are updated within + // hooks where it is unsafe to lock the entire-state mutex. + pal::Mutex certs_lock; + ccf::crypto::Pem self_signed_node_cert; + std::optional endorsed_node_cert = std::nullopt; + std::atomic stop_noticed = false; struct NodeStateMsg @@ -479,12 +483,15 @@ namespace ccf subject_alt_names = get_subject_alternative_names(); js::register_class_ids(); - self_signed_node_cert = create_self_signed_cert( - node_sign_kp, - config.node_certificate.subject_name, - subject_alt_names, - config.startup_host_time, - config.node_certificate.initial_validity_days); + { + std::lock_guard certs_guard(certs_lock); + self_signed_node_cert = create_self_signed_cert( + node_sign_kp, + config.node_certificate.subject_name, + subject_alt_names, + config.startup_host_time, + config.node_certificate.initial_validity_days); + } accept_node_tls_connections(); open_frontend(ActorsType::nodes); @@ -572,11 +579,15 @@ namespace ccf auto network_ca = std::make_shared<::tls::CA>(std::string( config.join.service_cert.begin(), config.join.service_cert.end())); - auto join_client_cert = std::make_unique<::tls::Cert>( - network_ca, - self_signed_node_cert, - node_sign_kp->private_key_pem(), - config.join.target_rpc_address); + std::unique_ptr<::tls::Cert> join_client_cert; + { + std::lock_guard certs_guard(certs_lock); + join_client_cert = std::make_unique<::tls::Cert>( + network_ca, + self_signed_node_cert, + node_sign_kp->private_key_pem(), + config.join.target_rpc_address); + } // Create RPC client and connect to remote node // Note: For now, assume that target node accepts same application @@ -847,15 +858,19 @@ namespace ccf "auto-refresh"); return; } - jwt_key_auto_refresh = std::make_shared( - config.jwt.key_refresh_interval.count_s(), - network, - consensus, - rpcsessions, - rpc_map, - node_sign_kp, - self_signed_node_cert); - jwt_key_auto_refresh->start(); + + { + std::lock_guard certs_guard(certs_lock); + jwt_key_auto_refresh = std::make_shared( + config.jwt.key_refresh_interval.count_s(), + network, + consensus, + rpcsessions, + rpc_map, + node_sign_kp, + self_signed_node_cert); + jwt_key_auto_refresh->start(); + } network.tables->set_map_hook( network.jwt_issuers.get_name(), @@ -1767,7 +1782,7 @@ namespace ccf ccf::crypto::Pem get_self_signed_certificate() override { - std::lock_guard guard(lock); + std::lock_guard certs_guard(certs_lock); return self_signed_node_cert; } @@ -1827,6 +1842,7 @@ namespace ccf { // Accept TLS connections, presenting self-signed (i.e. non-endorsed) // node certificate. + // NB: Assumes certs_lock already held rpcsessions->set_node_cert( self_signed_node_cert, node_sign_kp->private_key_pem()); LOG_INFO_FMT("Node TLS connections now accepted"); @@ -1836,6 +1852,7 @@ namespace ccf { // Accept TLS connections, presenting node certificate signed by network // certificate + // NB: Assumes certs_lock already held CCF_ASSERT_FMT( endorsed_node_cert.has_value(), "Node certificate should be endorsed before accepting endorsed " @@ -1993,8 +2010,12 @@ namespace ccf bool send_create_request(const std::vector& packed) { - auto node_session = std::make_shared( - InvalidSessionId, self_signed_node_cert.raw()); + std::shared_ptr node_session; + { + std::lock_guard certs_guard(certs_lock); + node_session = std::make_shared( + InvalidSessionId, self_signed_node_cert.raw()); + } auto ctx = make_rpc_context(node_session, packed); std::shared_ptr search = @@ -2245,11 +2266,13 @@ namespace ccf "Could not find endorsed node certificate for {}", self)); } - std::lock_guard guard(lock); - + std::lock_guard certs_guard(certs_lock); endorsed_node_cert = endorsed_certificate.value(); history->set_endorsed_certificate(endorsed_node_cert.value()); n2n_channels->set_endorsed_node_cert(endorsed_node_cert.value()); + + // Stop iterating once this node's cert has been found + break; } return ccf::kv::ConsensusHookPtr(nullptr); @@ -2274,8 +2297,7 @@ namespace ccf "Could not find endorsed node certificate for {}", self)); } - std::lock_guard guard(lock); - + std::lock_guard certs_guard(certs_lock); accept_network_tls_connections(); if (is_member_frontend_open_unsafe()) @@ -2478,8 +2500,12 @@ namespace ccf auto shared_state = std::make_shared(self); - auto node_client = std::make_shared( - rpc_map, node_sign_kp, self_signed_node_cert, endorsed_node_cert); + std::shared_ptr node_client; + { + std::lock_guard certs_guard(certs_lock); + node_client = std::make_shared( + rpc_map, node_sign_kp, self_signed_node_cert, endorsed_node_cert); + } ccf::kv::MembershipState membership_state = ccf::kv::MembershipState::Active; @@ -2676,8 +2702,8 @@ namespace ccf std::optional client_cert_key = std::nullopt; if (authenticate_as_node_client_certificate) { - client_cert = - endorsed_node_cert ? *endorsed_node_cert : self_signed_node_cert; + std::lock_guard certs_guard(certs_lock); + client_cert = endorsed_node_cert.value_or(self_signed_node_cert); client_cert_key = node_sign_kp->private_key_pem(); } From 7e6fc66fc84a74d234ae85f08c7495ed455f77ec Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 15:16:44 +0000 Subject: [PATCH 08/13] Install TSAN for pebble --- .github/workflows/long-test.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/long-test.yml b/.github/workflows/long-test.yml index 36fd0aafe1a5..1cf7b5730143 100644 --- a/.github/workflows/long-test.yml +++ b/.github/workflows/long-test.yml @@ -85,6 +85,13 @@ jobs: with: fetch-depth: 0 + - name: "Install deps" + run: | + sudo apt-get -y update + sudo apt install ansible -y + cd getting_started/setup_vm + ansible-playbook ccf-extended-testing.yml + - name: "Build" run: | git config --global --add safe.directory /__w/CCF/CCF From 0e85753510a8a75a3b7175d2aeec4b422bd93389 Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 15:58:46 +0000 Subject: [PATCH 09/13] Remove unused history --- include/ccf/endpoint_registry.h | 3 --- src/endpoints/endpoint_registry.cpp | 5 ----- 2 files changed, 8 deletions(-) diff --git a/include/ccf/endpoint_registry.h b/include/ccf/endpoint_registry.h index ab991ee0785b..d5b729030019 100644 --- a/include/ccf/endpoint_registry.h +++ b/include/ccf/endpoint_registry.h @@ -162,7 +162,6 @@ namespace ccf::endpoints templated_endpoints; ccf::kv::Consensus* consensus = nullptr; - ccf::kv::TxHistory* history = nullptr; public: EndpointRegistry(const std::string& method_prefix_) : @@ -280,8 +279,6 @@ namespace ccf::endpoints void set_consensus(ccf::kv::Consensus* c); - void set_history(ccf::kv::TxHistory* h); - // Override these methods to log or report request metrics. virtual void handle_event_request_completed( const ccf::endpoints::RequestCompletedEvent& event) diff --git a/src/endpoints/endpoint_registry.cpp b/src/endpoints/endpoint_registry.cpp index e6db940d55ba..a6818c11dff2 100644 --- a/src/endpoints/endpoint_registry.cpp +++ b/src/endpoints/endpoint_registry.cpp @@ -582,9 +582,4 @@ namespace ccf::endpoints { consensus = c; } - - void EndpointRegistry::set_history(ccf::kv::TxHistory* h) - { - history = h; - } } From e21050152cc469fe21c3bcc7ee6d4726ff52bc2d Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 15:59:05 +0000 Subject: [PATCH 10/13] Remove unnecessary history member - fetch on-demand --- src/node/rpc/frontend.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index be375c8c7fe2..a23159a92ed8 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -44,7 +44,6 @@ namespace ccf ccf::kv::Consensus* consensus; std::shared_ptr cmd_forwarder; - ccf::kv::TxHistory* history; size_t sig_tx_interval = 5000; std::chrono::milliseconds sig_ms_interval = std::chrono::milliseconds(1000); @@ -64,12 +63,6 @@ namespace ccf } } - void update_history() - { - history = tables.get_history().get(); - endpoints.set_history(history); - } - endpoints::EndpointDefinitionPtr find_endpoint( std::shared_ptr ctx, ccf::kv::CommittableTx& tx) { @@ -651,7 +644,6 @@ namespace ccf } ++attempts; - update_history(); endpoint = find_endpoint(ctx, *tx_p); if (endpoint == nullptr) @@ -815,11 +807,13 @@ namespace ccf } } - if ( - consensus != nullptr && consensus->can_replicate() && - history != nullptr) + if (consensus != nullptr && consensus->can_replicate()) { - history->try_emit_signature(); + auto history = tables.get_history().get(); + if (history != nullptr) + { + history->try_emit_signature(); + } } return; @@ -917,8 +911,7 @@ namespace ccf tables(tables_), endpoints(handlers_), node_context(node_context_), - consensus(nullptr), - history(nullptr) + consensus(nullptr) {} void set_sig_intervals( @@ -953,7 +946,7 @@ namespace ccf { if (endpoints.request_needs_root(ctx)) { - update_history(); + auto history = tables.get_history().get(); if (history) { // Warning: Retrieving the current TxID and root from the history From ce8c9bba4c4c39a5d28706aa8ad93275820edc6e Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Fri, 8 Nov 2024 17:26:25 +0000 Subject: [PATCH 11/13] More mutex shuffling --- include/ccf/indexing/indexer_interface.h | 4 ++++ include/ccf/indexing/strategy.h | 4 ++++ src/indexing/indexer.h | 3 +++ src/kv/store.h | 3 +-- src/node/ledger_secrets.h | 12 ++++++------ 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/ccf/indexing/indexer_interface.h b/include/ccf/indexing/indexer_interface.h index add86f3963d3..49f306e5eac6 100644 --- a/include/ccf/indexing/indexer_interface.h +++ b/include/ccf/indexing/indexer_interface.h @@ -4,6 +4,7 @@ #include "ccf/indexing/strategy.h" #include "ccf/node_subsystem_interface.h" +#include "ccf/pal/locking.h" #include #include @@ -19,6 +20,7 @@ namespace ccf::indexing class IndexingStrategies : public ccf::AbstractNodeSubSystem { protected: + ccf::pal::Mutex lock; std::set strategies; public: @@ -36,11 +38,13 @@ namespace ccf::indexing throw std::logic_error("Tried to install null strategy"); } + std::lock_guard guard(lock); return strategies.insert(strategy).second; } void uninstall_strategy(const StrategyPtr& strategy) { + std::lock_guard guard(lock); if (strategy == nullptr || strategies.find(strategy) == strategies.end()) { throw std::logic_error("Strategy doesn't exist"); diff --git a/include/ccf/indexing/strategy.h b/include/ccf/indexing/strategy.h index 1d29483adb64..5f8c95f9b5a8 100644 --- a/include/ccf/indexing/strategy.h +++ b/include/ccf/indexing/strategy.h @@ -3,6 +3,7 @@ #pragma once #include "ccf/kv/read_only_store.h" +#include "ccf/pal/locking.h" #include "ccf/tx_id.h" #include @@ -68,6 +69,7 @@ namespace ccf::indexing class LazyStrategy : public Base { protected: + ccf::pal::Mutex lock; ccf::SeqNo max_requested_seqno = 0; public: @@ -78,6 +80,7 @@ namespace ccf::indexing const auto base = Base::next_requested(); if (base.has_value()) { + std::lock_guard guard(lock); if (*base <= max_requested_seqno) { return base; @@ -89,6 +92,7 @@ namespace ccf::indexing void extend_index_to(ccf::TxID to_txid) { + std::lock_guard guard(lock); if (to_txid.seqno > max_requested_seqno) { max_requested_seqno = to_txid.seqno; diff --git a/src/indexing/indexer.h b/src/indexing/indexer.h index a98e583495b5..bf1b6dff2064 100644 --- a/src/indexing/indexer.h +++ b/src/indexing/indexer.h @@ -68,6 +68,9 @@ namespace ccf::indexing update_commit(newly_committed); std::optional min_requested = std::nullopt; + + std::lock_guard guard(lock); + for (auto& strategy : strategies) { strategy->tick(); diff --git a/src/kv/store.h b/src/kv/store.h index e1789e4ef663..08519ac6d0db 100644 --- a/src/kv/store.h +++ b/src/kv/store.h @@ -1255,8 +1255,7 @@ namespace ccf::kv ReservedTx create_reserved_tx(const TxID& tx_id) { - // version_lock should already been acquired in case term_of_last_version - // is incremented. + std::lock_guard vguard(version_lock); return ReservedTx(this, term_of_last_version, tx_id, rollback_count); } diff --git a/src/node/ledger_secrets.h b/src/node/ledger_secrets.h index c30223d876d9..a4b1d3fead37 100644 --- a/src/node/ledger_secrets.h +++ b/src/node/ledger_secrets.h @@ -168,10 +168,10 @@ namespace ccf VersionedLedgerSecret get_latest(ccf::kv::ReadOnlyTx& tx) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (ledger_secrets.empty()) { throw std::logic_error( @@ -184,10 +184,10 @@ namespace ccf std::pair> get_latest_and_penultimate(ccf::kv::ReadOnlyTx& tx) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (ledger_secrets.empty()) { throw std::logic_error( @@ -207,10 +207,10 @@ namespace ccf ccf::kv::ReadOnlyTx& tx, std::optional up_to = std::nullopt) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (!up_to.has_value()) { return ledger_secrets; From 367ba21670f77f4cdaf167198613c863ba7fb0ba Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Tue, 12 Nov 2024 13:13:39 +0000 Subject: [PATCH 12/13] Protect snapshotter state --- src/node/snapshotter.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/node/snapshotter.h b/src/node/snapshotter.h index cabc8f371384..bf045a39ec2e 100644 --- a/src/node/snapshotter.h +++ b/src/node/snapshotter.h @@ -114,6 +114,7 @@ namespace ccf std::unique_ptr snapshot, uint32_t generation_count) { + std::unique_lock guard(lock); if (pending_snapshots.size() >= max_pending_snapshots_count) { LOG_FAIL_FMT( @@ -127,11 +128,7 @@ namespace ccf auto serialised_snapshot = store->serialise_snapshot(std::move(snapshot)); auto serialised_snapshot_size = serialised_snapshot.size(); - - auto tx = store->create_tx(); - auto evidence = tx.rw(Tables::SNAPSHOT_EVIDENCE); auto snapshot_hash = ccf::crypto::Sha256Hash(serialised_snapshot); - evidence->put({snapshot_hash, snapshot_version}); ccf::ClaimsDigest cd; cd.set(std::move(snapshot_hash)); @@ -155,6 +152,11 @@ namespace ccf pending_snapshots[generation_count] = {}; pending_snapshots[generation_count].version = snapshot_version; + // Temporarily unlock, while interacting with KV. + guard.unlock(); + auto tx = store->create_tx(); + auto evidence = tx.wo(Tables::SNAPSHOT_EVIDENCE); + evidence->put({snapshot_hash, snapshot_version}); auto rc = tx.commit(cd, false, nullptr, capture_ws_digest_and_commit_evidence); if (rc != ccf::kv::CommitResult::SUCCESS) @@ -165,6 +167,7 @@ namespace ccf rc); return; } + guard.lock(); auto evidence_version = tx.commit_version(); From 9a4df406d688462731d0866efc2f0a605884e70a Mon Sep 17 00:00:00 2001 From: Eddy Ashton Date: Tue, 12 Nov 2024 15:29:52 +0000 Subject: [PATCH 13/13] Update CMakeLists.txt Co-authored-by: Amaury Chamayou --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ff26c9245524..cc788fd9408e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1002,7 +1002,7 @@ if(BUILD_TESTS) # Picobench benchmarks if(NOT TSAN) - # Skip performance benchmarks when TASN is enabled - some are vvv slow + # Skip performance benchmarks when TSAN is enabled - some are vvv slow add_picobench(map_bench SRCS src/ds/test/map_bench.cpp) add_picobench(logger_bench SRCS src/ds/test/logger_bench.cpp) add_picobench(json_bench SRCS src/ds/test/json_bench.cpp)