diff --git a/.github/workflows/long-test.yml b/.github/workflows/long-test.yml index 91940977a3bc..09ba403bce11 100644 --- a/.github/workflows/long-test.yml +++ b/.github/workflows/long-test.yml @@ -86,6 +86,13 @@ jobs: with: fetch-depth: 0 + - name: "Install deps" + run: | + sudo apt-get -y update + sudo apt install ansible -y + cd getting_started/setup_vm + ansible-playbook ccf-extended-testing.yml + - name: "Build" run: | git config --global --add safe.directory /__w/CCF/CCF diff --git a/CMakeLists.txt b/CMakeLists.txt index 6640fec8ca50..03758ebbb156 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -950,8 +950,9 @@ if(BUILD_TESTS) ) endif() + # Picobench benchmarks if(NOT TSAN) - # Picobench benchmarks + # Skip performance benchmarks when TSAN is enabled - some are vvv slow add_picobench(map_bench SRCS src/ds/test/map_bench.cpp) add_picobench(logger_bench SRCS src/ds/test/logger_bench.cpp) add_picobench(json_bench SRCS src/ds/test/json_bench.cpp) @@ -975,407 +976,395 @@ if(BUILD_TESTS) ) add_picobench(merkle_bench SRCS src/node/test/merkle_bench.cpp) add_picobench(hash_bench SRCS src/ds/test/hash_bench.cpp) + endif() - if(LONG_TESTS) - set(ADDITIONAL_RECOVERY_ARGS --with-load) - - add_e2e_test( - NAME recovery_test_cft_api_0 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py - ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "classic" - ) - endif() + if(LONG_TESTS) + set(ADDITIONAL_RECOVERY_ARGS --with-load) add_e2e_test( - NAME recovery_test_cft_api_1 + NAME recovery_test_cft_api_0 PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py - ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version - "2024-07-01" + ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "classic" ) + endif() - add_e2e_test( - NAME recovery_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS - --test-duration - 150 - --test-suite - rekey_recovery - --test-suite - membership_recovery - --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates - ) + add_e2e_test( + NAME recovery_test_cft_api_1 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/recovery.py + ADDITIONAL_ARGS ${ADDITIONAL_RECOVERY_ARGS} --gov-api-version "2024-07-01" + ) - add_e2e_test( - NAME reconfiguration_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS - --test-duration 200 --test-suite reconfiguration --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates - ) + add_e2e_test( + NAME recovery_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --test-duration + 150 + --test-suite + rekey_recovery + --test-suite + membership_recovery + --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) - if(LONG_TESTS) - add_e2e_test( - NAME regression_test_suite - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py - LABEL suite - ADDITIONAL_ARGS --test-duration 200 --test-suite regression_5236 - ) - endif() + add_e2e_test( + NAME reconfiguration_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --test-duration 200 --test-suite reconfiguration --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) + if(LONG_TESTS) add_e2e_test( - NAME full_test_suite + NAME regression_test_suite PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py LABEL suite - ADDITIONAL_ARGS - --oe-binary - ${OE_BINDIR} - --ledger-recovery-timeout - 20 - --test-duration - 200 - --test-suite - all - --jinja-templates-path - ${CMAKE_SOURCE_DIR}/samples/templates + ADDITIONAL_ARGS --test-duration 200 --test-suite regression_5236 ) + endif() - add_e2e_test( - NAME committable_suffix_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/committable.py - ADDITIONAL_ARGS --sig-ms-interval 100 - ) + add_e2e_test( + NAME full_test_suite + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_suite.py + LABEL suite + ADDITIONAL_ARGS + --oe-binary + ${OE_BINDIR} + --ledger-recovery-timeout + 20 + --test-duration + 200 + --test-suite + all + --jinja-templates-path + ${CMAKE_SOURCE_DIR}/samples/templates + ) - add_e2e_test( - NAME commit_latency - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/commit_latency.py - LABEL perf - ) + add_e2e_test( + NAME committable_suffix_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/committable.py + ADDITIONAL_ARGS --sig-ms-interval 100 + ) - add_e2e_test( - NAME js_batched_stress_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_batched.py - ADDITIONAL_ARGS - --js-app-bundle - ${CMAKE_SOURCE_DIR}/src/apps/batched - --election-timeout-ms - 10000 # Larger election timeout as recording large JS applications may - # trigger leadership changes - ) + add_e2e_test( + NAME commit_latency + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/commit_latency.py + LABEL perf + ) - add_e2e_test( - NAME modules_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/js-modules/modules.py - ADDITIONAL_ARGS - --package - libjs_generic - --election-timeout-ms - 10000 # Larger election timeout as recording - # large JS applications may trigger leadership changes - ) + add_e2e_test( + NAME js_batched_stress_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_batched.py + ADDITIONAL_ARGS + --js-app-bundle + ${CMAKE_SOURCE_DIR}/src/apps/batched + --election-timeout-ms + 10000 # Larger election timeout as recording large JS applications may + # trigger leadership changes + ) - add_e2e_test( - NAME auth - PYTHON_SCRIPT - ${CMAKE_SOURCE_DIR}/tests/js-custom-authorization/custom_authorization.py - ADDITIONAL_ARGS --package libjs_generic --js-app-bundle - ${CMAKE_SOURCE_DIR}/tests - ) + add_e2e_test( + NAME modules_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/js-modules/modules.py + ADDITIONAL_ARGS + --package libjs_generic --election-timeout-ms + 10000 # Larger election timeout as recording + # large JS applications may trigger leadership changes + ) - add_e2e_test( - NAME launch_host_process_test - PYTHON_SCRIPT - ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process/host_process.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process - ) + add_e2e_test( + NAME auth + PYTHON_SCRIPT + ${CMAKE_SOURCE_DIR}/tests/js-custom-authorization/custom_authorization.py + ADDITIONAL_ARGS --package libjs_generic --js-app-bundle + ${CMAKE_SOURCE_DIR}/tests + ) - set(CONSTITUTION_ARGS - --constitution - ${CCF_DIR}/samples/constitutions/default/actions.js - --constitution - ${CCF_DIR}/samples/constitutions/test/test_actions.js - --constitution - ${CCF_DIR}/samples/constitutions/default/validate.js - --constitution - ${CCF_DIR}/samples/constitutions/test/resolve.js - --constitution - ${CCF_DIR}/samples/constitutions/default/apply.js - ) + add_e2e_test( + NAME launch_host_process_test + PYTHON_SCRIPT + ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process/host_process.py + ADDITIONAL_ARGS --js-app-bundle + ${CMAKE_SOURCE_DIR}/tests/js-launch-host-process + ) - add_e2e_test( - NAME governance_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/governance.py - CONSTITUTION ${CONSTITUTION_ARGS} - ADDITIONAL_ARGS - --oe-binary ${OE_BINDIR} --initial-operator-count 1 - --jinja-templates-path ${CMAKE_SOURCE_DIR}/samples/templates - ) + set(CONSTITUTION_ARGS + --constitution + ${CCF_DIR}/samples/constitutions/default/actions.js + --constitution + ${CCF_DIR}/samples/constitutions/test/test_actions.js + --constitution + ${CCF_DIR}/samples/constitutions/default/validate.js + --constitution + ${CCF_DIR}/samples/constitutions/test/resolve.js + --constitution + ${CCF_DIR}/samples/constitutions/default/apply.js + ) - add_e2e_test( - NAME jwt_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/jwt_test.py - LABEL snp_flaky - ) + add_e2e_test( + NAME governance_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/governance.py + CONSTITUTION ${CONSTITUTION_ARGS} + ADDITIONAL_ARGS + --oe-binary ${OE_BINDIR} --initial-operator-count 1 + --jinja-templates-path ${CMAKE_SOURCE_DIR}/samples/templates + ) - add_e2e_test( - NAME code_update_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py - ADDITIONAL_ARGS --oe-binary ${OE_BINDIR} --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js - ) + add_e2e_test( + NAME jwt_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/jwt_test.py + LABEL snp_flaky + ) + + add_e2e_test( + NAME code_update_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/code_update.py + ADDITIONAL_ARGS --oe-binary ${OE_BINDIR} --js-app-bundle + ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) - if(BUILD_TPCC) - include(${CMAKE_CURRENT_SOURCE_DIR}/src/apps/tpcc/tpcc.cmake) - endif() - - if(CLIENT_PROTOCOLS_TEST) - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh - COMMAND - rm -rf ${CMAKE_CURRENT_BINARY_DIR}/testssl && git clone --depth 1 - --branch v3.0.7 --single-branch - https://github.com/drwetter/testssl.sh - ${CMAKE_CURRENT_BINARY_DIR}/testssl - ) - add_custom_target( - testssl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh - ) - endif() + if(BUILD_TPCC) + include(${CMAKE_CURRENT_SOURCE_DIR}/src/apps/tpcc/tpcc.cmake) + endif() - add_e2e_test( - NAME e2e_logging_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + if(CLIENT_PROTOCOLS_TEST) + add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh + COMMAND + rm -rf ${CMAKE_CURRENT_BINARY_DIR}/testssl && git clone --depth 1 + --branch v3.0.7 --single-branch https://github.com/drwetter/testssl.sh + ${CMAKE_CURRENT_BINARY_DIR}/testssl ) - - set(RBAC_CONSTITUTION_ARGS - --constitution - ${CCF_DIR}/samples/constitutions/default/actions.js - --constitution - ${CCF_DIR}/samples/constitutions/roles/set_role_definition.js - --constitution - ${CCF_DIR}/samples/constitutions/default/validate.js - --constitution - ${CCF_DIR}/samples/constitutions/default/resolve.js - --constitution - ${CCF_DIR}/samples/constitutions/default/apply.js + add_custom_target( + testssl ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/testssl/testssl.sh ) + endif() - add_e2e_test( - NAME programmability - CONSTITUTION ${RBAC_CONSTITUTION_ARGS} - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/programmability.py - ) + add_e2e_test( + NAME e2e_logging_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) - # This test uses large requests (so too slow for SAN) - if(NOT SAN) - add_e2e_test( - NAME e2e_limits PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/limits.py - ) - endif() + set(RBAC_CONSTITUTION_ARGS + --constitution + ${CCF_DIR}/samples/constitutions/default/actions.js + --constitution + ${CCF_DIR}/samples/constitutions/roles/set_role_definition.js + --constitution + ${CCF_DIR}/samples/constitutions/default/validate.js + --constitution + ${CCF_DIR}/samples/constitutions/default/resolve.js + --constitution + ${CCF_DIR}/samples/constitutions/default/apply.js + ) - add_e2e_test( - NAME e2e_redirects - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/redirects.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js - ) + add_e2e_test( + NAME programmability + CONSTITUTION ${RBAC_CONSTITUTION_ARGS} + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/programmability.py + ) + # This test uses large requests (so too slow for SAN) + if(NOT SAN) add_e2e_test( - NAME e2e_logging_http2 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py - ADDITIONAL_ARGS --js-app-bundle - ${CMAKE_SOURCE_DIR}/samples/apps/logging/js --http2 + NAME e2e_limits PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/limits.py ) + endif() - if(LONG_TESTS) - add_e2e_test( - NAME membership_api_0 - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py - ADDITIONAL_ARGS --gov-api-version "classic" - ) - endif() + add_e2e_test( + NAME e2e_redirects + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/redirects.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + ) + add_e2e_test( + NAME e2e_logging_http2 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/e2e_logging.py + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/logging/js + --http2 + ) + + if(LONG_TESTS) add_e2e_test( - NAME membership_api_1 + NAME membership_api_0 PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py - ADDITIONAL_ARGS --gov-api-version "2024-07-01" + ADDITIONAL_ARGS --gov-api-version "classic" ) + endif() - set(PARTITIONS_TEST_ARGS - # Higher snapshot interval as the test currently assumes that no - # transactions - # are emitted while partitions are up. To be removed when - # https://github.com/microsoft/CCF/issues/2577 is implemented - --snapshot-tx-interval 10000 - ) + add_e2e_test( + NAME membership_api_1 + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/membership.py + ADDITIONAL_ARGS --gov-api-version "2024-07-01" + ) - add_e2e_test( - NAME partitions_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/partitions_test.py - LABEL partitions - CONFIGURATIONS partitions - ADDITIONAL_ARGS ${PARTITIONS_TEST_ARGS} - ) + set(PARTITIONS_TEST_ARGS + # Higher snapshot interval as the test currently assumes that no + # transactions + # are emitted while partitions are up. To be removed when + # https://github.com/microsoft/CCF/issues/2577 is implemented + --snapshot-tx-interval 10000 + ) - add_e2e_test( - NAME connections_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py - ) + add_e2e_test( + NAME partitions_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/partitions_test.py + LABEL partitions + CONFIGURATIONS partitions + ADDITIONAL_ARGS ${PARTITIONS_TEST_ARGS} + ) - add_e2e_test( - NAME consistency_trace_validation - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/consistency_trace_validation.py - LABEL snp_flaky - ) + add_e2e_test( + NAME connections_cft PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/connections.py + ) - add_e2e_test( - NAME fuzz_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/fuzzing.py - ) + add_e2e_test( + NAME consistency_trace_validation + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/consistency_trace_validation.py + LABEL snp_flaky + ) - if(CLIENT_PROTOCOLS_TEST) - add_e2e_test( - NAME client_protocols - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/client_protocols.py - LABEL protocolstest - ) - endif() + add_e2e_test( + NAME fuzz_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/fuzzing.py + ) + if(CLIENT_PROTOCOLS_TEST) add_e2e_test( - NAME schema_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py - ADDITIONAL_ARGS - --schema-dir - ${CMAKE_SOURCE_DIR}/doc/schemas - --ledger-tutorial - ${CMAKE_SOURCE_DIR}/python/ledger_tutorial.py - --config-samples-dir - ${CMAKE_SOURCE_DIR}/samples/config - --config-file-1x - ${CMAKE_SOURCE_DIR}/python/config_1_x.ini + NAME client_protocols + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/client_protocols.py + LABEL protocolstest ) + endif() - list(APPEND LTS_TEST_ARGS --oe-binary ${OE_BINDIR} --ccf-version - ${CCF_VERSION} - ) - if(LONG_TESTS) - list(APPEND LTS_TEST_ARGS --check-ledger-compatibility) - endif() - - if(NOT SAN) - add_e2e_test( - NAME lts_compatibility - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/lts_compatibility.py - LABEL e2e - ADDITIONAL_ARGS ${LTS_TEST_ARGS} - ) - set_property( - TEST lts_compatibility - APPEND - PROPERTY ENVIRONMENT "LTS_COMPAT_GOV_CLIENT=1" - ) - set_property( - TEST lts_compatibility - APPEND - PROPERTY LABELS lts_compatibility - ) - endif() - - if(LONG_TESTS) - set(ROTATION_TEST_ARGS --rotation-retirements 10) - endif() + add_e2e_test( + NAME schema_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/schema.py + ADDITIONAL_ARGS + --schema-dir + ${CMAKE_SOURCE_DIR}/doc/schemas + --ledger-tutorial + ${CMAKE_SOURCE_DIR}/python/ledger_tutorial.py + --config-samples-dir + ${CMAKE_SOURCE_DIR}/samples/config + --config-file-1x + ${CMAKE_SOURCE_DIR}/python/config_1_x.ini + ) - add_e2e_test( - NAME rotation_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rotation.py - LABEL rotation - ADDITIONAL_ARGS ${ROTATION_TEST_ARGS} - ) + list(APPEND LTS_TEST_ARGS --oe-binary ${OE_BINDIR} --ccf-version + ${CCF_VERSION} + ) + if(LONG_TESTS) + list(APPEND LTS_TEST_ARGS --check-ledger-compatibility) + endif() - set(RECONFIG_TEST_ARGS --ccf-version ${CCF_VERSION}) + if(NOT SAN) add_e2e_test( - NAME reconfiguration_test_cft - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py - ADDITIONAL_ARGS ${RECONFIG_TEST_ARGS} + NAME lts_compatibility + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/lts_compatibility.py + LABEL e2e + ADDITIONAL_ARGS ${LTS_TEST_ARGS} + ) + set_property( + TEST lts_compatibility + APPEND + PROPERTY ENVIRONMENT "LTS_COMPAT_GOV_CLIENT=1" ) + endif() + + if(LONG_TESTS) + set(ROTATION_TEST_ARGS --rotation-retirements 10) + endif() + add_e2e_test( + NAME rotation_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/rotation.py + LABEL rotation + ADDITIONAL_ARGS ${ROTATION_TEST_ARGS} + ) + + set(RECONFIG_TEST_ARGS --ccf-version ${CCF_VERSION}) + add_e2e_test( + NAME reconfiguration_test_cft + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/reconfiguration.py + ADDITIONAL_ARGS ${RECONFIG_TEST_ARGS} + ) + + add_e2e_test( + NAME election_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py + ) + + if(LONG_TESTS) add_e2e_test( - NAME election_test PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/election.py + NAME acme_endorsement_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/acme_endorsement.py + LABEL ACME ) + endif() - if(LONG_TESTS) - add_e2e_test( - NAME acme_endorsement_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/acme_endorsement.py - LABEL ACME - ) - endif() + add_piccolo_test( + NAME pi_ls + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py + CLIENT_BIN ./submit PERF_LABEL "Logging" + ADDITIONAL_ARGS --package "samples/apps/logging/liblogging" + --max-writes-ahead 1000 --repetitions 10000 + ) - add_piccolo_test( - NAME pi_ls - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py - CLIENT_BIN ./submit PERF_LABEL "Logging" - ADDITIONAL_ARGS --package "samples/apps/logging/liblogging" - --max-writes-ahead 1000 --repetitions 10000 - ) + add_piccolo_test( + NAME pi_basic + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py + CLIENT_BIN ./submit PERF_LABEL "Basic" + ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def + "1,write,100000,primary" + ) - add_piccolo_test( - NAME pi_basic - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic" - ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def - "1,write,100000,primary" - ) + add_piccolo_test( + NAME pi_basic_js + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py + CLIENT_BIN ./submit PERF_LABEL "Basic JS" + ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/basic/js + --client-def "1,write,100000,primary" + ) + if(WORKER_THREADS) add_piccolo_test( - NAME pi_basic_js + NAME pi_basic_mt PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic JS" - ADDITIONAL_ARGS --js-app-bundle ${CMAKE_SOURCE_DIR}/samples/apps/basic/js - --client-def "1,write,100000,primary" + CLIENT_BIN ./submit PERF_LABEL "Basic Multi-Threaded" + ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def + "${WORKER_THREADS},write,100000,primary" ) + endif() - if(WORKER_THREADS) - add_piccolo_test( - NAME pi_basic_mt - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/basicperf.py - CLIENT_BIN ./submit PERF_LABEL "Basic Multi-Threaded" - ADDITIONAL_ARGS --package "samples/apps/basic/libbasic" --client-def - "${WORKER_THREADS},write,100000,primary" - ) - endif() - - add_piccolo_test( - NAME pi_ls_jwt - PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py - CLIENT_BIN ./submit PERF_LABEL "Logging JWT" - ADDITIONAL_ARGS - --package - "samples/apps/logging/liblogging" - --max-writes-ahead - 1000 - --repetitions - 1000 - --use-jwt - ) + add_piccolo_test( + NAME pi_ls_jwt + PYTHON_SCRIPT ${CMAKE_CURRENT_LIST_DIR}/tests/infra/piccolo_driver.py + CLIENT_BIN ./submit PERF_LABEL "Logging JWT" + ADDITIONAL_ARGS + --package + "samples/apps/logging/liblogging" + --max-writes-ahead + 1000 + --repetitions + 1000 + --use-jwt + ) - add_e2e_test( - NAME historical_query_perf_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_perf.py - LABEL perf PERF_LABEL "Historical Queries" - CONFIGURATIONS perf - ) + add_e2e_test( + NAME historical_query_perf_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_perf.py + LABEL perf PERF_LABEL "Historical Queries" + CONFIGURATIONS perf + ) - add_e2e_test( - NAME historical_query_cache_test - PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_cache.py - ) - endif() + add_e2e_test( + NAME historical_query_cache_test + PYTHON_SCRIPT ${CMAKE_SOURCE_DIR}/tests/historical_query_cache.py + ) endif() # Generate and install CMake export file for consumers using CMake diff --git a/include/ccf/endpoint_registry.h b/include/ccf/endpoint_registry.h index ab991ee0785b..d5b729030019 100644 --- a/include/ccf/endpoint_registry.h +++ b/include/ccf/endpoint_registry.h @@ -162,7 +162,6 @@ namespace ccf::endpoints templated_endpoints; ccf::kv::Consensus* consensus = nullptr; - ccf::kv::TxHistory* history = nullptr; public: EndpointRegistry(const std::string& method_prefix_) : @@ -280,8 +279,6 @@ namespace ccf::endpoints void set_consensus(ccf::kv::Consensus* c); - void set_history(ccf::kv::TxHistory* h); - // Override these methods to log or report request metrics. virtual void handle_event_request_completed( const ccf::endpoints::RequestCompletedEvent& event) diff --git a/include/ccf/indexing/indexer_interface.h b/include/ccf/indexing/indexer_interface.h index add86f3963d3..49f306e5eac6 100644 --- a/include/ccf/indexing/indexer_interface.h +++ b/include/ccf/indexing/indexer_interface.h @@ -4,6 +4,7 @@ #include "ccf/indexing/strategy.h" #include "ccf/node_subsystem_interface.h" +#include "ccf/pal/locking.h" #include #include @@ -19,6 +20,7 @@ namespace ccf::indexing class IndexingStrategies : public ccf::AbstractNodeSubSystem { protected: + ccf::pal::Mutex lock; std::set strategies; public: @@ -36,11 +38,13 @@ namespace ccf::indexing throw std::logic_error("Tried to install null strategy"); } + std::lock_guard guard(lock); return strategies.insert(strategy).second; } void uninstall_strategy(const StrategyPtr& strategy) { + std::lock_guard guard(lock); if (strategy == nullptr || strategies.find(strategy) == strategies.end()) { throw std::logic_error("Strategy doesn't exist"); diff --git a/include/ccf/indexing/strategy.h b/include/ccf/indexing/strategy.h index 1d29483adb64..5f8c95f9b5a8 100644 --- a/include/ccf/indexing/strategy.h +++ b/include/ccf/indexing/strategy.h @@ -3,6 +3,7 @@ #pragma once #include "ccf/kv/read_only_store.h" +#include "ccf/pal/locking.h" #include "ccf/tx_id.h" #include @@ -68,6 +69,7 @@ namespace ccf::indexing class LazyStrategy : public Base { protected: + ccf::pal::Mutex lock; ccf::SeqNo max_requested_seqno = 0; public: @@ -78,6 +80,7 @@ namespace ccf::indexing const auto base = Base::next_requested(); if (base.has_value()) { + std::lock_guard guard(lock); if (*base <= max_requested_seqno) { return base; @@ -89,6 +92,7 @@ namespace ccf::indexing void extend_index_to(ccf::TxID to_txid) { + std::lock_guard guard(lock); if (to_txid.seqno > max_requested_seqno) { max_requested_seqno = to_txid.seqno; diff --git a/src/endpoints/endpoint_registry.cpp b/src/endpoints/endpoint_registry.cpp index e6db940d55ba..a6818c11dff2 100644 --- a/src/endpoints/endpoint_registry.cpp +++ b/src/endpoints/endpoint_registry.cpp @@ -582,9 +582,4 @@ namespace ccf::endpoints { consensus = c; } - - void EndpointRegistry::set_history(ccf::kv::TxHistory* h) - { - history = h; - } } diff --git a/src/indexing/indexer.h b/src/indexing/indexer.h index a98e583495b5..bf1b6dff2064 100644 --- a/src/indexing/indexer.h +++ b/src/indexing/indexer.h @@ -68,6 +68,9 @@ namespace ccf::indexing update_commit(newly_committed); std::optional min_requested = std::nullopt; + + std::lock_guard guard(lock); + for (auto& strategy : strategies) { strategy->tick(); diff --git a/src/node/history.h b/src/node/history.h index 0dcd0fd420a7..8ec8c1853e2e 100644 --- a/src/node/history.h +++ b/src/node/history.h @@ -739,7 +739,6 @@ namespace ccf // Delay taking this lock until _after_ the read above, to avoid lock // inversions std::lock_guard guard(state_lock); - CCF_ASSERT_FMT( !replicated_state_tree.in_range(1), "Tree is not empty before initialising from snapshot"); diff --git a/src/node/ledger_secrets.h b/src/node/ledger_secrets.h index 318f37086156..4cce97cbd4f4 100644 --- a/src/node/ledger_secrets.h +++ b/src/node/ledger_secrets.h @@ -169,10 +169,10 @@ namespace ccf VersionedLedgerSecret get_latest(ccf::kv::ReadOnlyTx& tx) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (ledger_secrets.empty()) { throw std::logic_error( @@ -185,10 +185,10 @@ namespace ccf std::pair> get_latest_and_penultimate(ccf::kv::ReadOnlyTx& tx) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (ledger_secrets.empty()) { throw std::logic_error( @@ -208,10 +208,10 @@ namespace ccf ccf::kv::ReadOnlyTx& tx, std::optional up_to = std::nullopt) { - std::lock_guard guard(lock); - take_dependency_on_secrets(tx); + std::lock_guard guard(lock); + if (!up_to.has_value()) { return ledger_secrets; diff --git a/src/node/node_state.h b/src/node/node_state.h index 2e2249efd75b..1363c8786850 100644 --- a/src/node/node_state.h +++ b/src/node/node_state.h @@ -87,8 +87,6 @@ namespace ccf std::shared_ptr node_sign_kp; NodeId self; std::shared_ptr node_encrypt_kp; - ccf::crypto::Pem self_signed_node_cert; - std::optional endorsed_node_cert = std::nullopt; QuoteInfo quote_info; pal::PlatformAttestationMeasurement node_measurement; ccf::StartupConfig config; @@ -97,6 +95,12 @@ namespace ccf std::shared_ptr quote_endorsements_client = nullptr; + // These certs are protected by their own mutex. They are updated within + // hooks where it is unsafe to lock the entire-state mutex. + pal::Mutex certs_lock; + ccf::crypto::Pem self_signed_node_cert; + std::optional endorsed_node_cert = std::nullopt; + std::atomic stop_noticed = false; struct NodeStateMsg @@ -465,7 +469,13 @@ namespace ccf ccf::StartupConfig&& config_, std::vector&& startup_snapshot_) { + // This lock is currently unnecessary - this function executes while the + // node is single-threaded. Taking this lock also raises a potential lock + // cycle in TSAN. That is currently suppressed, but in future it may be + // simpler to extract this function to where it is more clearly and + // robustly single-threaded. std::lock_guard guard(lock); + sm.expect(NodeStartupState::initialized); start_type = start_type_; @@ -474,12 +484,15 @@ namespace ccf subject_alt_names = get_subject_alternative_names(); js::register_class_ids(); - self_signed_node_cert = create_self_signed_cert( - node_sign_kp, - config.node_certificate.subject_name, - subject_alt_names, - config.startup_host_time, - config.node_certificate.initial_validity_days); + { + std::lock_guard certs_guard(certs_lock); + self_signed_node_cert = create_self_signed_cert( + node_sign_kp, + config.node_certificate.subject_name, + subject_alt_names, + config.startup_host_time, + config.node_certificate.initial_validity_days); + } accept_node_tls_connections(); open_frontend(ActorsType::nodes); @@ -566,11 +579,15 @@ namespace ccf auto network_ca = std::make_shared<::tls::CA>(std::string( config.join.service_cert.begin(), config.join.service_cert.end())); - auto join_client_cert = std::make_unique<::tls::Cert>( - network_ca, - self_signed_node_cert, - node_sign_kp->private_key_pem(), - config.join.target_rpc_address); + std::unique_ptr<::tls::Cert> join_client_cert; + { + std::lock_guard certs_guard(certs_lock); + join_client_cert = std::make_unique<::tls::Cert>( + network_ca, + self_signed_node_cert, + node_sign_kp->private_key_pem(), + config.join.target_rpc_address); + } // Create RPC client and connect to remote node // Note: For now, assume that target node accepts same application @@ -848,15 +865,19 @@ namespace ccf "auto-refresh"); return; } - jwt_key_auto_refresh = std::make_shared( - config.jwt.key_refresh_interval.count_s(), - network, - consensus, - rpcsessions, - rpc_map, - node_sign_kp, - self_signed_node_cert); - jwt_key_auto_refresh->start(); + + { + std::lock_guard certs_guard(certs_lock); + jwt_key_auto_refresh = std::make_shared( + config.jwt.key_refresh_interval.count_s(), + network, + consensus, + rpcsessions, + rpc_map, + node_sign_kp, + self_signed_node_cert); + jwt_key_auto_refresh->start(); + } network.tables->set_map_hook( network.jwt_issuers.get_name(), @@ -1230,7 +1251,7 @@ namespace ccf // Trigger a snapshot (at next signature) to ensure we have a working // snapshot signed by the current (now new) service identity, in case // we need to recover soon again. - trigger_snapshot(tx); + NodeState::trigger_snapshot_impl(tx); if (tx.commit() != ccf::kv::CommitResult::SUCCESS) { @@ -1359,7 +1380,14 @@ namespace ccf ccf::kv::CommittableTx::Flag::LEDGER_CHUNK_AT_NEXT_SIGNATURE); } + // Virtual override for dispatch purposes, but actual implementation is + // stateless and static void trigger_snapshot(ccf::kv::Tx& tx) override + { + trigger_snapshot_impl(tx); + } + + static void trigger_snapshot_impl(ccf::kv::Tx& tx) { auto committable_tx = static_cast(&tx); if (committable_tx == nullptr) @@ -1457,11 +1485,14 @@ namespace ccf AppMessage::launch_host_process, to_host, json, input); } + // transition_service_to_open is a virtual override for dispatch purposes, + // but that actual implementation is almost stateless, so marked as static + // to make that clear void transition_service_to_open( ccf::kv::Tx& tx, AbstractGovernanceEffects::ServiceIdentities identities) override { - std::lock_guard guard(lock); + std::unique_lock guard(lock); auto service = tx.rw(Tables::SERVICE); auto service_info = service->get(); @@ -1533,6 +1564,13 @@ namespace ccf } else if (is_part_of_network()) { + auto ident = network.identity->get_key_pair(); + + // Nothing after this relies on state, so the lock can be dropped, + // allowing the Tx interactions to run without triggering lock + // inversions + guard.unlock(); + // Otherwise, if the node is part of the network. Open the network // straight away. Recovery shares are allocated to each recovery // member. @@ -1547,9 +1585,8 @@ namespace ccf } InternalTablesAccess::open_service(tx); - InternalTablesAccess::endorse_previous_identity( - tx, *network.identity->get_key_pair()); - trigger_snapshot(tx); + InternalTablesAccess::endorse_previous_identity(tx, *ident); + NodeState::trigger_snapshot_impl(tx); return; } else @@ -1788,7 +1825,7 @@ namespace ccf ccf::crypto::Pem get_self_signed_certificate() override { - std::lock_guard guard(lock); + std::lock_guard certs_guard(certs_lock); return self_signed_node_cert; } @@ -1860,6 +1897,7 @@ namespace ccf { // Accept TLS connections, presenting self-signed (i.e. non-endorsed) // node certificate. + // NB: Assumes certs_lock already held rpcsessions->set_node_cert( self_signed_node_cert, node_sign_kp->private_key_pem()); LOG_INFO_FMT("Node TLS connections now accepted"); @@ -1869,6 +1907,7 @@ namespace ccf { // Accept TLS connections, presenting node certificate signed by network // certificate + // NB: Assumes certs_lock already held CCF_ASSERT_FMT( endorsed_node_cert.has_value(), "Node certificate should be endorsed before accepting endorsed " @@ -2028,8 +2067,12 @@ namespace ccf bool send_create_request(const std::vector& packed) { - auto node_session = std::make_shared( - InvalidSessionId, self_signed_node_cert.raw()); + std::shared_ptr node_session; + { + std::lock_guard certs_guard(certs_lock); + node_session = std::make_shared( + InvalidSessionId, self_signed_node_cert.raw()); + } auto ctx = make_rpc_context(node_session, packed); std::shared_ptr search = @@ -2290,7 +2333,7 @@ namespace ccf "Could not find endorsed node certificate for {}", self)); } - std::lock_guard guard(lock); + std::lock_guard certs_guard(certs_lock); if (endorsed_node_cert.has_value()) { @@ -2305,6 +2348,9 @@ namespace ccf endorsed_node_cert->str()); history->set_endorsed_certificate(endorsed_node_cert.value()); n2n_channels->set_endorsed_node_cert(endorsed_node_cert.value()); + + // Stop iterating once this node's cert has been found + break; } return ccf::kv::ConsensusHookPtr(nullptr); @@ -2340,7 +2386,7 @@ namespace ccf "Could not find endorsed node certificate for {}", self)); } - std::lock_guard guard(lock); + std::lock_guard certs_guard(certs_lock); LOG_INFO_FMT("[global] Accepting network connections"); accept_network_tls_connections(); @@ -2568,8 +2614,12 @@ namespace ccf auto shared_state = std::make_shared(self); - auto node_client = std::make_shared( - rpc_map, node_sign_kp, self_signed_node_cert, endorsed_node_cert); + std::shared_ptr node_client; + { + std::lock_guard certs_guard(certs_lock); + node_client = std::make_shared( + rpc_map, node_sign_kp, self_signed_node_cert, endorsed_node_cert); + } ccf::kv::MembershipState membership_state = ccf::kv::MembershipState::Active; @@ -2766,8 +2816,8 @@ namespace ccf std::optional client_cert_key = std::nullopt; if (authenticate_as_node_client_certificate) { - client_cert = - endorsed_node_cert ? *endorsed_node_cert : self_signed_node_cert; + std::lock_guard certs_guard(certs_lock); + client_cert = endorsed_node_cert.value_or(self_signed_node_cert); client_cert_key = node_sign_kp->private_key_pem(); } diff --git a/src/node/rpc/frontend.h b/src/node/rpc/frontend.h index 67064d98073c..cce474705dd4 100644 --- a/src/node/rpc/frontend.h +++ b/src/node/rpc/frontend.h @@ -44,7 +44,6 @@ namespace ccf ccf::kv::Consensus* consensus; std::shared_ptr cmd_forwarder; - ccf::kv::TxHistory* history; size_t sig_tx_interval = 5000; std::chrono::milliseconds sig_ms_interval = std::chrono::milliseconds(1000); @@ -64,12 +63,6 @@ namespace ccf } } - void update_history() - { - history = tables.get_history().get(); - endpoints.set_history(history); - } - endpoints::EndpointDefinitionPtr find_endpoint( std::shared_ptr ctx, ccf::kv::CommittableTx& tx) { @@ -651,7 +644,6 @@ namespace ccf } ++attempts; - update_history(); endpoint = find_endpoint(ctx, *tx_p); if (endpoint == nullptr) @@ -821,11 +813,13 @@ namespace ccf } } - if ( - consensus != nullptr && consensus->can_replicate() && - history != nullptr) + if (consensus != nullptr && consensus->can_replicate()) { - history->try_emit_signature(); + auto history = tables.get_history().get(); + if (history != nullptr) + { + history->try_emit_signature(); + } } return; @@ -923,8 +917,7 @@ namespace ccf tables(tables_), endpoints(handlers_), node_context(node_context_), - consensus(nullptr), - history(nullptr) + consensus(nullptr) {} void set_sig_intervals( @@ -963,7 +956,7 @@ namespace ccf { if (endpoints.request_needs_root(ctx)) { - update_history(); + auto history = tables.get_history().get(); if (history) { // Warning: Retrieving the current TxID and root from the history diff --git a/src/node/snapshotter.h b/src/node/snapshotter.h index 69d59fef124c..76130d886187 100644 --- a/src/node/snapshotter.h +++ b/src/node/snapshotter.h @@ -114,6 +114,7 @@ namespace ccf std::unique_ptr snapshot, uint32_t generation_count) { + std::unique_lock guard(lock); if (pending_snapshots.size() >= max_pending_snapshots_count) { LOG_FAIL_FMT( @@ -127,11 +128,7 @@ namespace ccf auto serialised_snapshot = store->serialise_snapshot(std::move(snapshot)); auto serialised_snapshot_size = serialised_snapshot.size(); - - auto tx = store->create_tx(); - auto evidence = tx.rw(Tables::SNAPSHOT_EVIDENCE); auto snapshot_hash = ccf::crypto::Sha256Hash(serialised_snapshot); - evidence->put({snapshot_hash, snapshot_version}); ccf::ClaimsDigest cd; cd.set(std::move(snapshot_hash)); @@ -155,6 +152,11 @@ namespace ccf pending_snapshots[generation_count] = {}; pending_snapshots[generation_count].version = snapshot_version; + // Temporarily unlock, while interacting with KV. + guard.unlock(); + auto tx = store->create_tx(); + auto evidence = tx.wo(Tables::SNAPSHOT_EVIDENCE); + evidence->put({snapshot_hash, snapshot_version}); auto rc = tx.commit(cd, false, nullptr, capture_ws_digest_and_commit_evidence); if (rc != ccf::kv::CommitResult::SUCCESS) @@ -165,6 +167,7 @@ namespace ccf rc); return; } + guard.lock(); auto evidence_version = tx.commit_version(); diff --git a/tsan_env_suppressions b/tsan_env_suppressions index 665ce4df0a27..efede4b5cd4a 100644 --- a/tsan_env_suppressions +++ b/tsan_env_suppressions @@ -1,12 +1,19 @@ # For ThreadSanitizerSuppressions + # https://github.com/google/sanitizers/wiki/ThreadSanitizerSuppressions # Awkward usages of '*' in this file like '/ds/*ring_buffer.h' are necessary to handle the cases where tsan thinks + # src/ds/ring_buffer.h as src/ds/test/../ring_buffer.h for example # For partitions_test -deadlock:*/store.h -deadlock:*/untyped_map.h + +# deadlock:\*/store.h + +# deadlock:\*/untyped_map.h # For governance_test -race:*/node/*rpc/*frontend.h + +# race:*/node/*rpc/\*frontend.h + +deadlock:enclave_create_node