Skip to content

Commit b08e007

Browse files
committed
fix(test): rework reload_nginx
This change comes after a deep investigation into why certain jobs were failing when upgrading to the latest dd-trace-cpp version. The root cause turned out to be increased contention in the default curl HTTP client due to telemetry refactoring. This caused NGINX to take significantly longer to shut down, which in turn led to timeouts during reload_nginx. While investigating, I also realized the logic in `reload_nginx` was flawed. It was only checking whether a worker process was running, rather than verifying that the old worker (by PID) had exited. Since NGINX spawns a new worker and sends a SIGQUIT to the old one during reloads, our previous check was resulting in false positives. I've updated the logic to explicitly wait for the old worker PID to terminate and confirm the new worker is up before proceeding. Additionally, while fixing this, I encountered issues with some AppSec tests. These tests were intentionally causing the worker to fail during initialization (e.g. by using invalid config paths), but this left NGINX in a broken state for subsequent tests. To resolve this, the tests for `datadog_appsec_http_blocked_template_json`, `datadog_appsec_http_blocked_template_html`, and `datadog_appsec_ruleset_file` now validate the existence of their required files during the configuration process instead of at worker startup.
1 parent 4e94d09 commit b08e007

File tree

9 files changed

+216
-182
lines changed

9 files changed

+216
-182
lines changed

src/security/directives.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ constexpr directive appsec_directives[] = {
3333
{
3434
"datadog_appsec_ruleset_file",
3535
NGX_HTTP_MAIN_CONF | NGX_CONF_TAKE1,
36-
ngx_conf_set_str_slot,
36+
ngx_conf_set_path_slot,
3737
NGX_HTTP_MAIN_CONF_OFFSET,
3838
offsetof(datadog_main_conf_t, appsec_ruleset_file),
3939
nullptr,
@@ -42,7 +42,7 @@ constexpr directive appsec_directives[] = {
4242
{
4343
"datadog_appsec_http_blocked_template_json",
4444
NGX_HTTP_MAIN_CONF | NGX_CONF_TAKE1,
45-
ngx_conf_set_str_slot,
45+
ngx_conf_set_path_slot,
4646
NGX_HTTP_MAIN_CONF_OFFSET,
4747
offsetof(datadog_main_conf_t, appsec_http_blocked_template_json),
4848
nullptr,
@@ -51,7 +51,7 @@ constexpr directive appsec_directives[] = {
5151
{
5252
"datadog_appsec_http_blocked_template_html",
5353
NGX_HTTP_MAIN_CONF | NGX_CONF_TAKE1,
54-
ngx_conf_set_str_slot,
54+
ngx_conf_set_path_slot,
5555
NGX_HTTP_MAIN_CONF_OFFSET,
5656
offsetof(datadog_main_conf_t, appsec_http_blocked_template_html),
5757
nullptr,

src/security/library.cpp

+9-6
Original file line numberDiff line numberDiff line change
@@ -491,23 +491,26 @@ FinalizedConfigSettings::FinalizedConfigSettings(
491491
: enable_status::DISABLED;
492492
}
493493

494-
if (ngx_conf.appsec_ruleset_file.len > 0) {
495-
ruleset_file_ = to_string_view(ngx_conf.appsec_ruleset_file);
494+
if (ngx_conf.appsec_ruleset_file != nullptr &&
495+
ngx_conf.appsec_ruleset_file->name.len > 0) {
496+
ruleset_file_ = to_string_view(ngx_conf.appsec_ruleset_file->name);
496497
} else {
497498
ruleset_file_ = get_env_str(evs, "DD_APPSEC_RULES"sv).value_or("");
498499
}
499500

500-
if (ngx_conf.appsec_http_blocked_template_json.len > 0) {
501+
if (ngx_conf.appsec_http_blocked_template_json != nullptr &&
502+
ngx_conf.appsec_http_blocked_template_json->name.len > 0) {
501503
blocked_template_json_ =
502-
to_string_view(ngx_conf.appsec_http_blocked_template_json);
504+
to_string_view(ngx_conf.appsec_http_blocked_template_json->name);
503505
} else {
504506
blocked_template_json_ =
505507
get_env_str(evs, "DD_APPSEC_HTTP_BLOCKED_TEMPLATE_JSON"sv).value_or("");
506508
}
507509

508-
if (ngx_conf.appsec_http_blocked_template_html.len > 0) {
510+
if (ngx_conf.appsec_http_blocked_template_html != nullptr &&
511+
ngx_conf.appsec_http_blocked_template_html->name.len > 0) {
509512
blocked_template_html_ =
510-
to_string_view(ngx_conf.appsec_http_blocked_template_html);
513+
to_string_view(ngx_conf.appsec_http_blocked_template_html->name);
511514
} else {
512515
blocked_template_html_ =
513516
get_env_str(evs, "DD_APPSEC_HTTP_BLOCKED_TEMPLATE_HTML"sv).value_or("");

src/tracing_library.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ dd::Expected<dd::Tracer> TracingLibrary::make_tracer(
7373
#ifdef WITH_WAF
7474
const bool appsec_fully_disabled = (nginx_conf.appsec_enabled == 0);
7575
if (!appsec_fully_disabled) {
76-
const bool has_custom_ruleset = (nginx_conf.appsec_ruleset_file.len > 0);
76+
const bool has_custom_ruleset =
77+
(nginx_conf.appsec_ruleset_file != nullptr &&
78+
nginx_conf.appsec_ruleset_file->len > 0);
7779
const bool appsec_enabling_explicit =
7880
(nginx_conf.appsec_enabled != NGX_CONF_UNSET);
7981
security::register_with_remote_cfg(

test/cases/orchestration.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,21 @@ def quit_signal_handler(signum, frame):
2929
faulthandler.dump_traceback()
3030

3131

32+
def wait_until(predicate_func, timeout_seconds):
33+
"""Wait until `predicate_func` is True."""
34+
before = time.monotonic()
35+
while True:
36+
if predicate_func():
37+
break
38+
39+
now = time.monotonic()
40+
if now - before >= timeout_seconds:
41+
raise Exception(
42+
f"{timeout_seconds} seconds timeout exceeded while waiting for nginx workers to stop. {now - before} seconds elapsed."
43+
)
44+
time.sleep(0.5)
45+
46+
3247
signal.signal(signal.SIGQUIT, quit_signal_handler)
3348

3449
# Since we override the environment variables of child processes,
@@ -788,17 +803,22 @@ def reload_nginx(self, wait_for_workers_to_terminate=True):
788803
# The polling interval was chosen based on a system where:
789804
# - nginx_worker_pids ran in ~0.05 seconds
790805
# - the workers terminated after ~6 seconds
791-
poll_period_seconds = 0.5
792-
timeout_seconds = 10
793-
before = time.monotonic()
794-
while old_worker_pids & nginx_worker_pids(nginx_container,
795-
self.verbose):
796-
now = time.monotonic()
797-
if now - before >= timeout_seconds:
798-
raise Exception(
799-
f"{timeout_seconds} seconds timeout exceeded while waiting for nginx workers to stop. {now - before} seconds elapsed."
800-
)
801-
time.sleep(poll_period_seconds)
806+
def old_worker_stops(worker_pid):
807+
_worker_pid = worker_pid
808+
809+
def check():
810+
pids = nginx_worker_pids(nginx_container, self.verbose)
811+
return _worker_pid not in pids
812+
813+
return check
814+
815+
wait_until(old_worker_stops(old_worker_pids), timeout_seconds=10)
816+
817+
def new_worker_starts():
818+
pids = nginx_worker_pids(nginx_container, self.verbose)
819+
return len(pids) == 1
820+
821+
wait_until(new_worker_starts, timeout_seconds=10)
802822

803823
def nginx_replace_config(self, nginx_conf_text, file_name):
804824
"""Replace nginx's config and reload nginx.

0 commit comments

Comments
 (0)