Skip to content

Commit 45ce45e

Browse files
authored
fix(test): rework reload_nginx (#197)
* fix(test): rework `reload_nginx` This change comes after a deep investigation into why certain jobs were failing when upgrading to the latest dd-trace-cpp version. The root cause turned out to be increased contention in the default curl HTTP client due to telemetry refactoring. This caused NGINX to take significantly longer to shut down, which in turn led to timeouts during reload_nginx. While investigating, I've updated the logic to explicitly wait for the old worker PID to terminate and confirm the new worker is up before proceeding. Additionally, while fixing this, I encountered issues with some AppSec tests. These tests were intentionally causing the worker to fail during initialization (e.g. by using invalid config paths), but this left NGINX in a broken state for subsequent tests. To resolve this, the tests for `datadog_appsec_http_blocked_template_json`, `datadog_appsec_http_blocked_template_html`, and `datadog_appsec_ruleset_file` now validate the existence of their required files during the configuration process instead of at worker startup.
1 parent 5f45ba8 commit 45ce45e

File tree

8 files changed

+147
-80
lines changed

8 files changed

+147
-80
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ target_compile_definitions(ngx_http_datadog_objs PUBLIC DD_NGINX_FLAVOR="${NGINX
137137
target_sources(ngx_http_datadog_objs
138138
PRIVATE
139139
src/common/variable.cpp
140+
src/common/directives.cpp
140141
src/array_util.cpp
141142
src/datadog_conf.cpp
142143
src/datadog_conf_handler.cpp

src/common/directives.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include "common/directives.h"
2+
3+
#include <cassert>
4+
#include <filesystem>
5+
6+
#include "string_util.h"
7+
8+
namespace datadog::common {
9+
10+
char *check_file_exists(ngx_conf_t *cf, void *post, void *data) {
11+
assert(data != nullptr);
12+
ngx_str_t *s = (ngx_str_t *)data;
13+
if (!std::filesystem::exists(nginx::to_string_view(*s))) {
14+
ngx_conf_log_error(NGX_LOG_ERR, cf, 0, "Failed to open file: \"%V\"", s);
15+
return static_cast<char *>(NGX_CONF_ERROR);
16+
}
17+
return NGX_CONF_OK;
18+
}
19+
20+
} // namespace datadog::common

src/common/directives.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#pragma once
2+
3+
extern "C" {
4+
#include <ngx_core.h>
5+
}
6+
7+
namespace datadog::common {
8+
9+
/// Checks if the file specified in the configuration exists.
10+
///
11+
/// This function is typically used as a post-processing callback for NGINX
12+
/// configuration It verifies that the file specified in a configuration
13+
/// directive actually exists on the filesystem.
14+
char *check_file_exists(ngx_conf_t *cf, void *post, void *data);
15+
16+
/// Post handler for checking a filepath exists.
17+
static ngx_conf_post_t ngx_conf_post_file_exists = {check_file_exists};
18+
19+
} // namespace datadog::common

src/security/directives.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include "common/directives.h"
34
#include "datadog_conf.h"
45
#include "datadog_directive.h"
56

@@ -36,7 +37,7 @@ constexpr directive appsec_directives[] = {
3637
ngx_conf_set_str_slot,
3738
NGX_HTTP_MAIN_CONF_OFFSET,
3839
offsetof(datadog_main_conf_t, appsec_ruleset_file),
39-
nullptr,
40+
&datadog::common::ngx_conf_post_file_exists,
4041
},
4142

4243
{
@@ -45,7 +46,7 @@ constexpr directive appsec_directives[] = {
4546
ngx_conf_set_str_slot,
4647
NGX_HTTP_MAIN_CONF_OFFSET,
4748
offsetof(datadog_main_conf_t, appsec_http_blocked_template_json),
48-
nullptr,
49+
&datadog::common::ngx_conf_post_file_exists,
4950
},
5051

5152
{
@@ -54,7 +55,7 @@ constexpr directive appsec_directives[] = {
5455
ngx_conf_set_str_slot,
5556
NGX_HTTP_MAIN_CONF_OFFSET,
5657
offsetof(datadog_main_conf_t, appsec_http_blocked_template_html),
57-
nullptr,
58+
&datadog::common::ngx_conf_post_file_exists,
5859
},
5960

6061
{

test/cases/orchestration.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,21 @@ def quit_signal_handler(signum, frame):
2929
faulthandler.dump_traceback()
3030

3131

32+
def wait_until(predicate_func, timeout_seconds):
33+
"""Wait until `predicate_func` is True."""
34+
before = time.monotonic()
35+
while True:
36+
if predicate_func():
37+
break
38+
39+
now = time.monotonic()
40+
if now - before >= timeout_seconds:
41+
raise Exception(
42+
f"{timeout_seconds} seconds timeout exceeded while waiting for nginx workers to stop. {now - before} seconds elapsed."
43+
)
44+
time.sleep(0.5)
45+
46+
3247
signal.signal(signal.SIGQUIT, quit_signal_handler)
3348

3449
# Since we override the environment variables of child processes,
@@ -816,17 +831,24 @@ def reload_nginx(self, wait_for_workers_to_terminate=True):
816831
# The polling interval was chosen based on a system where:
817832
# - nginx_worker_pids ran in ~0.05 seconds
818833
# - the workers terminated after ~6 seconds
819-
poll_period_seconds = 0.5
820-
timeout_seconds = 10
821-
before = time.monotonic()
822-
while old_worker_pids & nginx_worker_pids(nginx_container,
823-
self.verbose):
824-
now = time.monotonic()
825-
if now - before >= timeout_seconds:
826-
raise Exception(
827-
f"{timeout_seconds} seconds timeout exceeded while waiting for nginx workers to stop. {now - before} seconds elapsed."
828-
)
829-
time.sleep(poll_period_seconds)
834+
def old_worker_stops(worker_pid):
835+
_worker_pid = worker_pid
836+
837+
def check():
838+
pids = nginx_worker_pids(nginx_container, self.verbose)
839+
res = _worker_pid.intersection(pids)
840+
# print(f"_worker_pid={_worker_pid}, pids={pids}, cond={res}")
841+
return len(res) == 0
842+
843+
return check
844+
845+
wait_until(old_worker_stops(old_worker_pids), timeout_seconds=10)
846+
847+
def new_worker_starts():
848+
pids = nginx_worker_pids(nginx_container, self.verbose)
849+
return len(pids) == 1
850+
851+
wait_until(new_worker_starts, timeout_seconds=10)
830852

831853
def nginx_replace_config(self, nginx_conf_text, file_name):
832854
"""Replace nginx's config and reload nginx.

test/cases/sec_config/test_sec_config.py

Lines changed: 68 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,20 @@ class TestSecConfig(case.TestCase):
88
requires_waf = True
99

1010
def apply_config(self, conf_name):
11-
conf_path = Path(__file__).parent / f'./conf/http_{conf_name}.conf'
11+
conf_path = Path(__file__).parent / f"./conf/http_{conf_name}.conf"
1212
conf_text = conf_path.read_text()
1313
status, log_lines = self.orch.nginx_replace_config(
1414
conf_text, conf_path.name)
1515
self.assertEqual(0, status, log_lines)
1616

17+
def _test_config(self, conf_name):
18+
conf_path = Path(__file__).parent / f"./conf/http_{conf_name}.conf"
19+
conf_text = conf_path.read_text()
20+
return self.orch.nginx_replace_config(conf_text, conf_path.name)
21+
1722
def get_appsec_data(self):
1823
self.orch.reload_nginx()
19-
log_lines = self.orch.sync_service('agent')
24+
log_lines = self.orch.sync_service("agent")
2025
entries = [
2126
entry for entry in (formats.parse_trace(line)
2227
for line in log_lines) if entry is not None
@@ -25,141 +30,140 @@ def get_appsec_data(self):
2530
for entry in entries:
2631
for trace in entry:
2732
for span in trace:
28-
if span.get('meta', {}).get('_dd.appsec.json'):
29-
return json.loads(span['meta']['_dd.appsec.json'])
30-
self.failureException('No _dd.appsec.json found in traces')
33+
if span.get("meta", {}).get("_dd.appsec.json"):
34+
return json.loads(span["meta"]["_dd.appsec.json"])
35+
self.failureException("No _dd.appsec.json found in traces")
3136

3237
def test_custom_templates(self):
33-
templ_json_path = Path(__file__).parent / './conf/templ.json'
34-
templ_html_path = Path(__file__).parent / './conf/templ.html'
35-
self.orch.nginx_replace_file('/tmp/templ.json',
38+
templ_json_path = Path(__file__).parent / "./conf/templ.json"
39+
templ_html_path = Path(__file__).parent / "./conf/templ.html"
40+
self.orch.nginx_replace_file("/tmp/templ.json",
3641
templ_json_path.read_text())
37-
self.orch.nginx_replace_file('/tmp/templ.html',
42+
self.orch.nginx_replace_file("/tmp/templ.html",
3843
templ_html_path.read_text())
3944

40-
self.apply_config('custom_blocking_templates')
45+
self.apply_config("custom_blocking_templates")
4146

4247
headers = {
43-
'User-Agent': 'dd-test-scanner-log-block',
44-
'Accept': 'text/html'
48+
"User-Agent": "dd-test-scanner-log-block",
49+
"Accept": "text/html"
4550
}
4651
status, headers, body = self.orch.send_nginx_http_request(
47-
'/http', 80, headers)
52+
"/http", 80, headers)
4853
self.assertEqual(status, 403)
4954
# find content-type header:
5055
ct = next((v for k, v in headers if k.lower() == "content-type"), None)
51-
self.assertEqual(ct, 'text/html;charset=utf-8')
52-
self.assertTrue('My custom blocking response' in body)
56+
self.assertEqual(ct, "text/html;charset=utf-8")
57+
self.assertTrue("My custom blocking response" in body)
5358

5459
headers = {
55-
'User-Agent': 'dd-test-scanner-log-block',
56-
'Accept': 'text/json'
60+
"User-Agent": "dd-test-scanner-log-block",
61+
"Accept": "text/json"
5762
}
5863
status, headers, body = self.orch.send_nginx_http_request(
59-
'/http', 80, headers)
64+
"/http", 80, headers)
6065
self.assertEqual(status, 403)
6166
ct = next((v for k, v in headers if k.lower() == "content-type"), None)
62-
self.assertEqual(ct, 'application/json')
67+
self.assertEqual(ct, "application/json")
6368
self.assertEqual(
6469
body,
6570
'{"error": "blocked", "details": "my custom json response"}\n')
6671

6772
def test_appsec_fully_disabled(self):
68-
self.apply_config('appsec_fully_disabled')
73+
self.apply_config("appsec_fully_disabled")
6974

7075
headers = {
71-
'User-Agent': 'dd-test-scanner-log-block',
72-
'Accept': 'text/json'
76+
"User-Agent": "dd-test-scanner-log-block",
77+
"Accept": "text/json"
7378
}
74-
status, _, _ = self.orch.send_nginx_http_request('/', 80, headers)
79+
status, _, _ = self.orch.send_nginx_http_request("/", 80, headers)
7580
self.assertEqual(status, 200)
7681

7782
def test_bad_custom_template(self):
78-
self.apply_config('bad_template_file')
79-
80-
msg = self.orch.wait_for_log_message(
81-
'nginx',
82-
'.*Initialising security library failed.*',
83-
timeout_secs=5)
83+
# We can't afford to shutdown workers
84+
status, log_lines = self._test_config("bad_template_file")
85+
self.assertNotEqual(0, status, log_lines)
8486
self.assertTrue(
85-
'Failed to open file: /file/that/does/not/exist' in msg)
87+
any('Failed to open file: "/file/that/does/not/exist"' in line
88+
for line in log_lines))
8689

8790
def test_bad_rules_file(self):
88-
self.apply_config('bad_rules_file')
89-
90-
msg = self.orch.wait_for_log_message(
91-
'nginx',
92-
'.*Initialising security library failed.*',
93-
timeout_secs=5)
94-
self.assertTrue('Failed to open file: /bad/rules/file' in msg)
91+
status, log_lines = self._test_config("bad_rules_file")
92+
self.assertNotEqual(0, status, log_lines)
93+
self.assertTrue(
94+
any('Failed to open file: "/bad/rules/file' in line
95+
for line in log_lines))
9596

9697
def test_bad_pool_name(self):
97-
conf_path = Path(__file__).parent / 'conf/http_bad_thread_pool.conf'
98-
conf_text = conf_path.read_text()
99-
status, log_lines = self.orch.nginx_replace_config(
100-
conf_text, conf_path.name)
98+
status, log_lines = self._test_config("bad_thread_pool")
10199
self.assertNotEqual(0, status, log_lines)
102100

103101
self.assertTrue(
104102
any('datadog_waf_thread_pool_name: "bad_thread_pool" not found' in
105103
line for line in log_lines))
106104

107105
def test_multiple_pools(self):
108-
self.apply_config('multiple_thread_pools')
106+
self.apply_config("multiple_thread_pools")
109107

110-
headers = {'User-Agent': 'dd-test-scanner-log-block'}
108+
headers = {"User-Agent": "dd-test-scanner-log-block"}
111109
status, _, _ = self.orch.send_nginx_http_request(
112-
'/http/a', 80, headers)
110+
"/http/a", 80, headers)
113111
self.assertEqual(status, 403)
114112

115-
headers = {'User-Agent': 'dd-test-scanner-log-block'}
113+
headers = {"User-Agent": "dd-test-scanner-log-block"}
116114
status, _, _ = self.orch.send_nginx_http_request(
117-
'/local/', 80, headers)
115+
"/local/", 80, headers)
118116
self.assertEqual(status, 403)
119117

120-
headers = {'User-Agent': 'dd-test-scanner-log-block'}
118+
headers = {"User-Agent": "dd-test-scanner-log-block"}
121119
status, _, _ = self.orch.send_nginx_http_request(
122-
'/unmonitored/index.html', 80, headers)
120+
"/unmonitored/index.html", 80, headers)
123121
self.assertEqual(status, 200)
124122

125123
def test_custom_obfuscation(self):
126-
waf_path = Path(__file__).parent / './conf/waf.json'
124+
waf_path = Path(__file__).parent / "./conf/waf.json"
127125
waf_text = waf_path.read_text()
128-
self.orch.nginx_replace_file('/tmp/waf.json', waf_text)
126+
self.orch.nginx_replace_file("/tmp/waf.json", waf_text)
129127

130-
self.apply_config('custom_obfuscation')
128+
self.apply_config("custom_obfuscation")
131129

132130
# Redaction by key
133131
# datadog_appsec_obfuscation_key_regex my.special.key;
134132
status, _, _ = self.orch.send_nginx_http_request(
135-
'/http/?my_special_key=matched+value', 80)
133+
"/http/?my_special_key=matched+value", 80)
136134
appsec_data = self.get_appsec_data()
137135
self.assertEqual(
138-
appsec_data['triggers'][0]['rule_matches'][0]['parameters'][0]
139-
['value'], '<Redacted>')
136+
appsec_data["triggers"][0]["rule_matches"][0]["parameters"][0]
137+
["value"],
138+
"<Redacted>",
139+
)
140140

141141
# Redaction by value
142142
# datadog_appsec_obfuscation_value_regex \Az.*;
143143
status, _, _ = self.orch.send_nginx_http_request(
144-
'/http/?the+key=z_matched+value', 80)
144+
"/http/?the+key=z_matched+value", 80)
145145
appsec_data = self.get_appsec_data()
146146
self.assertEqual(
147-
appsec_data['triggers'][0]['rule_matches'][0]['parameters'][0]
148-
['value'], '<Redacted>')
147+
appsec_data["triggers"][0]["rule_matches"][0]["parameters"][0]
148+
["value"],
149+
"<Redacted>",
150+
)
149151

150152
def test_no_obfuscation(self):
151-
waf_path = Path(__file__).parent / './conf/waf.json'
153+
waf_path = Path(__file__).parent / "./conf/waf.json"
152154
waf_text = waf_path.read_text()
153-
self.orch.nginx_replace_file('/tmp/waf.json', waf_text)
155+
self.orch.nginx_replace_file("/tmp/waf.json", waf_text)
154156

155-
self.apply_config('no_obfuscation')
157+
self.apply_config("no_obfuscation")
156158

157-
self.orch.sync_service('agent')
159+
self.orch.sync_service("agent")
158160

159161
# No redaction by key
160162
status, _, _ = self.orch.send_nginx_http_request(
161-
'/http/?password=matched+value', 80)
163+
"/http/?password=matched+value", 80)
162164
appsec_data = self.get_appsec_data()
163165
self.assertEqual(
164-
appsec_data['triggers'][0]['rule_matches'][0]['parameters'][0]
165-
['value'], 'matched value')
166+
appsec_data["triggers"][0]["rule_matches"][0]["parameters"][0]
167+
["value"],
168+
"matched value",
169+
)

test/cases/smoke/conf/minimal.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# nginx config, and "index.html".
44
load_module /datadog-tests/ngx_http_datadog_module.so;
55

6-
worker_processes 0;
6+
worker_processes 1;
77

88
events {
99
worker_connections 1024;

test/cases/smoke/conf/nginx.conf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# nginx config, and "index.html".
44
load_module /datadog-tests/ngx_http_datadog_module.so;
55

6-
worker_processes 0;
6+
worker_processes 1;
77

88
events {
99
worker_connections 1024;

0 commit comments

Comments
 (0)