Skip to content

Commit 3222c42

Browse files
committed
tests: parametrize bench mark tests
In the previous implementation, it was necessary to adjust the timeout value every time a benchmark test added. By parametrizing the benchmark tests, the time required for each test becomes predictable, eliminating the need to adjust the timeout value Signed-off-by: Tomoya Iwata <[email protected]>
1 parent 70ac154 commit 3222c42

File tree

1 file changed

+86
-49
lines changed

1 file changed

+86
-49
lines changed

tests/integration_tests/performance/test_benchmarks.py

+86-49
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import json
66
import logging
77
import platform
8+
import re
89
import shutil
910
from pathlib import Path
1011

@@ -17,78 +18,114 @@
1718
LOGGER = logging.getLogger(__name__)
1819

1920

21+
def get_executables():
22+
"""
23+
Get a list of binaries for benchmarking
24+
"""
25+
26+
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
27+
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
28+
# extract the path to the compiled benchmark binary.
29+
_, stdout, _ = cargo(
30+
"bench",
31+
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
32+
)
33+
34+
executables = []
35+
for line in stdout.split("\n"):
36+
if line:
37+
msg = json.loads(line)
38+
executable = msg.get("executable")
39+
if executable:
40+
executables.append(executable)
41+
42+
return executables
43+
44+
2045
@pytest.mark.no_block_pr
21-
@pytest.mark.timeout(900)
22-
def test_no_regression_relative_to_target_branch():
46+
@pytest.mark.timeout(600)
47+
@pytest.mark.parametrize("executable", get_executables())
48+
def test_no_regression_relative_to_target_branch(executable):
2349
"""
2450
Run the microbenchmarks in this repository, comparing results from pull
2551
request target branch against what's achieved on HEAD
2652
"""
53+
run_criterion = get_run_criterion(executable)
54+
compare_results = get_compare_results(executable)
2755
git_ab_test(run_criterion, compare_results)
2856

2957

30-
def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
58+
def get_run_criterion(executable):
3159
"""
32-
Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU
60+
Get function that executes specified benchmarks, and running them pinned to some CPU
3361
"""
34-
baseline_name = "a_baseline" if is_a else "b_baseline"
35-
36-
with contextlib.chdir(firecracker_checkout):
37-
# Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the
38-
# usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we
39-
# extract the path to the compiled benchmark binary.
40-
_, stdout, _ = cargo(
41-
"bench",
42-
f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run",
43-
)
4462

45-
executables = []
46-
for line in stdout.split("\n"):
47-
if line:
48-
msg = json.loads(line)
49-
executable = msg.get("executable")
50-
if executable:
51-
executables.append(executable)
63+
def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path:
64+
baseline_name = "a_baseline" if is_a else "b_baseline"
5265

53-
for executable in executables:
66+
with contextlib.chdir(firecracker_checkout):
5467
utils.check_output(
5568
f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
5669
)
5770

58-
return firecracker_checkout / "build" / "cargo_target" / "criterion"
71+
return firecracker_checkout / "build" / "cargo_target" / "criterion"
72+
73+
return _run_criterion
74+
5975

76+
def get_compare_results(executable):
77+
"""
78+
Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main
79+
"""
6080

61-
def compare_results(location_a_baselines: Path, location_b_baselines: Path):
62-
"""Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main"""
63-
for benchmark in location_b_baselines.glob("*"):
64-
data = json.loads(
65-
(benchmark / "b_baseline" / "estimates.json").read_text("utf-8")
81+
def _compare_results(location_a_baselines: Path, location_b_baselines: Path):
82+
83+
list_result = utils.check_output(
84+
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --list"
6685
)
6786

68-
average_ns = data["mean"]["point_estimate"]
87+
# Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`.
88+
# Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create.
89+
bench_marks = [
90+
re.sub(r"\s#(?P<sub_id>[1-9]+)", r"_\g<sub_id>", i.split(":")[0])
91+
for i in list_result.stdout.split("\n")
92+
if i.endswith(": benchmark")
93+
]
94+
95+
for benchmark in bench_marks:
96+
data = json.loads(
97+
(
98+
location_b_baselines / benchmark / "b_baseline" / "estimates.json"
99+
).read_text("utf-8")
100+
)
69101

70-
LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000)
102+
average_ns = data["mean"]["point_estimate"]
71103

72-
# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
73-
# to do the comparison
74-
for benchmark in location_a_baselines.glob("*"):
75-
shutil.copytree(
76-
benchmark / "a_baseline",
77-
location_b_baselines / benchmark.name / "a_baseline",
104+
LOGGER.info("%s mean: %iµs", benchmark, average_ns / 1000)
105+
106+
# Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here
107+
# to do the comparison
108+
109+
for benchmark in bench_marks:
110+
shutil.copytree(
111+
location_a_baselines / benchmark / "a_baseline",
112+
location_b_baselines / benchmark / "a_baseline",
113+
)
114+
115+
bench_result = utils.check_output(
116+
f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --baseline a_baseline --load-baseline b_baseline",
117+
True,
118+
Path.cwd().parent,
78119
)
79120

80-
_, stdout, _ = cargo(
81-
"bench",
82-
f"--all --target {platform.machine()}-unknown-linux-musl",
83-
"--baseline a_baseline --load-baseline b_baseline",
84-
)
121+
regressions_only = "\n\n".join(
122+
result
123+
for result in bench_result.stdout.split("\n\n")
124+
if "Performance has regressed." in result
125+
)
85126

86-
regressions_only = "\n\n".join(
87-
result
88-
for result in stdout.split("\n\n")
89-
if "Performance has regressed." in result
90-
)
127+
# If this string is anywhere in stdout, then at least one of our benchmarks
128+
# is now performing worse with the PR changes.
129+
assert not regressions_only, "\n" + regressions_only
91130

92-
# If this string is anywhere in stdout, then at least one of our benchmarks
93-
# is now performing worse with the PR changes.
94-
assert not regressions_only, "\n" + regressions_only
131+
return _compare_results

0 commit comments

Comments
 (0)