|
5 | 5 | import json
|
6 | 6 | import logging
|
7 | 7 | import platform
|
| 8 | +import re |
8 | 9 | import shutil
|
9 | 10 | from pathlib import Path
|
10 | 11 |
|
|
17 | 18 | LOGGER = logging.getLogger(__name__)
|
18 | 19 |
|
19 | 20 |
|
| 21 | +def get_executables(): |
| 22 | + """ |
| 23 | + Get a list of binaries for benchmarking |
| 24 | + """ |
| 25 | + |
| 26 | + # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the |
| 27 | + # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we |
| 28 | + # extract the path to the compiled benchmark binary. |
| 29 | + _, stdout, _ = cargo( |
| 30 | + "bench", |
| 31 | + f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run", |
| 32 | + ) |
| 33 | + |
| 34 | + executables = [] |
| 35 | + for line in stdout.split("\n"): |
| 36 | + if line: |
| 37 | + msg = json.loads(line) |
| 38 | + executable = msg.get("executable") |
| 39 | + if executable: |
| 40 | + executables.append(executable) |
| 41 | + |
| 42 | + return executables |
| 43 | + |
| 44 | + |
20 | 45 | @pytest.mark.no_block_pr
|
21 |
| -@pytest.mark.timeout(900) |
22 |
| -def test_no_regression_relative_to_target_branch(): |
| 46 | +@pytest.mark.timeout(600) |
| 47 | +@pytest.mark.parametrize("executable", get_executables()) |
| 48 | +def test_no_regression_relative_to_target_branch(executable): |
23 | 49 | """
|
24 | 50 | Run the microbenchmarks in this repository, comparing results from pull
|
25 | 51 | request target branch against what's achieved on HEAD
|
26 | 52 | """
|
| 53 | + run_criterion = get_run_criterion(executable) |
| 54 | + compare_results = get_compare_results(executable) |
27 | 55 | git_ab_test(run_criterion, compare_results)
|
28 | 56 |
|
29 | 57 |
|
30 |
| -def run_criterion(firecracker_checkout: Path, is_a: bool) -> Path: |
| 58 | +def get_run_criterion(executable): |
31 | 59 | """
|
32 |
| - Executes all benchmarks by running "cargo bench --no-run", finding the executables, and running them pinned to some CPU |
| 60 | + Get function that executes specified benchmarks, and running them pinned to some CPU |
33 | 61 | """
|
34 |
| - baseline_name = "a_baseline" if is_a else "b_baseline" |
35 |
| - |
36 |
| - with contextlib.chdir(firecracker_checkout): |
37 |
| - # Passing --message-format json to cargo tells it to print its log in a json format. At the end, instead of the |
38 |
| - # usual "placed executable <...> at <...>" we'll get a json object with an 'executable' key, from which we |
39 |
| - # extract the path to the compiled benchmark binary. |
40 |
| - _, stdout, _ = cargo( |
41 |
| - "bench", |
42 |
| - f"--all --quiet --target {platform.machine()}-unknown-linux-musl --message-format json --no-run", |
43 |
| - ) |
44 | 62 |
|
45 |
| - executables = [] |
46 |
| - for line in stdout.split("\n"): |
47 |
| - if line: |
48 |
| - msg = json.loads(line) |
49 |
| - executable = msg.get("executable") |
50 |
| - if executable: |
51 |
| - executables.append(executable) |
| 63 | + def _run_criterion(firecracker_checkout: Path, is_a: bool) -> Path: |
| 64 | + baseline_name = "a_baseline" if is_a else "b_baseline" |
52 | 65 |
|
53 |
| - for executable in executables: |
| 66 | + with contextlib.chdir(firecracker_checkout): |
54 | 67 | utils.check_output(
|
55 | 68 | f"CARGO_TARGET_DIR=build/cargo_target taskset -c 1 {executable} --bench --save-baseline {baseline_name}"
|
56 | 69 | )
|
57 | 70 |
|
58 |
| - return firecracker_checkout / "build" / "cargo_target" / "criterion" |
| 71 | + return firecracker_checkout / "build" / "cargo_target" / "criterion" |
| 72 | + |
| 73 | + return _run_criterion |
| 74 | + |
59 | 75 |
|
| 76 | +def get_compare_results(executable): |
| 77 | + """ |
| 78 | + Get function that compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main |
| 79 | + """ |
60 | 80 |
|
61 |
| -def compare_results(location_a_baselines: Path, location_b_baselines: Path): |
62 |
| - """Compares the two recorded criterion baselines for regressions, assuming that "A" is the baseline from main""" |
63 |
| - for benchmark in location_b_baselines.glob("*"): |
64 |
| - data = json.loads( |
65 |
| - (benchmark / "b_baseline" / "estimates.json").read_text("utf-8") |
| 81 | + def _compare_results(location_a_baselines: Path, location_b_baselines: Path): |
| 82 | + |
| 83 | + list_result = utils.check_output( |
| 84 | + f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --list" |
66 | 85 | )
|
67 | 86 |
|
68 |
| - average_ns = data["mean"]["point_estimate"] |
| 87 | + # Format a string like `page_fault #2: benchmark` to a string like `page_fault_2`. |
| 88 | + # Because under `cargo_target/criterion/`, a directory like `page_fault_2` will create. |
| 89 | + bench_marks = [ |
| 90 | + re.sub(r"\s#(?P<sub_id>[1-9]+)", r"_\g<sub_id>", i.split(":")[0]) |
| 91 | + for i in list_result.stdout.split("\n") |
| 92 | + if i.endswith(": benchmark") |
| 93 | + ] |
| 94 | + |
| 95 | + for benchmark in bench_marks: |
| 96 | + data = json.loads( |
| 97 | + ( |
| 98 | + location_b_baselines / benchmark / "b_baseline" / "estimates.json" |
| 99 | + ).read_text("utf-8") |
| 100 | + ) |
69 | 101 |
|
70 |
| - LOGGER.info("%s mean: %iµs", benchmark.name, average_ns / 1000) |
| 102 | + average_ns = data["mean"]["point_estimate"] |
71 | 103 |
|
72 |
| - # Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here |
73 |
| - # to do the comparison |
74 |
| - for benchmark in location_a_baselines.glob("*"): |
75 |
| - shutil.copytree( |
76 |
| - benchmark / "a_baseline", |
77 |
| - location_b_baselines / benchmark.name / "a_baseline", |
| 104 | + LOGGER.info("%s mean: %iµs", benchmark, average_ns / 1000) |
| 105 | + |
| 106 | + # Assumption: location_b_baseline = cargo_target of current working directory. So just copy the a_baselines here |
| 107 | + # to do the comparison |
| 108 | + |
| 109 | + for benchmark in bench_marks: |
| 110 | + shutil.copytree( |
| 111 | + location_a_baselines / benchmark / "a_baseline", |
| 112 | + location_b_baselines / benchmark / "a_baseline", |
| 113 | + ) |
| 114 | + |
| 115 | + bench_result = utils.check_output( |
| 116 | + f"CARGO_TARGET_DIR=build/cargo_target {executable} --bench --baseline a_baseline --load-baseline b_baseline", |
| 117 | + True, |
| 118 | + Path.cwd().parent, |
78 | 119 | )
|
79 | 120 |
|
80 |
| - _, stdout, _ = cargo( |
81 |
| - "bench", |
82 |
| - f"--all --target {platform.machine()}-unknown-linux-musl", |
83 |
| - "--baseline a_baseline --load-baseline b_baseline", |
84 |
| - ) |
| 121 | + regressions_only = "\n\n".join( |
| 122 | + result |
| 123 | + for result in bench_result.stdout.split("\n\n") |
| 124 | + if "Performance has regressed." in result |
| 125 | + ) |
85 | 126 |
|
86 |
| - regressions_only = "\n\n".join( |
87 |
| - result |
88 |
| - for result in stdout.split("\n\n") |
89 |
| - if "Performance has regressed." in result |
90 |
| - ) |
| 127 | + # If this string is anywhere in stdout, then at least one of our benchmarks |
| 128 | + # is now performing worse with the PR changes. |
| 129 | + assert not regressions_only, "\n" + regressions_only |
91 | 130 |
|
92 |
| - # If this string is anywhere in stdout, then at least one of our benchmarks |
93 |
| - # is now performing worse with the PR changes. |
94 |
| - assert not regressions_only, "\n" + regressions_only |
| 131 | + return _compare_results |
0 commit comments