Skip to content

Commit b5c69cc

Browse files
authored
Merge pull request #17 from lapp0/introduce-asv-ci-workflow-883
Introduce asv ci workflow 883
2 parents 7863f8e + 0154457 commit b5c69cc

9 files changed

+168
-74
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
name: Benchmark PR
2+
3+
on:
4+
pull_request:
5+
branches: [main]
6+
7+
permissions:
8+
contents: read # Read access for repository contents
9+
pull-requests: write # Write access for pull requests
10+
11+
env:
12+
PYTHON_VERSION: "3.10"
13+
WORKING_DIR: ${{ github.workspace }}/benchmarks
14+
15+
jobs:
16+
benchmark-pr:
17+
runs-on: ubuntu-latest
18+
19+
defaults:
20+
run:
21+
working-directory: ${{ env.WORKING_DIR }}
22+
23+
steps:
24+
25+
- name: Checkout repository
26+
uses: actions/checkout@v3
27+
with:
28+
fetch-depth: 0
29+
30+
- name: Set up Python
31+
uses: actions/setup-python@v4
32+
with:
33+
python-version: ${{ env.PYTHON_VERSION }}
34+
35+
- name: Install dependencies
36+
run: |
37+
python -m pip install --upgrade pip
38+
pip install asv virtualenv lf-asv-formatter
39+
40+
- name: Create ASV machine config file
41+
run: asv machine --machine gh-runner --yes
42+
43+
- name: Save comparison of PR against main branch
44+
run: |
45+
# prepare main branch for comparison
46+
git remote add upstream https://github.com/${{ github.repository }}.git
47+
git fetch upstream main
48+
49+
# Run benchmarks, writing comment contents to ./output
50+
asv continuous upstream/main HEAD \
51+
--factor 1.1 --sort ratio --split --interleave-rounds -a repeat=3
52+
asv compare upstream/main HEAD --factor 1.1 --sort ratio --split | tee output
53+
python -m lf_asv_formatter --asv_version "$(echo asv --version)"
54+
printf "Benchmark Suite Results:\n\n" >> comment_body
55+
cat output >> comment_body
56+
57+
# from https://github.com/hombit/load_ztfdr_for_tape/blob/9acf7c83/.github/workflows/asv-pr.yml
58+
- name: Find benchmarks comment
59+
uses: peter-evans/find-comment@v2
60+
id: find-comment
61+
with:
62+
issue-number: ${{ github.event.pull_request.number }}
63+
comment-author: 'github-actions[bot]'
64+
body-includes: Benchmark Suite Results
65+
66+
- name: Create or update benchmarks comment
67+
uses: peter-evans/create-or-update-comment@v3
68+
with:
69+
comment-id: ${{ steps.find-comment.outputs.comment-id }}
70+
issue-number: ${{ github.event.pull_request.number }}
71+
body-path: ${{ env.WORKING_DIR }}/comment_body
72+
edit-mode: replace

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ docs/build
66
.idea/
77
*.gguf
88
.venv
9+
benchmarks/results

benchmarks/__init__.py

Whitespace-only changes.

benchmarks/asv.conf.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"version": 1,
3+
"project": "Outlines",
4+
"project_url": "https://outlines-dev.github.io/outlines/",
5+
"repo": "..",
6+
"branches": [
7+
"HEAD"
8+
],
9+
"build_command": [
10+
"pip install .[test]",
11+
"python -m build --wheel -o {build_cache_dir} {build_dir}"
12+
],
13+
"environment_type": "virtualenv",
14+
"show_commit_url": "https://github.com/lapp0/outlines/commit/",
15+
"benchmark_dir": ".",
16+
"env_dir": "env",
17+
"results_dir": "results",
18+
"html_dir": "html",
19+
"build_cache_size": 8
20+
}

tests/benchmark/test_benchmark_json_schema.py renamed to benchmarks/bench_json_schema.py

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import pytest
2-
31
import outlines
42

53
outlines.disable_cache()
64

75
from outlines.fsm.guide import RegexGuide # noqa: E402
86
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402
97

8+
from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
9+
1010
simple_schema = """{
1111
"$defs": {
1212
"Armor": {
@@ -63,30 +63,20 @@
6363
"required": ["id", "work", "recording_artists"]
6464
}"""
6565

66-
6766
schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
6867

6968

70-
@pytest.mark.parametrize("schema_name", schemas.keys())
71-
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
72-
"""Benchmark convert json schema to regex"""
73-
schema = schemas[schema_name]
74-
benchmark.pedantic(
75-
build_regex_from_schema,
76-
args=(schema,),
77-
rounds=8,
78-
)
69+
class JsonSchemaBenchmark:
70+
params = schemas.keys()
71+
72+
def setup(self, schema_name):
73+
self.tokenizer = setup_tokenizer()
74+
self.schema = schemas[schema_name]
75+
ensure_numba_compiled(self.tokenizer)
7976

77+
def time_json_schema_to_regex(self, schema_name):
78+
build_regex_from_schema(self.schema)
8079

81-
@pytest.mark.parametrize("schema_name", schemas.keys())
82-
def test_benchmark_json_schema_to_fsm(
83-
benchmark, tokenizer, ensure_numba_compiled, schema_name
84-
):
85-
"""Benchmark compile json schema as FSM"""
86-
schema = schemas[schema_name]
87-
regex = build_regex_from_schema(schema)
88-
benchmark.pedantic(
89-
RegexGuide,
90-
args=(regex, tokenizer),
91-
rounds=8,
92-
)
80+
def time_json_schema_to_fsm(self, schema_name):
81+
regex = build_regex_from_schema(self.schema)
82+
RegexGuide(regex, self.tokenizer)

benchmarks/bench_numba_compile.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import importlib
2+
3+
import interegular
4+
import numba
5+
6+
import outlines
7+
8+
from .common import setup_tokenizer
9+
10+
outlines.disable_cache()
11+
12+
13+
class NumbaCompileBenchmark:
14+
def setup(self):
15+
from outlines.fsm import regex
16+
17+
self.tokenizer = setup_tokenizer()
18+
self.regex = regex
19+
original_njit = numba.njit
20+
21+
def mock_njit(*args, **kwargs):
22+
kwargs["cache"] = False
23+
return original_njit(*args, **kwargs)
24+
25+
self.original_njit = original_njit
26+
numba.njit = mock_njit
27+
importlib.reload(self.regex)
28+
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
29+
interegular.parse_pattern("a").to_fsm().reduce()
30+
)
31+
32+
def teardown(self):
33+
numba.njit = self.original_njit
34+
35+
def time_compile_numba(self):
36+
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import pytest
2-
31
import outlines
42

3+
from .common import ensure_numba_compiled, setup_tokenizer
4+
55
outlines.disable_cache()
66

77
from outlines.fsm.guide import RegexGuide # noqa: E402
@@ -19,14 +19,25 @@
1919
}
2020

2121

22-
@pytest.mark.parametrize("regex_name", regex_samples.keys())
23-
def test_benchmark_regex_to_fsm(
24-
benchmark, tokenizer, ensure_numba_compiled, regex_name
25-
):
26-
"""Benchmark converting regex to FSM"""
27-
regex_str = regex_samples[regex_name]
28-
benchmark.pedantic(
29-
RegexGuide,
30-
args=(regex_str, tokenizer),
31-
rounds=8,
32-
)
22+
class RegexGuideBenchmark:
23+
params = regex_samples.keys()
24+
25+
def setup(self, pattern_name):
26+
self.tokenizer = setup_tokenizer()
27+
ensure_numba_compiled(self.tokenizer)
28+
self.pattern = regex_samples[pattern_name]
29+
30+
def time_regex_to_guide(self, pattern_name):
31+
RegexGuide(self.pattern, self.tokenizer)
32+
33+
34+
class MemoryRegexGuideBenchmark:
35+
params = ["simple_phone", "complex_span_constrained_relation_extraction"]
36+
37+
def setup(self, pattern_name):
38+
self.tokenizer = setup_tokenizer()
39+
ensure_numba_compiled(self.tokenizer)
40+
self.pattern = regex_samples[pattern_name]
41+
42+
def peakmem_regex_to_guide(self, pattern_name):
43+
RegexGuide(self.pattern, self.tokenizer)
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
import pytest
21
from transformers import AutoTokenizer
32

43
from outlines.fsm.guide import RegexGuide
54
from outlines.models.transformers import TransformerTokenizer
65

76

8-
@pytest.fixture
9-
def tokenizer():
7+
def setup_tokenizer():
108
tokenizer = AutoTokenizer.from_pretrained("gpt2")
119
return TransformerTokenizer(tokenizer)
1210

1311

14-
@pytest.fixture
1512
def ensure_numba_compiled(tokenizer):
1613
RegexGuide("a", tokenizer)
1714
return True

tests/benchmark/test_benchmark_numba_compile.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)