Skip to content

Commit af440aa

Browse files
committed
Convert tests/benchmarks/ pytest-benchmark benchmarks into benchmarks/ asv benchmarks
1 parent a1bcb3e commit af440aa

File tree

5 files changed

+60
-74
lines changed

5 files changed

+60
-74
lines changed

tests/benchmark/test_benchmark_json_schema.py renamed to benchmarks/bench_json_schema.py

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import pytest
2-
31
import outlines
42

53
outlines.disable_cache()
64

75
from outlines.fsm.guide import RegexGuide # noqa: E402
86
from outlines.fsm.json_schema import build_regex_from_schema # noqa: E402
97

8+
from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
9+
1010
simple_schema = """{
1111
"$defs": {
1212
"Armor": {
@@ -63,30 +63,19 @@
6363
"required": ["id", "work", "recording_artists"]
6464
}"""
6565

66-
6766
schemas = dict(simple_schema=simple_schema, complex_schema=complex_schema)
6867

6968

70-
@pytest.mark.parametrize("schema_name", schemas.keys())
71-
def test_benchmark_json_schema_to_regex(benchmark, ensure_numba_compiled, schema_name):
72-
"""Benchmark convert json schema to regex"""
73-
schema = schemas[schema_name]
74-
benchmark.pedantic(
75-
build_regex_from_schema,
76-
args=(schema,),
77-
rounds=8,
78-
)
69+
class JsonSchemaBenchmark:
70+
def setup(self):
71+
self.tokenizer = setup_tokenizer()
72+
ensure_numba_compiled(self.tokenizer)
7973

74+
def time_json_schema_to_regex(self):
75+
for schema_name, schema in schemas.items():
76+
build_regex_from_schema(schema)
8077

81-
@pytest.mark.parametrize("schema_name", schemas.keys())
82-
def test_benchmark_json_schema_to_fsm(
83-
benchmark, tokenizer, ensure_numba_compiled, schema_name
84-
):
85-
"""Benchmark compile json schema as FSM"""
86-
schema = schemas[schema_name]
87-
regex = build_regex_from_schema(schema)
88-
benchmark.pedantic(
89-
RegexGuide,
90-
args=(regex, tokenizer),
91-
rounds=8,
92-
)
78+
def time_json_schema_to_fsm(self):
79+
for schema_name, schema in schemas.items():
80+
regex = build_regex_from_schema(schema)
81+
RegexGuide(regex, self.tokenizer)

benchmarks/bench_numba_compile.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import importlib
2+
3+
import interegular
4+
import numba
5+
6+
import outlines
7+
8+
from .common import setup_tokenizer
9+
10+
outlines.disable_cache()
11+
12+
13+
class NumbaCompileBenchmark:
14+
def setup(self):
15+
from outlines.fsm import regex
16+
17+
self.tokenizer = setup_tokenizer()
18+
self.regex = regex
19+
original_njit = numba.njit
20+
21+
def mock_njit(*args, **kwargs):
22+
kwargs["cache"] = False
23+
return original_njit(*args, **kwargs)
24+
25+
self.original_njit = original_njit
26+
numba.njit = mock_njit
27+
importlib.reload(self.regex)
28+
self.regex_pattern, _ = self.regex.make_deterministic_fsm(
29+
interegular.parse_pattern("a").to_fsm().reduce()
30+
)
31+
32+
def teardown(self):
33+
numba.njit = self.original_njit
34+
35+
def time_compile_numba(self):
36+
self.regex.create_fsm_index_tokenizer(self.regex_pattern, self.tokenizer)
Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
import pytest
2-
31
import outlines
42

3+
from .common import ensure_numba_compiled, setup_tokenizer
4+
55
outlines.disable_cache()
66

77
from outlines.fsm.guide import RegexGuide # noqa: E402
@@ -19,14 +19,11 @@
1919
}
2020

2121

22-
@pytest.mark.parametrize("regex_name", regex_samples.keys())
23-
def test_benchmark_regex_to_fsm(
24-
benchmark, tokenizer, ensure_numba_compiled, regex_name
25-
):
26-
"""Benchmark converting regex to FSM"""
27-
regex_str = regex_samples[regex_name]
28-
benchmark.pedantic(
29-
RegexGuide,
30-
args=(regex_str, tokenizer),
31-
rounds=8,
32-
)
22+
class RegexFsmBenchmark:
23+
def setup(self):
24+
self.tokenizer = setup_tokenizer()
25+
ensure_numba_compiled(self.tokenizer)
26+
27+
def time_regex_to_fsm(self):
28+
for regex_name, regex_str in regex_samples.items():
29+
RegexGuide(regex_str, self.tokenizer)
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
1-
import pytest
21
from transformers import AutoTokenizer
32

43
from outlines.fsm.guide import RegexGuide
54
from outlines.models.transformers import TransformerTokenizer
65

76

8-
@pytest.fixture
9-
def tokenizer():
7+
def setup_tokenizer():
108
tokenizer = AutoTokenizer.from_pretrained("gpt2")
119
return TransformerTokenizer(tokenizer)
1210

1311

14-
@pytest.fixture
1512
def ensure_numba_compiled(tokenizer):
1613
RegexGuide("a", tokenizer)
1714
return True

tests/benchmark/test_benchmark_numba_compile.py

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)