Skip to content

Commit 90f7144

Browse files
committed
Use more performant fsm backend
1 parent 6035e86 commit 90f7144

14 files changed

+24
-2173
lines changed

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
runs-on: ubuntu-latest
2323
strategy:
2424
matrix:
25-
python-version: ["3.8", "3.10"]
25+
python-version: ["3.10"]
2626
steps:
2727
- uses: actions/checkout@v3
2828
- name: Set up Python ${{ matrix.python-version }}

benchmarks/bench_cfg_guide.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from outlines.fsm.guide import CFGGuide
88
from outlines.models.transformers import TransformerTokenizer
99

10-
from .common import ensure_numba_compiled
11-
1210
random.seed(42)
1311

1412

@@ -30,9 +28,6 @@ class CFGGuideBenchmark:
3028

3129
def setup(self, grammar_name):
3230
self.tokenizer = get_tiny_tokenizer()
33-
ensure_numba_compiled(
34-
self.tokenizer
35-
) # numba not currently used, but will be in the future
3631
self.prebuilt_cfg_guide = CFGGuide(
3732
benched_grammars[grammar_name], self.tokenizer
3833
)

benchmarks/bench_json_schema.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from outlines.fsm.guide import RegexGuide
33
from outlines.fsm.json_schema import build_regex_from_schema
44

5-
from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
5+
from .common import setup_tokenizer # noqa: E402
66

77
simple_schema = """{
88
"$defs": {
@@ -69,7 +69,6 @@ class JsonSchemaBenchmark:
6969
def setup(self, schema_name):
7070
self.tokenizer = setup_tokenizer()
7171
self.schema = schemas[schema_name]
72-
ensure_numba_compiled(self.tokenizer)
7372

7473
@cache_disabled()
7574
def time_json_schema_to_regex(self, schema_name):

benchmarks/bench_numba_compile.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

benchmarks/bench_regex_guide.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from outlines.caching import cache_disabled
22
from outlines.fsm.guide import RegexGuide
33

4-
from .common import ensure_numba_compiled, setup_tokenizer
4+
from .common import setup_tokenizer
55

66
regex_samples = {
77
"email": r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
@@ -21,7 +21,6 @@ class RegexGuideBenchmark:
2121

2222
def setup(self, pattern_name):
2323
self.tokenizer = setup_tokenizer()
24-
ensure_numba_compiled(self.tokenizer)
2524
self.pattern = regex_samples[pattern_name]
2625

2726
@cache_disabled()
@@ -34,7 +33,6 @@ class MemoryRegexGuideBenchmark:
3433

3534
def setup(self, pattern_name):
3635
self.tokenizer = setup_tokenizer()
37-
ensure_numba_compiled(self.tokenizer)
3836
self.pattern = regex_samples[pattern_name]
3937

4038
@cache_disabled()

benchmarks/common.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
11
from transformers import AutoTokenizer
22

3-
from outlines.fsm.guide import RegexGuide
43
from outlines.models.transformers import TransformerTokenizer
54

65

76
def setup_tokenizer():
87
tokenizer = AutoTokenizer.from_pretrained("gpt2")
98
return TransformerTokenizer(tokenizer)
10-
11-
12-
def ensure_numba_compiled(tokenizer):
13-
RegexGuide("a", tokenizer)
14-
return True

outlines/fsm/fsm.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

0 commit comments

Comments
 (0)