Skip to content

Commit e5f52b1

Browse files
committed
Use more performant fsm backend
1 parent 6035e86 commit e5f52b1

14 files changed

+10
-2221
lines changed

benchmarks/bench_cfg_guide.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from outlines.fsm.guide import CFGGuide
88
from outlines.models.transformers import TransformerTokenizer
99

10-
from .common import ensure_numba_compiled
11-
1210
random.seed(42)
1311

1412

@@ -30,9 +28,6 @@ class CFGGuideBenchmark:
3028

3129
def setup(self, grammar_name):
3230
self.tokenizer = get_tiny_tokenizer()
33-
ensure_numba_compiled(
34-
self.tokenizer
35-
) # numba not currently used, but will be in the future
3631
self.prebuilt_cfg_guide = CFGGuide(
3732
benched_grammars[grammar_name], self.tokenizer
3833
)

benchmarks/bench_json_schema.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from outlines.fsm.guide import RegexGuide
33
from outlines.fsm.json_schema import build_regex_from_schema
44

5-
from .common import ensure_numba_compiled, setup_tokenizer # noqa: E402
5+
from .common import setup_tokenizer # noqa: E402
66

77
simple_schema = """{
88
"$defs": {
@@ -69,7 +69,6 @@ class JsonSchemaBenchmark:
6969
def setup(self, schema_name):
7070
self.tokenizer = setup_tokenizer()
7171
self.schema = schemas[schema_name]
72-
ensure_numba_compiled(self.tokenizer)
7372

7473
@cache_disabled()
7574
def time_json_schema_to_regex(self, schema_name):

benchmarks/bench_numba_compile.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

benchmarks/bench_regex_guide.py

Lines changed: 0 additions & 42 deletions
This file was deleted.

benchmarks/common.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,8 @@
11
from transformers import AutoTokenizer
22

3-
from outlines.fsm.guide import RegexGuide
43
from outlines.models.transformers import TransformerTokenizer
54

65

76
def setup_tokenizer():
87
tokenizer = AutoTokenizer.from_pretrained("gpt2")
98
return TransformerTokenizer(tokenizer)
10-
11-
12-
def ensure_numba_compiled(tokenizer):
13-
RegexGuide("a", tokenizer)
14-
return True

outlines/fsm/fsm.py

Lines changed: 0 additions & 47 deletions
This file was deleted.

outlines/fsm/guide.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
import torch
2121
from lark.indenter import DedentError
2222
from lark.lexer import UnexpectedCharacters, UnexpectedToken
23-
24-
from outlines import grammars
25-
from outlines.caching import cache
26-
from outlines.fsm.parsing import PartialLark, PartialParserState
27-
from outlines.fsm.regex import (
23+
from outlines_core.fsm.regex import (
2824
create_fsm_index_tokenizer,
2925
make_byte_level_fsm,
3026
make_deterministic_fsm,
3127
)
3228

29+
from outlines import grammars
30+
from outlines.caching import cache
31+
from outlines.fsm.parsing import PartialLark, PartialParserState
32+
3333
if TYPE_CHECKING:
3434
from outlines.models.tokenizer import Tokenizer
3535

outlines/fsm/parsing.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@
3434
)
3535
from lark.parsers.lalr_interactive_parser import InteractiveParser
3636
from lark.parsers.lalr_parser import LALR_Parser, ParseConf, ParserState, _Parser
37-
38-
from outlines.fsm.regex import (
37+
from outlines_core.fsm.regex import (
3938
fsm_union,
4039
get_sub_fsms_from_seq,
4140
get_token_transition_keys,

0 commit comments

Comments
 (0)