Skip to content

Commit 58a0948

Browse files
committed
Revert "Add CFG to vllm serving"
1 parent fde61a8 commit 58a0948

File tree

3 files changed

+36
-81
lines changed

3 files changed

+36
-81
lines changed

docs/reference/vllm.md

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ You can then query the model in shell by passing a prompt and either
2424

2525
1. a [JSON Schema][jsonschema]{:target="_blank"} specification or
2626
2. a [Regex][regex]{:target="_blank"} pattern
27-
2. an EBNF grammar
2827

29-
with the `schema`, `regex` of `cfg` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained.
28+
with the `schema` or `regex` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained.
3029

3130
For example, to generate a string that matches the schema `{"type": "string"}` (any string):
3231

@@ -48,16 +47,6 @@ curl http://127.0.0.1:8000/generate \
4847
}'
4948
```
5049

51-
To generate a string that matches the grammar `<grammar>`:
52-
53-
```bash
54-
curl http://127.0.0.1:8000/generate \
55-
-d '{
56-
"prompt": "What is Pi? Give me the first 15 digits: ",
57-
"cfg": <grammar>
58-
}'
59-
```
60-
6150
Instead of `curl`, you can also use the [requests][requests]{:target="_blank"} library from another python program.
6251

6352
Please consult the [vLLM documentation][vllm]{:target="_blank"} for details on additional request parameters. You can also [read the code](https://github.com/outlines-dev/outlines/blob/main/outlines/serve/serve.py) in case you need to customize the solution to your needs.

outlines/serve/serve.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from vllm.utils import random_uuid
2626

2727
from .vllm import (
28-
CFGLogitsProcessor,
2928
JSONLogitsProcessor,
3029
RegexLogitsProcessor,
3130
_patched_apply_logits_processors,
@@ -66,13 +65,10 @@ async def generate(request: Request) -> Response:
6665

6766
json_schema = request_dict.pop("schema", None)
6867
regex_string = request_dict.pop("regex", None)
69-
cfg_string = request_dict.pop("cfg", None)
7068
if json_schema is not None:
7169
logits_processors = [JSONLogitsProcessor(json_schema, engine.engine)]
7270
elif regex_string is not None:
7371
logits_processors = [RegexLogitsProcessor(regex_string, engine.engine)]
74-
elif cfg_string is not None:
75-
logits_processors = [CFGLogitsProcessor(cfg_string, engine.engine)]
7672
else:
7773
logits_processors = []
7874

outlines/serve/vllm.py

Lines changed: 35 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2,50 +2,14 @@
22
import json
33
import math
44
from collections import defaultdict
5-
from typing import Callable, DefaultDict, List
5+
from typing import DefaultDict, List
66

77
import torch
88

9-
from outlines.fsm.fsm import CFGFSM, FSM, RegexFSM
9+
from outlines.fsm.fsm import RegexFSM
1010
from outlines.fsm.json_schema import build_regex_from_object
1111

1212

13-
def _adapt_tokenizer(tokenizer):
14-
"""Adapt vLLM's tokenizer to use to compile the FSM.
15-
16-
The API of Outlines tokenizers is slightly different to that of
17-
`transformers`. In addition we need to handle the missing spaces to
18-
Llama's tokenizer to be able to compile FSMs for this model.
19-
20-
"""
21-
tokenizer.vocabulary = tokenizer.get_vocab()
22-
tokenizer.special_tokens = set(tokenizer.all_special_tokens)
23-
24-
def convert_token_to_string(token: str) -> str:
25-
from transformers.file_utils import SPIECE_UNDERLINE
26-
27-
string = tokenizer.convert_tokens_to_string([token])
28-
29-
# A hack to handle missing spaces to HF's Llama tokenizers
30-
if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
31-
return " " + string
32-
33-
return string
34-
35-
def change_decoder(
36-
decoder: Callable[[List[int]], str]
37-
) -> Callable[[List[int]], List[str]]:
38-
def new_decoder(inp_tokens: List[int]) -> List[str]:
39-
return [decoder(inp_tokens)]
40-
41-
return new_decoder
42-
43-
tokenizer.convert_token_to_string = convert_token_to_string
44-
tokenizer.decode = change_decoder(tokenizer.decode)
45-
46-
return tokenizer
47-
48-
4913
def _patched_apply_logits_processors(
5014
logits,
5115
sampling_metadata,
@@ -75,9 +39,21 @@ def _patched_apply_logits_processors(
7539
return logits
7640

7741

78-
class FSMLogitsProcessor:
79-
def __init__(self):
80-
fsm = FSM()
42+
class RegexLogitsProcessor:
43+
def __init__(self, regex_string, llm):
44+
"""Compile the FSM that drives the regex-guided generation.
45+
46+
Parameters
47+
----------
48+
regex_string
49+
A string that represents a regular expression
50+
llm
51+
An instance of `vllm.LLM`
52+
53+
"""
54+
tokenizer = self.adapt_tokenizer(llm.tokenizer)
55+
56+
fsm = RegexFSM(regex_string, tokenizer)
8157
self.fsm = fsm
8258

8359
def __call__(
@@ -101,37 +77,31 @@ def __call__(
10177

10278
return biased_scores
10379

80+
def adapt_tokenizer(self, tokenizer):
81+
"""Adapt vLLM's tokenizer to use to compile the FSM.
10482
105-
class RegexLogitsProcessor(FSMLogitsProcessor):
106-
def __init__(self, regex_string, llm):
107-
"""Compile the FSM that drives the regex-guided generation.
108-
109-
Parameters
110-
----------
111-
regex_string
112-
A string that represents a regular expression
113-
llm
114-
An instance of `vllm.LLM`
83+
The API of Outlines tokenizers is slightly different to that of
84+
`transformers`. In addition we need to handle the missing spaces to
85+
Llama's tokenizer to be able to compile FSMs for this model.
11586
11687
"""
117-
fsm = RegexFSM(regex_string, llm.tokenizer)
118-
self.fsm = fsm
88+
tokenizer.vocabulary = tokenizer.get_vocab()
89+
tokenizer.special_tokens = set(tokenizer.all_special_tokens)
11990

91+
def convert_token_to_string(token: str) -> str:
92+
from transformers.file_utils import SPIECE_UNDERLINE
12093

121-
class CFGLogitsProcessor(FSMLogitsProcessor):
122-
def __init__(self, cfg_string, llm):
123-
"""Compile the FSM that drives the cfg-guided generation.
94+
string = tokenizer.convert_tokens_to_string([token])
12495

125-
Parameters
126-
----------
127-
regex_string
128-
A string that represents a regular expression
129-
llm
130-
An instance of `vllm.LLM`
96+
# A hack to handle missing spaces to HF's Llama tokenizers
97+
if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
98+
return " " + string
13199

132-
"""
133-
fsm = CFGFSM(cfg_string, llm.tokenizer)
134-
self.fsm = fsm
100+
return string
101+
102+
tokenizer.convert_token_to_string = convert_token_to_string
103+
104+
return tokenizer
135105

136106

137107
class JSONLogitsProcessor(RegexLogitsProcessor):

0 commit comments

Comments
 (0)