Expect vllm.LLMEngine as processor's argument

mory91 · mory91 · commit dd0492fc5143 · 2024-01-22T16:01:31.000-07:00
diff --git a/docs/reference/vllm.md b/docs/reference/vllm.md
@@ -28,7 +28,11 @@ You can then query the model in shell by passing a prompt and either
 1. a [JSON Schema][jsonschema]{:target="_blank"} specification or
 2. a [Regex][regex]{:target="_blank"} pattern
 
+<<<<<<< HEAD
 with the `schema` or `regex` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained.
+=======
+with the `schema`, `regex` or `cfg` parameters, respectively, to the `/generate` endpoint. If both are specified, the schema will be used. If neither is specified, the generated text will be unconstrained.
+>>>>>>> 43ff5c5 (Expect vllm.LLMEngine as processor's argument)
 
 For example, to generate a string that matches the schema `{"type": "string"}` (any string):
 
diff --git a/examples/vllm_integration.py b/examples/vllm_integration.py
@@ -14,7 +14,7 @@ class User(BaseModel):
 
 
 llm = vllm.LLM(model="gpt2")
-logits_processor = JSONLogitsProcessor(User, llm)
+logits_processor = JSONLogitsProcessor(User, llm.llm_engine)
 result = llm.generate(
     ["A prompt", "Another prompt"],
     sampling_params=vllm.SamplingParams(
diff --git a/outlines/serve/vllm.py b/outlines/serve/vllm.py
@@ -2,11 +2,12 @@
 import json
 import math
 from collections import defaultdict
-from typing import DefaultDict, List
+from typing import DefaultDict, List, Callable
 
 import torch
+from vllm import LLMEngine
 
-from outlines.fsm.fsm import RegexFSM
+from outlines.fsm.fsm import RegexFSM, CFGFSM, FSM
 from outlines.fsm.json_schema import build_regex_from_object
 
 
@@ -39,21 +40,45 @@ def _patched_apply_logits_processors(
     return logits
 
 
-class RegexLogitsProcessor:
-    def __init__(self, regex_string, llm):
-        """Compile the FSM that drives the regex-guided generation.
+def _adapt_tokenizer(tokenizer):
+    """Adapt vLLM's tokenizer to use to compile the FSM.
 
-        Parameters
-        ----------
-        regex_string
-            A string that represents a regular expression
-        llm
-            An instance of `vllm.LLM`
+    The API of Outlines tokenizers is slightly different to that of
+    `transformers`. In addition we need to handle the missing spaces to
+    Llama's tokenizer to be able to compile FSMs for this model.
+
+    """
+    tokenizer.vocabulary = tokenizer.get_vocab()
+    tokenizer.special_tokens = set(tokenizer.all_special_tokens)
+
+    def convert_token_to_string(token: str) -> str:
+        from transformers.file_utils import SPIECE_UNDERLINE
+
+        string = tokenizer.convert_tokens_to_string([token])
+
+        # A hack to handle missing spaces to HF's Llama tokenizers
+        if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
+            return " " + string
+
+        return string
+
+    def change_decoder(
+        decoder: Callable[[List[int]], str]
+    ) -> Callable[[List[int]], List[str]]:
+        def new_decoder(inp_tokens: List[int]) -> List[str]:
+            return [decoder(inp_tokens)]
+
+        return new_decoder
+
+    tokenizer.convert_token_to_string = convert_token_to_string
+    tokenizer.decode = change_decoder(tokenizer.decode)
+
+    return tokenizer
 
-        """
-        tokenizer = self.adapt_tokenizer(llm.tokenizer)
 
-        fsm = RegexFSM(regex_string, tokenizer)
+class FSMLogitsProcessor:
+    def __init__(self):
+        fsm = FSM()
         self.fsm = fsm
 
     def __call__(
@@ -77,43 +102,51 @@ def __call__(
 
         return biased_scores
 
-    def adapt_tokenizer(self, tokenizer):
-        """Adapt vLLM's tokenizer to use to compile the FSM.
 
-        The API of Outlines tokenizers is slightly different to that of
-        `transformers`. In addition we need to handle the missing spaces to
-        Llama's tokenizer to be able to compile FSMs for this model.
-
-        """
-        tokenizer.vocabulary = tokenizer.get_vocab()
-        tokenizer.special_tokens = set(tokenizer.all_special_tokens)
+class RegexLogitsProcessor(FSMLogitsProcessor):
+    def __init__(self, regex_string, llm: LLMEngine):
+        """Compile the FSM that drives the regex-guided generation.
 
-        def convert_token_to_string(token: str) -> str:
-            from transformers.file_utils import SPIECE_UNDERLINE
+        Parameters
+        ----------
+        regex_string
+            A string that represents a regular expression
+        llm
+            An instance of `vllm.LLMEngine`
 
-            string = tokenizer.convert_tokens_to_string([token])
+        """
+        adapted_tokenizer = _adapt_tokenizer(llm.tokenizer)
+        fsm = RegexFSM(regex_string, adapted_tokenizer)
+        self.fsm = fsm
 
-            # A hack to handle missing spaces to HF's Llama tokenizers
-            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
-                return " " + string
 
-            return string
+class CFGLogitsProcessor(FSMLogitsProcessor):
+    def __init__(self, cfg_string, llm: LLMEngine):
+        """Compile the FSM that drives the cfg-guided generation.
 
-        tokenizer.convert_token_to_string = convert_token_to_string
+        Parameters
+        ----------
+        regex_string
+            A string that represents a regular expression
+        llm
+            An instance of `vllm.LLMEngine`
 
-        return tokenizer
+        """
+        adapted_tokenizer = _adapt_tokenizer(llm.tokenizer)
+        fsm = CFGFSM(cfg_string, adapted_tokenizer)
+        self.fsm = fsm
 
 
 class JSONLogitsProcessor(RegexLogitsProcessor):
-    def __init__(self, schema, llm):
+    def __init__(self, schema, llm: LLMEngine):
         """Compile the FSM that drives the JSON-guided generation.
 
         Parameters
         ----------
         schema
             A JSON schema that encodes the structure we want the model to generate
         llm
-            An instance of `vllm.LLM`
+            An instance of `vllm.LLMEngine`
 
         """
         if isinstance(schema, dict):