Save the vllm tokenizer adapted state

mory91 · mory91 · commit fdf7397d371d · 2024-01-25T16:05:33.000-07:00
diff --git a/docs/reference/vllm.md b/docs/reference/vllm.md
@@ -50,7 +50,7 @@ curl http://127.0.0.1:8000/generate \
         }'
 ```
 
-To generate a string that matches the grammar `<grammar>`:
+To generate a string that matches a given grammar `<grammar>`:
 
 ```bash
 curl http://127.0.0.1:8000/generate \
diff --git a/outlines/serve/vllm.py b/outlines/serve/vllm.py
@@ -44,10 +44,16 @@ def _adapt_tokenizer(tokenizer):
     """Adapt vLLM's tokenizer to use to compile the FSM.
 
     The API of Outlines tokenizers is slightly different to that of
-    `transformers`. In addition we need to handle the missing spaces to
-    Llama's tokenizer to be able to compile FSMs for this model.
+    `transformers`. The decoder of outlines, returns a list whereas
+    the decode of vLLM returns an str. To sync the vLLM decoder with
+    outlines internal api, the decoder should be adapted. In addition
+    we need to handle the missing spaces to Llama's tokenizer to be
+    able to compile FSMs for this model.
 
     """
+    if getattr(tokenizer, "_outlines_adapted", False):
+        return tokenizer
+
     tokenizer.vocabulary = tokenizer.get_vocab()
     tokenizer.special_tokens = set(tokenizer.all_special_tokens)
 
@@ -65,13 +71,16 @@ def convert_token_to_string(token: str) -> str:
     def change_decoder(
         decoder: Callable[[List[int]], str]
     ) -> Callable[[List[int]], List[str]]:
+        """Sync vLLM's decoder with the outlines expectations by returning list"""
+
         def new_decoder(inp_tokens: List[int]) -> List[str]:
             return [decoder(inp_tokens)]
 
         return new_decoder
 
     tokenizer.convert_token_to_string = convert_token_to_string
     tokenizer.decode = change_decoder(tokenizer.decode)
+    setattr(tokenizer, "_outlines_adapted", True)
 
     return tokenizer