Fix llamacpp caching by making LlamaCppTokenizer pickleable

lapp0 · lapp0 · commit cb16b1656192 · 2024-05-29T12:31:47.000-05:00
diff --git a/outlines/integrations/llamacpp.py b/outlines/integrations/llamacpp.py
@@ -66,6 +66,16 @@ def __init__(self, model: "Llama"):
     def convert_token_to_string(self, token: str) -> str:
         return token
 
+    def __getstate__(self):
+        """Allow tokenizer to be used as hash key by excluding self.decode"""
+        return (
+            self.vocabulary.items(),
+            self.eos_token_id,
+            self.eos_token,
+            self.pad_token_id,
+            sorted(self.special_tokens),
+        )
+
 
 class LogitsProcessor:
     """Bias LlamaCpp generation using a finite state machine.
diff --git a/tests/generate/test_integration_llamacpp.py b/tests/generate/test_integration_llamacpp.py
@@ -279,3 +279,11 @@ def test_llama_cpp_pre_tokenizer_remains_broken():
     model = models.llamacpp(repo, model_path)
     with pytest.raises(RuntimeError):
         generate.choice(model, ["skirt", "dress", "pen", "jacket"])
+
+
+def test_create_states_mapping_llamacpp_tokenizer_regression(model):
+    """Minimal reproducer for #922, error passing llamacpp tokenizer to create_states_mapping"""
+    from outlines.fsm.guide import create_states_mapping
+    from outlines.integrations.llamacpp import LlamaCppTokenizer
+
+    create_states_mapping("a", LlamaCppTokenizer(model.model))