Add caching to the function cached_create_states_mapping

RobinPicard · rlouf · commit 689d4ce8cded · 2025-03-18T16:40:59.000-07:00
diff --git a/outlines/processors/guide.py b/outlines/processors/guide.py
@@ -15,6 +15,7 @@
 )
 
 from outlines import grammars
+from outlines.caching import cache
 from outlines.fsm.parsing import PartialLark, PartialParserState
 
 if TYPE_CHECKING:
@@ -72,6 +73,7 @@ def copy(self):
         return self
 
 
+@cache()
 def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs):
     return uncached_create_states_mapping(regex_string, tokenizer, *args, **kwargs)
 
diff --git a/tests/processors/test_guide.py b/tests/processors/test_guide.py
@@ -1,6 +1,23 @@
 import pytest
 
+import llama_cpp
+import transformers
+
+import outlines
 from outlines.processors.guide import CFGGuide, Generate, RegexGuide, StopAtEOSGuide, Write
+from outlines import caching
+
+try:
+    import mlx_lm
+    HAS_MLX = True
+except ImportError:
+    HAS_MLX = False
+
+try:
+    import vllm
+    HAS_VLLM = True
+except ImportError:
+    HAS_VLLM = False
 
 
 def assert_expected_tensor_ids(tensor, ids):
@@ -181,6 +198,45 @@ def convert_token_to_string(self, token):
     assert fsm.is_final_state(state)
 
 
+def test_regex_guide_caching():
+    assert caching._caching_enabled
+
+    cache = caching.get_cache()
+    _, _ = cache.stats(enable=True, reset=True) # (hits, misses)
+
+    regex = r"[0-9]{3}"
+
+    models = [
+        outlines.from_transformers(
+            transformers.AutoModelForCausalLM.from_pretrained("erwanf/gpt2-mini"),
+            transformers.AutoTokenizer.from_pretrained("erwanf/gpt2-mini")
+        ),
+        outlines.from_llamacpp(llama_cpp.Llama.from_pretrained(
+            repo_id="M4-ai/TinyMistral-248M-v2-Instruct-GGUF",
+            filename="TinyMistral-248M-v2-Instruct.Q4_K_M.gguf",
+        ))
+    ]
+    if HAS_MLX:
+        models.append(outlines.from_mlxlm(
+            *mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit")
+        ))
+    if HAS_VLLM:
+        models.append(outlines.from_vllm(vllm.LLM("erwanf/gpt2-mini")))
+
+    for i, model in enumerate(models):
+        # First call for each model should be a miss
+        RegexGuide.from_regex(regex, model.tokenizer)
+        expected_misses = i + 1
+        expected_hits = i
+        assert cache.stats(enable=True, reset=False) == (expected_hits, expected_misses)
+
+        # Second call for each model
+        RegexGuide.from_regex(regex, model.tokenizer)
+        expected_misses = i + 1
+        expected_hits = i + 1
+        assert cache.stats(enable=True, reset=False) == (expected_hits, expected_misses)
+
+
 def test_cfg():
     class MockTokenizer:
         vocabulary = {"{": 1, "}": 2, "[": 3, "]": 4, "eos": 5}