From 0b9b1f5670a7672663647abe9342909946dadb3d Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 20 Sep 2024 21:12:21 -0700 Subject: [PATCH 01/54] make a guidance_metho decorator --- guidance/_guidance.py | 52 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 0f847c0de..3b8a40498 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -17,6 +17,58 @@ def guidance( """Decorator used to define guidance grammars""" return _decorator(f, stateless=stateless, cache=cache, dedent=dedent, model=model) +def guidance_method( + f = None, + *, + stateless = False, + cache = False, + dedent = True, + model = Model, +): + # if we are not yet being used as a decorator, then save the args + if f is None: + return functools.partial( + GuidanceMethod, stateless=stateless, cache=cache, dedent=dedent, model=model + ) + return GuidanceMethod(f, stateless=stateless, cache=cache, dedent=dedent, model=model) + +class GuidanceMethod: + def __init__( + self, + f, + *, + stateless = False, + cache = False, + dedent = True, + model = Model, + ): + self.f = f + self.stateless = stateless + self.cache = cache + self.model = model + self._owner = None + + def __call__(self, *args, **kwargs): + if self._owner is None: + raise TypeError(f"GuidanceMethod must decorate a method, not a function. Got: {self.f!r}") + raise TypeError(f"GuidanceMethod must be bound to an instance. Did you mean to instantiate a {self._owner.__name__!r} object?") + + def __get__(self, instance, owner=None, /): + if instance is None: + self._owner = owner + return self + + return _decorator( + # Bind the function to the instance before passing it to the decorator + # in order to handle the `self` argument properly + self.f.__get__(instance, owner), + stateless=self.stateless, + cache=self.cache, + # Source code rewriting does scary things + dedent=False, + model=self.model, + ) + _null_grammar = string("") From 943a80142d84b0c809771ba877e251c2a7717bcf Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 20 Sep 2024 21:13:25 -0700 Subject: [PATCH 02/54] handle partial one level up --- guidance/_guidance.py | 115 ++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 59 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 3b8a40498..00db5811d 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -15,6 +15,13 @@ def guidance( model = Model, ): """Decorator used to define guidance grammars""" + # if we are not yet being used as a decorator, then save the args + + if f is None: + return functools.partial( + _decorator, stateless=stateless, cache=cache, dedent=dedent, model=model + ) + return _decorator(f, stateless=stateless, cache=cache, dedent=dedent, model=model) def guidance_method( @@ -74,74 +81,64 @@ def __get__(self, instance, owner=None, /): def _decorator(f, *, stateless, cache, dedent, model): + # this strips out indentation in multiline strings that aligns with the current python indentation + if dedent is True or dedent == "python": + f = strip_multiline_string_indents(f) - # if we are not yet being used as a decorator, then save the args - if f is None: - return functools.partial( - _decorator, stateless=stateless, cache=cache, dedent=dedent, model=model - ) - - # if we are being used as a decorator then return the decorated function - else: + # we cache if requested + if cache: + f = functools.cache(f) - # this strips out indentation in multiline strings that aligns with the current python indentation - if dedent is True or dedent == "python": - f = strip_multiline_string_indents(f) + @functools.wraps(f) + def wrapped(*args, **kwargs): - # we cache if requested - if cache: - f = functools.cache(f) + # make a stateless grammar if we can + if stateless is True or ( + callable(stateless) and stateless(*args, **kwargs) + ): - @functools.wraps(f) - def wrapped(*args, **kwargs): + # if we have a (deferred) reference set, then we must be in a recursive definition and so we return the reference + reference = getattr(f, "_self_call_reference_", None) + if reference is not None: + return reference - # make a stateless grammar if we can - if stateless is True or ( - callable(stateless) and stateless(*args, **kwargs) - ): + # otherwise we call the function to generate the grammar + else: - # if we have a (deferred) reference set, then we must be in a recursive definition and so we return the reference - reference = getattr(f, "_self_call_reference_", None) - if reference is not None: - return reference + # set a DeferredReference for recursive calls (only if we don't have arguments that might make caching a bad idea) + no_args = len(args) + len(kwargs) == 0 + if no_args: + f._self_call_reference_ = DeferredReference() - # otherwise we call the function to generate the grammar + try: + # call the function to get the grammar node + node = f(_null_grammar, *args, **kwargs) + except: + raise else: - - # set a DeferredReference for recursive calls (only if we don't have arguments that might make caching a bad idea) - no_args = len(args) + len(kwargs) == 0 + if not isinstance(node, (Terminal, str)): + node.name = f.__name__ + # set the reference value with our generated node if no_args: - f._self_call_reference_ = DeferredReference() - - try: - # call the function to get the grammar node - node = f(_null_grammar, *args, **kwargs) - except: - raise - else: - if not isinstance(node, (Terminal, str)): - node.name = f.__name__ - # set the reference value with our generated node - if no_args: - f._self_call_reference_.value = node - finally: - if no_args: - del f._self_call_reference_ - - return node - - # otherwise must be stateful (which means we can't be inside a select() call) - else: - return RawFunction(f, args, kwargs) + f._self_call_reference_.value = node + finally: + if no_args: + del f._self_call_reference_ + + return node + + # otherwise must be stateful (which means we can't be inside a select() call) + else: + return RawFunction(f, args, kwargs) - # Remove the first argument from the wrapped function - signature = inspect.signature(f) - params = list(signature.parameters.values()) - params.pop(0) - wrapped.__signature__ = signature.replace(parameters=params) + # Remove the first argument from the wrapped function + signature = inspect.signature(f) + params = list(signature.parameters.values()) + params.pop(0) + wrapped.__signature__ = signature.replace(parameters=params) - # attach this as a method of the model class (if given) - # if model is not None: - # setattr(model, f.__name__, f) + # attach this as a method of the model class (if given) + # if model is not None: + # setattr(model, f.__name__, f) - return wrapped + return wrapped From 1e67aafb7fb49740c01f00e26c86ddade0b25732 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 20 Sep 2024 21:16:36 -0700 Subject: [PATCH 03/54] dedent up the stack so we can do it for methods too --- guidance/_guidance.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 00db5811d..8519c3b1f 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -19,10 +19,14 @@ def guidance( if f is None: return functools.partial( - _decorator, stateless=stateless, cache=cache, dedent=dedent, model=model + _decorator, stateless=stateless, cache=cache, model=model ) - return _decorator(f, stateless=stateless, cache=cache, dedent=dedent, model=model) + # this strips out indentation in multiline strings that aligns with the current python indentation + if dedent is True or dedent == "python": + f = strip_multiline_string_indents(f) + + return _decorator(f, stateless=stateless, cache=cache, model=model) def guidance_method( f = None, @@ -35,9 +39,14 @@ def guidance_method( # if we are not yet being used as a decorator, then save the args if f is None: return functools.partial( - GuidanceMethod, stateless=stateless, cache=cache, dedent=dedent, model=model + GuidanceMethod, stateless=stateless, cache=cache, model=model ) - return GuidanceMethod(f, stateless=stateless, cache=cache, dedent=dedent, model=model) + + # this strips out indentation in multiline strings that aligns with the current python indentation + if dedent is True or dedent == "python": + f = strip_multiline_string_indents(f) + + return GuidanceMethod(f, stateless=stateless, cache=cache, model=model) class GuidanceMethod: def __init__( @@ -46,7 +55,6 @@ def __init__( *, stateless = False, cache = False, - dedent = True, model = Model, ): self.f = f @@ -71,8 +79,6 @@ def __get__(self, instance, owner=None, /): self.f.__get__(instance, owner), stateless=self.stateless, cache=self.cache, - # Source code rewriting does scary things - dedent=False, model=self.model, ) @@ -80,11 +86,7 @@ def __get__(self, instance, owner=None, /): _null_grammar = string("") -def _decorator(f, *, stateless, cache, dedent, model): - # this strips out indentation in multiline strings that aligns with the current python indentation - if dedent is True or dedent == "python": - f = strip_multiline_string_indents(f) - +def _decorator(f, *, stateless, cache, model): # we cache if requested if cache: f = functools.cache(f) From 19882a27813f5b685af2699bb8d821edd3cb0a67 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 20 Sep 2024 21:18:22 -0700 Subject: [PATCH 04/54] cache up the stack so we capture the self arg in guidance_method --- guidance/_guidance.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 8519c3b1f..9f6a8f059 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -26,7 +26,11 @@ def guidance( if dedent is True or dedent == "python": f = strip_multiline_string_indents(f) - return _decorator(f, stateless=stateless, cache=cache, model=model) + # we cache if requested + if cache: + f = functools.cache(f) + + return _decorator(f, stateless=stateless, model=model) def guidance_method( f = None, @@ -39,14 +43,18 @@ def guidance_method( # if we are not yet being used as a decorator, then save the args if f is None: return functools.partial( - GuidanceMethod, stateless=stateless, cache=cache, model=model + GuidanceMethod, stateless=stateless, model=model ) # this strips out indentation in multiline strings that aligns with the current python indentation if dedent is True or dedent == "python": f = strip_multiline_string_indents(f) - return GuidanceMethod(f, stateless=stateless, cache=cache, model=model) + # we cache if requested + if cache: + f = functools.cache(f) + + return GuidanceMethod(f, stateless=stateless, model=model) class GuidanceMethod: def __init__( @@ -54,12 +62,10 @@ def __init__( f, *, stateless = False, - cache = False, model = Model, ): self.f = f self.stateless = stateless - self.cache = cache self.model = model self._owner = None @@ -78,7 +84,6 @@ def __get__(self, instance, owner=None, /): # in order to handle the `self` argument properly self.f.__get__(instance, owner), stateless=self.stateless, - cache=self.cache, model=self.model, ) @@ -86,11 +91,7 @@ def __get__(self, instance, owner=None, /): _null_grammar = string("") -def _decorator(f, *, stateless, cache, model): - # we cache if requested - if cache: - f = functools.cache(f) - +def _decorator(f, *, stateless, model): @functools.wraps(f) def wrapped(*args, **kwargs): From eaa875f9ebf52503a466f1ad53dd0236fed42720 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 09:36:10 -0700 Subject: [PATCH 05/54] add failing guidance_method tests --- tests/unit/test_decorator.py | 43 ++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 62042f814..f94ecd104 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -147,3 +147,46 @@ def raises(lm): # improper handling may not raise and may instead return # a Placeholder grammar node raises() + + +class TestGuidanceMethod: + class MyClassWithoutHash: + def __init__(self, x): + self.x = x + + @guidance.guidance_method(stateless=True, cache=True, dedent=False) + def method(self, lm): + lm += str(self.x) + return lm + + class MyClassWithHash: + def __init__(self, x): + self.x = x + + @guidance.guidance_method(stateless=True, cache=True, dedent=False) + def method(self, lm): + lm += str(self.x) + return lm + + def __hash__(self): + return hash(self.x) + + def test_guidance_method_caches(self): + obj1 = self.MyClassWithoutHash(1) + + lm = guidance.models.Mock() + result = lm + obj1.method() + assert str(result) == "1" + obj1.x = 2 + result = lm + obj1.method() + assert str(result) == "1" + + def test_guidance_method_caches_with_hash(self): + obj1 = self.MyClassWithHash(1) + + lm = guidance.models.Mock() + result = lm + obj1.method() + assert str(result) == "1" + obj1.x = 2 + result = lm + obj1.method() + assert str(result) == "2" \ No newline at end of file From 657dbc0254e5b6038523850ba53caa13f5cc865f Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 09:42:12 -0700 Subject: [PATCH 06/54] drop cache arg --- guidance/_guidance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 9f6a8f059..bcf706dc6 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -19,7 +19,7 @@ def guidance( if f is None: return functools.partial( - _decorator, stateless=stateless, cache=cache, model=model + _decorator, stateless=stateless, model=model ) # this strips out indentation in multiline strings that aligns with the current python indentation From 3edf627deda9a7e96418664652d0a2fa68ed71ce Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 09:47:51 -0700 Subject: [PATCH 07/54] export guidance_method --- guidance/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/__init__.py b/guidance/__init__.py index 9175b3dc0..8415e5ec3 100644 --- a/guidance/__init__.py +++ b/guidance/__init__.py @@ -4,7 +4,7 @@ import types from . import models -from ._guidance import guidance +from ._guidance import guidance, guidance_method from ._grammar import ( RawFunction, From 00b13bd96aa35db8af6e08ad6f1e865c6967ea47 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 09:48:29 -0700 Subject: [PATCH 08/54] move partial to where caching and dedenting happens... --- guidance/_guidance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index bcf706dc6..c1dabf71a 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -19,7 +19,7 @@ def guidance( if f is None: return functools.partial( - _decorator, stateless=stateless, model=model + guidance, stateless=stateless, cache=cache, dedent=dedent, model=model, ) # this strips out indentation in multiline strings that aligns with the current python indentation @@ -43,7 +43,7 @@ def guidance_method( # if we are not yet being used as a decorator, then save the args if f is None: return functools.partial( - GuidanceMethod, stateless=stateless, model=model + guidance_method, stateless=stateless, cache=cache, dedent=dedent, model=model, ) # this strips out indentation in multiline strings that aligns with the current python indentation From 14e478bacf5564ca48b6f56cddcaf3eec9232d76 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 09:50:00 -0700 Subject: [PATCH 09/54] move reference to closure since MethodType has slots --- guidance/_guidance.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index c1dabf71a..aead46479 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -92,8 +92,10 @@ def __get__(self, instance, owner=None, /): def _decorator(f, *, stateless, model): + reference = None @functools.wraps(f) def wrapped(*args, **kwargs): + nonlocal reference # make a stateless grammar if we can if stateless is True or ( @@ -101,7 +103,6 @@ def wrapped(*args, **kwargs): ): # if we have a (deferred) reference set, then we must be in a recursive definition and so we return the reference - reference = getattr(f, "_self_call_reference_", None) if reference is not None: return reference @@ -111,7 +112,7 @@ def wrapped(*args, **kwargs): # set a DeferredReference for recursive calls (only if we don't have arguments that might make caching a bad idea) no_args = len(args) + len(kwargs) == 0 if no_args: - f._self_call_reference_ = DeferredReference() + reference = DeferredReference() try: # call the function to get the grammar node @@ -123,10 +124,10 @@ def wrapped(*args, **kwargs): node.name = f.__name__ # set the reference value with our generated node if no_args: - f._self_call_reference_.value = node + reference.value = node finally: if no_args: - del f._self_call_reference_ + reference = None return node From c66c1c0de41cb287b4ad25a01d7089dc0771d946 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 10:17:00 -0700 Subject: [PATCH 10/54] remove guidance_method by making consolidated GuidanceDecorator class --- guidance/__init__.py | 2 +- guidance/_guidance.py | 31 +++++-------------------------- tests/unit/test_decorator.py | 4 ++-- 3 files changed, 8 insertions(+), 29 deletions(-) diff --git a/guidance/__init__.py b/guidance/__init__.py index 8415e5ec3..9175b3dc0 100644 --- a/guidance/__init__.py +++ b/guidance/__init__.py @@ -4,7 +4,7 @@ import types from . import models -from ._guidance import guidance, guidance_method +from ._guidance import guidance from ._grammar import ( RawFunction, diff --git a/guidance/_guidance.py b/guidance/_guidance.py index aead46479..408e1bd1e 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -30,33 +30,10 @@ def guidance( if cache: f = functools.cache(f) - return _decorator(f, stateless=stateless, model=model) + return GuidanceDecorator(f, stateless=stateless, model=model) -def guidance_method( - f = None, - *, - stateless = False, - cache = False, - dedent = True, - model = Model, -): - # if we are not yet being used as a decorator, then save the args - if f is None: - return functools.partial( - guidance_method, stateless=stateless, cache=cache, dedent=dedent, model=model, - ) - - # this strips out indentation in multiline strings that aligns with the current python indentation - if dedent is True or dedent == "python": - f = strip_multiline_string_indents(f) - - # we cache if requested - if cache: - f = functools.cache(f) - - return GuidanceMethod(f, stateless=stateless, model=model) -class GuidanceMethod: +class GuidanceDecorator: def __init__( self, f, @@ -71,7 +48,9 @@ def __init__( def __call__(self, *args, **kwargs): if self._owner is None: - raise TypeError(f"GuidanceMethod must decorate a method, not a function. Got: {self.f!r}") + # Function case + decorated = _decorator(self.f, stateless=self.stateless, model=self.model) + return decorated(*args, **kwargs) raise TypeError(f"GuidanceMethod must be bound to an instance. Did you mean to instantiate a {self._owner.__name__!r} object?") def __get__(self, instance, owner=None, /): diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index f94ecd104..9729fdd19 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -154,7 +154,7 @@ class MyClassWithoutHash: def __init__(self, x): self.x = x - @guidance.guidance_method(stateless=True, cache=True, dedent=False) + @guidance(stateless=True, cache=True, dedent=False) def method(self, lm): lm += str(self.x) return lm @@ -163,7 +163,7 @@ class MyClassWithHash: def __init__(self, x): self.x = x - @guidance.guidance_method(stateless=True, cache=True, dedent=False) + @guidance(stateless=True, cache=True, dedent=False) def method(self, lm): lm += str(self.x) return lm From f969a5050c5f0bcf205cae88bc35125d93b74eb6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 10:44:52 -0700 Subject: [PATCH 11/54] add separate GuidanceMethod subclass --- guidance/_guidance.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 408e1bd1e..d5313d321 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -30,10 +30,10 @@ def guidance( if cache: f = functools.cache(f) - return GuidanceDecorator(f, stateless=stateless, model=model) + return GuidanceFunction(f, stateless=stateless, model=model) -class GuidanceDecorator: +class GuidanceFunction: def __init__( self, f, @@ -44,29 +44,39 @@ def __init__( self.f = f self.stateless = stateless self.model = model - self._owner = None def __call__(self, *args, **kwargs): - if self._owner is None: - # Function case - decorated = _decorator(self.f, stateless=self.stateless, model=self.model) - return decorated(*args, **kwargs) - raise TypeError(f"GuidanceMethod must be bound to an instance. Did you mean to instantiate a {self._owner.__name__!r} object?") + decorated = _decorator(self.f, stateless=self.stateless, model=self.model) + return decorated(*args, **kwargs) def __get__(self, instance, owner=None, /): if instance is None: - self._owner = owner return self - return _decorator( - # Bind the function to the instance before passing it to the decorator - # in order to handle the `self` argument properly - self.f.__get__(instance, owner), + return GuidanceMethod( + self.f, stateless=self.stateless, model=self.model, + instance=instance, + owner=owner, ) +class GuidanceMethod(GuidanceFunction): + def __init__(self, f, *, stateless=False, model=Model, instance, owner): + super().__init__( + f.__get__(instance, owner), + stateless=stateless, + model=model + ) + # Save the instance and owner for introspection + self._instance = instance + self._owner = owner + + def __get__(self, instance, owner=None, /): + raise AttributeError("GuidanceMethod is already bound to an instance") + + _null_grammar = string("") From c4a5ee9b8e8d09d3caadcc13810ee2e17693c4d6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 10:46:43 -0700 Subject: [PATCH 12/54] cache wrapper so we can do recursion --- guidance/_guidance.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index d5313d321..02b926eb6 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -44,10 +44,13 @@ def __init__( self.f = f self.stateless = stateless self.model = model + self._wrapper = None def __call__(self, *args, **kwargs): - decorated = _decorator(self.f, stateless=self.stateless, model=self.model) - return decorated(*args, **kwargs) + # "Cache" the wrapped function + if self._wrapper is None: + self._wrapper = _decorator(self.f, stateless=self.stateless, model=self.model) + return self._wrapper(*args, **kwargs) def __get__(self, instance, owner=None, /): if instance is None: From 364c49fe729d6c3a945a4188a46cbd6134956b2d Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 10:48:51 -0700 Subject: [PATCH 13/54] wraps --- guidance/_guidance.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 02b926eb6..616694149 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -41,6 +41,8 @@ def __init__( stateless = False, model = Model, ): + # Update self with the wrapped function's metadata + functools.wraps(f)(self) self.f = f self.stateless = stateless self.model = model @@ -85,7 +87,6 @@ def __get__(self, instance, owner=None, /): def _decorator(f, *, stateless, model): reference = None - @functools.wraps(f) def wrapped(*args, **kwargs): nonlocal reference From 866f1c07c231492d7aac60d761a165ae88c26d06 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 11:06:56 -0700 Subject: [PATCH 14/54] modify signature of GuidanceFunction, not wrapped function --- guidance/_guidance.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 616694149..887e71d38 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -42,7 +42,13 @@ def __init__( model = Model, ): # Update self with the wrapped function's metadata - functools.wraps(f)(self) + functools.update_wrapper(self, f) + # Remove the first argument from the wrapped function + signature = inspect.signature(f) + params = list(signature.parameters.values()) + params.pop(0) + self.__signature__ = signature.replace(parameters=params) + self.f = f self.stateless = stateless self.model = model @@ -128,12 +134,6 @@ def wrapped(*args, **kwargs): else: return RawFunction(f, args, kwargs) - # Remove the first argument from the wrapped function - signature = inspect.signature(f) - params = list(signature.parameters.values()) - params.pop(0) - wrapped.__signature__ = signature.replace(parameters=params) - # attach this as a method of the model class (if given) # if model is not None: # setattr(model, f.__name__, f) From fa884133873c407f4b7845a90ad8d8b204f130f3 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 11:38:24 -0700 Subject: [PATCH 15/54] Make method caching tests more extensive --- tests/unit/test_decorator.py | 121 ++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 38 deletions(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 9729fdd19..07a144ca0 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -149,44 +149,89 @@ def raises(lm): raises() -class TestGuidanceMethod: - class MyClassWithoutHash: - def __init__(self, x): - self.x = x - - @guidance(stateless=True, cache=True, dedent=False) - def method(self, lm): - lm += str(self.x) - return lm - - class MyClassWithHash: - def __init__(self, x): - self.x = x - - @guidance(stateless=True, cache=True, dedent=False) - def method(self, lm): - lm += str(self.x) - return lm - - def __hash__(self): - return hash(self.x) - - def test_guidance_method_caches(self): - obj1 = self.MyClassWithoutHash(1) +class TestGuidanceMethodCache: + class MyClass: + def __init__(self, prefix: str, suffix: str): + self.prefix = prefix + self.suffix = suffix + self.delimiter = "\n" + def __hash__(self): + # Intentionally leave out self.delimiter so we can mess with it later + return hash((self.prefix, self.suffix)) + + @guidance(stateless=True, cache=True) + def cached_method(self, lm, middle: str): + return lm + self.delimiter.join([self.prefix, middle, self.suffix]) + + @guidance(stateless=True, cache=False) + def uncached_method(self, lm, middle: str): + return lm + self.delimiter.join([self.prefix, middle, self.suffix]) + + def test_guidance_method_cache(self): + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.cached_method("Computer, tell me a joke.") + grammar2 = obj.cached_method("Computer, tell me a joke.") + assert grammar1 is grammar2 + + def test_miss_cache_when_instance_hash_changes(self): + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.cached_method("Computer, tell me a joke.") + obj.suffix = "Thanks!" + grammar2 = obj.cached_method("Computer, tell me a joke.") + assert grammar1 is not grammar2 lm = guidance.models.Mock() - result = lm + obj1.method() - assert str(result) == "1" - obj1.x = 2 - result = lm + obj1.method() - assert str(result) == "1" - - def test_guidance_method_caches_with_hash(self): - obj1 = self.MyClassWithHash(1) - + assert ( + str(lm + grammar1) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." + ) + assert ( + str(lm + grammar2) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThanks!" + ) + + def test_hit_cache_when_instance_hash_does_not_change(self): + """ + Note: this is a bit of a "gotcha" when using `cache=True` since users may expect changing the instance's attributes + will change the grammar. They _must_ implement __hash__ to ensure that the grammar is recalculated when the hash changes. + """ + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.cached_method("Computer, tell me a joke.") + obj.delimiter = "\t" + grammar2 = obj.cached_method("Computer, tell me a joke.") + assert grammar1 is grammar2 + lm = guidance.models.Mock() + assert ( + str(lm + grammar1) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." + ) + # Note that the delimiter is still the same as the first call since the hash didn't change + assert ( + str(lm + grammar2) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." + ) + + def test_guidance_method_no_cache(self): + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.uncached_method("Computer, tell me a joke.") + grammar2 = obj.uncached_method("Computer, tell me a joke.") + assert grammar1 is not grammar2 + lm = guidance.models.Mock() + assert str(lm + grammar1) == str(lm + grammar2) + + def test_guidance_method_no_cache_changes_when_instance_changes(self): + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.uncached_method("Computer, tell me a joke.") + obj.delimiter = "\t" + grammar2 = obj.uncached_method("Computer, tell me a joke.") + assert grammar1 is not grammar2 lm = guidance.models.Mock() - result = lm + obj1.method() - assert str(result) == "1" - obj1.x = 2 - result = lm + obj1.method() - assert str(result) == "2" \ No newline at end of file + assert ( + str(lm + grammar1) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." + ) + # Note that the delimiter is still the same as the first call since the hash didn't change + assert ( + str(lm + grammar2) + == "You are a helpful AI. Do what the user asks:\tComputer, tell me a joke.\tThank you." + ) From 67d541eb201ae6292ddf2a7f1ac46c7d3fba5817 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 11:39:10 -0700 Subject: [PATCH 16/54] Test that dedenting works with methods --- tests/unit/test_decorator.py | 107 +++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 07a144ca0..82151bcbd 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -235,3 +235,110 @@ def test_guidance_method_no_cache_changes_when_instance_changes(self): str(lm + grammar2) == "You are a helpful AI. Do what the user asks:\tComputer, tell me a joke.\tThank you." ) + + +class TestGuidanceMethodDedent: + def test_dedent_basic(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + lm += f"""\ + {{ + "name": "{1+1}", + "age": "{gen('name', stop='"', max_tokens=1)}", + }}""" + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result).startswith("{") + + def test_basic_multiline_fstring(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + lm += f"""\ + {{ + "name": "{'har' + 'sha'}", + "age": "{314}", + }}""" + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result) == '{\n "name": "harsha",\n "age": "314",\n}' + + def test_mixed_content(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + s = "Regular string\n" + s += f"""\ + {{ + "name": "{'har' + 'sha'}", + "age": "{314}", + }}""" + lm += s + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result) == 'Regular string\n{\n "name": "harsha",\n "age": "314",\n}' + + def test_non_fstring_multiline(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + s = """\ + Line 1 + Line 2 + Line 3 + """ + lm += s + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result) == "Line 1\nLine 2\nLine 3\n" + + def test_empty_strings(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + s = f"""\ + {""}""" + lm += s + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result) == "" + + def test_inconsistent_indentation(self): + class MyClass: + @guidance(stateless=True, dedent=True) + def dedent_method(self, lm): + s = f"""\ + {{ + "name": "{'har' + 'sha'}", + "age": "{314}", + "weapon": "{'sword'}" + }}""" + lm += s + return lm + + obj = MyClass() + grammar = obj.dedent_method() + lm = guidance.models.Mock() + result = lm + grammar + assert str(result) == '{\n"name": "harsha",\n "age": "314",\n"weapon": "sword"\n}' From 0be7ea50cc05b5a38f35360380e6bd3d41045311 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 11:41:27 -0700 Subject: [PATCH 17/54] Update comment --- tests/unit/test_decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 82151bcbd..c030ad893 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -230,7 +230,7 @@ def test_guidance_method_no_cache_changes_when_instance_changes(self): str(lm + grammar1) == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." ) - # Note that the delimiter is still the same as the first call since the hash didn't change + # Note that the delimiter actually changes because the instance changed and we're not calling the cached method assert ( str(lm + grammar2) == "You are a helpful AI. Do what the user asks:\tComputer, tell me a joke.\tThank you." From 41b1be50060da3c5a837d89cab3163a6ca0e47d0 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 12:08:16 -0700 Subject: [PATCH 18/54] reprs --- guidance/_guidance.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 887e71d38..802cbfe0e 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -72,6 +72,9 @@ def __get__(self, instance, owner=None, /): owner=owner, ) + def __repr__(self): + return f"" + class GuidanceMethod(GuidanceFunction): def __init__(self, f, *, stateless=False, model=Model, instance, owner): @@ -87,6 +90,9 @@ def __init__(self, f, *, stateless=False, model=Model, instance, owner): def __get__(self, instance, owner=None, /): raise AttributeError("GuidanceMethod is already bound to an instance") + def __repr__(self): + return f"" + _null_grammar = string("") From 91ff5cf1b2941407cb236c10d3ad198cfa93e2cc Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 12:08:57 -0700 Subject: [PATCH 19/54] copy more metadata with strip_multiline_string_indents --- guidance/_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/guidance/_utils.py b/guidance/_utils.py index 093873da9..5d3a90635 100644 --- a/guidance/_utils.py +++ b/guidance/_utils.py @@ -113,6 +113,10 @@ def strip_multiline_string_indents(f): closure=f.__closure__, ) new_f.__kwdefaults__ = f.__kwdefaults__ + new_f.__qualname__ = f.__qualname__ + new_f.__annotations__ = f.__annotations__ + new_f.__doc__ = f.__doc__ + new_f.__module__ = f.__module__ return new_f class CaptureEvents: From 90fb9efe9d89461c47089ab9b0f5d846a7bf0d86 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 16:02:36 -0700 Subject: [PATCH 20/54] add explicit tests for recursion issues (failing for method) --- tests/unit/test_decorator.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index c030ad893..036b5c358 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -342,3 +342,19 @@ def dedent_method(self, lm): lm = guidance.models.Mock() result = lm + grammar assert str(result) == '{\n"name": "harsha",\n "age": "314",\n"weapon": "sword"\n}' + +class TestGuidanceRecursion: + class MyClass: + @guidance(stateless=True, dedent=False) + def recursive(self, lm): + return lm + guidance.select(["a", self.recursive()]) + + def test_method_recursion(self): + assert self.MyClass().recursive() is not None + + def test_function_recursion(self): + @guidance(stateless=True, dedent=False) + def recursive(lm): + return lm + guidance.select(["a", recursive()]) + + assert recursive() is not None From 40417f286e22181d092781a785b9aeb163f67cfe Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Mon, 23 Sep 2024 16:02:44 -0700 Subject: [PATCH 21/54] fix method recursion --- guidance/_guidance.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 802cbfe0e..a1b085bf8 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -55,11 +55,13 @@ def __init__( self._wrapper = None def __call__(self, *args, **kwargs): - # "Cache" the wrapped function + # "Cache" the wrapped function (needed for recursive calls) if self._wrapper is None: self._wrapper = _decorator(self.f, stateless=self.stateless, model=self.model) return self._wrapper(*args, **kwargs) + # Cache the bound method (needed for recursive calls) + @functools.cache def __get__(self, instance, owner=None, /): if instance is None: return self From 556b4da456256bc8c502d9845d7f53b497125bff Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 09:54:57 -0700 Subject: [PATCH 22/54] move cache down into __init__ so we cache either functions or bound methods (no unbound methods) --- guidance/_guidance.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index c826f919f..636c64e04 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -27,11 +27,7 @@ def guidance( if dedent is True or dedent == "python": f = strip_multiline_string_indents(f) - # we cache if requested - if cache: - f = functools.cache(f) - - return GuidanceFunction(f, stateless=stateless, model=model) + return GuidanceFunction(f, stateless=stateless, cache=cache, model=model) class GuidanceFunction: @@ -40,8 +36,13 @@ def __init__( f, *, stateless = False, + cache = False, model = Model, ): + # we cache the function itself if requested + if cache: + f = functools.cache(f) + # Update self with the wrapped function's metadata functools.update_wrapper(self, f) # Remove the first argument from the wrapped function @@ -80,11 +81,12 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): - def __init__(self, f, *, stateless=False, model=Model, instance, owner): + def __init__(self, f, *, stateless=False, cache=False, model=Model, instance, owner): super().__init__( f.__get__(instance, owner), stateless=stateless, - model=model + cache=cache, + model=model, ) # Save the instance and owner for introspection self._instance = instance From f48e8df48b8bcb62a6c55d3866d5214d027bce5c Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 10:25:02 -0700 Subject: [PATCH 23/54] Use a weak cache for GuidanceFunction.__get__ --- guidance/_guidance.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 636c64e04..082895de7 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -1,6 +1,8 @@ import functools import inspect import threading +from typing import Any +import weakref from ._grammar import DeferredReference, RawFunction, Terminal, string from ._utils import strip_multiline_string_indents @@ -55,6 +57,7 @@ def __init__( self.stateless = stateless self.model = model self._wrapper = None + self._methods: weakref.WeakKeyDictionary[Any, GuidanceMethod] = weakref.WeakKeyDictionary() def __call__(self, *args, **kwargs): # "Cache" the wrapped function (needed for recursive calls) @@ -62,19 +65,25 @@ def __call__(self, *args, **kwargs): self._wrapper = _decorator(self.f, stateless=self.stateless, model=self.model) return self._wrapper(*args, **kwargs) - # Cache the bound method (needed for recursive calls) - @functools.cache def __get__(self, instance, owner=None, /): + """ + Return a GuidanceMethod bound to the instance. GuidanceMethods are cached in a WeakKeyDictionary on a per-instance basis. + """ if instance is None: return self - return GuidanceMethod( - self.f, - stateless=self.stateless, - model=self.model, - instance=instance, - owner=owner, - ) + # On cache miss, create a new GuidanceMethod + if instance not in self._methods: + method = GuidanceMethod( + self.f, + stateless=self.stateless, + model=self.model, + instance=instance, + owner=owner, + ) + self._methods[instance] = method + + return self._methods[instance] def __repr__(self): return f"" From c4fba1269c839ae9b63b41a21ec76b19dfa475d4 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 10:25:53 -0700 Subject: [PATCH 24/54] Add tests for GuidanceMethod garbage collection --- tests/unit/test_decorator.py | 39 ++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 036b5c358..d2816a152 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -1,4 +1,7 @@ import pytest +import weakref +import gc + import guidance from guidance import gen @@ -358,3 +361,39 @@ def recursive(lm): return lm + guidance.select(["a", recursive()]) assert recursive() is not None + +class TestMethodGarbageCollection: + class MyClass: + @guidance(cache=True) + def cached_method(self, lm): + return lm + + @guidance(cache=False) + def uncached_method(self, lm): + return lm + + def test_garbage_collection_cached_method(self): + obj = self.MyClass() + # Create a weak reference to the object + obj_ref = weakref.ref(obj) + # Call the cached method + _ = obj.cached_method() + # Delete the hard ref to the obj + del obj + # Run garbage collection + gc.collect() + # Check if the object was garbage collected + assert obj_ref() is None + + def test_garbage_collection_uncached_method(self): + obj = self.MyClass() + # Create a weak reference to the object + obj_ref = weakref.ref(obj) + # Call the uncached method + _ = obj.uncached_method() + # Delete the hard ref to the obj + del obj + # Run garbage collection + gc.collect() + # Check if the object was garbage collected + assert obj_ref() is None From 0a42cc9c63b704f5bdc488b116535ce2518c2487 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 14:42:52 -0700 Subject: [PATCH 25/54] Move cache back up --- guidance/_guidance.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 082895de7..dc881ec33 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -29,7 +29,11 @@ def guidance( if dedent is True or dedent == "python": f = strip_multiline_string_indents(f) - return GuidanceFunction(f, stateless=stateless, cache=cache, model=model) + # we cache the function itself if requested + if cache: + f = functools.cache(f) + + return GuidanceFunction(f, stateless=stateless, model=model) class GuidanceFunction: @@ -38,13 +42,8 @@ def __init__( f, *, stateless = False, - cache = False, model = Model, ): - # we cache the function itself if requested - if cache: - f = functools.cache(f) - # Update self with the wrapped function's metadata functools.update_wrapper(self, f) # Remove the first argument from the wrapped function @@ -90,11 +89,10 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): - def __init__(self, f, *, stateless=False, cache=False, model=Model, instance, owner): + def __init__(self, f, *, stateless=False, model=Model, instance, owner): super().__init__( f.__get__(instance, owner), stateless=stateless, - cache=cache, model=model, ) # Save the instance and owner for introspection From ffa354dc33f302fcb6022a8b83b130630c176d60 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 14:51:12 -0700 Subject: [PATCH 26/54] Clunky fix to get gc working --- guidance/_guidance.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index dc881ec33..b4d84db34 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -90,20 +90,29 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): def __init__(self, f, *, stateless=False, model=Model, instance, owner): + bound_f = f.__get__(instance, owner) + _weak_bound_f = weakref.WeakMethod(bound_f) + # Copy metadata from bound function (has signature without self arg) + @functools.wraps(bound_f) + def weak_bound_f(*args, **kwargs): + return _weak_bound_f()(*args, **kwargs) + # Replace the bound __wrapped__ function with the original function (which doesn't have hard references to the instance) + weak_bound_f.__wrapped__ = f + super().__init__( - f.__get__(instance, owner), + weak_bound_f, stateless=stateless, model=model, ) # Save the instance and owner for introspection - self._instance = instance - self._owner = owner + self._instance = weakref.ref(instance) + self._owner = weakref.ref(owner) def __get__(self, instance, owner=None, /): raise AttributeError("GuidanceMethod is already bound to an instance") def __repr__(self): - return f"" + return f"" _null_grammar = string("") From 737919103a045bdb8bccffda89812adfaf6cd104 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 15:08:04 -0700 Subject: [PATCH 27/54] Clean up a bit --- guidance/_guidance.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index b4d84db34..5670bcf50 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -90,17 +90,8 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): def __init__(self, f, *, stateless=False, model=Model, instance, owner): - bound_f = f.__get__(instance, owner) - _weak_bound_f = weakref.WeakMethod(bound_f) - # Copy metadata from bound function (has signature without self arg) - @functools.wraps(bound_f) - def weak_bound_f(*args, **kwargs): - return _weak_bound_f()(*args, **kwargs) - # Replace the bound __wrapped__ function with the original function (which doesn't have hard references to the instance) - weak_bound_f.__wrapped__ = f - super().__init__( - weak_bound_f, + f, stateless=stateless, model=model, ) @@ -108,6 +99,17 @@ def weak_bound_f(*args, **kwargs): self._instance = weakref.ref(instance) self._owner = weakref.ref(owner) + def __call__(self, *args, **kwargs): + # "Cache" the wrapped function (needed for recursive calls) + if self._wrapper is None: + def weak_bound_f(*args, **kwargs): + if self._instance() is None: + raise ReferenceError("Weak reference to instance is dead") + bound_f = self.f.__get__(self._instance(), self._owner()) + return bound_f(*args, **kwargs) + self._wrapper = _decorator(weak_bound_f, stateless=self.stateless, model=self.model) + return self._wrapper(*args, **kwargs) + def __get__(self, instance, owner=None, /): raise AttributeError("GuidanceMethod is already bound to an instance") From 98f80b41b41f47eb4f196b80901a94a4666d9289 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 15:54:45 -0700 Subject: [PATCH 28/54] Add tests to make sure we got the signature right --- tests/unit/test_decorator.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index d2816a152..4ffb08a3e 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -397,3 +397,20 @@ def test_garbage_collection_uncached_method(self): gc.collect() # Check if the object was garbage collected assert obj_ref() is None + + +class TestSignature: + @guidance(stateless=True) + def func(lm, a, b=1, *, c, d=2): + return lm + + class MyClass: + @guidance(stateless=True) + def method(self, lm, a, b=1, *, c, d=2): + return lm + + def test_function_signature(self): + assert list(self.func.__signature__.parameters.keys()) == ["a", "b", "c", "d"] + + def test_method_signature(self): + assert list(self.MyClass().method.__signature__.parameters.keys()) == ["a", "b", "c", "d"] From acc939c0126e450e0a1da56b6ccec57274100b1e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 16:17:56 -0700 Subject: [PATCH 29/54] Cleaner --- guidance/_guidance.py | 15 ++------------- guidance/_utils.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 5670bcf50..05d979b07 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -5,7 +5,7 @@ import weakref from ._grammar import DeferredReference, RawFunction, Terminal, string -from ._utils import strip_multiline_string_indents +from ._utils import strip_multiline_string_indents, make_weak_bound_method from .models import Model @@ -91,7 +91,7 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): def __init__(self, f, *, stateless=False, model=Model, instance, owner): super().__init__( - f, + make_weak_bound_method(f, instance), stateless=stateless, model=model, ) @@ -99,17 +99,6 @@ def __init__(self, f, *, stateless=False, model=Model, instance, owner): self._instance = weakref.ref(instance) self._owner = weakref.ref(owner) - def __call__(self, *args, **kwargs): - # "Cache" the wrapped function (needed for recursive calls) - if self._wrapper is None: - def weak_bound_f(*args, **kwargs): - if self._instance() is None: - raise ReferenceError("Weak reference to instance is dead") - bound_f = self.f.__get__(self._instance(), self._owner()) - return bound_f(*args, **kwargs) - self._wrapper = _decorator(weak_bound_f, stateless=self.stateless, model=self.model) - return self._wrapper(*args, **kwargs) - def __get__(self, instance, owner=None, /): raise AttributeError("GuidanceMethod is already bound to an instance") diff --git a/guidance/_utils.py b/guidance/_utils.py index 5d3a90635..808c7b749 100644 --- a/guidance/_utils.py +++ b/guidance/_utils.py @@ -6,6 +6,8 @@ import sys import textwrap import types +import weakref +import functools import numpy as np @@ -119,6 +121,19 @@ def strip_multiline_string_indents(f): new_f.__module__ = f.__module__ return new_f +def make_weak_bound_method(f, instance): + weak_instance = weakref.ref(instance) + + @functools.wraps(f) # ish + def weak_bound_f(*args, **kwargs): + instance = weak_instance() + if instance is None: + raise ReferenceError("Weak reference to instance is dead") + bound_f = types.MethodType(f, instance) + return bound_f(*args, **kwargs) + + return weak_bound_f + class CaptureEvents: """Creates a scope where all the events are captured in a queue. From 1fd4595dc6effdb4dcb816fb6c45093382934ef3 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 16:27:30 -0700 Subject: [PATCH 30/54] signature_pop helper util --- guidance/_guidance.py | 9 +++------ guidance/_utils.py | 7 +++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 05d979b07..7cfa6662c 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -5,7 +5,7 @@ import weakref from ._grammar import DeferredReference, RawFunction, Terminal, string -from ._utils import strip_multiline_string_indents, make_weak_bound_method +from ._utils import strip_multiline_string_indents, make_weak_bound_method, signature_pop from .models import Model @@ -46,11 +46,8 @@ def __init__( ): # Update self with the wrapped function's metadata functools.update_wrapper(self, f) - # Remove the first argument from the wrapped function - signature = inspect.signature(f) - params = list(signature.parameters.values()) - params.pop(0) - self.__signature__ = signature.replace(parameters=params) + # Remove the first argument from the wrapped function since we're going to drop the `lm` argument + self.__signature__ = signature_pop(inspect.signature(f), 0) self.f = f self.stateless = stateless diff --git a/guidance/_utils.py b/guidance/_utils.py index 808c7b749..259d7ec6d 100644 --- a/guidance/_utils.py +++ b/guidance/_utils.py @@ -132,8 +132,15 @@ def weak_bound_f(*args, **kwargs): bound_f = types.MethodType(f, instance) return bound_f(*args, **kwargs) + # remove the first argument from the wrapped function since it is now bound + weak_bound_f.__signature__ = signature_pop(inspect.signature(f), 0) return weak_bound_f +def signature_pop(signature, index): + params = list(signature.parameters.values()) + params.pop(index) + return signature.replace(parameters=params) + class CaptureEvents: """Creates a scope where all the events are captured in a queue. From 12961837023e017bcde0d75d0117aea232a22853 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 16:27:42 -0700 Subject: [PATCH 31/54] improve signature tests --- tests/unit/test_decorator.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 4ffb08a3e..d6f936472 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -1,6 +1,7 @@ import pytest import weakref import gc +import inspect import guidance from guidance import gen @@ -400,17 +401,20 @@ def test_garbage_collection_uncached_method(self): class TestSignature: - @guidance(stateless=True) - def func(lm, a, b=1, *, c, d=2): - return lm - - class MyClass: + def test_function_signature(self): + def func(a, b=1, *, c, d=2): + pass @guidance(stateless=True) - def method(self, lm, a, b=1, *, c, d=2): + def guidance_func(lm, a, b=1, *, c, d=2): return lm - - def test_function_signature(self): - assert list(self.func.__signature__.parameters.keys()) == ["a", "b", "c", "d"] + assert inspect.signature(guidance_func) == inspect.signature(func) def test_method_signature(self): - assert list(self.MyClass().method.__signature__.parameters.keys()) == ["a", "b", "c", "d"] + class MyClass: + def method(self, a, b=1, *, c, d=2): + pass + @guidance(stateless=True) + def guidance_method(self, lm, a, b=1, *, c, d=2): + pass + obj = MyClass() + assert inspect.signature(obj.guidance_method) == inspect.signature(obj.method) From 2d0f98d0b4854e231278aee5b7ef0920c458e45e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 16:29:17 -0700 Subject: [PATCH 32/54] drop unused owner --- guidance/_guidance.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 7cfa6662c..6e664e507 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -75,7 +75,6 @@ def __get__(self, instance, owner=None, /): stateless=self.stateless, model=self.model, instance=instance, - owner=owner, ) self._methods[instance] = method @@ -86,7 +85,7 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): - def __init__(self, f, *, stateless=False, model=Model, instance, owner): + def __init__(self, f, *, stateless=False, model=Model, instance): super().__init__( make_weak_bound_method(f, instance), stateless=stateless, @@ -94,7 +93,6 @@ def __init__(self, f, *, stateless=False, model=Model, instance, owner): ) # Save the instance and owner for introspection self._instance = weakref.ref(instance) - self._owner = weakref.ref(owner) def __get__(self, instance, owner=None, /): raise AttributeError("GuidanceMethod is already bound to an instance") From e26cb76b36cf7bfdda36d58b2f66e4f0ed339ab5 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 16:31:50 -0700 Subject: [PATCH 33/54] additional cache miss test --- tests/unit/test_decorator.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index d6f936472..a46b8a081 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -178,6 +178,21 @@ def test_guidance_method_cache(self): grammar2 = obj.cached_method("Computer, tell me a joke.") assert grammar1 is grammar2 + def test_miss_cache_when_args_change(self): + obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + grammar1 = obj.cached_method("Computer, tell me a joke.") + grammar2 = obj.cached_method("Computer, tell me a riddle.") + assert grammar1 is not grammar2 + lm = guidance.models.Mock() + assert ( + str(lm + grammar1) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a joke.\nThank you." + ) + assert ( + str(lm + grammar2) + == "You are a helpful AI. Do what the user asks:\nComputer, tell me a riddle.\nThank you." + ) + def test_miss_cache_when_instance_hash_changes(self): obj = self.MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") grammar1 = obj.cached_method("Computer, tell me a joke.") From bafa37f09d323d67727f7f1167b7c6c734efae5e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 17:30:23 -0700 Subject: [PATCH 34/54] add failing test (good, I was confused about why this worked) --- tests/unit/test_decorator.py | 45 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index a46b8a081..5b8055e24 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -380,13 +380,19 @@ def recursive(lm): class TestMethodGarbageCollection: class MyClass: + def __init__(self, thing: str = "Hello"): + self.thing = thing + + def __hash__(self): + return hash(self.thing) + @guidance(cache=True) - def cached_method(self, lm): - return lm + def cached_method(self, lm, *args): + return lm + self.thing @guidance(cache=False) - def uncached_method(self, lm): - return lm + def uncached_method(self, lm, *args): + return lm + self.thing def test_garbage_collection_cached_method(self): obj = self.MyClass() @@ -401,6 +407,37 @@ def test_garbage_collection_cached_method(self): # Check if the object was garbage collected assert obj_ref() is None + def test_garbage_collection_cached_method_2(self): + # Not sure why I couldn't trigger the issue in the other tests... + class MyClass: + def __init__(self, prefix: str, suffix: str): + self.prefix = prefix + self.suffix = suffix + self.delimiter = "\n" + + # def __hash__(self): + # # Intentionally leave out self.delimiter so we can mess with it later + # return hash((self.prefix, self.suffix)) + + @guidance(stateless=True, cache=True) + def cached_method(self, lm, middle: str): + return lm + self.delimiter.join([self.prefix, middle, self.suffix]) + obj = MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") + + # Create a weak reference to the object + obj_ref = weakref.ref(obj) + + # Call the cached method + grammar1 = obj.cached_method("Computer, tell me a joke.") + obj.suffix = "Thanks!" + grammar2 = obj.cached_method("Computer, tell me a joke.") + # Delete the hard ref to the obj + del obj + # Run garbage collection + gc.collect() + # Check if the object was garbage collected + assert obj_ref() is None + def test_garbage_collection_uncached_method(self): obj = self.MyClass() # Create a weak reference to the object From 71aa3d879ac6ab9afe16c67e7942cb105b29694e Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 17:54:21 -0700 Subject: [PATCH 35/54] Move cache back down but be more careful this time --- guidance/_guidance.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 6e664e507..97722998c 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -29,11 +29,7 @@ def guidance( if dedent is True or dedent == "python": f = strip_multiline_string_indents(f) - # we cache the function itself if requested - if cache: - f = functools.cache(f) - - return GuidanceFunction(f, stateless=stateless, model=model) + return GuidanceFunction(f, stateless=stateless, cache=cache, model=model) class GuidanceFunction: @@ -42,8 +38,14 @@ def __init__( f, *, stateless = False, + cache = False, model = Model, ): + # we cache the function itself if requested + # do this before updating the wrapper so we can maintain the __wrapped__ chain + if cache: + f = functools.cache(f) + # Update self with the wrapped function's metadata functools.update_wrapper(self, f) # Remove the first argument from the wrapped function since we're going to drop the `lm` argument @@ -51,6 +53,7 @@ def __init__( self.f = f self.stateless = stateless + self.cache = cache self.model = model self._wrapper = None self._methods: weakref.WeakKeyDictionary[Any, GuidanceMethod] = weakref.WeakKeyDictionary() @@ -71,8 +74,10 @@ def __get__(self, instance, owner=None, /): # On cache miss, create a new GuidanceMethod if instance not in self._methods: method = GuidanceMethod( - self.f, + # Don't cache twice (in particular, we need to cache a weak_bound_method version downstairs) + self.f if not self.cache else self.f.__wrapped__, stateless=self.stateless, + cache=self.cache, model=self.model, instance=instance, ) @@ -85,10 +90,11 @@ def __repr__(self): class GuidanceMethod(GuidanceFunction): - def __init__(self, f, *, stateless=False, model=Model, instance): + def __init__(self, f, *, stateless=False, cache=False, model=Model, instance): super().__init__( make_weak_bound_method(f, instance), stateless=stateless, + cache=cache, model=model, ) # Save the instance and owner for introspection From 64e95e1ff98fe95e965c8e8a33ae532934bd192a Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 17:55:41 -0700 Subject: [PATCH 36/54] Make sure to invalidate the cache when the instance's hash changes --- guidance/_guidance.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 97722998c..241b2e1de 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -57,6 +57,7 @@ def __init__( self.model = model self._wrapper = None self._methods: weakref.WeakKeyDictionary[Any, GuidanceMethod] = weakref.WeakKeyDictionary() + self._method_hashes: weakref.WeakKeyDictionary[Any, int] = weakref.WeakKeyDictionary() def __call__(self, *args, **kwargs): # "Cache" the wrapped function (needed for recursive calls) @@ -71,8 +72,8 @@ def __get__(self, instance, owner=None, /): if instance is None: return self - # On cache miss, create a new GuidanceMethod - if instance not in self._methods: + # On cache miss, create a new GuidanceMethod. Make sure that changing the hash of the instance invalidates the cache. + if instance not in self._methods or hash(instance) != self._method_hashes.get(instance): method = GuidanceMethod( # Don't cache twice (in particular, we need to cache a weak_bound_method version downstairs) self.f if not self.cache else self.f.__wrapped__, @@ -82,6 +83,7 @@ def __get__(self, instance, owner=None, /): instance=instance, ) self._methods[instance] = method + self._method_hashes[instance] = hash(instance) return self._methods[instance] From 480479e5ab0af4735e80985f4519f0579a8f4997 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Tue, 24 Sep 2024 17:55:56 -0700 Subject: [PATCH 37/54] Something terrifying is going on --- guidance/_guidance.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 241b2e1de..f8e150a2c 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -84,6 +84,8 @@ def __get__(self, instance, owner=None, /): ) self._methods[instance] = method self._method_hashes[instance] = hash(instance) + # Why do I need this? Am I the only reference to the instance?? + return method return self._methods[instance] From 5c5dcbfe6a5c82f65f03e0a562388252d3ebc3c1 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 09:44:28 -0700 Subject: [PATCH 38/54] Only the return was needed. Still terrifying. --- guidance/_guidance.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index f8e150a2c..8ad0d23f3 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -57,7 +57,6 @@ def __init__( self.model = model self._wrapper = None self._methods: weakref.WeakKeyDictionary[Any, GuidanceMethod] = weakref.WeakKeyDictionary() - self._method_hashes: weakref.WeakKeyDictionary[Any, int] = weakref.WeakKeyDictionary() def __call__(self, *args, **kwargs): # "Cache" the wrapped function (needed for recursive calls) @@ -73,7 +72,7 @@ def __get__(self, instance, owner=None, /): return self # On cache miss, create a new GuidanceMethod. Make sure that changing the hash of the instance invalidates the cache. - if instance not in self._methods or hash(instance) != self._method_hashes.get(instance): + if instance not in self._methods: method = GuidanceMethod( # Don't cache twice (in particular, we need to cache a weak_bound_method version downstairs) self.f if not self.cache else self.f.__wrapped__, @@ -83,7 +82,6 @@ def __get__(self, instance, owner=None, /): instance=instance, ) self._methods[instance] = method - self._method_hashes[instance] = hash(instance) # Why do I need this? Am I the only reference to the instance?? return method From 7410338dfb6400eff586cc8032fd602467dd21c6 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 10:43:33 -0700 Subject: [PATCH 39/54] Resolve existential terror --- guidance/_guidance.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 8ad0d23f3..ac51962f1 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -56,7 +56,7 @@ def __init__( self.cache = cache self.model = model self._wrapper = None - self._methods: weakref.WeakKeyDictionary[Any, GuidanceMethod] = weakref.WeakKeyDictionary() + self._methods: dict[Any, GuidanceMethod] = {} def __call__(self, *args, **kwargs): # "Cache" the wrapped function (needed for recursive calls) @@ -71,8 +71,11 @@ def __get__(self, instance, owner=None, /): if instance is None: return self - # On cache miss, create a new GuidanceMethod. Make sure that changing the hash of the instance invalidates the cache. - if instance not in self._methods: + # Cache needs to be sensitive to both the instance's identity and its hash + key = (hash(instance), id(instance)) + + # On cache miss, create a new GuidanceMethod. + if key not in self._methods: method = GuidanceMethod( # Don't cache twice (in particular, we need to cache a weak_bound_method version downstairs) self.f if not self.cache else self.f.__wrapped__, @@ -81,11 +84,8 @@ def __get__(self, instance, owner=None, /): model=self.model, instance=instance, ) - self._methods[instance] = method - # Why do I need this? Am I the only reference to the instance?? - return method - - return self._methods[instance] + self._methods[key] = method + return self._methods[key] def __repr__(self): return f"" From 0a06899094f9f911408e20aad2aa42971bba8587 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 10:45:53 -0700 Subject: [PATCH 40/54] Use WeakMethod --- guidance/_utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/guidance/_utils.py b/guidance/_utils.py index 259d7ec6d..913eb326a 100644 --- a/guidance/_utils.py +++ b/guidance/_utils.py @@ -122,15 +122,15 @@ def strip_multiline_string_indents(f): return new_f def make_weak_bound_method(f, instance): - weak_instance = weakref.ref(instance) - + weak_method = weakref.WeakMethod( + types.MethodType(f, instance) + ) @functools.wraps(f) # ish def weak_bound_f(*args, **kwargs): - instance = weak_instance() - if instance is None: - raise ReferenceError("Weak reference to instance is dead") - bound_f = types.MethodType(f, instance) - return bound_f(*args, **kwargs) + method = weak_method() + if method is None: + raise ReferenceError(f"Weak reference to method is dead: {f}") + return method(*args, **kwargs) # remove the first argument from the wrapped function since it is now bound weak_bound_f.__signature__ = signature_pop(inspect.signature(f), 0) From 486cbe3c36542bbd93d7fc4784810488ed350498 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 10:46:29 -0700 Subject: [PATCH 41/54] Fix comment --- guidance/_guidance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index ac51962f1..3c1b865fe 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -99,7 +99,7 @@ def __init__(self, f, *, stateless=False, cache=False, model=Model, instance): cache=cache, model=model, ) - # Save the instance and owner for introspection + # Save the instance for introspection self._instance = weakref.ref(instance) def __get__(self, instance, owner=None, /): From 308a7c821d659cbf2e5d138c1af54e81b1ff4b6c Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 11:25:39 -0700 Subject: [PATCH 42/54] make sure our _methods dict acts like a WeakKeyDictionary --- guidance/_guidance.py | 3 +++ tests/unit/test_decorator.py | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 3c1b865fe..3bc4fe179 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -85,6 +85,9 @@ def __get__(self, instance, owner=None, /): instance=instance, ) self._methods[key] = method + # Ensure the method is removed from the cache when the instance is deleted + weakref.finalize(instance, self._methods.pop, key) + return self._methods[key] def __repr__(self): diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 5b8055e24..232f5fe03 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -452,6 +452,23 @@ def test_garbage_collection_uncached_method(self): assert obj_ref() is None + def test_deleting_instance_lets_method_be_garbage_collected(self): + obj = self.MyClass() + # Create a weak reference to the object + obj_ref = weakref.ref(obj) + # Create a weak reference to the cached method + meth_ref = weakref.ref(obj.cached_method) + # Quick sanity check (real methods need weakref.WeakMethod, but we're a bit different) + gc.collect() + assert meth_ref() is not None + # Delete the hard ref to the obj + del obj + # Run garbage collection + gc.collect() + # Check if the object was garbage collected + assert meth_ref() is None + + class TestSignature: def test_function_signature(self): def func(a, b=1, *, c, d=2): From 42078b17a54377d45018313076b4c00ce39262af Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 11:42:08 -0700 Subject: [PATCH 43/54] Remove extra test case (found and fixed why I couldn't replicate issue with other tests before) --- tests/unit/test_decorator.py | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 232f5fe03..011256a25 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -386,11 +386,11 @@ def __init__(self, thing: str = "Hello"): def __hash__(self): return hash(self.thing) - @guidance(cache=True) + @guidance(stateless=True, cache=True) def cached_method(self, lm, *args): return lm + self.thing - @guidance(cache=False) + @guidance(stateless=True, cache=False) def uncached_method(self, lm, *args): return lm + self.thing @@ -407,36 +407,6 @@ def test_garbage_collection_cached_method(self): # Check if the object was garbage collected assert obj_ref() is None - def test_garbage_collection_cached_method_2(self): - # Not sure why I couldn't trigger the issue in the other tests... - class MyClass: - def __init__(self, prefix: str, suffix: str): - self.prefix = prefix - self.suffix = suffix - self.delimiter = "\n" - - # def __hash__(self): - # # Intentionally leave out self.delimiter so we can mess with it later - # return hash((self.prefix, self.suffix)) - - @guidance(stateless=True, cache=True) - def cached_method(self, lm, middle: str): - return lm + self.delimiter.join([self.prefix, middle, self.suffix]) - obj = MyClass("You are a helpful AI. Do what the user asks:", "Thank you.") - - # Create a weak reference to the object - obj_ref = weakref.ref(obj) - - # Call the cached method - grammar1 = obj.cached_method("Computer, tell me a joke.") - obj.suffix = "Thanks!" - grammar2 = obj.cached_method("Computer, tell me a joke.") - # Delete the hard ref to the obj - del obj - # Run garbage collection - gc.collect() - # Check if the object was garbage collected - assert obj_ref() is None def test_garbage_collection_uncached_method(self): obj = self.MyClass() From 307a371499204cccc669a1e21991513c7c6faec7 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 12:14:06 -0700 Subject: [PATCH 44/54] xfail because reference cycles are hard --- tests/unit/test_decorator.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 011256a25..873846de2 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -439,6 +439,16 @@ def test_deleting_instance_lets_method_be_garbage_collected(self): assert meth_ref() is None + def test_deleting_instance_does_not_break_method(self): + # Reference to method but not instance + method = self.MyClass().cached_method + gc.collect() + try: + method() + except ReferenceError: + pytest.xfail(reason="This test is expected to fail due to the way weakrefs are handled in Python.") + + class TestSignature: def test_function_signature(self): def func(a, b=1, *, c, d=2): From d294cfa35347daed8952c477b5fbd8b918bbe5de Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 14:00:09 -0700 Subject: [PATCH 45/54] Solve so many problems by taking heavy inspiration from WeakMethod --- guidance/_guidance.py | 76 +++++++++++++++++------------------- tests/unit/test_decorator.py | 8 ++-- 2 files changed, 38 insertions(+), 46 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 3bc4fe179..64ab4f9a6 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -41,28 +41,20 @@ def __init__( cache = False, model = Model, ): - # we cache the function itself if requested - # do this before updating the wrapper so we can maintain the __wrapped__ chain - if cache: - f = functools.cache(f) - - # Update self with the wrapped function's metadata - functools.update_wrapper(self, f) - # Remove the first argument from the wrapped function since we're going to drop the `lm` argument - self.__signature__ = signature_pop(inspect.signature(f), 0) - self.f = f self.stateless = stateless self.cache = cache self.model = model - self._wrapper = None + self._impl = _decorator(f, stateless=stateless, cache=cache, model=model) self._methods: dict[Any, GuidanceMethod] = {} + # Update self with the wrapped function's metadata + functools.update_wrapper(self, self._impl) + # Pretend to be one level of wrapping higher than we are + self.__wrapped__ = self._impl.__wrapped__ + def __call__(self, *args, **kwargs): - # "Cache" the wrapped function (needed for recursive calls) - if self._wrapper is None: - self._wrapper = _decorator(self.f, stateless=self.stateless, model=self.model) - return self._wrapper(*args, **kwargs) + return self._impl(*args, **kwargs) def __get__(self, instance, owner=None, /): """ @@ -75,38 +67,31 @@ def __get__(self, instance, owner=None, /): key = (hash(instance), id(instance)) # On cache miss, create a new GuidanceMethod. - if key not in self._methods: - method = GuidanceMethod( - # Don't cache twice (in particular, we need to cache a weak_bound_method version downstairs) - self.f if not self.cache else self.f.__wrapped__, - stateless=self.stateless, - cache=self.cache, - model=self.model, - instance=instance, - ) - self._methods[key] = method + try: + impl = self._methods[key] + except KeyError: + weak_method = make_weak_bound_method(self.f, instance) + impl = _decorator(weak_method, stateless=self.stateless, cache=self.cache, model=self.model) + self._methods[key] = impl # Ensure the method is removed from the cache when the instance is deleted - weakref.finalize(instance, self._methods.pop, key) - - return self._methods[key] + # weakref.finalize(instance, self._methods.pop, key) + return GuidanceMethod(impl, instance) def __repr__(self): return f"" +class GuidanceMethod: + def __init__(self, impl, instance): + self.__self__ = instance + self.__func__ = impl -class GuidanceMethod(GuidanceFunction): - def __init__(self, f, *, stateless=False, cache=False, model=Model, instance): - super().__init__( - make_weak_bound_method(f, instance), - stateless=stateless, - cache=cache, - model=model, - ) - # Save the instance for introspection - self._instance = weakref.ref(instance) + # Update self with the wrapped function's metadata + functools.update_wrapper(self, impl) + # Pretend to be one level of wrapping higher than we are + self.__wrapped__ = impl.__wrapped__ - def __get__(self, instance, owner=None, /): - raise AttributeError("GuidanceMethod is already bound to an instance") + def __call__(self, *args, **kwargs): + return self.__func__(*args, **kwargs) def __repr__(self): return f"" @@ -115,11 +100,17 @@ def __repr__(self): _null_grammar = string("") -def _decorator(f, *, stateless, model): +def _decorator(f, *, stateless, cache, model): + # we cache the function itself if requested + # do this before updating the wrapper so we can maintain the __wrapped__ chain + if cache: + f = functools.cache(f) + # Use thread local to store the reference to the grammar node for recursive calls # Otherwise, shared state between threads may otherwise trick us into thinking we are in a recursive call thread_local = threading.local() + @functools.wraps(f) def wrapped(*args, **kwargs): # make a stateless grammar if we can @@ -160,6 +151,9 @@ def wrapped(*args, **kwargs): # otherwise must be stateful (which means we can't be inside a select() call) else: return RawFunction(f, args, kwargs) + + # Remove the first argument from the wrapped function since we're going to drop the `lm` argument + wrapped.__signature__ = signature_pop(inspect.signature(f), 0) # attach this as a method of the model class (if given) # if model is not None: diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 873846de2..0413a4914 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -427,7 +427,7 @@ def test_deleting_instance_lets_method_be_garbage_collected(self): # Create a weak reference to the object obj_ref = weakref.ref(obj) # Create a weak reference to the cached method - meth_ref = weakref.ref(obj.cached_method) + meth_ref = weakref.WeakMethod(obj.cached_method) # Quick sanity check (real methods need weakref.WeakMethod, but we're a bit different) gc.collect() assert meth_ref() is not None @@ -443,10 +443,8 @@ def test_deleting_instance_does_not_break_method(self): # Reference to method but not instance method = self.MyClass().cached_method gc.collect() - try: - method() - except ReferenceError: - pytest.xfail(reason="This test is expected to fail due to the way weakrefs are handled in Python.") + # Will raise a ReferenceError if the method is broken + method() class TestSignature: From 05e259af4e59dfdf7b2a7b2b84fb70e5769089d2 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 14:26:55 -0700 Subject: [PATCH 46/54] Move impl cache to GuidanceMethod class --- guidance/_guidance.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 64ab4f9a6..e026f54d6 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -62,26 +62,16 @@ def __get__(self, instance, owner=None, /): """ if instance is None: return self - - # Cache needs to be sensitive to both the instance's identity and its hash - key = (hash(instance), id(instance)) - - # On cache miss, create a new GuidanceMethod. - try: - impl = self._methods[key] - except KeyError: - weak_method = make_weak_bound_method(self.f, instance) - impl = _decorator(weak_method, stateless=self.stateless, cache=self.cache, model=self.model) - self._methods[key] = impl - # Ensure the method is removed from the cache when the instance is deleted - # weakref.finalize(instance, self._methods.pop, key) - return GuidanceMethod(impl, instance) + return GuidanceMethod.from_guidance_function(self, instance) def __repr__(self): return f"" class GuidanceMethod: + impl_cache = {} def __init__(self, impl, instance): + # Make object that looks like a method. Note we keep a hard reference to the instance + # to keep it (and therefore our cached impl) alive as long as we are alive self.__self__ = instance self.__func__ = impl @@ -90,6 +80,20 @@ def __init__(self, impl, instance): # Pretend to be one level of wrapping higher than we are self.__wrapped__ = impl.__wrapped__ + @classmethod + def from_guidance_function(cls, guidance_function: GuidanceFunction, instance: Any) -> "GuidanceMethod": + key = (guidance_function.f, hash(instance), id(instance)) + try: + impl = cls.impl_cache[key] + except KeyError: + # Make a weak bound method to prevent the instance from being kept alive by the cache + weak_method = make_weak_bound_method(guidance_function.f, instance) + impl = _decorator(weak_method, stateless=guidance_function.stateless, cache=guidance_function.cache, model=guidance_function.model) + cls.impl_cache[key] = impl + # Clean up the cache when the instance is deleted + weakref.finalize(weak_method, cls.impl_cache.pop, key) + return cls(impl, instance) + def __call__(self, *args, **kwargs): return self.__func__(*args, **kwargs) From e2bcd2ca521d56c04fee28813608038830b918ae Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 14:27:06 -0700 Subject: [PATCH 47/54] Fix repr --- guidance/_guidance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index e026f54d6..cd2d256f6 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -98,7 +98,7 @@ def __call__(self, *args, **kwargs): return self.__func__(*args, **kwargs) def __repr__(self): - return f"" + return f"" _null_grammar = string("") From fd9ff5ffd93a5d7eac5d9b63496a4033c1a82001 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 14:29:02 -0700 Subject: [PATCH 48/54] comment --- guidance/_guidance.py | 1 + 1 file changed, 1 insertion(+) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index cd2d256f6..88d150bea 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -82,6 +82,7 @@ def __init__(self, impl, instance): @classmethod def from_guidance_function(cls, guidance_function: GuidanceFunction, instance: Any) -> "GuidanceMethod": + # Use instance hash in addition to identity to make sure we miss the cache if the instance is meaningfully mutated key = (guidance_function.f, hash(instance), id(instance)) try: impl = cls.impl_cache[key] From 1fcf4a54eb05f7ae82ef844a699b12171c54a03f Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 15:51:11 -0700 Subject: [PATCH 49/54] fix finalizer --- guidance/_guidance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 88d150bea..548669e67 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -92,7 +92,7 @@ def from_guidance_function(cls, guidance_function: GuidanceFunction, instance: A impl = _decorator(weak_method, stateless=guidance_function.stateless, cache=guidance_function.cache, model=guidance_function.model) cls.impl_cache[key] = impl # Clean up the cache when the instance is deleted - weakref.finalize(weak_method, cls.impl_cache.pop, key) + weakref.finalize(instance, cls.impl_cache.pop, key) return cls(impl, instance) def __call__(self, *args, **kwargs): From bf132ebacc2fd7eab49a64884c1c17d90ae7419a Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 15:52:32 -0700 Subject: [PATCH 50/54] more informative error in make_weak_bound_method; no need for weak ref to func --- guidance/_utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/guidance/_utils.py b/guidance/_utils.py index 913eb326a..623c81771 100644 --- a/guidance/_utils.py +++ b/guidance/_utils.py @@ -122,14 +122,14 @@ def strip_multiline_string_indents(f): return new_f def make_weak_bound_method(f, instance): - weak_method = weakref.WeakMethod( - types.MethodType(f, instance) - ) + instance_ref = weakref.ref(instance) + instance_repr = repr(instance) @functools.wraps(f) # ish def weak_bound_f(*args, **kwargs): - method = weak_method() - if method is None: - raise ReferenceError(f"Weak reference to method is dead: {f}") + instance = instance_ref() + if instance is None: + raise ReferenceError(f"Lost reference to {instance_repr} and cannot bind {f} to it.") + method = types.MethodType(f, instance) return method(*args, **kwargs) # remove the first argument from the wrapped function since it is now bound From ddd7faced15154181ad41430d60ebe9ed1ad5b86 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 25 Sep 2024 15:56:34 -0700 Subject: [PATCH 51/54] comment --- guidance/_guidance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 548669e67..6d165ac17 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -70,8 +70,8 @@ def __repr__(self): class GuidanceMethod: impl_cache = {} def __init__(self, impl, instance): - # Make object that looks like a method. Note we keep a hard reference to the instance - # to keep it (and therefore our cached impl) alive as long as we are alive + # Make object that looks like a method (__self__ and __func__) in order to be able to better support weak referencing via weakref.WeakMethod + # Note we keep a hard reference to the instance to keep it (and therefore our cached impl) alive as long as we are alive self.__self__ = instance self.__func__ = impl From 769bca07acb759126906e5d3afaac83821885b51 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Thu, 26 Sep 2024 08:50:32 -0700 Subject: [PATCH 52/54] words --- guidance/_guidance.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 6d165ac17..0674709b8 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -50,7 +50,7 @@ def __init__( # Update self with the wrapped function's metadata functools.update_wrapper(self, self._impl) - # Pretend to be one level of wrapping higher than we are + # Pretend to be one level of wrapping lower than we are self.__wrapped__ = self._impl.__wrapped__ def __call__(self, *args, **kwargs): @@ -77,7 +77,7 @@ def __init__(self, impl, instance): # Update self with the wrapped function's metadata functools.update_wrapper(self, impl) - # Pretend to be one level of wrapping higher than we are + # Pretend to be one level of wrapping lower than we are self.__wrapped__ = impl.__wrapped__ @classmethod From ef2d7c20df1da9408a31410056d943b15e8556b8 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 27 Sep 2024 11:42:31 -0700 Subject: [PATCH 53/54] fix comment --- tests/unit/test_decorator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_decorator.py b/tests/unit/test_decorator.py index 0413a4914..9a549bda7 100644 --- a/tests/unit/test_decorator.py +++ b/tests/unit/test_decorator.py @@ -428,7 +428,7 @@ def test_deleting_instance_lets_method_be_garbage_collected(self): obj_ref = weakref.ref(obj) # Create a weak reference to the cached method meth_ref = weakref.WeakMethod(obj.cached_method) - # Quick sanity check (real methods need weakref.WeakMethod, but we're a bit different) + # Quick sanity check that the weak reference is working gc.collect() assert meth_ref() is not None # Delete the hard ref to the obj From 07d58c16467bed60bb189c490f197ae4f0d39a80 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Fri, 27 Sep 2024 11:52:13 -0700 Subject: [PATCH 54/54] comment --- guidance/_guidance.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/guidance/_guidance.py b/guidance/_guidance.py index 0674709b8..08612d983 100644 --- a/guidance/_guidance.py +++ b/guidance/_guidance.py @@ -58,7 +58,7 @@ def __call__(self, *args, **kwargs): def __get__(self, instance, owner=None, /): """ - Return a GuidanceMethod bound to the instance. GuidanceMethods are cached in a WeakKeyDictionary on a per-instance basis. + Return a GuidanceMethod bound to the instance. """ if instance is None: return self @@ -82,7 +82,12 @@ def __init__(self, impl, instance): @classmethod def from_guidance_function(cls, guidance_function: GuidanceFunction, instance: Any) -> "GuidanceMethod": - # Use instance hash in addition to identity to make sure we miss the cache if the instance is meaningfully mutated + # We can't directly use a weakref.WeakKeyDictionary because those don't really work when the key objects + # are allowed to change their hash value. + + # Instead use instance hash in addition to identity to make sure we miss the cache if the instance is meaningfully mutated. + # This should be safe because an id will only be reused after the original object is garbage collected, at which point we + # should have removed the cache entry (since we use weakref.finalize to remove the cache entry when the instance is deleted). key = (guidance_function.f, hash(instance), id(instance)) try: impl = cls.impl_cache[key]