From 386cdd3a52f80a6734eb81e9f788fd8f9ca6285f Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 09:07:27 -0400 Subject: [PATCH 01/29] Starting to think about what we need for AzureAI Studio --- aais_example.py | 60 +++++++++++++++++++++++++++++ tests/models/test_azureai_openai.py | 39 ++++++++----------- tests/models/test_azureai_studio.py | 34 ++++++++++++++++ tests/models/test_model.py | 2 + tests/utils.py | 6 +++ 5 files changed, 119 insertions(+), 22 deletions(-) create mode 100644 aais_example.py create mode 100644 tests/models/test_azureai_studio.py diff --git a/aais_example.py b/aais_example.py new file mode 100644 index 000000000..a8ffafbe7 --- /dev/null +++ b/aais_example.py @@ -0,0 +1,60 @@ +import urllib.request +import json +import os +import ssl + +def allowSelfSignedHttps(allowed): + # bypass the server certificate verification on client side + if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None): + ssl._create_default_https_context = ssl._create_unverified_context + +allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service. + +# Request data goes here +# The example below assumes JSON formatting which may be updated +# depending on the format your endpoint expects. +# More information can be found here: +# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script +data = { + "input_data": { + "input_string": [ + { + "role": "user", + "content": "I am going to Gomorrah, give me a list of 10 places to visit" + } + ], + "parameters": { + "temperature": 0.7, + "top_p": 0.9, + "do_sample": True, + "max_new_tokens": 1000 + } + } +} + + +body = str.encode(json.dumps(data)) + +url = 'https://guidance-build-azureai-mo-qahti.eastus2.inference.ml.azure.com/score' +# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint +api_key = 'jnR8Q7wKkyHaV3xJwXVTT8rTN7MMT3SR' +if not api_key: + raise Exception("A key should be provided to invoke the endpoint") + +# The azureml-model-deployment header will force the request to go to a specific deployment. +# Remove this header to have the request observe the endpoint traffic rules +headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'phi-3-mini-4k-instruct-2' } + +req = urllib.request.Request(url, body, headers) + +try: + response = urllib.request.urlopen(req) + + result = response.read() + print(result) +except urllib.error.HTTPError as error: + print("The request failed with status code: " + str(error.code)) + + # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure + print(error.info()) + print(error.read().decode("utf8", 'ignore')) diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index 38ff6d8ed..6e2b2fd6d 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -7,23 +7,18 @@ from guidance import assistant, gen, models, system, user +from utils import env_or_fail + # Everything in here needs credentials to work # Mark is configured in pyproject.toml pytestmark = pytest.mark.needs_credentials -def _env_or_fail(var_name: str) -> str: - env_value = os.getenv(var_name, None) - - assert env_value is not None, f"Env '{var_name}' not found" - - return env_value - def test_azureai_openai_chat_smoke(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") - model = _env_or_fail("AZUREAI_CHAT_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_CHAT_KEY") + model = env_or_fail("AZUREAI_CHAT_MODEL") lm = models.AzureOpenAI( model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key @@ -45,9 +40,9 @@ def test_azureai_openai_chat_smoke(rate_limiter): def test_azureai_openai_chat_alt_args(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") - model = _env_or_fail("AZUREAI_CHAT_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_CHAT_KEY") + model = env_or_fail("AZUREAI_CHAT_MODEL") parsed_url = urlparse(azureai_endpoint) parsed_query = parse_qs(parsed_url.query) @@ -78,9 +73,9 @@ def test_azureai_openai_chat_alt_args(rate_limiter): def test_azureai_openai_completion_smoke(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_COMPLETION_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_COMPLETION_KEY") - model = _env_or_fail("AZUREAI_COMPLETION_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_COMPLETION_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_COMPLETION_KEY") + model = env_or_fail("AZUREAI_COMPLETION_MODEL") lm = models.AzureOpenAI( model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key @@ -93,9 +88,9 @@ def test_azureai_openai_completion_smoke(rate_limiter): def test_azureai_openai_completion_alt_args(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_COMPLETION_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_COMPLETION_KEY") - model = _env_or_fail("AZUREAI_COMPLETION_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_COMPLETION_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_COMPLETION_KEY") + model = env_or_fail("AZUREAI_COMPLETION_MODEL") parsed_url = urlparse(azureai_endpoint) parsed_query = parse_qs(parsed_url.query) @@ -118,9 +113,9 @@ def test_azureai_openai_completion_alt_args(rate_limiter): def test_azureai_openai_chat_loop(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") - model = _env_or_fail("AZUREAI_CHAT_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_CHAT_KEY") + model = env_or_fail("AZUREAI_CHAT_MODEL") lm = models.AzureOpenAI( model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py new file mode 100644 index 000000000..72920e643 --- /dev/null +++ b/tests/models/test_azureai_studio.py @@ -0,0 +1,34 @@ +import pytest + +import pytest + +from guidance import assistant, gen, models, system, user + +from utils import env_or_fail + +# Everything in here needs credentials to work +# Mark is configured in pyproject.toml +pytestmark = pytest.mark.needs_credentials + + +def test_azureai_openai_chat_smoke(rate_limiter): + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_ENDPOINT") + azureai_studio_key = env_or_fail("AZUREAI_CHAT_KEY") + + lm = models.AzureOpenAI( + model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key + ) + assert isinstance(lm, models.AzureOpenAIChat) + + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 1dc4de113..c6f1478a2 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -6,6 +6,8 @@ def test_fstring(selected_model): lm = selected_model + print(f"{dir(lm.engine.tokenizer)=}") + assert hasattr(lm.engine.tokenizer,"sp_model") lm += f'this is a test {select(["item1", "item2"])}' assert str(lm) in ["this is a test item1", "this is a test item2"] diff --git a/tests/utils.py b/tests/utils.py index bf15ee59d..9a674e8bf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -8,6 +8,12 @@ opanai_model_cache = {} +def env_or_fail(var_name: str) -> str: + env_value = os.getenv(var_name, None) + + assert env_value is not None, f"Env '{var_name}' not found" + + return env_value def get_model(model_name, caching=False, **kwargs): """Get an LLM by name.""" From 176201cc5c5b3c9142c793a77f412c80971a0452 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 09:38:26 -0400 Subject: [PATCH 02/29] Getting to the initially desired failure --- guidance/models/__init__.py | 1 + guidance/models/_azureai_studio.py | 43 +++++++++++++++++++++++++++++ tests/models/test_azureai_openai.py | 2 +- tests/models/test_azureai_studio.py | 11 ++++---- 4 files changed, 51 insertions(+), 6 deletions(-) create mode 100644 guidance/models/_azureai_studio.py diff --git a/guidance/models/__init__.py b/guidance/models/__init__.py index 2599a98ec..c1d2e6767 100644 --- a/guidance/models/__init__.py +++ b/guidance/models/__init__.py @@ -19,6 +19,7 @@ AzureOpenAICompletion, AzureOpenAIInstruct, ) +from ._azureai_studio import AzureAIStudioChat from ._openai import OpenAI, OpenAIChat, OpenAIInstruct, OpenAICompletion from ._lite_llm import LiteLLM, LiteLLMChat, LiteLLMInstruct, LiteLLMCompletion from ._cohere import Cohere, CohereCompletion, CohereInstruct diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py new file mode 100644 index 000000000..4ec99cb02 --- /dev/null +++ b/guidance/models/_azureai_studio.py @@ -0,0 +1,43 @@ +from ._model import Chat +from ._grammarless import GrammarlessEngine, Grammarless + + +class AzureAIStudioChatEngine(GrammarlessEngine): + def __init__( + self, + *, + tokenizer, + max_streaming_tokens: int, + timeout: float, + compute_log_probs: bool, + azureai_studio_endpoint: str, + azureai_studio_key: str, + ): + self._endpoint = azureai_studio_endpoint + self._api_key = azureai_studio_key + + super().__init__(tokenizer, max_streaming_tokens, timeout, compute_log_probs) + + +class AzureAIStudioChat(Grammarless, Chat): + def __init__( + self, + azureai_studio_endpoint: str, + azureai_studio_key: str, + tokenizer=None, + echo: bool = True, + max_streaming_tokens: int = 1000, + timeout: float = 0.5, + compute_log_probs: bool = False, + ): + super().__init__( + AzureAIStudioChatEngine( + azureai_studio_endpoint=azureai_studio_endpoint, + azureai_studio_key=azureai_studio_key, + tokenizer=tokenizer, + max_streaming_tokens=max_streaming_tokens, + timeout=timeout, + compute_log_probs=compute_log_probs, + ), + echo=echo, + ) diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index 6e2b2fd6d..c2bc818d7 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -7,7 +7,7 @@ from guidance import assistant, gen, models, system, user -from utils import env_or_fail +from ..utils import env_or_fail # Everything in here needs credentials to work # Mark is configured in pyproject.toml diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 72920e643..0b8e8cc36 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -4,7 +4,7 @@ from guidance import assistant, gen, models, system, user -from utils import env_or_fail +from ..utils import env_or_fail # Everything in here needs credentials to work # Mark is configured in pyproject.toml @@ -13,12 +13,13 @@ def test_azureai_openai_chat_smoke(rate_limiter): azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_ENDPOINT") - azureai_studio_key = env_or_fail("AZUREAI_CHAT_KEY") + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_KEY") - lm = models.AzureOpenAI( - model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key + lm = models.AzureAIStudioChat( + azureai_studio_endpoint=azureai_studio_endpoint, + azureai_studio_key=azureai_studio_key, ) - assert isinstance(lm, models.AzureOpenAIChat) + assert isinstance(lm, models.AzureAIStudioChat) with system(): lm += "You are a math wiz." From beac0cc0c3aa3aa5892050e9792521121392c3e5 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 09:52:52 -0400 Subject: [PATCH 03/29] Very rough draft.... --- guidance/models/_azureai_studio.py | 94 ++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 4ec99cb02..73ac6bbda 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -1,3 +1,6 @@ +import json +import urllib.request + from ._model import Chat from ._grammarless import GrammarlessEngine, Grammarless @@ -11,18 +14,108 @@ def __init__( timeout: float, compute_log_probs: bool, azureai_studio_endpoint: str, + azureai_model_deployment: str, azureai_studio_key: str, ): self._endpoint = azureai_studio_endpoint + self._deployment = azureai_model_deployment self._api_key = azureai_studio_key super().__init__(tokenizer, max_streaming_tokens, timeout, compute_log_probs) + def _generator(self, prompt, temperature: float): + # Initial parts of this straight up copied from OpenAIChatEngine + + # find the role tags + pos = 0 + role_end = b"<|im_end|>" + messages = [] + found = True + while found: + + # find the role text blocks + found = False + for role_name, start_bytes in ( + ("system", b"<|im_start|>system\n"), + ("user", b"<|im_start|>user\n"), + ("assistant", b"<|im_start|>assistant\n"), + ): + if prompt[pos:].startswith(start_bytes): + pos += len(start_bytes) + end_pos = prompt[pos:].find(role_end) + if end_pos < 0: + assert ( + role_name == "assistant" + ), "Bad chat format! Last role before gen needs to be assistant!" + break + btext = prompt[pos : pos + end_pos] + pos += end_pos + len(role_end) + messages.append( + {"role": role_name, "content": btext.decode("utf8")} + ) + found = True + break + + # Add nice exception if no role tags were used in the prompt. + # TODO: Move this somewhere more general for all chat models? + if messages == []: + raise ValueError( + f"The model is a Chat-based model and requires role tags in the prompt! \ + Make sure you are using guidance context managers like `with system():`, `with user():` and `with assistant():` \ + to appropriately format your guidance program for this type of model." + ) + + # Update shared data state + self._reset_shared_data(prompt[:pos], temperature) + + # Use cache only when temperature is 0 + if temperature == 0: + cache_key = self._hash_prompt(prompt) + + # Check if the result is already in the cache + if cache_key in self.cache: + for chunk in self.cache[cache_key]: + yield chunk + return + + # Now switch to the example code from AzureAI Studio + + # Prepare for the API call (this might be model specific....) + parameters = dict(temperature=temperature) + payload = dict(input_data=dict(input_string=messages, parameters=parameters)) + + headers = { + "Content-Type": "application/json", + "Authorization": ("Bearer " + self._api_key), + "azureml-model-deployment": self._deployment, + } + + body = str.encode(json.dumps(payload)) + + req = urllib.request.Request(self._endpoint, body, headers) + + response = urllib.request.urlopen(req) + result = json.loads(response.read()) + + # Now back to OpenAIChatEngine + if temperature == 0: + cached_results = [] + + yield result["output"] + + if temperature == 0: + cached_results.append(result["output"]) + + # Cache the results after the generator is exhausted + if temperature == 0: + self.cache[cache_key] = cached_results + class AzureAIStudioChat(Grammarless, Chat): def __init__( self, azureai_studio_endpoint: str, + azureai_studio_deployment: str, azureai_studio_key: str, tokenizer=None, echo: bool = True, @@ -33,6 +126,7 @@ def __init__( super().__init__( AzureAIStudioChatEngine( azureai_studio_endpoint=azureai_studio_endpoint, + azureai_model_deployment=azureai_studio_deployment, azureai_studio_key=azureai_studio_key, tokenizer=tokenizer, max_streaming_tokens=max_streaming_tokens, From 3d90baaa3b000b4c293f47febd585057c9cf1c6e Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 09:57:07 -0400 Subject: [PATCH 04/29] Inching along --- guidance/models/_azureai_studio.py | 5 +++++ tests/models/test_azureai_studio.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 73ac6bbda..880379022 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -1,3 +1,4 @@ +import hashlib import json import urllib.request @@ -23,6 +24,10 @@ def __init__( super().__init__(tokenizer, max_streaming_tokens, timeout, compute_log_probs) + def _hash_prompt(self, prompt): + # Copied from OpenAIChatEngine + return hashlib.sha256(f"{prompt}".encode()).hexdigest() + def _generator(self, prompt, temperature: float): # Initial parts of this straight up copied from OpenAIChatEngine diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 0b8e8cc36..ab51d6450 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -13,10 +13,12 @@ def test_azureai_openai_chat_smoke(rate_limiter): azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_ENDPOINT") + azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_DEPLOYMENT") azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_KEY") lm = models.AzureAIStudioChat( azureai_studio_endpoint=azureai_studio_endpoint, + azureai_studio_deployment=azureai_studio_deployment, azureai_studio_key=azureai_studio_key, ) assert isinstance(lm, models.AzureAIStudioChat) From 32bc793af3fa16350f3d1a5d7967fd5cda5a2425 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 10:06:04 -0400 Subject: [PATCH 05/29] Trying to get things working :-/ --- guidance/models/_azureai_studio.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 880379022..db6fc414c 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -1,7 +1,11 @@ import hashlib import json +import pathlib import urllib.request +import diskcache as dc +import platformdirs + from ._model import Chat from ._grammarless import GrammarlessEngine, Grammarless @@ -22,6 +26,12 @@ def __init__( self._deployment = azureai_model_deployment self._api_key = azureai_studio_key + path = ( + pathlib.Path(platformdirs.user_cache_dir("guidance")) + / "azureaistudio.tokens" + ) + self.cache = dc.Cache(path) + super().__init__(tokenizer, max_streaming_tokens, timeout, compute_log_probs) def _hash_prompt(self, prompt): @@ -31,6 +41,11 @@ def _hash_prompt(self, prompt): def _generator(self, prompt, temperature: float): # Initial parts of this straight up copied from OpenAIChatEngine + # The next loop (or one like it) appears in several places, + # and quite possibly belongs in a library function or superclass + # That said, I'm not _completely sure that there aren't subtle + # differences between the various versions + # find the role tags pos = 0 role_end = b"<|im_end|>" @@ -84,6 +99,7 @@ def _generator(self, prompt, temperature: float): return # Now switch to the example code from AzureAI Studio + # Might want to rewrite this to the requests package # Prepare for the API call (this might be model specific....) parameters = dict(temperature=temperature) @@ -102,14 +118,17 @@ def _generator(self, prompt, temperature: float): response = urllib.request.urlopen(req) result = json.loads(response.read()) - # Now back to OpenAIChatEngine + # Now back to OpenAIChatEngine, with slight modifications since + # this isn't a streaming API if temperature == 0: cached_results = [] - yield result["output"] + encoded_chunk = result["output"].encode("utf8") + + yield encoded_chunk if temperature == 0: - cached_results.append(result["output"]) + cached_results.append(encoded_chunk) # Cache the results after the generator is exhausted if temperature == 0: From 7840cfd427e5b66d9c8a0b41cbc4d499734daeec Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 11:10:48 -0400 Subject: [PATCH 06/29] Didn't mean to check that in --- aais_example.py | 60 ------------------------------------------------- 1 file changed, 60 deletions(-) delete mode 100644 aais_example.py diff --git a/aais_example.py b/aais_example.py deleted file mode 100644 index a8ffafbe7..000000000 --- a/aais_example.py +++ /dev/null @@ -1,60 +0,0 @@ -import urllib.request -import json -import os -import ssl - -def allowSelfSignedHttps(allowed): - # bypass the server certificate verification on client side - if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None): - ssl._create_default_https_context = ssl._create_unverified_context - -allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service. - -# Request data goes here -# The example below assumes JSON formatting which may be updated -# depending on the format your endpoint expects. -# More information can be found here: -# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script -data = { - "input_data": { - "input_string": [ - { - "role": "user", - "content": "I am going to Gomorrah, give me a list of 10 places to visit" - } - ], - "parameters": { - "temperature": 0.7, - "top_p": 0.9, - "do_sample": True, - "max_new_tokens": 1000 - } - } -} - - -body = str.encode(json.dumps(data)) - -url = 'https://guidance-build-azureai-mo-qahti.eastus2.inference.ml.azure.com/score' -# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint -api_key = 'jnR8Q7wKkyHaV3xJwXVTT8rTN7MMT3SR' -if not api_key: - raise Exception("A key should be provided to invoke the endpoint") - -# The azureml-model-deployment header will force the request to go to a specific deployment. -# Remove this header to have the request observe the endpoint traffic rules -headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'phi-3-mini-4k-instruct-2' } - -req = urllib.request.Request(url, body, headers) - -try: - response = urllib.request.urlopen(req) - - result = response.read() - print(result) -except urllib.error.HTTPError as error: - print("The request failed with status code: " + str(error.code)) - - # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure - print(error.info()) - print(error.read().decode("utf8", 'ignore')) From 04e45c749cd8199696d8e679bd18dca978fa56a1 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 26 Apr 2024 11:19:12 -0400 Subject: [PATCH 07/29] Erroneous addition --- tests/models/test_model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index c6f1478a2..1dc4de113 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -6,8 +6,6 @@ def test_fstring(selected_model): lm = selected_model - print(f"{dir(lm.engine.tokenizer)=}") - assert hasattr(lm.engine.tokenizer,"sp_model") lm += f'this is a test {select(["item1", "item2"])}' assert str(lm) in ["this is a test item1", "this is a test item2"] From 25ecccfb2640abc8c3d7c171202b6bb65c1b0a20 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 29 Apr 2024 08:55:08 -0400 Subject: [PATCH 08/29] Switch to requests --- guidance/models/_azureai_studio.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index db6fc414c..97b2f31b1 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -1,10 +1,9 @@ import hashlib -import json import pathlib -import urllib.request import diskcache as dc import platformdirs +import requests from ._model import Chat from ._grammarless import GrammarlessEngine, Grammarless @@ -98,9 +97,6 @@ def _generator(self, prompt, temperature: float): yield chunk return - # Now switch to the example code from AzureAI Studio - # Might want to rewrite this to the requests package - # Prepare for the API call (this might be model specific....) parameters = dict(temperature=temperature) payload = dict(input_data=dict(input_string=messages, parameters=parameters)) @@ -111,12 +107,13 @@ def _generator(self, prompt, temperature: float): "azureml-model-deployment": self._deployment, } - body = str.encode(json.dumps(payload)) - - req = urllib.request.Request(self._endpoint, body, headers) + response = requests.post( + self._endpoint, + json=payload, + headers=headers, + ) - response = urllib.request.urlopen(req) - result = json.loads(response.read()) + result = response.json() # Now back to OpenAIChatEngine, with slight modifications since # this isn't a streaming API From 1265346714ae9364f2350d199d213cfb104ed3a0 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 29 Apr 2024 10:40:06 -0400 Subject: [PATCH 09/29] Make sure that cache is unique to endpoint/deployment --- guidance/models/_azureai_studio.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 97b2f31b1..e90f2430f 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -25,9 +25,13 @@ def __init__( self._deployment = azureai_model_deployment self._api_key = azureai_studio_key + # There is a cache... better make sure it's specific + # to the endpoint and deployment + deployment_id = self._hash_prompt(self._endpoint + self._deployment) + path = ( pathlib.Path(platformdirs.user_cache_dir("guidance")) - / "azureaistudio.tokens" + / f"azureaistudio.tokens.{deployment_id}" ) self.cache = dc.Cache(path) @@ -44,7 +48,7 @@ def _generator(self, prompt, temperature: float): # and quite possibly belongs in a library function or superclass # That said, I'm not _completely sure that there aren't subtle # differences between the various versions - + # find the role tags pos = 0 role_end = b"<|im_end|>" From f348880bb712b651031d48f7fe2ca8d7f970892c Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 29 Apr 2024 10:40:27 -0400 Subject: [PATCH 10/29] Starting to test mistral too.... not fully working yet --- tests/models/test_azureai_studio.py | 37 +++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index ab51d6450..49d0298e9 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -11,10 +11,10 @@ pytestmark = pytest.mark.needs_credentials -def test_azureai_openai_chat_smoke(rate_limiter): - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_ENDPOINT") - azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_DEPLOYMENT") - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_KEY") +def test_azureai_phi3_chat_smoke(rate_limiter): + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_PHI3_ENDPOINT") + azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_PHI3_DEPLOYMENT") + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_PHI3_KEY") lm = models.AzureAIStudioChat( azureai_studio_endpoint=azureai_studio_endpoint, @@ -30,8 +30,35 @@ def test_azureai_openai_chat_smoke(rate_limiter): lm += "What is 1 + 1?" with assistant(): - lm += gen(max_tokens=10, name="text") + lm += gen(max_tokens=10, name="text", temperature=0.5) lm += "Pick a number: " print(str(lm)) assert len(lm["text"]) > 0 + + +def test_azureai_mistral_chat_smoke(rate_limiter): + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT") + azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT") + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_KEY") + + lm = models.AzureAIStudioChat( + azureai_studio_endpoint=azureai_studio_endpoint, + azureai_studio_deployment=azureai_studio_deployment, + azureai_studio_key=azureai_studio_key, + ) + assert isinstance(lm, models.AzureAIStudioChat) + lm.engine.cache.clear() + + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text", temperature=0.5) + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) < 0 From 0fc4727afa71811d55816c1a56bc5353617d4b56 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 29 Apr 2024 11:00:31 -0400 Subject: [PATCH 11/29] Get the Mistral test working --- guidance/models/_azureai_studio.py | 2 +- tests/models/test_azureai_studio.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index e90f2430f..54ebbd4f1 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -48,7 +48,7 @@ def _generator(self, prompt, temperature: float): # and quite possibly belongs in a library function or superclass # That said, I'm not _completely sure that there aren't subtle # differences between the various versions - + # find the role tags pos = 0 role_end = b"<|im_end|>" diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 49d0298e9..6fa6bfec6 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -50,15 +50,16 @@ def test_azureai_mistral_chat_smoke(rate_limiter): assert isinstance(lm, models.AzureAIStudioChat) lm.engine.cache.clear() - with system(): - lm += "You are a math wiz." + # No "system" role for Mistral? + # with system(): + # lm += "You are a math wiz." with user(): lm += "What is 1 + 1?" with assistant(): - lm += gen(max_tokens=10, name="text", temperature=0.5) - lm += "Pick a number: " + lm += gen(max_tokens=15, name="text", temperature=0.5) + lm += "\nPick a number: " print(str(lm)) - assert len(lm["text"]) < 0 + assert len(lm["text"]) > 0 From 2be7f58c34fd1a650d73de0bf1d8933129c5d804 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Mon, 29 Apr 2024 11:24:57 -0400 Subject: [PATCH 12/29] Add LLama3 --- tests/models/test_azureai_studio.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 6fa6bfec6..45ba1bc3f 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -63,3 +63,29 @@ def test_azureai_mistral_chat_smoke(rate_limiter): print(str(lm)) assert len(lm["text"]) > 0 + + +def test_azureai_llama3_chat_smoke(rate_limiter): + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT") + azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT") + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_KEY") + + lm = models.AzureAIStudioChat( + azureai_studio_endpoint=azureai_studio_endpoint, + azureai_studio_deployment=azureai_studio_deployment, + azureai_studio_key=azureai_studio_key, + ) + assert isinstance(lm, models.AzureAIStudioChat) + + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text", temperature=0.5) + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 From 3bcb48ea6ae3e553230a73606505c478adf384da Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Tue, 30 Apr 2024 08:25:51 -0400 Subject: [PATCH 13/29] Expand the endpoint configuration --- .github/workflows/ci_tests.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index de28e840c..8cd45f378 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -58,13 +58,23 @@ jobs: python -c "import torch; assert torch.cuda.is_available()" - name: Test with pytest env: - # Configure endpoints + # Configure endpoints for Azure OpenAI AZUREAI_CHAT_ENDPOINT: ${{ secrets.AZUREAI_CHAT_ENDPOINT }} AZUREAI_CHAT_KEY: ${{ secrets.AZUREAI_CHAT_KEY }} AZUREAI_CHAT_MODEL: ${{ secrets.AZUREAI_CHAT_MODEL }} AZUREAI_COMPLETION_ENDPOINT: ${{ secrets.AZUREAI_COMPLETION_ENDPOINT }} AZUREAI_COMPLETION_KEY: ${{ secrets.AZUREAI_COMPLETION_KEY }} AZUREAI_COMPLETION_MODEL: ${{ secrets.AZUREAI_COMPLETION_MODEL }} + # Configure endpoints for Azure AI Studio + AZURE_AI_STUDIO_PHI3_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_PHI3_ENDPOINT }} + AZURE_AI_STUDIO_PHI3_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_PHI3_DEPLOYMENT }} + AZURE_AI_STUDIO_PHI3_KEY: ${{ secrets.AZURE_AI_STUDIO_PHI3_KEY }} + AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT }} + AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT }} + AZURE_AI_STUDIO_MISTRAL_CHAT_KEY: ${{ secrets.AZURE_AI_STUDIO_MISTRAL_CHAT_KEY }} + AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT: ${{ vars.AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT }} + AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT: ${{ vars.AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT }} + AZURE_AI_STUDIO_LLAMA3_CHAT_KEY: ${{ secrets.AZURE_AI_STUDIO_LLAMA3_CHAT_KEY }} run: | pytest --cov=guidance --cov-report=xml --cov-report=term-missing \ -m needs_credentials \ From 9e46101b240efe3985227652af59bf1f6abcd38b Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 09:02:16 -0400 Subject: [PATCH 14/29] Add option to clear cache on instaniating model --- guidance/models/_azureai_studio.py | 5 +++++ tests/models/test_azureai_studio.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 54ebbd4f1..dc1e938c4 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -20,6 +20,7 @@ def __init__( azureai_studio_endpoint: str, azureai_model_deployment: str, azureai_studio_key: str, + clear_cache: bool, ): self._endpoint = azureai_studio_endpoint self._deployment = azureai_model_deployment @@ -34,6 +35,8 @@ def __init__( / f"azureaistudio.tokens.{deployment_id}" ) self.cache = dc.Cache(path) + if clear_cache: + self.cache.clear() super().__init__(tokenizer, max_streaming_tokens, timeout, compute_log_probs) @@ -147,6 +150,7 @@ def __init__( max_streaming_tokens: int = 1000, timeout: float = 0.5, compute_log_probs: bool = False, + clear_cache: bool = False, ): super().__init__( AzureAIStudioChatEngine( @@ -157,6 +161,7 @@ def __init__( max_streaming_tokens=max_streaming_tokens, timeout=timeout, compute_log_probs=compute_log_probs, + clear_cache=False, ), echo=echo, ) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 45ba1bc3f..102f1889a 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -20,6 +20,7 @@ def test_azureai_phi3_chat_smoke(rate_limiter): azureai_studio_endpoint=azureai_studio_endpoint, azureai_studio_deployment=azureai_studio_deployment, azureai_studio_key=azureai_studio_key, + clear_cache=True, ) assert isinstance(lm, models.AzureAIStudioChat) @@ -46,6 +47,7 @@ def test_azureai_mistral_chat_smoke(rate_limiter): azureai_studio_endpoint=azureai_studio_endpoint, azureai_studio_deployment=azureai_studio_deployment, azureai_studio_key=azureai_studio_key, + clear_cache=True, ) assert isinstance(lm, models.AzureAIStudioChat) lm.engine.cache.clear() @@ -74,6 +76,7 @@ def test_azureai_llama3_chat_smoke(rate_limiter): azureai_studio_endpoint=azureai_studio_endpoint, azureai_studio_deployment=azureai_studio_deployment, azureai_studio_key=azureai_studio_key, + clear_cache=True, ) assert isinstance(lm, models.AzureAIStudioChat) From 0a7cc81a56368fe33238ea0d9cd1db7648f4d29d Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 09:32:21 -0400 Subject: [PATCH 15/29] Some more experimenting --- tests/models/test_azureai_studio.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 102f1889a..bb6f0e83a 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -36,6 +36,7 @@ def test_azureai_phi3_chat_smoke(rate_limiter): print(str(lm)) assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") def test_azureai_mistral_chat_smoke(rate_limiter): @@ -65,6 +66,7 @@ def test_azureai_mistral_chat_smoke(rate_limiter): print(str(lm)) assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") def test_azureai_llama3_chat_smoke(rate_limiter): @@ -86,9 +88,24 @@ def test_azureai_llama3_chat_smoke(rate_limiter): with user(): lm += "What is 1 + 1?" + with assistant(): + lm += "2" + + with user(): + lm += "What is 2 + 3?" + with assistant(): lm += gen(max_tokens=10, name="text", temperature=0.5) lm += "Pick a number: " - print(str(lm)) assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "I pick 10. Can you pick a number between 0 and 20?" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) < 0 From 1584d9ff5d1d47048252140cccfeee2b94094d6f Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 09:58:47 -0400 Subject: [PATCH 16/29] Want some parallel Azure OpenAI tests --- tests/models/test_azureai_openai.py | 75 +++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index c2bc818d7..1de6e5182 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -37,6 +37,81 @@ def test_azureai_openai_chat_smoke(rate_limiter): print(str(lm)) assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + +def test_azureai_openai_chat_longer_1(rate_limiter): + azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") + model = _env_or_fail("AZUREAI_CHAT_MODEL") + + lm = models.AzureOpenAI( + model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key + ) + assert isinstance(lm, models.AzureOpenAIChat) + + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "10. Now you pick a number between 0 and 20" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) > 0 + + +def test_azureai_openai_chat_longer_2(rate_limiter): + azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") + model = _env_or_fail("AZUREAI_CHAT_MODEL") + + lm = models.AzureOpenAI( + model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key + ) + assert isinstance(lm, models.AzureOpenAIChat) + + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += "2" + + with user(): + lm += "What is 2 + 3?" + + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "10. Now you pick a number between 0 and 20" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) < 0 def test_azureai_openai_chat_alt_args(rate_limiter): From 60b23c899740c86b29535d8f53ecb3fd78ec62e8 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 10:08:32 -0400 Subject: [PATCH 17/29] Copy/paste error --- tests/models/test_azureai_openai.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index 1de6e5182..11c3d91d7 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -41,9 +41,9 @@ def test_azureai_openai_chat_smoke(rate_limiter): def test_azureai_openai_chat_longer_1(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") - model = _env_or_fail("AZUREAI_CHAT_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_CHAT_KEY") + model = env_or_fail("AZUREAI_CHAT_MODEL") lm = models.AzureOpenAI( model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key @@ -75,9 +75,9 @@ def test_azureai_openai_chat_longer_1(rate_limiter): def test_azureai_openai_chat_longer_2(rate_limiter): - azureai_endpoint = _env_or_fail("AZUREAI_CHAT_ENDPOINT") - azureai_key = _env_or_fail("AZUREAI_CHAT_KEY") - model = _env_or_fail("AZUREAI_CHAT_MODEL") + azureai_endpoint = env_or_fail("AZUREAI_CHAT_ENDPOINT") + azureai_key = env_or_fail("AZUREAI_CHAT_KEY") + model = env_or_fail("AZUREAI_CHAT_MODEL") lm = models.AzureOpenAI( model=model, azure_endpoint=azureai_endpoint, api_key=azureai_key From 10fc9ba7d7fcdf676f2eb627be62ba76cb7f2b76 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 10:14:44 -0400 Subject: [PATCH 18/29] Change test to passing --- tests/models/test_azureai_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index 11c3d91d7..e64f5d040 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -111,7 +111,7 @@ def test_azureai_openai_chat_longer_2(rate_limiter): lm += gen(max_tokens=2, name="number") print(str(lm)) - assert len(lm["number"]) < 0 + assert len(lm["number"]) > 0 def test_azureai_openai_chat_alt_args(rate_limiter): From b68e9d716a252eb5bb6c2774ee6f2f1e7aaee773 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 10:18:28 -0400 Subject: [PATCH 19/29] Expand Azure AI Studio testing --- tests/models/test_azureai_studio.py | 90 +++++++++++++++++++---------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index bb6f0e83a..33bf5a4c4 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -11,10 +11,23 @@ pytestmark = pytest.mark.needs_credentials -def test_azureai_phi3_chat_smoke(rate_limiter): - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_PHI3_ENDPOINT") - azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_PHI3_DEPLOYMENT") - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_PHI3_KEY") +def _get_chat_model(model_name: str): + if model_name == "phi3": + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_PHI3_ENDPOINT") + azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_PHI3_DEPLOYMENT") + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_PHI3_KEY") + elif model_name == "mistral": + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT") + azureai_studio_deployment = env_or_fail( + "AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT" + ) + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_KEY") + elif model_name == "llama3": + azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT") + azureai_studio_deployment = env_or_fail( + "AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT" + ) + azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_KEY") lm = models.AzureAIStudioChat( azureai_studio_endpoint=azureai_studio_endpoint, @@ -23,6 +36,12 @@ def test_azureai_phi3_chat_smoke(rate_limiter): clear_cache=True, ) assert isinstance(lm, models.AzureAIStudioChat) + return lm + + +@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +def test_azureai_chat_smoke(rate_limiter, chat_model_name: str): + lm = _get_chat_model(chat_model_name) with system(): lm += "You are a math wiz." @@ -40,18 +59,7 @@ def test_azureai_phi3_chat_smoke(rate_limiter): def test_azureai_mistral_chat_smoke(rate_limiter): - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT") - azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT") - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_KEY") - - lm = models.AzureAIStudioChat( - azureai_studio_endpoint=azureai_studio_endpoint, - azureai_studio_deployment=azureai_studio_deployment, - azureai_studio_key=azureai_studio_key, - clear_cache=True, - ) - assert isinstance(lm, models.AzureAIStudioChat) - lm.engine.cache.clear() + lm = _get_chat_model("mistral") # No "system" role for Mistral? # with system(): @@ -69,18 +77,37 @@ def test_azureai_mistral_chat_smoke(rate_limiter): assert str(lm).endswith("Pick a number: <|im_end|>") -def test_azureai_llama3_chat_smoke(rate_limiter): - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT") - azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT") - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_KEY") +@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +def test_azureai_chat_longer_1(rate_limiter, chat_model_name: str): + lm = _get_chat_model(chat_model_name) - lm = models.AzureAIStudioChat( - azureai_studio_endpoint=azureai_studio_endpoint, - azureai_studio_deployment=azureai_studio_deployment, - azureai_studio_key=azureai_studio_key, - clear_cache=True, - ) - assert isinstance(lm, models.AzureAIStudioChat) + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "10. Now you pick a number between 0 and 20" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) > 0 + + +@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +def test_azureai_chat_longer_2(rate_limiter, chat_model_name: str): + lm = _get_chat_model(chat_model_name) with system(): lm += "You are a math wiz." @@ -90,22 +117,23 @@ def test_azureai_llama3_chat_smoke(rate_limiter): with assistant(): lm += "2" - + with user(): lm += "What is 2 + 3?" with assistant(): - lm += gen(max_tokens=10, name="text", temperature=0.5) + lm += gen(max_tokens=10, name="text") lm += "Pick a number: " + print(str(lm)) assert len(lm["text"]) > 0 assert str(lm).endswith("Pick a number: <|im_end|>") with user(): - lm += "I pick 10. Can you pick a number between 0 and 20?" + lm += "10. Now you pick a number between 0 and 20" with assistant(): lm += gen(max_tokens=2, name="number") print(str(lm)) - assert len(lm["number"]) < 0 + assert len(lm["number"]) > 0 From 9a4c1a87d7b0152e1f84353c68220876c8b36849 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 10:36:04 -0400 Subject: [PATCH 20/29] Refactor tests --- tests/models/test_azureai_studio.py | 68 ++++++++++------------------- 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 33bf5a4c4..0e0fff587 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -10,24 +10,17 @@ # Mark is configured in pyproject.toml pytestmark = pytest.mark.needs_credentials +# How to fill out the environment variables to +# set up the models +_chat_models = {"phi3": "PHI3", "mistral": "MISTRAL_CHAT", "llama3": "LLAMA3_CHAT"} + def _get_chat_model(model_name: str): - if model_name == "phi3": - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_PHI3_ENDPOINT") - azureai_studio_deployment = env_or_fail("AZURE_AI_STUDIO_PHI3_DEPLOYMENT") - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_PHI3_KEY") - elif model_name == "mistral": - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_ENDPOINT") - azureai_studio_deployment = env_or_fail( - "AZURE_AI_STUDIO_MISTRAL_CHAT_DEPLOYMENT" - ) - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_MISTRAL_CHAT_KEY") - elif model_name == "llama3": - azureai_studio_endpoint = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_ENDPOINT") - azureai_studio_deployment = env_or_fail( - "AZURE_AI_STUDIO_LLAMA3_CHAT_DEPLOYMENT" - ) - azureai_studio_key = env_or_fail("AZURE_AI_STUDIO_LLAMA3_CHAT_KEY") + env_string = _chat_models[model_name] + + azureai_studio_endpoint = env_or_fail(f"AZURE_AI_STUDIO_{env_string}_ENDPOINT") + azureai_studio_deployment = env_or_fail(f"AZURE_AI_STUDIO_{env_string}_DEPLOYMENT") + azureai_studio_key = env_or_fail(f"AZURE_AI_STUDIO_{env_string}_KEY") lm = models.AzureAIStudioChat( azureai_studio_endpoint=azureai_studio_endpoint, @@ -39,12 +32,14 @@ def _get_chat_model(model_name: str): return lm -@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +@pytest.mark.parametrize("chat_model_name", _chat_models.keys()) def test_azureai_chat_smoke(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - with system(): - lm += "You are a math wiz." + # This makes me unhappy + if chat_model_name != "mistral": + with system(): + lm += "You are a math wiz." with user(): lm += "What is 1 + 1?" @@ -58,31 +53,14 @@ def test_azureai_chat_smoke(rate_limiter, chat_model_name: str): assert str(lm).endswith("Pick a number: <|im_end|>") -def test_azureai_mistral_chat_smoke(rate_limiter): - lm = _get_chat_model("mistral") - - # No "system" role for Mistral? - # with system(): - # lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=15, name="text", temperature=0.5) - lm += "\nPick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") - - -@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +@pytest.mark.parametrize("chat_model_name", _chat_models.keys()) def test_azureai_chat_longer_1(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - with system(): - lm += "You are a math wiz." + # This makes me unhappy + if chat_model_name != "mistral": + with system(): + lm += "You are a math wiz." with user(): lm += "What is 1 + 1?" @@ -105,12 +83,14 @@ def test_azureai_chat_longer_1(rate_limiter, chat_model_name: str): assert len(lm["number"]) > 0 -@pytest.mark.parametrize("chat_model_name", ["phi3", "llama3"]) +@pytest.mark.parametrize("chat_model_name", _chat_models.keys()) def test_azureai_chat_longer_2(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - with system(): - lm += "You are a math wiz." + # This makes me unhappy + if chat_model_name != "mistral": + with system(): + lm += "You are a math wiz." with user(): lm += "What is 1 + 1?" From c0769b69efed6a73fc0c6a5fb7faee2bb088d048 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Wed, 1 May 2024 10:51:59 -0400 Subject: [PATCH 21/29] Refactor tests --- tests/models/common_chat_testing.py | 78 ++++++++++++++++++++++++++++ tests/models/test_azureai_openai.py | 80 ++--------------------------- tests/models/test_azureai_studio.py | 76 ++------------------------- 3 files changed, 88 insertions(+), 146 deletions(-) create mode 100644 tests/models/common_chat_testing.py diff --git a/tests/models/common_chat_testing.py b/tests/models/common_chat_testing.py new file mode 100644 index 000000000..99c45e860 --- /dev/null +++ b/tests/models/common_chat_testing.py @@ -0,0 +1,78 @@ +from guidance import assistant, gen, models, system, user + + +def smoke_chat(lm: models.Chat, has_system_role: bool = True): + if has_system_role: + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text", temperature=0.5) + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + +def longer_chat_1(lm: models.Chat, has_system_role: bool = True): + if has_system_role: + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "10. Now you pick a number between 0 and 20" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) > 0 + + +def longer_chat_2(lm: models.Chat, has_system_role: bool = True): + if has_system_role: + with system(): + lm += "You are a math wiz." + + with user(): + lm += "What is 1 + 1?" + + # This is the new part compared to longer_chat_1 + with assistant(): + lm += "2" + + with user(): + lm += "What is 2 + 3?" + + # Resume the previous + with assistant(): + lm += gen(max_tokens=10, name="text") + lm += "Pick a number: " + + print(str(lm)) + assert len(lm["text"]) > 0 + assert str(lm).endswith("Pick a number: <|im_end|>") + + with user(): + lm += "10. Now you pick a number between 0 and 20" + + with assistant(): + lm += gen(max_tokens=2, name="number") + + print(str(lm)) + assert len(lm["number"]) > 0 diff --git a/tests/models/test_azureai_openai.py b/tests/models/test_azureai_openai.py index e64f5d040..1a70d759f 100644 --- a/tests/models/test_azureai_openai.py +++ b/tests/models/test_azureai_openai.py @@ -7,6 +7,7 @@ from guidance import assistant, gen, models, system, user +from . import common_chat_testing from ..utils import env_or_fail # Everything in here needs credentials to work @@ -25,19 +26,7 @@ def test_azureai_openai_chat_smoke(rate_limiter): ) assert isinstance(lm, models.AzureOpenAIChat) - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") + common_chat_testing.smoke_chat(lm) def test_azureai_openai_chat_longer_1(rate_limiter): @@ -50,28 +39,7 @@ def test_azureai_openai_chat_longer_1(rate_limiter): ) assert isinstance(lm, models.AzureOpenAIChat) - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") - - with user(): - lm += "10. Now you pick a number between 0 and 20" - - with assistant(): - lm += gen(max_tokens=2, name="number") - - print(str(lm)) - assert len(lm["number"]) > 0 + common_chat_testing.longer_chat_1(lm) def test_azureai_openai_chat_longer_2(rate_limiter): @@ -84,34 +52,7 @@ def test_azureai_openai_chat_longer_2(rate_limiter): ) assert isinstance(lm, models.AzureOpenAIChat) - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += "2" - - with user(): - lm += "What is 2 + 3?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") - - with user(): - lm += "10. Now you pick a number between 0 and 20" - - with assistant(): - lm += gen(max_tokens=2, name="number") - - print(str(lm)) - assert len(lm["number"]) > 0 + common_chat_testing.longer_chat_2(lm) def test_azureai_openai_chat_alt_args(rate_limiter): @@ -133,18 +74,7 @@ def test_azureai_openai_chat_alt_args(rate_limiter): azure_deployment=azureai_deployment, ) - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 + common_chat_testing.smoke_chat(lm) def test_azureai_openai_completion_smoke(rate_limiter): diff --git a/tests/models/test_azureai_studio.py b/tests/models/test_azureai_studio.py index 0e0fff587..c13dbb2f5 100644 --- a/tests/models/test_azureai_studio.py +++ b/tests/models/test_azureai_studio.py @@ -4,6 +4,8 @@ from guidance import assistant, gen, models, system, user + +from . import common_chat_testing from ..utils import env_or_fail # Everything in here needs credentials to work @@ -36,84 +38,16 @@ def _get_chat_model(model_name: str): def test_azureai_chat_smoke(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - # This makes me unhappy - if chat_model_name != "mistral": - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=10, name="text", temperature=0.5) - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") + common_chat_testing.smoke_chat(lm, chat_model_name != "mistral") @pytest.mark.parametrize("chat_model_name", _chat_models.keys()) def test_azureai_chat_longer_1(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - - # This makes me unhappy - if chat_model_name != "mistral": - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") - - with user(): - lm += "10. Now you pick a number between 0 and 20" - - with assistant(): - lm += gen(max_tokens=2, name="number") - - print(str(lm)) - assert len(lm["number"]) > 0 + common_chat_testing.longer_chat_1(lm, chat_model_name != "mistral") @pytest.mark.parametrize("chat_model_name", _chat_models.keys()) def test_azureai_chat_longer_2(rate_limiter, chat_model_name: str): lm = _get_chat_model(chat_model_name) - - # This makes me unhappy - if chat_model_name != "mistral": - with system(): - lm += "You are a math wiz." - - with user(): - lm += "What is 1 + 1?" - - with assistant(): - lm += "2" - - with user(): - lm += "What is 2 + 3?" - - with assistant(): - lm += gen(max_tokens=10, name="text") - lm += "Pick a number: " - - print(str(lm)) - assert len(lm["text"]) > 0 - assert str(lm).endswith("Pick a number: <|im_end|>") - - with user(): - lm += "10. Now you pick a number between 0 and 20" - - with assistant(): - lm += gen(max_tokens=2, name="number") - - print(str(lm)) - assert len(lm["number"]) > 0 + common_chat_testing.longer_chat_2(lm, chat_model_name != "mistral") From 9c755d6a0a45064811d1f579636da24979612933 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 2 May 2024 06:38:15 -0400 Subject: [PATCH 22/29] Start doc writing --- guidance/models/_azureai_studio.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index dc1e938c4..1bf5a5082 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -152,6 +152,10 @@ def __init__( compute_log_probs: bool = False, clear_cache: bool = False, ): + """Create a model object for interacting with Azure AI Studio chat endpoints + + + """ super().__init__( AzureAIStudioChatEngine( azureai_studio_endpoint=azureai_studio_endpoint, @@ -161,7 +165,7 @@ def __init__( max_streaming_tokens=max_streaming_tokens, timeout=timeout, compute_log_probs=compute_log_probs, - clear_cache=False, + clear_cache=clear_cache, ), echo=echo, ) From 21ee13fdfacf3ee148da2f61f25c030da6f41d2a Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Thu, 2 May 2024 06:43:23 -0400 Subject: [PATCH 23/29] Add some basic docs --- guidance/models/_azureai_studio.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 1bf5a5082..8ce88ddca 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -152,9 +152,25 @@ def __init__( compute_log_probs: bool = False, clear_cache: bool = False, ): - """Create a model object for interacting with Azure AI Studio chat endpoints - - + """Create a model object for interacting with Azure AI Studio chat endpoints. + + The required information about the deployed endpoint can + be obtained from Azure AI Studio. + + A `diskcache`-based caching system is used to speed up + repeated calls when the temperature is specified to be + zero. + + Parameters + ---------- + azureai_studio_endpoint : str + The HTTPS endpoint deployed by Azure AI Studio + azureai_studio_deployment : str + The specific model deployed to the endpoint + azureai_studio_key : str + The key required for access to the API + clear_cache : bool + Whether to empty the internal cache """ super().__init__( AzureAIStudioChatEngine( From cdf679c7373e41035008bd0cef02c26b089228cf Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 09:44:24 -0400 Subject: [PATCH 24/29] Use the new endpoint --- guidance/models/_azureai_studio.py | 58 +++++++++++++++++++++--------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 8ce88ddca..53bd56e07 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -1,7 +1,9 @@ import hashlib import pathlib +import urllib.parse import diskcache as dc +import openai import platformdirs import requests @@ -22,7 +24,13 @@ def __init__( azureai_studio_key: str, clear_cache: bool, ): - self._endpoint = azureai_studio_endpoint + endpoint_parts = urllib.parse.urlparse(azureai_studio_endpoint) + if endpoint_parts.path == "/score": + self._is_openai_compatible = False + self._endpoint = azureai_studio_endpoint + else: + self._is_openai_compatible = True + self._endpoint = f"{endpoint_parts.scheme}://{endpoint_parts.hostname}" self._deployment = azureai_model_deployment self._api_key = azureai_studio_key @@ -104,31 +112,47 @@ def _generator(self, prompt, temperature: float): yield chunk return - # Prepare for the API call (this might be model specific....) - parameters = dict(temperature=temperature) - payload = dict(input_data=dict(input_string=messages, parameters=parameters)) + # Call the actual API and extract the next chunk + if self._is_openai_compatible: + client = openai.OpenAI(api_key=self._api_key, base_url=self._endpoint) + response = client.chat.completions.create( + model=self._deployment, + messages=messages, + # max_tokens=self.max_streaming_tokens, + n=1, + top_p=1.0, # TODO: this should be controllable like temp (from the grammar) + temperature=temperature, + # stream=True, + ) - headers = { - "Content-Type": "application/json", - "Authorization": ("Bearer " + self._api_key), - "azureml-model-deployment": self._deployment, - } + result = response.choices[0] + encoded_chunk = result.message.content.encode("utf8") + else: + parameters = dict(temperature=temperature) + payload = dict( + input_data=dict(input_string=messages, parameters=parameters) + ) - response = requests.post( - self._endpoint, - json=payload, - headers=headers, - ) + headers = { + "Content-Type": "application/json", + "Authorization": ("Bearer " + self._api_key), + "azureml-model-deployment": self._deployment, + } + response = requests.post( + self._endpoint, + json=payload, + headers=headers, + ) + + result = response.json() - result = response.json() + encoded_chunk = result["output"].encode("utf8") # Now back to OpenAIChatEngine, with slight modifications since # this isn't a streaming API if temperature == 0: cached_results = [] - encoded_chunk = result["output"].encode("utf8") - yield encoded_chunk if temperature == 0: From 4281d7f0513a1b562871ac507b0c048308e556b2 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 10:55:44 -0400 Subject: [PATCH 25/29] Handle optional import --- guidance/models/_azureai_studio.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 53bd56e07..3d28e7635 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -3,13 +3,16 @@ import urllib.parse import diskcache as dc -import openai import platformdirs import requests from ._model import Chat from ._grammarless import GrammarlessEngine, Grammarless +try: + import openai +except ImportError: + openai = None class AzureAIStudioChatEngine(GrammarlessEngine): def __init__( From 7bf3d0792d1d2c1cfcc58752b841157070aa8f23 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 10:57:19 -0400 Subject: [PATCH 26/29] OpenAI guard mk II --- guidance/models/_azureai_studio.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 3d28e7635..9f50c1340 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -9,10 +9,13 @@ from ._model import Chat from ._grammarless import GrammarlessEngine, Grammarless + try: import openai -except ImportError: - openai = None + + is_openai = True +except ModuleNotFoundError: + is_openai = False class AzureAIStudioChatEngine(GrammarlessEngine): def __init__( @@ -32,6 +35,8 @@ def __init__( self._is_openai_compatible = False self._endpoint = azureai_studio_endpoint else: + if not is_openai: + raise ValueError("Detected OpenAI compatible model; please install openai package") self._is_openai_compatible = True self._endpoint = f"{endpoint_parts.scheme}://{endpoint_parts.hostname}" self._deployment = azureai_model_deployment From 50847c514ad8e22c27285b9002f4ae8fb8138aad Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 11:11:40 -0400 Subject: [PATCH 27/29] Small fixes for mypy --- guidance/models/_azureai_studio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 9f50c1340..6a8bea201 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -146,13 +146,13 @@ def _generator(self, prompt, temperature: float): "Authorization": ("Bearer " + self._api_key), "azureml-model-deployment": self._deployment, } - response = requests.post( + response_score = requests.post( self._endpoint, json=payload, headers=headers, ) - result = response.json() + result = response_score.json() encoded_chunk = result["output"].encode("utf8") From 3d73c424f7983059e37b014f20426647fe676ed4 Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 11:20:29 -0400 Subject: [PATCH 28/29] One suppression.... --- guidance/models/_azureai_studio.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 6a8bea201..52a731c62 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -17,6 +17,7 @@ except ModuleNotFoundError: is_openai = False + class AzureAIStudioChatEngine(GrammarlessEngine): def __init__( self, @@ -36,7 +37,9 @@ def __init__( self._endpoint = azureai_studio_endpoint else: if not is_openai: - raise ValueError("Detected OpenAI compatible model; please install openai package") + raise ValueError( + "Detected OpenAI compatible model; please install openai package" + ) self._is_openai_compatible = True self._endpoint = f"{endpoint_parts.scheme}://{endpoint_parts.hostname}" self._deployment = azureai_model_deployment @@ -125,7 +128,7 @@ def _generator(self, prompt, temperature: float): client = openai.OpenAI(api_key=self._api_key, base_url=self._endpoint) response = client.chat.completions.create( model=self._deployment, - messages=messages, + messages=messages, # type: ignore[arg-type] # max_tokens=self.max_streaming_tokens, n=1, top_p=1.0, # TODO: this should be controllable like temp (from the grammar) From 64ec23227ddb1bddfd8e1ccb5b461c71530413cc Mon Sep 17 00:00:00 2001 From: "Richard Edgar (Microsoft)" Date: Fri, 3 May 2024 11:27:11 -0400 Subject: [PATCH 29/29] More mypy fixing --- guidance/models/_azureai_studio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/guidance/models/_azureai_studio.py b/guidance/models/_azureai_studio.py index 52a731c62..9eb1aae72 100644 --- a/guidance/models/_azureai_studio.py +++ b/guidance/models/_azureai_studio.py @@ -137,7 +137,7 @@ def _generator(self, prompt, temperature: float): ) result = response.choices[0] - encoded_chunk = result.message.content.encode("utf8") + encoded_chunk = result.message.content.encode("utf8") # type: ignore[union-attr] else: parameters = dict(temperature=temperature) payload = dict( @@ -155,9 +155,9 @@ def _generator(self, prompt, temperature: float): headers=headers, ) - result = response_score.json() + result_score = response_score.json() - encoded_chunk = result["output"].encode("utf8") + encoded_chunk = result_score["output"].encode("utf8") # Now back to OpenAIChatEngine, with slight modifications since # this isn't a streaming API