diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py index cca0a171..eb2d5e4c 100644 --- a/llm/default_plugins/openai_models.py +++ b/llm/default_plugins/openai_models.py @@ -1,6 +1,6 @@ from llm import EmbeddingModel, Model, hookimpl import llm -from llm.utils import dicts_to_table_string +from llm.utils import dicts_to_table_string, remove_dict_none_values import click import datetime import httpx @@ -312,7 +312,7 @@ def execute(self, prompt, stream, response, conversation=None): content = chunk.choices[0].delta.content if content is not None: yield content - response.response_json = combine_chunks(chunks) + response.response_json = remove_dict_none_values(combine_chunks(chunks)) else: completion = client.chat.completions.create( model=self.model_name or self.model_id, @@ -320,13 +320,13 @@ def execute(self, prompt, stream, response, conversation=None): stream=False, **kwargs, ) - response.response_json = completion.dict() + response.response_json = remove_dict_none_values(completion.dict()) yield completion.choices[0].message.content def get_client(self): kwargs = {} if self.api_base: - kwargs["api_base"] = self.api_base + kwargs["base_url"] = self.api_base if self.api_type: kwargs["api_type"] = self.api_type if self.api_version: @@ -396,7 +396,9 @@ def execute(self, prompt, stream, response, conversation=None): content = chunk.choices[0].text if content is not None: yield content - response.response_json = combine_chunks(chunks) + combined = combine_chunks(chunks) + cleaned = remove_dict_none_values(combined) + response.response_json = cleaned else: completion = client.completions.create( model=self.model_name or self.model_id, @@ -404,8 +406,8 @@ def execute(self, prompt, stream, response, conversation=None): stream=False, **kwargs, ) - response.response_json = completion.dict() - yield completion.choices[0]["text"] + response.response_json = remove_dict_none_values(completion.dict()) + yield completion.choices[0].text def not_nulls(data) -> dict: @@ -416,7 +418,6 @@ def combine_chunks(chunks: List) -> dict: content = "" role = None finish_reason = None - # If any of them have log probability, we're going to persist # those later on logprobs = [] @@ -449,7 +450,8 @@ def combine_chunks(chunks: List) -> dict: if logprobs: combined["logprobs"] = logprobs for key in ("id", "object", "model", "created", "index"): - if key in chunks[0]: - combined[key] = chunks[0][key] + value = getattr(chunks[0], key, None) + if value is not None: + combined[key] = value return combined diff --git a/llm/utils.py b/llm/utils.py index cf9a685a..43cc12c1 100644 --- a/llm/utils.py +++ b/llm/utils.py @@ -23,3 +23,23 @@ def dicts_to_table_string( res.append(" ".join(row)) return res + + +def remove_dict_none_values(d: dict) -> dict: + """ + Recursively remove keys with value of None or value of a dict that is all values of None + """ + if not isinstance(d, dict): + return d + new_dict = {} + for key, value in d.items(): + if value is not None: + if isinstance(value, dict): + nested = remove_dict_none_values(value) + if nested: + new_dict[key] = nested + elif isinstance(value, list): + new_dict[key] = [remove_dict_none_values(v) for v in value] + else: + new_dict[key] = value + return new_dict diff --git a/tests/conftest.py b/tests/conftest.py index 4ab513d0..9126cab0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,6 +4,7 @@ import llm from llm.plugins import pm from pydantic import Field +from pytest_httpx import HTTPXMock, IteratorStream from typing import Optional @@ -153,41 +154,44 @@ def mocked_openai_chat(httpx_mock): return httpx_mock +def stream_events(): + for delta, finish_reason in ( + ({"role": "assistant", "content": ""}, None), + ({"content": "Hi"}, None), + ({"content": "."}, None), + ({}, "stop"), + ): + yield "data: {}\n\n".format( + json.dumps( + { + "id": "chat-1", + "object": "chat.completion.chunk", + "created": 1695096940, + "model": "gpt-3.5-turbo-0613", + "choices": [ + {"index": 0, "delta": delta, "finish_reason": finish_reason} + ], + } + ) + ).encode("utf-8") + yield "data: [DONE]\n\n".encode("utf-8") + + @pytest.fixture -def mocked_openai_chat_stream(requests_mock): - def stream_events(*args): - for delta, finish_reason in ( - ({"role": "assistant", "content": ""}, None), - ({"content": "Hi"}, None), - ({"content": "."}, None), - ({}, "stop"), - ): - yield "data: {}\n\n".format( - json.dumps( - { - "id": "chat-1", - "object": "chat.completion.chunk", - "created": 1695096940, - "model": "gpt-3.5-turbo-0613", - "choices": [ - {"index": 0, "delta": delta, "finish_reason": finish_reason} - ], - } - ) - ).encode("utf-8") - yield "data: [DONE]\n\n".encode("utf-8") - - return requests_mock.post( - "https://api.openai.com/v1/chat/completions", - content=b"".join(stream_events()), +def mocked_openai_chat_stream(httpx_mock): + httpx_mock.add_response( + method="POST", + url="https://api.openai.com/v1/chat/completions", + stream=IteratorStream(stream_events()), headers={"Content-Type": "text/event-stream"}, ) @pytest.fixture -def mocked_openai_completion(requests_mock): - return requests_mock.post( - "https://api.openai.com/v1/completions", +def mocked_openai_completion(httpx_mock): + httpx_mock.add_response( + method="POST", + url="https://api.openai.com/v1/completions", json={ "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", "object": "text_completion", @@ -205,10 +209,10 @@ def mocked_openai_completion(requests_mock): }, headers={"Content-Type": "application/json"}, ) + return httpx_mock -@pytest.fixture -def mocked_openai_completion_logprobs_stream(requests_mock): +def stream_completion_events(): choices_chunks = [ [ { @@ -264,32 +268,37 @@ def mocked_openai_completion_logprobs_stream(requests_mock): ], ] - def stream_events(): - for choices in choices_chunks: - yield "data: {}\n\n".format( - json.dumps( - { - "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS", - "object": "text_completion", - "created": 1695097702, - "choices": choices, - "model": "gpt-3.5-turbo-instruct", - } - ) - ).encode("utf-8") - yield "data: [DONE]\n\n".encode("utf-8") - - return requests_mock.post( - "https://api.openai.com/v1/completions", - content=b"".join(stream_events()), + for choices in choices_chunks: + yield "data: {}\n\n".format( + json.dumps( + { + "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS", + "object": "text_completion", + "created": 1695097702, + "choices": choices, + "model": "gpt-3.5-turbo-instruct", + } + ) + ).encode("utf-8") + yield "data: [DONE]\n\n".encode("utf-8") + + +@pytest.fixture +def mocked_openai_completion_logprobs_stream(httpx_mock): + httpx_mock.add_response( + method="POST", + url="https://api.openai.com/v1/completions", + stream=IteratorStream(stream_completion_events()), headers={"Content-Type": "text/event-stream"}, ) + return httpx_mock @pytest.fixture -def mocked_openai_completion_logprobs(requests_mock): - return requests_mock.post( - "https://api.openai.com/v1/completions", +def mocked_openai_completion_logprobs(httpx_mock): + httpx_mock.add_response( + method="POST", + url="https://api.openai.com/v1/completions", json={ "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL", "object": "text_completion", @@ -316,12 +325,14 @@ def mocked_openai_completion_logprobs(requests_mock): }, headers={"Content-Type": "application/json"}, ) + return httpx_mock @pytest.fixture -def mocked_localai(requests_mock): - requests_mock.post( - "http://localai.localhost/chat/completions", +def mocked_localai(httpx_mock): + httpx_mock.add_response( + method="POST", + url="http://localai.localhost/chat/completions", json={ "model": "orca", "usage": {}, @@ -329,8 +340,9 @@ def mocked_localai(requests_mock): }, headers={"Content-Type": "application/json"}, ) - requests_mock.post( - "http://localai.localhost/completions", + httpx_mock.add_response( + method="POST", + url="http://localai.localhost/completions", json={ "model": "completion-babbage", "usage": {}, @@ -338,7 +350,7 @@ def mocked_localai(requests_mock): }, headers={"Content-Type": "application/json"}, ) - return requests_mock + return httpx_mock @pytest.fixture diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py index a47499fe..56acd113 100644 --- a/tests/test_cli_openai_models.py +++ b/tests/test_cli_openai_models.py @@ -4,9 +4,10 @@ @pytest.fixture -def mocked_models(requests_mock): - requests_mock.get( - "https://api.openai.com/v1/models", +def mocked_models(httpx_mock): + httpx_mock.add_response( + method="GET", + url="https://api.openai.com/v1/models", json={ "data": [ { @@ -25,7 +26,7 @@ def mocked_models(requests_mock): }, headers={"Content-Type": "application/json"}, ) - return requests_mock + return httpx_mock def test_openai_models(mocked_models): @@ -39,7 +40,7 @@ def test_openai_models(mocked_models): ) -def test_openai_options_min_max(mocked_models): +def test_openai_options_min_max(): options = { "temperature": [0, 2], "top_p": [0, 1], diff --git a/tests/test_llm.py b/tests/test_llm.py index 3e47cc58..9f8457ab 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -259,7 +259,6 @@ def test_llm_default_prompt( } assert json.loads(row["response_json"]) == { "model": "gpt-3.5-turbo", - "usage": {}, "choices": [{"message": {"content": "Bob, Alice, Eve"}}], } @@ -285,7 +284,6 @@ def test_llm_default_prompt( "response": "Bob, Alice, Eve", "response_json": { "model": "gpt-3.5-turbo", - "usage": {}, "choices": [{"message": {"content": "Bob, Alice, Eve"}}], }, # This doesn't have the \n after three names: @@ -323,7 +321,8 @@ def test_openai_completion(mocked_openai_completion, user_path): assert result.output == "\n\nThis is indeed a test\n" # Should have requested 256 tokens - assert json.loads(mocked_openai_completion.last_request.text) == { + last_request = mocked_openai_completion.get_requests()[-1] + assert json.loads(last_request.content) == { "model": "gpt-3.5-turbo-instruct", "prompt": "Say this is a test", "stream": False, @@ -393,8 +392,6 @@ def test_openai_completion_logprobs_stream( row = rows[0] assert json.loads(row["response_json"]) == { "content": "\n\nHi.", - "role": None, - "finish_reason": None, "logprobs": [ {"text": "\n\n", "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}]}, {"text": "Hi", "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}]}, @@ -481,7 +478,8 @@ def test_openai_localai_configuration(mocked_localai, user_path): result = runner.invoke(cli, ["--no-stream", "--model", "orca", prompt]) assert result.exit_code == 0 assert result.output == "Bob, Alice, Eve\n" - assert json.loads(mocked_localai.last_request.text) == { + last_request = mocked_localai.get_requests()[-1] + assert json.loads(last_request.content) == { "model": "orca-mini-3b", "messages": [{"role": "user", "content": "three names \nfor a pet pelican"}], "stream": False, @@ -490,7 +488,8 @@ def test_openai_localai_configuration(mocked_localai, user_path): result2 = runner.invoke(cli, ["--no-stream", "--model", "completion-babbage", "hi"]) assert result2.exit_code == 0 assert result2.output == "Hello\n" - assert json.loads(mocked_localai.last_request.text) == { + last_request2 = mocked_localai.get_requests()[-1] + assert json.loads(last_request2.content) == { "model": "babbage", "prompt": "hi", "stream": False,