diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
index cca0a171..eb2d5e4c 100644
--- a/llm/default_plugins/openai_models.py
+++ b/llm/default_plugins/openai_models.py
@@ -1,6 +1,6 @@
 from llm import EmbeddingModel, Model, hookimpl
 import llm
-from llm.utils import dicts_to_table_string
+from llm.utils import dicts_to_table_string, remove_dict_none_values
 import click
 import datetime
 import httpx
@@ -312,7 +312,7 @@ def execute(self, prompt, stream, response, conversation=None):
                 content = chunk.choices[0].delta.content
                 if content is not None:
                     yield content
-            response.response_json = combine_chunks(chunks)
+            response.response_json = remove_dict_none_values(combine_chunks(chunks))
         else:
             completion = client.chat.completions.create(
                 model=self.model_name or self.model_id,
@@ -320,13 +320,13 @@ def execute(self, prompt, stream, response, conversation=None):
                 stream=False,
                 **kwargs,
             )
-            response.response_json = completion.dict()
+            response.response_json = remove_dict_none_values(completion.dict())
             yield completion.choices[0].message.content
 
     def get_client(self):
         kwargs = {}
         if self.api_base:
-            kwargs["api_base"] = self.api_base
+            kwargs["base_url"] = self.api_base
         if self.api_type:
             kwargs["api_type"] = self.api_type
         if self.api_version:
@@ -396,7 +396,9 @@ def execute(self, prompt, stream, response, conversation=None):
                 content = chunk.choices[0].text
                 if content is not None:
                     yield content
-            response.response_json = combine_chunks(chunks)
+            combined = combine_chunks(chunks)
+            cleaned = remove_dict_none_values(combined)
+            response.response_json = cleaned
         else:
             completion = client.completions.create(
                 model=self.model_name or self.model_id,
@@ -404,8 +406,8 @@ def execute(self, prompt, stream, response, conversation=None):
                 stream=False,
                 **kwargs,
             )
-            response.response_json = completion.dict()
-            yield completion.choices[0]["text"]
+            response.response_json = remove_dict_none_values(completion.dict())
+            yield completion.choices[0].text
 
 
 def not_nulls(data) -> dict:
@@ -416,7 +418,6 @@ def combine_chunks(chunks: List) -> dict:
     content = ""
     role = None
     finish_reason = None
-
     # If any of them have log probability, we're going to persist
     # those later on
     logprobs = []
@@ -449,7 +450,8 @@ def combine_chunks(chunks: List) -> dict:
     if logprobs:
         combined["logprobs"] = logprobs
     for key in ("id", "object", "model", "created", "index"):
-        if key in chunks[0]:
-            combined[key] = chunks[0][key]
+        value = getattr(chunks[0], key, None)
+        if value is not None:
+            combined[key] = value
 
     return combined
diff --git a/llm/utils.py b/llm/utils.py
index cf9a685a..43cc12c1 100644
--- a/llm/utils.py
+++ b/llm/utils.py
@@ -23,3 +23,23 @@ def dicts_to_table_string(
         res.append("    ".join(row))
 
     return res
+
+
+def remove_dict_none_values(d: dict) -> dict:
+    """
+    Recursively remove keys with value of None or value of a dict that is all values of None
+    """
+    if not isinstance(d, dict):
+        return d
+    new_dict = {}
+    for key, value in d.items():
+        if value is not None:
+            if isinstance(value, dict):
+                nested = remove_dict_none_values(value)
+                if nested:
+                    new_dict[key] = nested
+            elif isinstance(value, list):
+                new_dict[key] = [remove_dict_none_values(v) for v in value]
+            else:
+                new_dict[key] = value
+    return new_dict
diff --git a/tests/conftest.py b/tests/conftest.py
index 4ab513d0..9126cab0 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,6 +4,7 @@
 import llm
 from llm.plugins import pm
 from pydantic import Field
+from pytest_httpx import HTTPXMock, IteratorStream
 from typing import Optional
 
 
@@ -153,41 +154,44 @@ def mocked_openai_chat(httpx_mock):
     return httpx_mock
 
 
+def stream_events():
+    for delta, finish_reason in (
+        ({"role": "assistant", "content": ""}, None),
+        ({"content": "Hi"}, None),
+        ({"content": "."}, None),
+        ({}, "stop"),
+    ):
+        yield "data: {}\n\n".format(
+            json.dumps(
+                {
+                    "id": "chat-1",
+                    "object": "chat.completion.chunk",
+                    "created": 1695096940,
+                    "model": "gpt-3.5-turbo-0613",
+                    "choices": [
+                        {"index": 0, "delta": delta, "finish_reason": finish_reason}
+                    ],
+                }
+            )
+        ).encode("utf-8")
+    yield "data: [DONE]\n\n".encode("utf-8")
+
+
 @pytest.fixture
-def mocked_openai_chat_stream(requests_mock):
-    def stream_events(*args):
-        for delta, finish_reason in (
-            ({"role": "assistant", "content": ""}, None),
-            ({"content": "Hi"}, None),
-            ({"content": "."}, None),
-            ({}, "stop"),
-        ):
-            yield "data: {}\n\n".format(
-                json.dumps(
-                    {
-                        "id": "chat-1",
-                        "object": "chat.completion.chunk",
-                        "created": 1695096940,
-                        "model": "gpt-3.5-turbo-0613",
-                        "choices": [
-                            {"index": 0, "delta": delta, "finish_reason": finish_reason}
-                        ],
-                    }
-                )
-            ).encode("utf-8")
-        yield "data: [DONE]\n\n".encode("utf-8")
-
-    return requests_mock.post(
-        "https://api.openai.com/v1/chat/completions",
-        content=b"".join(stream_events()),
+def mocked_openai_chat_stream(httpx_mock):
+    httpx_mock.add_response(
+        method="POST",
+        url="https://api.openai.com/v1/chat/completions",
+        stream=IteratorStream(stream_events()),
         headers={"Content-Type": "text/event-stream"},
     )
 
 
 @pytest.fixture
-def mocked_openai_completion(requests_mock):
-    return requests_mock.post(
-        "https://api.openai.com/v1/completions",
+def mocked_openai_completion(httpx_mock):
+    httpx_mock.add_response(
+        method="POST",
+        url="https://api.openai.com/v1/completions",
         json={
             "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
             "object": "text_completion",
@@ -205,10 +209,10 @@ def mocked_openai_completion(requests_mock):
         },
         headers={"Content-Type": "application/json"},
     )
+    return httpx_mock
 
 
-@pytest.fixture
-def mocked_openai_completion_logprobs_stream(requests_mock):
+def stream_completion_events():
     choices_chunks = [
         [
             {
@@ -264,32 +268,37 @@ def mocked_openai_completion_logprobs_stream(requests_mock):
         ],
     ]
 
-    def stream_events():
-        for choices in choices_chunks:
-            yield "data: {}\n\n".format(
-                json.dumps(
-                    {
-                        "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
-                        "object": "text_completion",
-                        "created": 1695097702,
-                        "choices": choices,
-                        "model": "gpt-3.5-turbo-instruct",
-                    }
-                )
-            ).encode("utf-8")
-        yield "data: [DONE]\n\n".encode("utf-8")
-
-    return requests_mock.post(
-        "https://api.openai.com/v1/completions",
-        content=b"".join(stream_events()),
+    for choices in choices_chunks:
+        yield "data: {}\n\n".format(
+            json.dumps(
+                {
+                    "id": "cmpl-80MdSaou7NnPuff5ZyRMysWBmgSPS",
+                    "object": "text_completion",
+                    "created": 1695097702,
+                    "choices": choices,
+                    "model": "gpt-3.5-turbo-instruct",
+                }
+            )
+        ).encode("utf-8")
+    yield "data: [DONE]\n\n".encode("utf-8")
+
+
+@pytest.fixture
+def mocked_openai_completion_logprobs_stream(httpx_mock):
+    httpx_mock.add_response(
+        method="POST",
+        url="https://api.openai.com/v1/completions",
+        stream=IteratorStream(stream_completion_events()),
         headers={"Content-Type": "text/event-stream"},
     )
+    return httpx_mock
 
 
 @pytest.fixture
-def mocked_openai_completion_logprobs(requests_mock):
-    return requests_mock.post(
-        "https://api.openai.com/v1/completions",
+def mocked_openai_completion_logprobs(httpx_mock):
+    httpx_mock.add_response(
+        method="POST",
+        url="https://api.openai.com/v1/completions",
         json={
             "id": "cmpl-80MeBfKJutM0uMNJkRrebJLeP3bxL",
             "object": "text_completion",
@@ -316,12 +325,14 @@ def mocked_openai_completion_logprobs(requests_mock):
         },
         headers={"Content-Type": "application/json"},
     )
+    return httpx_mock
 
 
 @pytest.fixture
-def mocked_localai(requests_mock):
-    requests_mock.post(
-        "http://localai.localhost/chat/completions",
+def mocked_localai(httpx_mock):
+    httpx_mock.add_response(
+        method="POST",
+        url="http://localai.localhost/chat/completions",
         json={
             "model": "orca",
             "usage": {},
@@ -329,8 +340,9 @@ def mocked_localai(requests_mock):
         },
         headers={"Content-Type": "application/json"},
     )
-    requests_mock.post(
-        "http://localai.localhost/completions",
+    httpx_mock.add_response(
+        method="POST",
+        url="http://localai.localhost/completions",
         json={
             "model": "completion-babbage",
             "usage": {},
@@ -338,7 +350,7 @@ def mocked_localai(requests_mock):
         },
         headers={"Content-Type": "application/json"},
     )
-    return requests_mock
+    return httpx_mock
 
 
 @pytest.fixture
diff --git a/tests/test_cli_openai_models.py b/tests/test_cli_openai_models.py
index a47499fe..56acd113 100644
--- a/tests/test_cli_openai_models.py
+++ b/tests/test_cli_openai_models.py
@@ -4,9 +4,10 @@
 
 
 @pytest.fixture
-def mocked_models(requests_mock):
-    requests_mock.get(
-        "https://api.openai.com/v1/models",
+def mocked_models(httpx_mock):
+    httpx_mock.add_response(
+        method="GET",
+        url="https://api.openai.com/v1/models",
         json={
             "data": [
                 {
@@ -25,7 +26,7 @@ def mocked_models(requests_mock):
         },
         headers={"Content-Type": "application/json"},
     )
-    return requests_mock
+    return httpx_mock
 
 
 def test_openai_models(mocked_models):
@@ -39,7 +40,7 @@ def test_openai_models(mocked_models):
     )
 
 
-def test_openai_options_min_max(mocked_models):
+def test_openai_options_min_max():
     options = {
         "temperature": [0, 2],
         "top_p": [0, 1],
diff --git a/tests/test_llm.py b/tests/test_llm.py
index 3e47cc58..9f8457ab 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -259,7 +259,6 @@ def test_llm_default_prompt(
     }
     assert json.loads(row["response_json"]) == {
         "model": "gpt-3.5-turbo",
-        "usage": {},
         "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
     }
 
@@ -285,7 +284,6 @@ def test_llm_default_prompt(
             "response": "Bob, Alice, Eve",
             "response_json": {
                 "model": "gpt-3.5-turbo",
-                "usage": {},
                 "choices": [{"message": {"content": "Bob, Alice, Eve"}}],
             },
             # This doesn't have the \n after three names:
@@ -323,7 +321,8 @@ def test_openai_completion(mocked_openai_completion, user_path):
     assert result.output == "\n\nThis is indeed a test\n"
 
     # Should have requested 256 tokens
-    assert json.loads(mocked_openai_completion.last_request.text) == {
+    last_request = mocked_openai_completion.get_requests()[-1]
+    assert json.loads(last_request.content) == {
         "model": "gpt-3.5-turbo-instruct",
         "prompt": "Say this is a test",
         "stream": False,
@@ -393,8 +392,6 @@ def test_openai_completion_logprobs_stream(
     row = rows[0]
     assert json.loads(row["response_json"]) == {
         "content": "\n\nHi.",
-        "role": None,
-        "finish_reason": None,
         "logprobs": [
             {"text": "\n\n", "top_logprobs": [{"\n\n": -0.6, "\n": -1.9}]},
             {"text": "Hi", "top_logprobs": [{"Hi": -1.1, "Hello": -0.7}]},
@@ -481,7 +478,8 @@ def test_openai_localai_configuration(mocked_localai, user_path):
     result = runner.invoke(cli, ["--no-stream", "--model", "orca", prompt])
     assert result.exit_code == 0
     assert result.output == "Bob, Alice, Eve\n"
-    assert json.loads(mocked_localai.last_request.text) == {
+    last_request = mocked_localai.get_requests()[-1]
+    assert json.loads(last_request.content) == {
         "model": "orca-mini-3b",
         "messages": [{"role": "user", "content": "three names \nfor a pet pelican"}],
         "stream": False,
@@ -490,7 +488,8 @@ def test_openai_localai_configuration(mocked_localai, user_path):
     result2 = runner.invoke(cli, ["--no-stream", "--model", "completion-babbage", "hi"])
     assert result2.exit_code == 0
     assert result2.output == "Hello\n"
-    assert json.loads(mocked_localai.last_request.text) == {
+    last_request2 = mocked_localai.get_requests()[-1]
+    assert json.loads(last_request2.content) == {
         "model": "babbage",
         "prompt": "hi",
         "stream": False,