From 3206598c80bfb8afe2f53ad017d5d1f1b5e1703a Mon Sep 17 00:00:00 2001 From: David Gray Date: Wed, 5 Jun 2024 11:20:04 -0400 Subject: [PATCH] fix: re-enable streaming for openai_plugin /v1/completions This PR fixes a bug where streaming: True was not set when using the /v1/completions endpoint with the openai plugin. --- plugins/openai_plugin.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/plugins/openai_plugin.py b/plugins/openai_plugin.py index e2384b22..87efdcf5 100644 --- a/plugins/openai_plugin.py +++ b/plugins/openai_plugin.py @@ -128,24 +128,18 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0): def streaming_request_http(self, query: dict, user_id: int, test_end_time: float): headers = {"Content-Type": "application/json"} - if "/v1/chat/completions" in self.host: - data = { - "messages": [ - {"role": "user", "content": query["text"]} - ], + data = { "max_tokens": query["output_tokens"], "temperature": 0.1, "stream": True, } + if "/v1/chat/completions" in self.host: + data["messages"] = [ + {"role": "user", "content": query["text"]} + ] else: - data = { - "prompt": query["text"], - "max_tokens": query["output_tokens"], - "min_tokens": query["output_tokens"], - "temperature": 0.1, - "top_p": 0.9, - "seed": 10, - } + data["prompt"] = query["text"], + data["min_tokens"] = query["output_tokens"] # some runtimes only serve one model, won't check this. if self.model_name is not None: