From 3206598c80bfb8afe2f53ad017d5d1f1b5e1703a Mon Sep 17 00:00:00 2001
From: David Gray <dagray@redhat.com>
Date: Wed, 5 Jun 2024 11:20:04 -0400
Subject: [PATCH] fix: re-enable streaming for openai_plugin /v1/completions

This PR fixes a bug where streaming: True was not set when using
the /v1/completions endpoint with the openai plugin.
---
 plugins/openai_plugin.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/plugins/openai_plugin.py b/plugins/openai_plugin.py
index e2384b22..87efdcf5 100644
--- a/plugins/openai_plugin.py
+++ b/plugins/openai_plugin.py
@@ -128,24 +128,18 @@ def request_http(self, query: dict, user_id: int, test_end_time: float = 0):
     def streaming_request_http(self, query: dict, user_id: int, test_end_time: float):
         headers = {"Content-Type": "application/json"}
 
-        if "/v1/chat/completions" in self.host:
-            data = {
-                "messages": [
-                    {"role": "user", "content": query["text"]}
-                ],
+        data = {
                 "max_tokens": query["output_tokens"],
                 "temperature": 0.1,
                 "stream": True,
             }
+        if "/v1/chat/completions" in self.host:
+            data["messages"] = [
+                    {"role": "user", "content": query["text"]}
+                ]
         else:
-            data = {
-                "prompt": query["text"],
-                "max_tokens": query["output_tokens"],
-                "min_tokens": query["output_tokens"],
-                "temperature": 0.1,
-                "top_p": 0.9,
-                "seed": 10,
-            }
+            data["prompt"] = query["text"],
+            data["min_tokens"] = query["output_tokens"]
 
         # some runtimes only serve one model, won't check this.
         if self.model_name is not None: