initial autodraw

Kav-K · Nov 17, 2023 · b9ca3e2 · b9ca3e2
1 parent b4ee0b6
commit b9ca3e2
Show file tree

Hide file tree

Showing 2 changed files with 223 additions and 58 deletions.
diff --git a/models/openai_model.py b/models/openai_model.py
@@ -932,6 +932,8 @@ async def send_chatgpt_chat_request(
         max_tokens_override=None,
         stop=None,
         custom_api_key=None,
+        system_prompt_override=None,
+        respond_json=None,
     ) -> (
         Tuple[dict, bool]
     ):  # The response, and a boolean indicating whether or not the context limit was reached.
@@ -947,75 +949,93 @@ async def send_chatgpt_chat_request(
         messages = []
         for number, message in enumerate(prompt_history):
             if number == 0:
-                # If this is the first message, it is the context prompt.
-                messages.append(
-                    {
-                        "role": "user",
-                        "content": message.text,
-                    }
-                )
-                continue
-
-            if message.text.startswith(f"\n{bot_name}"):
-                text = message.text.replace(bot_name, "")
-                text = text.replace("<|endofstatement|>", "")
-                messages.append(
-                    {
-                        "role": "assistant",
-                        "content": text,
-                    }
-                )
-            else:
-                try:
-                    if (
-                        message.text.strip()
-                        .lower()
-                        .startswith("this conversation has some context from earlier")
-                    ):
-                        raise Exception("This is a context message")
+                if not system_prompt_override:
+                    # If this is the first message, it is the context prompt.
+                    messages.append(
+                        {
+                            "role": "system",
+                            "content": message.text,
+                        }
+                    )
+                    continue
+                else:
+                    # When we have a system prompt override, we're trying to do a one-off chatcompletion WITH VISION. So we use the override
+                    # param as the new system prompt and the first message as the user message. This is messy, and I'll clean it up soon
+                    messages.append(
+                        {
+                            "role": "system",
+                            "content": system_prompt_override,
+                        }
+                    )
+                    messages.append(
+                        {
+                            "role": "user",
+                            "name": user_displayname,
+                            "content": message.text,
+                        }
+                    )
+                    continue
 
+            try:
+                if (
+                    message.text.strip()
+                    .lower()
+                    .startswith("this conversation has some context from earlier")
+                ):
+                    raise Exception("This is a context message")
+
+                if message.text.startswith(f"\n{bot_name}"):
+                    role = "assistant"
+                    text = message.text.replace(bot_name, "")
+                    text = text.replace("<|endofstatement|>", "")
+                else:
+                    role = "user"
                     username = re.search(r"(?<=\n)(.*?)(?=:)", message.text).group()
                     username_clean = self.cleanse_username(username)
                     text = message.text.replace(f"{username}:", "")
                     # Strip whitespace just from the right side of the string
                     text = text.rstrip()
                     text = text.replace("<|endofstatement|>", "")
 
-                    if "-vision" not in model_selection:
+                if "-vision" not in model_selection:
+                    messages.append(
+                        {
+                            "role": role,
+                            "name": username_clean if role == "user" else bot_name,
+                            "content": text,
+                        }
+                    )
+
+                else:
+                    if len(message.image_urls) > 0:
                         messages.append(
-                            {"role": "user", "name": username_clean, "content": text}
+                            {
+                                "role": role,
+                                "name": username_clean if role == "user" else bot_name,
+                                "content": [
+                                    {"type": "text", "text": text},
+                                ],
+                            }
                         )
-
+                        for image_url in message.image_urls:
+                            image_info = {
+                                "type": "image_url",
+                                "image_url": {"url": image_url, "detail": "high"},
+                            }
+                            messages[-1]["content"].append(image_info)
                     else:
-                        if len(message.image_urls) > 0:
-                            messages.append(
-                                {
-                                    "role": "user",
-                                    "name": username_clean,
-                                    "content": [
-                                        {"type": "text", "text": text},
-                                    ],
-                                }
-                            )
-                            for image_url in message.image_urls:
-                                image_info = {
-                                    "type": "image_url",
-                                    "image_url": {"url": image_url, "detail": "high"},
-                                }
-                                messages[-1]["content"].append(image_info)
-                        else:
-                            messages.append(
-                                {
-                                    "role": "user",
-                                    "name": username_clean,
-                                    "content": [
-                                        {"type": "text", "text": text},
-                                    ],
-                                }
-                            )
-                except Exception:
-                    text = message.text.replace("<|endofstatement|>", "")
-                    messages.append({"role": "system", "content": text})
+                        messages.append(
+                            {
+                                "role": role,
+                                "name": username_clean if role == "user" else bot_name,
+                                "content": [
+                                    {"type": "text", "text": text},
+                                ],
+                            }
+                        )
+            except Exception:
+                text = message.text.replace("<|endofstatement|>", "")
+                messages.append({"role": "system", "content": text})
 
         print(f"Messages -> {messages}")
         async with aiohttp.ClientSession(
@@ -1038,6 +1058,10 @@ async def send_chatgpt_chat_request(
                 payload[
                     "max_tokens"
                 ] = 4096  # TODO Not sure if this needs to be subtracted from a token count..
+            if respond_json:
+                # payload["response_format"] = { "type": "json_object" }
+                # TODO The above needs to be fixed, doesn't work for some reason?
+                pass
 
             headers = {
                 "Authorization": f"Bearer {self.openai_key if not custom_api_key else custom_api_key}"
@@ -1450,6 +1474,64 @@ async def send_image_request(
         # Now all the requests are done, we can save the URLs
         return await self.save_image_urls_and_return(image_urls, ctx)
 
+    @backoff.on_exception(
+        backoff.expo,
+        aiohttp.ClientResponseError,
+        factor=3,
+        base=5,
+        max_tries=4,
+        on_backoff=backoff_handler_http,
+    )
+    async def send_image_request_within_conversation(
+        self, prompt, quality, image_size, style, custom_api_key=None, num_images=1
+    ) -> List[str]:
+        await self.usage_service.update_usage_image(image_size)
+
+        print("Inside a dalle-3 request")
+
+        image_urls = []
+        tasks = []
+        payload = {
+            "prompt": prompt,
+            "quality": quality,
+            "style": style,
+            "model": "dall-e-3",
+            "size": image_size,
+        }
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {self.openai_key if not custom_api_key else custom_api_key}",
+        }
+        self.use_org = "true" in str(self.use_org).lower()
+        if self.use_org and self.openai_organization:
+            headers["OpenAI-Organization"] = self.openai_organization
+
+        # Setup the client session outside of the loop
+        async with aiohttp.ClientSession(
+            raise_for_status=True, timeout=aiohttp.ClientTimeout(total=300)
+        ) as session:
+            # Create a coroutine for each image request and store it in the tasks list
+            for _ in range(num_images):
+                task = self.make_image_request_individual(
+                    session,
+                    "https://api.openai.com/v1/images/generations",
+                    payload,
+                    headers,
+                )
+                tasks.append(task)
+
+            # Run all tasks in parallel and wait for them to complete
+            responses = await asyncio.gather(*tasks)
+
+            # Process the results
+            for response in responses:
+                print(response)
+                for result in response["data"]:
+                    image_urls.append(result["url"])
+
+        # Now all the requests are done, we can save the URLs
+        return image_urls
+
     @backoff.on_exception(
         backoff.expo,
         aiohttp.ClientResponseError,

diff --git a/services/text_service.py b/services/text_service.py
@@ -1,5 +1,6 @@
 import asyncio.exceptions
 import datetime
+import json
 import re
 import traceback
 from collections import defaultdict
@@ -848,6 +849,88 @@ async def process_conversation_message(
                 # increment the conversation counter for the user
                 converser_cog.conversation_threads[message.channel.id].count += 1
 
+            # Determine if we should draw an image and determine what to draw, and handle the drawing itself
+            # TODO: This should be encapsulated better into some other service or function so we're not cluttering this text service file, this text service file is gross right now..
+            if "-vision" in model and not converser_cog.pinecone_service:
+                print("Checking for if the user asked to draw")
+                draw_check_prompt = """
+                You will be given a set of conversation items and you will determine if the intent of the user(s) are to draw/create a picture or not, if the intent is to
+                draw a picture, extract a prompt for the image to draw for use in systems like DALL-E. Respond with JSON after you determine intent to draw or not. In this format:
+                
+                {
+                    "intent_to_draw": true/false,
+                    "prompt": "prompt to draw"
+                }
+                
+                For example, you determined intent to draw a cat sitting on a chair:
+                {
+                    "intent_to_draw": true,
+                    "prompt": "A cat sitting on a chair"
+                }
+                For example, you determined no intent:
+                {
+                    "intent_to_draw": false,
+                    "prompt": ""
+                }
+                Only signify an intent to draw when the user has explicitly asked you to draw, sometimes there may be situations where the user is asking you to brainstorm a prompt
+                but not neccessarily draw it, if you are unsure, ask the user explicitly.
+                """
+                last_messages = converser_cog.conversation_threads[
+                    message.channel.id
+                ].history[-6:] # Get the last 6 messages to determine context on whether we should draw
+                last_messages = last_messages[1:]
+                try:
+                    response = await converser_cog.model.send_chatgpt_chat_request(
+                        last_messages,
+                        "gpt-4-vision-preview",
+                        temp_override=0,
+                        user_displayname=message.author.display_name,
+                        bot_name=BOT_NAME,
+                        system_prompt_override=draw_check_prompt,
+                        respond_json=True,
+                    )
+                    response_text = response["choices"][0]["message"]["content"].strip()
+                    response_text = response_text.replace("```json", "")
+                    response_text = response_text.replace("```", "")
+                    # This validation is only until we figure out what's wrong with the json response mode for vision.
+                    response_json = json.loads(response_text)
+                    if response_json["intent_to_draw"]:
+                        thinking_embed = discord.Embed(
+                            title=f"🤖💬 Drawing...",
+                            color=0x808080,
+                        )
+
+                        thinking_embed.set_footer(text="This may take a few seconds.")
+                        try:
+                            thinking_message = await message.reply(embed=thinking_embed)
+                        except:
+                            pass
+                        links = await converser_cog.model.send_image_request_within_conversation(
+                            response_json["prompt"],
+                            quality="hd",
+                            image_size="1024x1024",
+                            style="vivid",
+                        )
+                        try:
+                            thinking_message = await thinking_message.delete()
+                        except:
+                            pass
+
+                        for num, link in enumerate(links):
+                            await message.reply(f"[image{num}]({link})")
+
+                        converser_cog.conversation_threads[
+                            message.channel.id
+                        ].history.append(
+                            EmbeddedConversationItem(
+                                f"\n{BOT_NAME}: [I have just drawn the following images for the user, briefly describe the image and acknowledge that you've drawn it] <|endofstatement|>\n",
+                                0,
+                                image_urls=links,
+                            )
+                        )
+                except:
+                    traceback.print_exc()
+
             # Send the request to the model
             # If conversing, the prompt to send is the history, otherwise, it's just the prompt
             if (