autodraw

Kav-K · Nov 17, 2023 · b84b62a · b84b62a
1 parent b9ca3e2
commit b84b62a
Show file tree

Hide file tree

Showing 5 changed files with 84 additions and 45 deletions.
diff --git a/conversation_starter_pretext_minimal.txt b/conversation_starter_pretext_minimal.txt
@@ -12,4 +12,4 @@ Human: Image Info-Caption: a landscape with a river and trees\nImage Info-QA: ye
 <yourname>: This is a landscape with a river and trees, it is indeed cartoony! <|endofstatement|>
 ...
 
-<yourname> and <username> will be given to you in an actual conversation.
+You speak in a fun, casual, and friendly tone, you're not overly inquisitive, you don't worry about formalities and speak as if you are speaking with a friend or peer.
diff --git a/conversation_starter_pretext_vision.txt b/conversation_starter_pretext_vision.txt
@@ -24,4 +24,6 @@ Human: I'm making a discord bot <|endofstatement|>
 <yourname>: that's pretty hype, I've never made one of those before, what part are you on right now? <|endofstatement|>
 ...
 
+You are able to draw (generate) images, when the user asks you to draw something let them know enthusiastically that you can do and work on a prompt with them. The rest will be handled automatically and you will see images that you've drawn appear in your conversation history.
+
 You speak in a fun, casual, and friendly tone, you're not overly inquisitive, you don't worry about formalities and speak as if you are speaking with a friend or peer.
diff --git a/gpt3discord.py b/gpt3discord.py
@@ -34,7 +34,7 @@
 from models.openai_model import Model
 
 
-__version__ = "12.2.9"
+__version__ = "12.3.0"
 
 
 PID_FILE = Path("bot.pid")

diff --git a/models/openai_model.py b/models/openai_model.py
@@ -1,5 +1,6 @@
 import asyncio
 import functools
+import json
 import math
 import os
 import re
@@ -1001,7 +1002,7 @@ async def send_chatgpt_chat_request(
                     messages.append(
                         {
                             "role": role,
-                            "name": username_clean if role == "user" else bot_name,
+                            "name": username_clean if role == "user" else bot_name_clean,
                             "content": text,
                         }
                     )
@@ -1011,7 +1012,7 @@ async def send_chatgpt_chat_request(
                         messages.append(
                             {
                                 "role": role,
-                                "name": username_clean if role == "user" else bot_name,
+                                "name": username_clean if role == "user" else bot_name_clean,
                                 "content": [
                                     {"type": "text", "text": text},
                                 ],
@@ -1027,7 +1028,7 @@ async def send_chatgpt_chat_request(
                         messages.append(
                             {
                                 "role": role,
-                                "name": username_clean if role == "user" else bot_name,
+                                "name": username_clean if role == "user" else bot_name_clean,
                                 "content": [
                                     {"type": "text", "text": text},
                                 ],
@@ -1084,6 +1085,19 @@ async def send_chatgpt_chat_request(
                 )
                 print(f"Response -> {response}")
 
+                # Temporary until we can ensure json response via the API, for some reason upstream pydantic complains when
+                # we pass response_format in the request..
+                if respond_json:
+                    response_text = response["choices"][0]["message"]["content"].strip()
+                    response_text = response_text.replace("```json", "")
+                    response_text = response_text.replace("```", "")
+                    try:
+                        response_text = json.loads(response_text)
+                        return response_text
+                    except Exception:
+                        raise ValueError("Could not decode JSON response from the API")
+
+
                 return response
 
     @backoff.on_exception(

diff --git a/services/text_service.py b/services/text_service.py
@@ -29,6 +29,33 @@ class TextService:
     def __init__(self):
         pass
 
+    @staticmethod
+    async def trigger_thinking(message: discord.Message, is_drawing=None):
+        thinking_embed = discord.Embed(
+            title=f"🤖💬 Thinking..." if not is_drawing else f"🤖🎨 Drawing...",
+            color=0x808080,
+        )
+
+        thinking_embed.set_footer(text="This may take a few seconds.")
+        try:
+            thinking_message = await message.reply(embed=thinking_embed)
+        except:
+            thinking_message = None
+
+        try:
+            await message.channel.trigger_typing()
+        except Exception:
+            thinking_message = None
+
+        return thinking_message
+
+    @staticmethod
+    async def stop_thinking(thinking_message: discord.Message):
+        try:
+            await thinking_message.delete()
+        except:
+            pass
+
     @staticmethod
     async def encapsulated_send(
         converser_cog,
@@ -859,28 +886,36 @@ async def process_conversation_message(
                 
                 {
                     "intent_to_draw": true/false,
-                    "prompt": "prompt to draw"
+                    "prompt": "prompt to draw",
+                    "amount": 1
                 }
                 
                 For example, you determined intent to draw a cat sitting on a chair:
                 {
                     "intent_to_draw": true,
-                    "prompt": "A cat sitting on a chair"
+                    "prompt": "A cat sitting on a chair",
+                    "amount": 1
+
                 }
                 For example, you determined no intent:
                 {
                     "intent_to_draw": false,
-                    "prompt": ""
+                    "prompt": "",
+                    "amount": 1
                 }
+                Make sure you use double quotes around all keys and values. Ensure to OMIT trailing commas.
+                As you can see, the default amount should always be one, but a user can draw up to 4 images. Be hesitant to draw more than 3 images.
                 Only signify an intent to draw when the user has explicitly asked you to draw, sometimes there may be situations where the user is asking you to brainstorm a prompt
-                but not neccessarily draw it, if you are unsure, ask the user explicitly.
+                but not neccessarily draw it, if you are unsure, ask the user explicitly. Ensure your JSON strictly confirms, only output the raw json. no other text.
                 """
                 last_messages = converser_cog.conversation_threads[
                     message.channel.id
                 ].history[-6:] # Get the last 6 messages to determine context on whether we should draw
                 last_messages = last_messages[1:]
                 try:
-                    response = await converser_cog.model.send_chatgpt_chat_request(
+                    thinking_message = await TextService.trigger_thinking(message)
+
+                    response_json = await converser_cog.model.send_chatgpt_chat_request(
                         last_messages,
                         "gpt-4-vision-preview",
                         temp_override=0,
@@ -889,46 +924,48 @@ async def process_conversation_message(
                         system_prompt_override=draw_check_prompt,
                         respond_json=True,
                     )
-                    response_text = response["choices"][0]["message"]["content"].strip()
-                    response_text = response_text.replace("```json", "")
-                    response_text = response_text.replace("```", "")
+                    await TextService.stop_thinking(thinking_message)
                     # This validation is only until we figure out what's wrong with the json response mode for vision.
-                    response_json = json.loads(response_text)
                     if response_json["intent_to_draw"]:
-                        thinking_embed = discord.Embed(
-                            title=f"🤖💬 Drawing...",
-                            color=0x808080,
-                        )
+                        thinking_message = await TextService.trigger_thinking(message,is_drawing=True)
 
-                        thinking_embed.set_footer(text="This may take a few seconds.")
-                        try:
-                            thinking_message = await message.reply(embed=thinking_embed)
-                        except:
-                            pass
                         links = await converser_cog.model.send_image_request_within_conversation(
                             response_json["prompt"],
                             quality="hd",
                             image_size="1024x1024",
                             style="vivid",
+                            num_images=response_json["amount"],
                         )
-                        try:
-                            thinking_message = await thinking_message.delete()
-                        except:
-                            pass
+                        await TextService.stop_thinking(thinking_message)
 
+                        image_markdowns = []
                         for num, link in enumerate(links):
-                            await message.reply(f"[image{num}]({link})")
+                            image_markdowns.append(f"[image{num}]({link})")
+                        await message.reply(" ".join(image_markdowns))
 
                         converser_cog.conversation_threads[
                             message.channel.id
                         ].history.append(
                             EmbeddedConversationItem(
-                                f"\n{BOT_NAME}: [I have just drawn the following images for the user, briefly describe the image and acknowledge that you've drawn it] <|endofstatement|>\n",
+                                f"\nYou have just generated images for the user, notify the user about what you've drawn\n",
                                 0,
                                 image_urls=links,
                             )
                         )
                 except:
+                    try:
+                        await message.reply("I encountered an error while trying to draw..")
+                        await thinking_message.delete()
+                        converser_cog.conversation_threads[
+                            message.channel.id
+                        ].history.append(
+                            EmbeddedConversationItem(
+                                f"\nYou just tried to generate an image but the generation failed. Notify the user of this now.>\n",
+                                0,
+                            )
+                        )
+                    except:
+                        pass
                     traceback.print_exc()
 
             # Send the request to the model
@@ -960,21 +997,7 @@ async def process_conversation_message(
             )
 
             # Send an embed that tells the user that the bot is thinking
-            thinking_embed = discord.Embed(
-                title=f"🤖💬 Thinking...",
-                color=0x808080,
-            )
-
-            thinking_embed.set_footer(text="This may take a few seconds.")
-            try:
-                thinking_message = await message.reply(embed=thinking_embed)
-            except:
-                pass
-
-            try:
-                await message.channel.trigger_typing()
-            except Exception:
-                pass
+            thinking_message = await TextService.trigger_thinking(message)
             converser_cog.full_conversation_history[message.channel.id].append(prompt)
 
             if not converser_cog.pinecone_service:
@@ -991,7 +1014,7 @@ async def process_conversation_message(
             )
 
             # Delete the thinking embed
-            await thinking_message.delete()
+            await TextService.stop_thinking(thinking_message)
 
             return True