Format Python code with psf/black push

Kav-K · Jan 9, 2023 · 7f1a8e3 · 7f1a8e3
1 parent b1d3304
commit 7f1a8e3
Show file tree

Hide file tree

Showing 6 changed files with 127 additions and 49 deletions.
diff --git a/cogs/gpt_3_commands_and_converser.py b/cogs/gpt_3_commands_and_converser.py
@@ -674,7 +674,10 @@ async def on_message(self, message):
 
             # Send the request to the model
             # If conversing, the prompt to send is the history, otherwise, it's just the prompt
-            if self.pinecone_service or message.channel.id not in self.conversation_threads:
+            if (
+                self.pinecone_service
+                or message.channel.id not in self.conversation_threads
+            ):
                 primary_prompt = prompt
             else:
                 primary_prompt = "".join(
@@ -724,44 +727,75 @@ async def encapsulated_send(
                 new_prompt = prompt.encode("ascii", "ignore").decode()
                 prompt_less_author = f"{new_prompt} <|endofstatement|>\n"
 
-                user_displayname = ctx.user.name if isinstance(ctx, discord.ApplicationContext) else ctx.author.display_name
+                user_displayname = (
+                    ctx.user.name
+                    if isinstance(ctx, discord.ApplicationContext)
+                    else ctx.author.display_name
+                )
 
-                new_prompt = f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
+                new_prompt = (
+                    f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
+                )
 
                 # print("Creating embedding for ", prompt)
                 # Print the current timestamp
-                timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
+                timestamp = int(
+                    str(datetime.datetime.now().timestamp()).replace(".", "")
+                )
 
                 starter_conversation_item = EmbeddedConversationItem(
-                    str(self.conversation_threads[ctx.channel.id].history[0]), 0)
-                self.conversation_threads[ctx.channel.id].history[0] = starter_conversation_item
+                    str(self.conversation_threads[ctx.channel.id].history[0]), 0
+                )
+                self.conversation_threads[ctx.channel.id].history[
+                    0
+                ] = starter_conversation_item
 
                 new_prompt_item = EmbeddedConversationItem(new_prompt, timestamp)
 
-                self.conversation_threads[conversation_id].history.append(new_prompt_item)
+                self.conversation_threads[conversation_id].history.append(
+                    new_prompt_item
+                )
 
                 # Create and upsert the embedding for  the conversation id, prompt, timestamp
-                embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
-                                                                                      new_prompt, timestamp)
+                embedding = await self.pinecone_service.upsert_conversation_embedding(
+                    self.model, conversation_id, new_prompt, timestamp
+                )
 
-                embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author) # Use the version of
+                embedding_prompt_less_author = await self.model.send_embedding_request(
+                    prompt_less_author
+                )  # Use the version of
                 # the prompt without the author's name for better clarity on retrieval.
 
                 # Now, build the new prompt by getting the X most similar with pinecone
-                similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author,
-                                                                      n=self.model.num_conversation_lookback)
+                similar_prompts = self.pinecone_service.get_n_similar(
+                    conversation_id,
+                    embedding_prompt_less_author,
+                    n=self.model.num_conversation_lookback,
+                )
 
                 # When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we
                 # can use that as a base to build our new prompt
-                prompt_with_history = [self.conversation_threads[ctx.channel.id].history[0]]
+                prompt_with_history = [
+                    self.conversation_threads[ctx.channel.id].history[0]
+                ]
 
                 # Append the similar prompts to the prompt with history
-                prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in
-                                        similar_prompts]
+                prompt_with_history += [
+                    EmbeddedConversationItem(prompt, timestamp)
+                    for prompt, timestamp in similar_prompts
+                ]
 
                 # iterate UP TO the last X prompts in the history
-                for i in range(1, min(len(self.conversation_threads[ctx.channel.id].history), self.model.num_static_conversation_items)):
-                    prompt_with_history.append(self.conversation_threads[ctx.channel.id].history[-i])
+                for i in range(
+                    1,
+                    min(
+                        len(self.conversation_threads[ctx.channel.id].history),
+                        self.model.num_static_conversation_items,
+                    ),
+                ):
+                    prompt_with_history.append(
+                        self.conversation_threads[ctx.channel.id].history[-i]
+                    )
 
                 # remove duplicates from prompt_with_history
                 prompt_with_history = list(dict.fromkeys(prompt_with_history))
@@ -777,7 +811,9 @@ async def encapsulated_send(
                         pass
                     prompt_with_history.append(new_prompt_item)
 
-                prompt_with_history = "".join([item.text for item in prompt_with_history])
+                prompt_with_history = "".join(
+                    [item.text for item in prompt_with_history]
+                )
 
                 new_prompt = prompt_with_history
 
@@ -788,7 +824,7 @@ async def encapsulated_send(
                 id in self.conversation_threads
                 and tokens > self.model.summarize_threshold
                 and not from_g_command
-                and not self.pinecone_service # This should only happen if we are not doing summarizations.
+                and not self.pinecone_service  # This should only happen if we are not doing summarizations.
             ):
 
                 # We don't need to worry about the differences between interactions and messages in this block,
@@ -850,27 +886,42 @@ async def encapsulated_send(
                 )
 
             # If the user is conversing, add the GPT response to their conversation history.
-            if id in self.conversation_threads and not from_g_command and not self.pinecone_service:
+            if (
+                id in self.conversation_threads
+                and not from_g_command
+                and not self.pinecone_service
+            ):
                 self.conversation_threads[id].history.append(
                     "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
                 )
 
             # Embeddings case!
-            elif id in self.conversation_threads and not from_g_command and self.pinecone_service:
+            elif (
+                id in self.conversation_threads
+                and not from_g_command
+                and self.pinecone_service
+            ):
                 conversation_id = id
 
                 # Create an embedding and timestamp for the prompt
-                response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
+                response_text = (
+                    "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
+                )
 
                 response_text = response_text.encode("ascii", "ignore").decode()
 
                 # Print the current timestamp
-                timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
-                self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp))
+                timestamp = int(
+                    str(datetime.datetime.now().timestamp()).replace(".", "")
+                )
+                self.conversation_threads[conversation_id].history.append(
+                    EmbeddedConversationItem(response_text, timestamp)
+                )
 
                 # Create and upsert the embedding for  the conversation id, prompt, timestamp
-                embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
-                                                                                      response_text, timestamp)
+                embedding = await self.pinecone_service.upsert_conversation_embedding(
+                    self.model, conversation_id, response_text, timestamp
+                )
 
             # Cleanse
             response_text = self.cleanse_response(response_text)

diff --git a/gpt3discord.py b/gpt3discord.py
@@ -40,7 +40,7 @@
 pinecone_service = None
 if PINECONE_TOKEN:
     pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp")
-    PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
+    PINECONE_INDEX = "conversation-embeddings"  # This will become unfixed later.
     pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
     print("Got the pinecone service")
 

diff --git a/models/autocomplete_model.py b/models/autocomplete_model.py
@@ -27,17 +27,17 @@ async def get_value(
         ctx: discord.AutocompleteContext,
     ):  # Behaves a bit weird if you go back and edit the parameter without typing in a new command
         values = {
-            "max_conversation_length": [str(num) for num in range(1,500,2)],
-            "num_images": [str(num) for num in range(1,4+1)],
+            "max_conversation_length": [str(num) for num in range(1, 500, 2)],
+            "num_images": [str(num) for num in range(1, 4 + 1)],
             "mode": ["temperature", "top_p"],
             "model": ["text-davinci-003", "text-curie-001"],
             "low_usage_mode": ["True", "False"],
             "image_size": ["256x256", "512x512", "1024x1024"],
             "summarize_conversation": ["True", "False"],
             "welcome_message_enabled": ["True", "False"],
-            "num_static_conversation_items": [str(num) for num in range(5,20+1)],
-            "num_conversation_lookback": [str(num) for num in range(5,15+1)],
-            "summarize_threshold": [str(num) for num in range(800, 3500, 50)]
+            "num_static_conversation_items": [str(num) for num in range(5, 20 + 1)],
+            "num_conversation_lookback": [str(num) for num in range(5, 15 + 1)],
+            "summarize_threshold": [str(num) for num in range(800, 3500, 50)],
         }
         if ctx.options["parameter"] in values.keys():
             return [value for value in values[ctx.options["parameter"]]]

diff --git a/models/openai_model.py b/models/openai_model.py
@@ -93,7 +93,9 @@ def num_static_conversation_items(self, value):
         if value < 3:
             raise ValueError("num_static_conversation_items must be >= 3")
         if value > 20:
-            raise ValueError("num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!")
+            raise ValueError(
+                "num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!"
+            )
         self._num_static_conversation_items = value
 
     @property
@@ -106,7 +108,9 @@ def num_conversation_lookback(self, value):
         if value < 3:
             raise ValueError("num_conversation_lookback must be >= 3")
         if value > 15:
-            raise ValueError("num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!")
+            raise ValueError(
+                "num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!"
+            )
         self._num_conversation_lookback = value
 
     @property
@@ -358,7 +362,7 @@ async def send_embedding_request(self, text):
                 "Authorization": f"Bearer {self.openai_key}",
             }
             async with session.post(
-                    "https://api.openai.com/v1/embeddings", json=payload, headers=headers
+                "https://api.openai.com/v1/embeddings", json=payload, headers=headers
             ) as resp:
                 response = await resp.json()
 
@@ -474,8 +478,8 @@ async def send_request(
                 "https://api.openai.com/v1/completions", json=payload, headers=headers
             ) as resp:
                 response = await resp.json()
-                #print(f"Payload -> {payload}")
-                #print(f"Response -> {response}")
+                # print(f"Payload -> {payload}")
+                # print(f"Response -> {response}")
                 # Parse the total tokens used for this request and response pair from the response
                 await self.valid_text_request(response)
 

diff --git a/models/pinecone_service_model.py b/models/pinecone_service_model.py
@@ -2,42 +2,66 @@
 
 
 class PineconeService:
-
     def __init__(self, index: pinecone.Index):
         self.index = index
 
     def upsert_basic(self, text, embeddings):
         self.index.upsert([(text, embeddings)])
 
     def get_all_for_conversation(self, conversation_id: int):
-        response = self.index.query(top_k=100, filter={"conversation_id": conversation_id})
+        response = self.index.query(
+            top_k=100, filter={"conversation_id": conversation_id}
+        )
         return response
 
-    async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp):
+    async def upsert_conversation_embedding(
+        self, model, conversation_id: int, text, timestamp
+    ):
         # If the text is > 512 characters, we need to split it up into multiple entries.
         first_embedding = None
         if len(text) > 500:
             # Split the text into 512 character chunks
-            chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
+            chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
             for chunk in chunks:
                 print("The split chunk is ", chunk)
 
                 # Create an embedding for the split chunk
                 embedding = await model.send_embedding_request(chunk)
                 if not first_embedding:
                     first_embedding = embedding
-                self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp})
+                self.index.upsert(
+                    [(chunk, embedding)],
+                    metadata={
+                        "conversation_id": conversation_id,
+                        "timestamp": timestamp,
+                    },
+                )
             return first_embedding
         else:
             embedding = await model.send_embedding_request(text)
-            self.index.upsert([(text, embedding, {"conversation_id": conversation_id,
-                                              "timestamp": timestamp})])
+            self.index.upsert(
+                [
+                    (
+                        text,
+                        embedding,
+                        {"conversation_id": conversation_id, "timestamp": timestamp},
+                    )
+                ]
+            )
             return embedding
 
     def get_n_similar(self, conversation_id: int, embedding, n=10):
-        response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id})
+        response = self.index.query(
+            vector=embedding,
+            top_k=n,
+            include_metadata=True,
+            filter={"conversation_id": conversation_id},
+        )
         print(response)
-        relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']]
+        relevant_phrases = [
+            (match["id"], match["metadata"]["timestamp"])
+            for match in response["matches"]
+        ]
         # Sort the relevant phrases based on the timestamp
         relevant_phrases.sort(key=lambda x: x[1])
-        return relevant_phrases
+        return relevant_phrases
diff --git a/models/user_model.py b/models/user_model.py
@@ -73,6 +73,7 @@ def __repr__(self):
     def __str__(self):
         return self.__repr__()
 
+
 class EmbeddedConversationItem:
     def __init__(self, text, timestamp):
         self.text = text
@@ -104,5 +105,3 @@ def __ge__(self, other):
 
     def __ne__(self, other):
         return not self.__eq__(other)
-
-