Skip to content

Commit

Permalink
Format Python code with psf/black push
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions authored and github-actions committed Jan 9, 2023
1 parent b1d3304 commit 7f1a8e3
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 49 deletions.
103 changes: 77 additions & 26 deletions cogs/gpt_3_commands_and_converser.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,10 @@ async def on_message(self, message):

# Send the request to the model
# If conversing, the prompt to send is the history, otherwise, it's just the prompt
if self.pinecone_service or message.channel.id not in self.conversation_threads:
if (
self.pinecone_service
or message.channel.id not in self.conversation_threads
):
primary_prompt = prompt
else:
primary_prompt = "".join(
Expand Down Expand Up @@ -724,44 +727,75 @@ async def encapsulated_send(
new_prompt = prompt.encode("ascii", "ignore").decode()
prompt_less_author = f"{new_prompt} <|endofstatement|>\n"

user_displayname = ctx.user.name if isinstance(ctx, discord.ApplicationContext) else ctx.author.display_name
user_displayname = (
ctx.user.name
if isinstance(ctx, discord.ApplicationContext)
else ctx.author.display_name
)

new_prompt = f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
new_prompt = (
f"\n'{user_displayname}': {new_prompt} <|endofstatement|>\n"
)

# print("Creating embedding for ", prompt)
# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
timestamp = int(
str(datetime.datetime.now().timestamp()).replace(".", "")
)

starter_conversation_item = EmbeddedConversationItem(
str(self.conversation_threads[ctx.channel.id].history[0]), 0)
self.conversation_threads[ctx.channel.id].history[0] = starter_conversation_item
str(self.conversation_threads[ctx.channel.id].history[0]), 0
)
self.conversation_threads[ctx.channel.id].history[
0
] = starter_conversation_item

new_prompt_item = EmbeddedConversationItem(new_prompt, timestamp)

self.conversation_threads[conversation_id].history.append(new_prompt_item)
self.conversation_threads[conversation_id].history.append(
new_prompt_item
)

# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
new_prompt, timestamp)
embedding = await self.pinecone_service.upsert_conversation_embedding(
self.model, conversation_id, new_prompt, timestamp
)

embedding_prompt_less_author = await self.model.send_embedding_request(prompt_less_author) # Use the version of
embedding_prompt_less_author = await self.model.send_embedding_request(
prompt_less_author
) # Use the version of
# the prompt without the author's name for better clarity on retrieval.

# Now, build the new prompt by getting the X most similar with pinecone
similar_prompts = self.pinecone_service.get_n_similar(conversation_id, embedding_prompt_less_author,
n=self.model.num_conversation_lookback)
similar_prompts = self.pinecone_service.get_n_similar(
conversation_id,
embedding_prompt_less_author,
n=self.model.num_conversation_lookback,
)

# When we are in embeddings mode, only the pre-text is contained in self.conversation_threads[message.channel.id].history, so we
# can use that as a base to build our new prompt
prompt_with_history = [self.conversation_threads[ctx.channel.id].history[0]]
prompt_with_history = [
self.conversation_threads[ctx.channel.id].history[0]
]

# Append the similar prompts to the prompt with history
prompt_with_history += [EmbeddedConversationItem(prompt, timestamp) for prompt, timestamp in
similar_prompts]
prompt_with_history += [
EmbeddedConversationItem(prompt, timestamp)
for prompt, timestamp in similar_prompts
]

# iterate UP TO the last X prompts in the history
for i in range(1, min(len(self.conversation_threads[ctx.channel.id].history), self.model.num_static_conversation_items)):
prompt_with_history.append(self.conversation_threads[ctx.channel.id].history[-i])
for i in range(
1,
min(
len(self.conversation_threads[ctx.channel.id].history),
self.model.num_static_conversation_items,
),
):
prompt_with_history.append(
self.conversation_threads[ctx.channel.id].history[-i]
)

# remove duplicates from prompt_with_history
prompt_with_history = list(dict.fromkeys(prompt_with_history))
Expand All @@ -777,7 +811,9 @@ async def encapsulated_send(
pass
prompt_with_history.append(new_prompt_item)

prompt_with_history = "".join([item.text for item in prompt_with_history])
prompt_with_history = "".join(
[item.text for item in prompt_with_history]
)

new_prompt = prompt_with_history

Expand All @@ -788,7 +824,7 @@ async def encapsulated_send(
id in self.conversation_threads
and tokens > self.model.summarize_threshold
and not from_g_command
and not self.pinecone_service # This should only happen if we are not doing summarizations.
and not self.pinecone_service # This should only happen if we are not doing summarizations.
):

# We don't need to worry about the differences between interactions and messages in this block,
Expand Down Expand Up @@ -850,27 +886,42 @@ async def encapsulated_send(
)

# If the user is conversing, add the GPT response to their conversation history.
if id in self.conversation_threads and not from_g_command and not self.pinecone_service:
if (
id in self.conversation_threads
and not from_g_command
and not self.pinecone_service
):
self.conversation_threads[id].history.append(
"\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
)

# Embeddings case!
elif id in self.conversation_threads and not from_g_command and self.pinecone_service:
elif (
id in self.conversation_threads
and not from_g_command
and self.pinecone_service
):
conversation_id = id

# Create an embedding and timestamp for the prompt
response_text = "\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
response_text = (
"\nGPTie: " + str(response_text) + "<|endofstatement|>\n"
)

response_text = response_text.encode("ascii", "ignore").decode()

# Print the current timestamp
timestamp = int(str(datetime.datetime.now().timestamp()).replace(".", ""))
self.conversation_threads[conversation_id].history.append(EmbeddedConversationItem(response_text, timestamp))
timestamp = int(
str(datetime.datetime.now().timestamp()).replace(".", "")
)
self.conversation_threads[conversation_id].history.append(
EmbeddedConversationItem(response_text, timestamp)
)

# Create and upsert the embedding for the conversation id, prompt, timestamp
embedding = await self.pinecone_service.upsert_conversation_embedding(self.model, conversation_id,
response_text, timestamp)
embedding = await self.pinecone_service.upsert_conversation_embedding(
self.model, conversation_id, response_text, timestamp
)

# Cleanse
response_text = self.cleanse_response(response_text)
Expand Down
2 changes: 1 addition & 1 deletion gpt3discord.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
pinecone_service = None
if PINECONE_TOKEN:
pinecone.init(api_key=PINECONE_TOKEN, environment="us-west1-gcp")
PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
PINECONE_INDEX = "conversation-embeddings" # This will become unfixed later.
pinecone_service = PineconeService(pinecone.Index(PINECONE_INDEX))
print("Got the pinecone service")

Expand Down
10 changes: 5 additions & 5 deletions models/autocomplete_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ async def get_value(
ctx: discord.AutocompleteContext,
): # Behaves a bit weird if you go back and edit the parameter without typing in a new command
values = {
"max_conversation_length": [str(num) for num in range(1,500,2)],
"num_images": [str(num) for num in range(1,4+1)],
"max_conversation_length": [str(num) for num in range(1, 500, 2)],
"num_images": [str(num) for num in range(1, 4 + 1)],
"mode": ["temperature", "top_p"],
"model": ["text-davinci-003", "text-curie-001"],
"low_usage_mode": ["True", "False"],
"image_size": ["256x256", "512x512", "1024x1024"],
"summarize_conversation": ["True", "False"],
"welcome_message_enabled": ["True", "False"],
"num_static_conversation_items": [str(num) for num in range(5,20+1)],
"num_conversation_lookback": [str(num) for num in range(5,15+1)],
"summarize_threshold": [str(num) for num in range(800, 3500, 50)]
"num_static_conversation_items": [str(num) for num in range(5, 20 + 1)],
"num_conversation_lookback": [str(num) for num in range(5, 15 + 1)],
"summarize_threshold": [str(num) for num in range(800, 3500, 50)],
}
if ctx.options["parameter"] in values.keys():
return [value for value in values[ctx.options["parameter"]]]
Expand Down
14 changes: 9 additions & 5 deletions models/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ def num_static_conversation_items(self, value):
if value < 3:
raise ValueError("num_static_conversation_items must be >= 3")
if value > 20:
raise ValueError("num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!")
raise ValueError(
"num_static_conversation_items must be <= 20, this is to ensure reliability and reduce token wastage!"
)
self._num_static_conversation_items = value

@property
Expand All @@ -106,7 +108,9 @@ def num_conversation_lookback(self, value):
if value < 3:
raise ValueError("num_conversation_lookback must be >= 3")
if value > 15:
raise ValueError("num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!")
raise ValueError(
"num_conversation_lookback must be <= 15, this is to ensure reliability and reduce token wastage!"
)
self._num_conversation_lookback = value

@property
Expand Down Expand Up @@ -358,7 +362,7 @@ async def send_embedding_request(self, text):
"Authorization": f"Bearer {self.openai_key}",
}
async with session.post(
"https://api.openai.com/v1/embeddings", json=payload, headers=headers
"https://api.openai.com/v1/embeddings", json=payload, headers=headers
) as resp:
response = await resp.json()

Expand Down Expand Up @@ -474,8 +478,8 @@ async def send_request(
"https://api.openai.com/v1/completions", json=payload, headers=headers
) as resp:
response = await resp.json()
#print(f"Payload -> {payload}")
#print(f"Response -> {response}")
# print(f"Payload -> {payload}")
# print(f"Response -> {response}")
# Parse the total tokens used for this request and response pair from the response
await self.valid_text_request(response)

Expand Down
44 changes: 34 additions & 10 deletions models/pinecone_service_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,66 @@


class PineconeService:

def __init__(self, index: pinecone.Index):
self.index = index

def upsert_basic(self, text, embeddings):
self.index.upsert([(text, embeddings)])

def get_all_for_conversation(self, conversation_id: int):
response = self.index.query(top_k=100, filter={"conversation_id": conversation_id})
response = self.index.query(
top_k=100, filter={"conversation_id": conversation_id}
)
return response

async def upsert_conversation_embedding(self, model, conversation_id: int, text, timestamp):
async def upsert_conversation_embedding(
self, model, conversation_id: int, text, timestamp
):
# If the text is > 512 characters, we need to split it up into multiple entries.
first_embedding = None
if len(text) > 500:
# Split the text into 512 character chunks
chunks = [text[i:i + 500] for i in range(0, len(text), 500)]
chunks = [text[i : i + 500] for i in range(0, len(text), 500)]
for chunk in chunks:
print("The split chunk is ", chunk)

# Create an embedding for the split chunk
embedding = await model.send_embedding_request(chunk)
if not first_embedding:
first_embedding = embedding
self.index.upsert([(chunk, embedding)], metadata={"conversation_id": conversation_id, "timestamp": timestamp})
self.index.upsert(
[(chunk, embedding)],
metadata={
"conversation_id": conversation_id,
"timestamp": timestamp,
},
)
return first_embedding
else:
embedding = await model.send_embedding_request(text)
self.index.upsert([(text, embedding, {"conversation_id": conversation_id,
"timestamp": timestamp})])
self.index.upsert(
[
(
text,
embedding,
{"conversation_id": conversation_id, "timestamp": timestamp},
)
]
)
return embedding

def get_n_similar(self, conversation_id: int, embedding, n=10):
response = self.index.query(vector=embedding, top_k=n, include_metadata=True, filter={"conversation_id": conversation_id})
response = self.index.query(
vector=embedding,
top_k=n,
include_metadata=True,
filter={"conversation_id": conversation_id},
)
print(response)
relevant_phrases = [(match['id'],match['metadata']['timestamp']) for match in response['matches']]
relevant_phrases = [
(match["id"], match["metadata"]["timestamp"])
for match in response["matches"]
]
# Sort the relevant phrases based on the timestamp
relevant_phrases.sort(key=lambda x: x[1])
return relevant_phrases
return relevant_phrases
3 changes: 1 addition & 2 deletions models/user_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def __repr__(self):
def __str__(self):
return self.__repr__()


class EmbeddedConversationItem:
def __init__(self, text, timestamp):
self.text = text
Expand Down Expand Up @@ -104,5 +105,3 @@ def __ge__(self, other):

def __ne__(self, other):
return not self.__eq__(other)


0 comments on commit 7f1a8e3

Please sign in to comment.