Skip to content
This repository has been archived by the owner on Apr 29, 2024. It is now read-only.

Commit

Permalink
Step4
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha370 committed Apr 12, 2024
1 parent 9ae9213 commit 1decee6
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 25 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ If the above works, you should see Nur's manager command prompt - the same thing

Bear in mind that the Dockerized version uses a shared volume named `nur_shared_content`. This is bootstrapped during the first installation, but will persist until it is manually removed.

## Add a new data model using Alembic


```warp-runnable-command
# Move to the poetry package
cd code/nur/nurai
# Import the model in /nurai/migrations/env.py
# Create a new migration
poetry run alembic revision --autogenerate -m "Initialize the db"
# Apply the migration
poetry run alembic upgrade head
```


## Network traffic

1. Outgoing to Open AI API (for Embeds and completion)
Expand Down
5 changes: 0 additions & 5 deletions database/page_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ def get_page_ids_missing_embeds(self, session):
Retrieve the page IDs of pages that are missing embeddings.
:return: A list of page IDs.
"""
# with session as session:
records = session.query(PageData).filter(
(PageData.lastUpdated > PageData.last_embedded) |
(PageData.last_embedded.is_(None))
Expand All @@ -72,7 +71,6 @@ def get_all_page_data_from_db(self, session, space_key=None):
:param space_key: Optional; the specific space key to filter pages by.
:return: Tuple of page_ids (list of page IDs), all_documents (list of document strings), and embeddings (list of embeddings as strings)
"""
# with session as session:
if space_key:
records = session.query(PageData).filter(PageData.space_key == space_key).all()
else:
Expand Down Expand Up @@ -100,7 +98,6 @@ def add_or_update_embed_vector(self, page_id, embed_vector, session):
page_id (str): The ID of the page to update.
embed_vector: The embed vector data to be added or updated, expected to be a list of floats.
"""
# with session as session:
page = session.query(PageData).filter_by(page_id=page_id).first()

if page:
Expand All @@ -117,7 +114,6 @@ def find_page(self, page_id, session) -> Optional[PageData]:
:param page_id: The ID of the page to find.
:return: The page data if found, or None if not found.
"""
# with session as session:
return session.query(PageData).filter_by(page_id=page_id).first()

def find_pages(self, page_ids, session) -> List[PageData]:
Expand All @@ -126,7 +122,6 @@ def find_pages(self, page_ids, session) -> List[PageData]:
:param page_ids: A list of page IDs to find.
:return: A list of page data if found, or an empty list if not found.
"""
# with session as session:
return session.query(PageData).filter(PageData.page_id.in_(page_ids)).all()

def format_page_for_llm(self, page: PageData) -> str:
Expand Down
32 changes: 32 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
version: '3.3'
services:
db:
image: postgres:12
restart: always
environment:
POSTGRES_DB: 'top-assist'
POSTGRES_USER: 'toptal'
POSTGRES_PASSWORD: 'toptal'
ports:
- '5432:5432'
expose:
- '5432'
volumes:
- postgres-data:/var/lib/postgresql/data
chroma:
image: ghcr.io/chroma-core/chroma:latest
volumes:
- chroma-data:/chroma/.chroma/index
ports:
- 8001:8000
networks:
- net

volumes:
postgres-data:
chroma-data:
driver: local

networks:
net:
driver: bridge
15 changes: 9 additions & 6 deletions interactions/identify_knowledge_gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from configuration import interactions_folder_path, embedding_model_id, knowledge_gap_discussions_channel_id
from configuration import interaction_retrieval_count, interactions_collection_name
from configuration import quizz_assistant_id
from database.database import get_db_session
from open_ai.embedding.embed_manager import embed_text
from open_ai.assistants.utility import extract_assistant_response, initiate_client
from open_ai.assistants.thread_manager import ThreadManager
Expand Down Expand Up @@ -213,14 +212,18 @@ def strip_json(assistant_response):
str: The extracted JSON content as a string. Returns an empty JSON array '[]' if extraction fails.
"""
try:
# Attempt to find the start of the JSON content
if assistant_response.find("```json") == 1:
json_content = assistant_response

# Strip markdown quotes if any
if "```json" in assistant_response:
start_index = assistant_response.index("```json") + len("```json")
end_index = assistant_response.index("```", start_index)
assistant_response = assistant_response[start_index:end_index].strip()
json_content = json_content[start_index:end_index]

json_content = json_content.strip()
# Basic validation to check if it's likely to be JSON
if assistant_response.startswith("[") and assistant_response.endswith("]"):
return assistant_response
if json_content.startswith("[") and json_content.endswith("]"):
return json_content
else:
logging.error("Extracted content does not appear to be valid JSON.")
return "[]"
Expand Down
6 changes: 3 additions & 3 deletions interactions/vectorize_and_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,14 @@ def submit_create_interaction_embeds_request(interaction_id):
post_request(interaction_embeds_endpoint, {"interaction_id": interaction_id}, data_type='Interaction embed')


def vectorize_interactions_and_store_in_db(db, retry_limit: int = 3, wait_time: int = 5) -> None:
def vectorize_interactions_and_store_in_db(session, retry_limit: int = 3, wait_time: int = 5) -> None:
"""
Vectorize all interactions without embeds and store them in the database,
with retries for failed attempts.
"""
for attempt in range(retry_limit):
# Retrieve interactions that are still missing embeddings.
interactions = get_qna_interactions_without_embeds(db)
interactions = get_qna_interactions_without_embeds(session)

# If there are no interactions missing embeddings, exit the loop and end the process.
if not interactions:
Expand All @@ -151,7 +151,7 @@ def vectorize_interactions_and_store_in_db(db, retry_limit: int = 3, wait_time:
time.sleep(wait_time)

# After waiting, retrieve the list of interactions still missing embeddings to see if the list has decreased.
interactions = get_qna_interactions_without_embeds(db)
interactions = get_qna_interactions_without_embeds(session)
if not interactions:
print("All interactions now have embeddings. Process complete.")
break # Break out of the loop if there are no more interactions missing embeddings.
Expand Down
22 changes: 11 additions & 11 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,36 +45,36 @@ def main_menu():
print("Loading new documentation space...")
space_key, space_name = tui_choose_space()
if space_key and space_name:
with get_db_session() as db_session:
import_space(space_key, space_name, db_session)
with get_db_session() as session:
import_space(space_key, space_name, session)
print()
print(f"Space '{space_name}' retrieval and indexing complete.")

elif choice == "2":
question = ask_question()
if question:
with get_db_session() as db_session:
answer, thread_id = answer_question_with_assistant(question, db_session)
with get_db_session() as session:
answer, thread_id = answer_question_with_assistant(question, session)
print(f"\nThread ID: {thread_id}\nAnswer: {answer}")

elif choice == "3":
print("Creating vector db for interactions")
with get_db_session() as db_session:
vectorize_interactions_and_store_in_db(db_session)
VectorInteractionManager().add_to_vector(db_session)
with get_db_session() as session:
vectorize_interactions_and_store_in_db(session)
VectorInteractionManager().add_to_vector(session)

elif choice == "4":
load_manage_assistants()

elif choice == "5":
context = input("Enter the context you want to identifying knowledge gaps in\nex:(billing reminders): ")
with get_db_session() as db_session:
identify_knowledge_gaps(context, db_session)
with get_db_session() as session:
identify_knowledge_gaps(context, session)

elif choice == "6":
print("Starting 3D visualization process...")
with get_db_session() as db_session:
load_confluence_pages_spacial_distribution(db_session)
with get_db_session() as session:
load_confluence_pages_spacial_distribution(session)

elif choice == "0":
print("Exiting program.")
Expand Down

0 comments on commit 1decee6

Please sign in to comment.