Remove unused code (#5)

* Remove unused slack_user_oauth_token credential * Remove unused create_trivia endpoint and related code * Remove unused vectorization persisted queue * Avoid embeddings double-generation on space import * Remove unused ConfluenceClient methods * Stop creating unused "Nur documentation QnA" space * Remove unused git/ * Remove unused query_assistant_rag_tool module * Remove open_ai/assistants/file_manager and related code
toptal · Apr 4, 2024 · 3fb020d · 3fb020d
1 parent c1fe9d7
commit 3fb020d
Show file tree

Hide file tree

Showing 16 changed files with 19 additions and 673 deletions.
diff --git a/api/endpoint.py b/api/endpoint.py
@@ -10,7 +10,6 @@
 from vector.chroma import vectorize_document_and_store_in_db
 from configuration import api_host, api_port
 from interactions.vectorize_and_store import vectorize_interaction_and_store_in_db
-from trivia.trivia_manager import TriviaQuizz
 
 host = os.environ.get("NUR_API_HOST", api_host)
 port = os.environ.get("NUR_API_PORT", api_port)
@@ -44,13 +43,6 @@ class InteractionEmbedRequest(BaseModel):
     interaction_id: str
 
 
-class TriviaRequestEvent(BaseModel):
-    domain: str
-    thread_ts: str
-    channel: str
-    user: str
-
-
 @processor.post("/api/v1/questions")
 def create_question(question_event: QuestionEvent):
     thread = threading.Thread(target=process_question, args=(question_event,))
@@ -99,23 +91,6 @@ def create_interaction_embeds(InteractionEmbedRequest: InteractionEmbedRequest):
     }
 
 
-@processor.post("/api/v1/create_trivia")
-def create_trivia(TriviaRequestEvent: TriviaRequestEvent):
-    """
-    Endpoint to initiate a trivia quizz based on a sepecific domain and share it in specified channel.
-    args: domain, thread_ts, channel, user
-    """
-    # Using retrieve context retrieve interactions where the model failed to find relevant context that are closest to the domain mentioned.
-    # Share the questions on the channel in question and tag the user.
-    # The bot then posts the first question and allows the conversation to go on untill it detects a :check mark: emoji on each question.
-    # The bot will also count the thumbs up provided on each message and keep track of each users thumbs up count.
-    # The bot then creates a confluence page under "Q&A KB" confluence space tagging the top 3 contributors
-
-    thread = threading.Thread(target=TriviaQuizz, args=(TriviaRequestEvent))
-    thread.start()
-    return "STUB TEXT - STILL IN DEVELOPMENT \nmessage: Trivia creation request initiated, processing in background"
-
-
 def main():
     """Entry point for starting the FastAPI application."""
     uvicorn.run("api.endpoint:processor", host=host, port=int(port), reload=True)

diff --git a/configuration.py b/configuration.py
@@ -24,7 +24,6 @@ def get_project_root() -> str:
 chart_folder_path = os.path.join(project_path, "content", "charts")
 sql_file_path = os.path.join(project_path, "content", "database", "confluence_pages_sql.db")
 persist_page_processing_queue_path = os.path.join(project_path, "content", "queues", "confluence_page_processing_queue")
-persist_page_vector_queue_path = os.path.join(project_path, "content", "queues", "confluence_page_vector_queue")
 vector_folder_path = os.path.join(project_path, "content", "vectors", "confluence_pages")
 interactions_folder_path = os.path.join(project_path, "content", "vectors", "confluence_interactions")
 

diff --git a/confluence_integration/confluence_client.py b/confluence_integration/confluence_client.py
@@ -23,12 +23,6 @@ def __init__(self):
         username (str): The username for authentication.
         api_token (str): The API token for authentication.
         """
-        self.initialize_confluence_client()
-
-    def initialize_confluence_client(self):
-        """
-        Initialize the Confluence client.
-        """
         self.confluence = Confluence(
             url=confluence_credentials['base_url'],
             username=confluence_credentials['username'],
@@ -91,33 +85,6 @@ def update_page(self, page_id, title, content):
         clean_content = self.validate_and_coerce_xhtml(content)  # Validate and clean the content
         return self.confluence.update_page(page_id=page_id, title=title, body=clean_content)
 
-    def retrieve_confluence_pages(self, space_key, limit=50):
-        """
-        Retrieve pages from a specified Confluence space using pagination.
-
-        Args:
-        space_key (str): The key of the Confluence space.
-        limit (int): The number of items to retrieve per page.
-
-        Returns:
-        list: A list of page data objects.
-        """
-        # Implementation goes here
-
-    def retrieve_child_items(self, item_id, content_type, limit=50):
-        """
-        Retrieve child items (pages or comments) for a given Confluence item using pagination.
-
-        Args:
-        item_id (str): The ID of the Confluence item (page or comment).
-        content_type (str): Type of content to retrieve ('page' or 'comment').
-        limit (int): The number of items to retrieve per page.
-
-        Returns:
-        list: A list of child item data objects.
-        """
-        # Implementation goes here
-
     def retrieve_space_list(self):
         """
         Retrieve a complete list of available spaces in Confluence using pagination.
@@ -202,31 +169,3 @@ def generate_space_key(space_name):
         # Append a timestamp to the base key
         timestamp = int(time.time())
         return f"{base_key}{timestamp}"
-
-    def retrieve_page_history(self, page_id):
-        """
-        Retrieve the history of a specified Confluence page.
-
-        Args:
-        page_id (str): The ID of the Confluence page.
-
-        Returns:
-        dict: A dictionary containing the history of the page.
-        """
-        # Implementation goes here
-
-    def retrieve_page_content(self, page_id):
-        """
-        Retrieve the content of a specified Confluence page.
-
-        Args:
-        page_id (str): The ID of the Confluence page.
-
-        Returns:
-        str: The content of the page.
-        """
-        # Implementation goes here
-
-
-conf_client = ConfluenceClient()
-conf_client.create_space_if_not_found(space_name="Nur documentation QnA")
diff --git a/confluence_integration/extract_page_content_and_store_processor.py b/confluence_integration/extract_page_content_and_store_processor.py
@@ -6,7 +6,7 @@
 import logging
 from concurrent.futures import ThreadPoolExecutor
 from configuration import api_host, api_port
-from configuration import persist_page_processing_queue_path, persist_page_vector_queue_path
+from configuration import persist_page_processing_queue_path
 from persistqueue import Queue
 from file_system.file_manager import FileManager
 from database.page_manager import store_pages_data, is_page_processed, get_last_updated_timestamp
@@ -113,15 +113,13 @@ def submit_embedding_creation_request(page_id):
 def get_page_content_using_queue(space_key):
     logging.info(f"Starting to process pages for space key: {space_key}")
     process_page_queue = QueueManager(persist_page_processing_queue_path, space_key)
-    vectorization_queue = QueueManager(persist_page_vector_queue_path, space_key)
     file_manager = FileManager()
     page_content_map = {}
     page_processor = PageProcessor(file_manager, space_key)
 
     def process_page_wrapper(page_id):
         logging.info(f"Processing page with ID {page_id}...")
         page_processor.process_page(page_id, page_content_map)
-        vectorization_queue.enqueue_page(page_id)
         process_page_queue.task_done()
         logging.info(f"Page with ID {page_id} processing complete, added for vectorization.")
 
@@ -132,9 +130,6 @@ def process_page_wrapper(page_id):
             executor.submit(process_page_wrapper, page_id)
 
     # After all threads are done, continue with the single-threaded part
-    page_ids = [page_id for page_id in page_content_map.keys()]
-    for page_id in page_ids:
-        submit_embedding_creation_request(page_id)
     logging.info(f"Page content for space key {space_key} processing complete.")
     store_pages_data(space_key, page_content_map)
 

diff --git a/credentials.py b/credentials.py
@@ -10,5 +10,4 @@
 
 # Slack tokens Toptal
 slack_app_level_token = os.environ.get("SLACK_APP_TOKEN")
-slack_bot_user_oauth_token = os.environ.get("SLACK_BOT_TOKEN")
-slack_user_oauth_token = os.environ.get("SLACK_USER_TOKEN")
+slack_bot_user_oauth_token = os.environ.get("SLACK_BOT_TOKEN")
diff --git a/git/__init__.py b/git/__init__.py
diff --git a/git/import_repo_to_file.py b/git/import_repo_to_file.py
diff --git a/open_ai/assistants/assistant_manager.py b/open_ai/assistants/assistant_manager.py
@@ -1,6 +1,5 @@
 # ./oai_assistants/assistant_manager.py
 import json
-from open_ai.assistants.file_manager import FileManager
 from open_ai.assistants.utility import initiate_client
 
 
@@ -43,68 +42,6 @@ def create_assistant(self, model, name, instructions, tools, description=None, m
         )
         return response
 
-    def clean_missing_files_from_assistant(self, assistant_id):
-        """
-        Identifies and removes missing file references from an assistant's configuration.
-
-        This function checks the files associated with the specified assistant and identifies any files
-        that are no longer present in the file system. It then updates the assistant's configuration
-        to remove these missing files.
-
-        Parameters:
-        assistant_id (str): The ID of the assistant to be cleaned.
-
-        Returns:
-        list: A list of file IDs that were identified as missing and removed from the assistant.
-        """
-        assistant = self.load_assistant(assistant_id)
-        assistant_file_ids = assistant.file_ids if assistant.file_ids is not None else []
-
-        # Check if the assistant has any file IDs associated with it
-        if not assistant_file_ids:
-            print("No files are associated with this assistant.")
-            return []
-
-        file_manager = FileManager(initiate_client())
-        all_files = file_manager.list()
-        file_ids = list(all_files.keys())
-
-        missing_files = [file_id for file_id in assistant_file_ids if file_id not in file_ids]
-
-        # Check if there are any missing files
-        if missing_files:
-            for missing_file in missing_files:
-                print(f"Deleting missing file {missing_file} from assistant {assistant_id}.")
-                self.client.update(
-                    assistant_id=assistant_id,
-                    file_ids=[file_id for file_id in assistant_file_ids if file_id != missing_file]
-                )
-        else:
-            print("There are no missing files.")
-
-        return missing_files
-
-    def add_file_to_assistant(self, assistant_id, file_id):
-        """
-        Add a file to an assistant's list of files.
-
-        Args:
-            assistant_id: The ID of the assistant being updated.
-            file_id: The ID of the file to add to the assistant.
-
-        Returns:
-            The updated Assistant object.
-        """
-        assistant = self.client.retrieve(assistant_id)
-        existing_file_ids = assistant.file_ids if assistant.file_ids is not None else []
-        updated_file_ids = existing_file_ids + [file_id]
-
-        response = self.client.update(
-            assistant_id=assistant_id,
-            file_ids=updated_file_ids
-        )
-        return response
-
     def list_assistants(self):
         """
         List all assistants.

diff --git a/open_ai/assistants/file_manager.py b/open_ai/assistants/file_manager.py