Merge pull request #144 from amosproj/develop

Develop
amosproj · Jun 12, 2024 · ec24439 · ec24439
2 parents a1b42c1 + 3acad49
commit ec24439
Show file tree

Hide file tree

Showing 41 changed files with 1,583 additions and 400 deletions.
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 #Root Makefile 
 
-.PHONY: all start stop help backend
+.PHONY: all start stop help backend frontend lint format lint-all format-all
 
 all: help
 
@@ -16,18 +16,28 @@ stop-dev:
 backend-%:
 	@$(MAKE) -C Project/backend $*
 
+# Pass through to frontend Makefile
+frontend-%:
+	@$(MAKE) -C Project/frontend $*
+
+lint:
+	@$(MAKE) -C Project/backend lint
+	@$(MAKE) -C Project/frontend lint
+
+format:
+	@$(MAKE) -C Project/backend format
+	@$(MAKE) -C Project/frontend format
+
+lint-all: lint
+format-all: format
+
 help:
 	@echo "Usage: make [target]"
 	@echo "Targets:"
 	@echo "  build-dev          - Start both frontend and backend"
 	@echo "  stop-dev           - Stop both frontend and backend"
-	@echo "  backend-<cmd>  - Run a backend command (e.g., make backend-build-dev)"
-	@echo "    Available backend commands:"
-	@echo "      backend-build-dev    - Build the backend development environment"
-	@echo "      backend-stop-dev     - Stop the backend development environment"
-	@echo "      backend-lint         - Run linter on the backend"
-	@echo "      backend-format       - Run formatter on the backend"
-	@echo "      backend-test         - Run tests on the backend"
-	@echo "      backend-migrations   - Create migration files for the backend"
-	@echo "      backend-migrate      - Run migrations for the backend"
-	@echo "  help           - Show this help message"
+	@echo "  backend-<cmd>      - Run a backend command (e.g., make backend-build-dev)"
+	@echo "  frontend-<cmd>     - Run a frontend command (e.g., make frontend-start-dev)"
+	@echo "  lint               - Lint both frontend and backend"
+	@echo "  format             - Format both frontend and backend"
+	@echo "  help               - Show this help message"
diff --git a/Project/backend/.env.example b/Project/backend/.env.example
@@ -15,7 +15,7 @@ POSTGRES_PASSWORD=password
 POSTGRES_DB=amos
 POSTGRES_PORT=5432
 POSTGRES_HOST=amos-db
-JANUS_PORT=8182
+
 
 #API Keys 
 GROQ_API_KEY=API_KEY

diff --git a/Project/backend/codebase/graph_creator/gemini.py b/Project/backend/codebase/graph_creator/gemini.py
@@ -1,9 +1,6 @@
 import os
-import json
 from datetime import datetime
-from dotenv import load_dotenv
 import google.generativeai as genai
-from graph_creator.graph_handler import extract_relation_from_llm_output
 from graph_creator.services.json_handler import transform_llm_output_to_dict
 
 
@@ -17,6 +14,7 @@ def configure_genai():
         raise ValueError("API key not found in environment variables")
     genai.configure(api_key=api_key)
 
+
 def serialize_chat_history(history):
     """
     Convert the chat history to a serializable format.
@@ -31,6 +29,7 @@ def serialize_chat_history(history):
         serialized_history.append(serialized_entry)
     return serialized_history
 
+
 def extract_entities_and_relations(chunk, genai_client):
     """
     Extract entities and relations from a chunk using the Gemini client.
@@ -59,14 +58,17 @@ def extract_entities_and_relations(chunk, genai_client):
         "]"
     )
     USER_PROMPT = f"context: ```{chunk}``` \n\n output: "
-    
+
     chat_session = genai_client.start_chat(history=[])
     message = SYS_PROMPT + USER_PROMPT
     response = chat_session.send_message(message)
-    
+
     return response.text
 
-def check_for_connecting_relation(chunk, entities_component_1, entities_component_2, genai_client):
+
+def check_for_connecting_relation(
+    chunk, entities_component_1, entities_component_2, genai_client
+):
     """
     Check for connecting relation between entities of two components.
     """
@@ -86,44 +88,112 @@ def check_for_connecting_relation(chunk, entities_component_1, entities_componen
         "}"
     )
     USER_PROMPT = f"text chunk: ```{chunk}``` \n\n output: "
-    
+
     chat_session = genai_client.start_chat(history=[])
     message = SYS_PROMPT + USER_PROMPT
     response = chat_session.send_message(message)
-    
+
     return response.text
 
-def process_chunks(chunks, prompt_template, entities_component_1=None, entities_component_2=None):
+
+def check_for_connecting_relation_(
+    text_chunk, entities_component_1, entities_component_2
+):
+    """
+    Takes a text chunk, and two lists of entities (from each component in the graph)
+    and tries to extract a connection relation between any entity of
+    entities_component_1 with any entity of entities_component_2
+
+    Parameters
+    ----------
+    text_chunk : str
+        The text chunk to be proccessed
+    entities_component_1 : list
+        List of entities
+    entities_component_1 : list
+        List of entities
+
+    Returns
+    -------
+    str
+        The Response of the llm as a string
+    """
+    configure_genai()
+    genai_client = genai.GenerativeModel(
+        model_name="gemini-1.5-pro-latest",
+        safety_settings=[
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+        ],
+        generation_config={
+            "temperature": 1,
+            "top_p": 0.95,
+            "top_k": 64,
+            "max_output_tokens": 8192,
+            "response_mime_type": "text/plain",
+        },
+    )
+
+    return check_for_connecting_relation(
+        text_chunk, entities_component_1, entities_component_2, genai_client
+    )
+
+
+def process_chunks(chunks):
     """
     Process a list of chunks through the generative model.
     """
     configure_genai()
     genai_client = genai.GenerativeModel(
         model_name="gemini-1.5-pro-latest",
         safety_settings=[
-            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
-            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
+            {
+                "category": "HARM_CATEGORY_HARASSMENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_HATE_SPEECH",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
+            {
+                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                "threshold": "BLOCK_MEDIUM_AND_ABOVE",
+            },
         ],
         generation_config={
             "temperature": 1,
             "top_p": 0.95,
             "top_k": 64,
             "max_output_tokens": 8192,
             "response_mime_type": "text/plain",
-        }
+        },
     )
-    
+
     responses = []
 
     for chunk in chunks:
         text_content = chunk["text"]
-        if entities_component_1 and entities_component_2:
-            response_json = check_for_connecting_relation(text_content, entities_component_1, entities_component_2, genai_client)
-        else:
-            response_json = extract_entities_and_relations(text_content, genai_client)
+
+        response_json = extract_entities_and_relations(text_content, genai_client)
 
         responses.append(transform_llm_output_to_dict(response_json))
 
-    return responses
+    return responses
diff --git a/Project/backend/codebase/graph_creator/graph_creator_main.py b/Project/backend/codebase/graph_creator/graph_creator_main.py
@@ -1,14 +1,9 @@
-import json
 import mimetypes
 import pandas
-import tempfile
-import shutil
 
-from graph_creator.gemini import process_chunks
 from graph_creator.llama3 import process_chunks as groq_process_chunks
 from graph_creator.models.graph_job import GraphJob
 from graph_creator import pdf_handler
-from graph_creator import llm_handler
 from graph_creator import graph_handler
 from graph_creator.services import netx_graphdb
 
@@ -24,15 +19,17 @@ def process_file_to_graph(g_job: GraphJob):
         None
     """
     # extract entities and relations
-    entities_and_relations = process_file_to_entities_and_relations(g_job.location)
+    entities_and_relations, chunks = process_file_to_entities_and_relations(
+        g_job.location
+    )
 
-    #check for error
-    if entities_and_relations == None:
+    # check for error
+    if entities_and_relations is None:
         return
-    
-    #connect graph pieces
+
+    # connect graph pieces
     uuid = g_job.id
-    create_and_store_graph(uuid, entities_and_relations)
+    create_and_store_graph(uuid, entities_and_relations, chunks)
 
 
 def process_file_to_entities_and_relations(file: str):
@@ -60,21 +57,18 @@ def process_file_to_entities_and_relations(file: str):
             {"text": chunk.page_content} for chunk in chunks
         ]  # Assuming chunk has 'page_content' attribute
 
-        # Define the prompt template
-        prompt_template = "Give all valid relation in the given: {text_content}"
-
         # Generate response using LLM
         # response_json = process_chunks(text_chunks, prompt_template)
-        response_json = groq_process_chunks(text_chunks, prompt_template)
+        response_json = groq_process_chunks(text_chunks)
         print(response_json)
     except Exception as e:
         print(e)
         response_json = None
-    
-    return response_json
+
+    return response_json, chunks
 
 
-def create_and_store_graph(uuid, entities_and_relations):
+def create_and_store_graph(uuid, entities_and_relations, chunks):
     """
     Create and store a graph based on the given entities and relations.
 
@@ -85,21 +79,13 @@ def create_and_store_graph(uuid, entities_and_relations):
     Returns:
     None
     """
-    # flatten the list ba adding attribute chunk_id
-    flattened_data = []
-    for j in range(len(entities_and_relations)):
-        id = j
-        for i in range(len(entities_and_relations[j])):
-            entities_and_relations[j][i]["chunk_id"] = str(id)
-            flattened_data.append(entities_and_relations[j][i])
-
-    # convert data to dataframe
-    df_e_and_r = pandas.DataFrame(flattened_data)
+    df_e_and_r = graph_handler.build_flattened_dataframe(entities_and_relations)
 
     # combine knowledge graph pieces
-    combined = graph_handler.connect_with_chunk_proximity(df_e_and_r)
-
-    print(combined)
+    # combined = graph_handler.connect_with_chunk_proximity(df_e_and_r)
+    for i in range(len(chunks)):
+        chunks[i] = chunks[i].dict()
+    combined = graph_handler.connect_with_llm(df_e_and_r, chunks, 30)
 
     # get graph db service
     graph_db_service = netx_graphdb.NetXGraphDB()