Skip to content

Commit

Permalink
Merge pull request #144 from amosproj/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
get4flo authored Jun 12, 2024
2 parents a1b42c1 + 3acad49 commit ec24439
Show file tree
Hide file tree
Showing 41 changed files with 1,583 additions and 400 deletions.
32 changes: 21 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#Root Makefile

.PHONY: all start stop help backend
.PHONY: all start stop help backend frontend lint format lint-all format-all

all: help

Expand All @@ -16,18 +16,28 @@ stop-dev:
backend-%:
@$(MAKE) -C Project/backend $*

# Pass through to frontend Makefile
frontend-%:
@$(MAKE) -C Project/frontend $*

lint:
@$(MAKE) -C Project/backend lint
@$(MAKE) -C Project/frontend lint

format:
@$(MAKE) -C Project/backend format
@$(MAKE) -C Project/frontend format

lint-all: lint
format-all: format

help:
@echo "Usage: make [target]"
@echo "Targets:"
@echo " build-dev - Start both frontend and backend"
@echo " stop-dev - Stop both frontend and backend"
@echo " backend-<cmd> - Run a backend command (e.g., make backend-build-dev)"
@echo " Available backend commands:"
@echo " backend-build-dev - Build the backend development environment"
@echo " backend-stop-dev - Stop the backend development environment"
@echo " backend-lint - Run linter on the backend"
@echo " backend-format - Run formatter on the backend"
@echo " backend-test - Run tests on the backend"
@echo " backend-migrations - Create migration files for the backend"
@echo " backend-migrate - Run migrations for the backend"
@echo " help - Show this help message"
@echo " backend-<cmd> - Run a backend command (e.g., make backend-build-dev)"
@echo " frontend-<cmd> - Run a frontend command (e.g., make frontend-start-dev)"
@echo " lint - Lint both frontend and backend"
@echo " format - Format both frontend and backend"
@echo " help - Show this help message"
2 changes: 1 addition & 1 deletion Project/backend/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ POSTGRES_PASSWORD=password
POSTGRES_DB=amos
POSTGRES_PORT=5432
POSTGRES_HOST=amos-db
JANUS_PORT=8182


#API Keys
GROQ_API_KEY=API_KEY
Expand Down
110 changes: 90 additions & 20 deletions Project/backend/codebase/graph_creator/gemini.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import os
import json
from datetime import datetime
from dotenv import load_dotenv
import google.generativeai as genai
from graph_creator.graph_handler import extract_relation_from_llm_output
from graph_creator.services.json_handler import transform_llm_output_to_dict


Expand All @@ -17,6 +14,7 @@ def configure_genai():
raise ValueError("API key not found in environment variables")
genai.configure(api_key=api_key)


def serialize_chat_history(history):
"""
Convert the chat history to a serializable format.
Expand All @@ -31,6 +29,7 @@ def serialize_chat_history(history):
serialized_history.append(serialized_entry)
return serialized_history


def extract_entities_and_relations(chunk, genai_client):
"""
Extract entities and relations from a chunk using the Gemini client.
Expand Down Expand Up @@ -59,14 +58,17 @@ def extract_entities_and_relations(chunk, genai_client):
"]"
)
USER_PROMPT = f"context: ```{chunk}``` \n\n output: "

chat_session = genai_client.start_chat(history=[])
message = SYS_PROMPT + USER_PROMPT
response = chat_session.send_message(message)

return response.text

def check_for_connecting_relation(chunk, entities_component_1, entities_component_2, genai_client):

def check_for_connecting_relation(
chunk, entities_component_1, entities_component_2, genai_client
):
"""
Check for connecting relation between entities of two components.
"""
Expand All @@ -86,44 +88,112 @@ def check_for_connecting_relation(chunk, entities_component_1, entities_componen
"}"
)
USER_PROMPT = f"text chunk: ```{chunk}``` \n\n output: "

chat_session = genai_client.start_chat(history=[])
message = SYS_PROMPT + USER_PROMPT
response = chat_session.send_message(message)

return response.text

def process_chunks(chunks, prompt_template, entities_component_1=None, entities_component_2=None):

def check_for_connecting_relation_(
text_chunk, entities_component_1, entities_component_2
):
"""
Takes a text chunk, and two lists of entities (from each component in the graph)
and tries to extract a connection relation between any entity of
entities_component_1 with any entity of entities_component_2
Parameters
----------
text_chunk : str
The text chunk to be proccessed
entities_component_1 : list
List of entities
entities_component_1 : list
List of entities
Returns
-------
str
The Response of the llm as a string
"""
configure_genai()
genai_client = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest",
safety_settings=[
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
],
generation_config={
"temperature": 1,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
},
)

return check_for_connecting_relation(
text_chunk, entities_component_1, entities_component_2, genai_client
)


def process_chunks(chunks):
"""
Process a list of chunks through the generative model.
"""
configure_genai()
genai_client = genai.GenerativeModel(
model_name="gemini-1.5-pro-latest",
safety_settings=[
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
{
"category": "HARM_CATEGORY_HARASSMENT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_HATE_SPEECH",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
{
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
"threshold": "BLOCK_MEDIUM_AND_ABOVE",
},
],
generation_config={
"temperature": 1,
"top_p": 0.95,
"top_k": 64,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
},
)

responses = []

for chunk in chunks:
text_content = chunk["text"]
if entities_component_1 and entities_component_2:
response_json = check_for_connecting_relation(text_content, entities_component_1, entities_component_2, genai_client)
else:
response_json = extract_entities_and_relations(text_content, genai_client)

response_json = extract_entities_and_relations(text_content, genai_client)

responses.append(transform_llm_output_to_dict(response_json))

return responses
return responses
48 changes: 17 additions & 31 deletions Project/backend/codebase/graph_creator/graph_creator_main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
import json
import mimetypes
import pandas
import tempfile
import shutil

from graph_creator.gemini import process_chunks
from graph_creator.llama3 import process_chunks as groq_process_chunks
from graph_creator.models.graph_job import GraphJob
from graph_creator import pdf_handler
from graph_creator import llm_handler
from graph_creator import graph_handler
from graph_creator.services import netx_graphdb

Expand All @@ -24,15 +19,17 @@ def process_file_to_graph(g_job: GraphJob):
None
"""
# extract entities and relations
entities_and_relations = process_file_to_entities_and_relations(g_job.location)
entities_and_relations, chunks = process_file_to_entities_and_relations(
g_job.location
)

#check for error
if entities_and_relations == None:
# check for error
if entities_and_relations is None:
return
#connect graph pieces

# connect graph pieces
uuid = g_job.id
create_and_store_graph(uuid, entities_and_relations)
create_and_store_graph(uuid, entities_and_relations, chunks)


def process_file_to_entities_and_relations(file: str):
Expand Down Expand Up @@ -60,21 +57,18 @@ def process_file_to_entities_and_relations(file: str):
{"text": chunk.page_content} for chunk in chunks
] # Assuming chunk has 'page_content' attribute

# Define the prompt template
prompt_template = "Give all valid relation in the given: {text_content}"

# Generate response using LLM
# response_json = process_chunks(text_chunks, prompt_template)
response_json = groq_process_chunks(text_chunks, prompt_template)
response_json = groq_process_chunks(text_chunks)
print(response_json)
except Exception as e:
print(e)
response_json = None
return response_json

return response_json, chunks


def create_and_store_graph(uuid, entities_and_relations):
def create_and_store_graph(uuid, entities_and_relations, chunks):
"""
Create and store a graph based on the given entities and relations.
Expand All @@ -85,21 +79,13 @@ def create_and_store_graph(uuid, entities_and_relations):
Returns:
None
"""
# flatten the list ba adding attribute chunk_id
flattened_data = []
for j in range(len(entities_and_relations)):
id = j
for i in range(len(entities_and_relations[j])):
entities_and_relations[j][i]["chunk_id"] = str(id)
flattened_data.append(entities_and_relations[j][i])

# convert data to dataframe
df_e_and_r = pandas.DataFrame(flattened_data)
df_e_and_r = graph_handler.build_flattened_dataframe(entities_and_relations)

# combine knowledge graph pieces
combined = graph_handler.connect_with_chunk_proximity(df_e_and_r)

print(combined)
# combined = graph_handler.connect_with_chunk_proximity(df_e_and_r)
for i in range(len(chunks)):
chunks[i] = chunks[i].dict()
combined = graph_handler.connect_with_llm(df_e_and_r, chunks, 30)

# get graph db service
graph_db_service = netx_graphdb.NetXGraphDB()
Expand Down
Loading

0 comments on commit ec24439

Please sign in to comment.