Merge sprint-04-release pull request #69 from amosproj/develop

sprint-04-release
amosproj · May 22, 2024 · 0145fdc · 0145fdc
2 parents 5d8de15 + 1bcc135
commit 0145fdc
Show file tree

Hide file tree

Showing 36 changed files with 4,290 additions and 21 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,25 @@
+## Description
+<!-- Describe the changes made in this pull request -->
+* 
+
+## Related Backlog Item
+<!-- Link to the specific backlog item this pull request addresses. -->
+Issue: #
+
+---
+
+### Context
+<!-- Why are these changes necessary? -->
+* 
+
+
+---
+
+## Checklist:
+- [ ] I have documented my changes
+- [ ] I have tested the changes and they work as expected
+- [ ] I have assigned this PR to someone
+- [ ] I have run `make format` to format my code
+
+### Additional references
+
diff --git a/Project/backend/Makefile b/Project/backend/Makefile
@@ -6,7 +6,7 @@ SHELL := /bin/sh
 PROJECTNAME ?= amos-knowledge-graph
 APP_NAME := $(PROJECTNAME)
 BACKEND_APP_NAME := $(APP_NAME)-backend
-DOCKER_COMPOSE != docker compose 1> /dev/null 2> /dev/null && echo docker compose || echo docker-compose
+DOCKER_COMPOSE := $(shell DOCKER_COMPOSE != docker compose 1> /dev/null 2> /dev/null && echo docker compose || echo docker-compose)
 
 
 define HELP
@@ -49,7 +49,7 @@ migrate:
 
 build-dev:
 	DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 $(DOCKER_COMPOSE) -f docker-compose.yml up --build -d
-
+	@cd ../frontend &&  sudo npm install && sudo npm run dev
 
 stop-dev:
 	@$(DOCKER_COMPOSE) -f docker-compose.yml down

diff --git a/Project/backend/codebase/graph_creator/__init__.py b/Project/backend/codebase/graph_creator/__init__.py
diff --git a/Project/backend/codebase/graph_creator/json_to_graphml.py b/Project/backend/codebase/graph_creator/json_to_graphml.py
@@ -0,0 +1,85 @@
+import json
+import networkx as nx
+import pandas as pd
+import logging
+
+
+def json_string_to_graph(json_string):
+    """
+    Converts a JSON string to a NetworkX graph.
+
+    Args:
+        json_string (str): The JSON string representing the graph.
+
+    Returns:
+        nx.Graph: The NetworkX graph representation of the JSON.
+
+    """
+    try:
+        json_object = json.loads(json_string)
+    except json.JSONDecodeError as e:
+        logging.error("Invalid JSON syntax: %s", e)
+        return None
+
+    if not isinstance(json_object, list):
+        logging.error("JSON does not contain a list")
+        return None
+
+    graph = nx.Graph()
+
+    for relation in json_object:
+        if not isinstance(relation, dict):
+            logging.error("Relation is not a dictionary: %s", relation)
+            continue
+
+        required_keys = {'node_1', 'node_2', 'edge'}
+        if set(relation.keys()) != required_keys:
+            logging.error("Relation does not have exactly two nodes and one edge: %s", relation)
+            continue
+
+        node_1 = relation.get('node_1')
+        node_2 = relation.get('node_2')
+        edge_label = relation.get('edge')
+
+        if not isinstance(node_1, str) or not isinstance(node_2, str) or not isinstance(edge_label, str):
+            logging.error("Node names and edge label must be strings: %s", relation)
+            continue
+
+        graph.add_node(node_1)
+        graph.add_node(node_2)
+        graph.add_edge(node_1, node_2, label=edge_label)
+
+    return graph
+
+
+def graph_to_dfs(graph):
+    """
+    Converts a NetworkX graph to DataFrames for nodes and edges.
+
+    Args:
+        graph (nx.Graph): The NetworkX graph to convert.
+
+    Returns:
+        tuple: A tuple containing the nodes DataFrame and edges DataFrame.
+
+    """
+    # Create DataFrames for nodes and edges
+    nodes_df = pd.DataFrame(graph.nodes(), columns=["Node"])
+    edges_df = pd.DataFrame([(u, v, d['label']) for u, v, d in graph.edges(data=True)],
+                            columns=["Node_1", "Node_2", "Edge"])
+
+    return nodes_df, edges_df
+
+
+def graph_to_graphml(graph):
+    """
+    Converts a NetworkX graph to a GraphML string.
+
+    Args:
+        graph (nx.Graph): The NetworkX graph to convert.
+
+    Returns:
+        str: The GraphML string representation of the graph.
+
+    """
+    return nx.generate_graphml(graph, encoding='utf-8', prettyprint=True)
diff --git a/Project/backend/codebase/graph_creator/pdf_handler.py b/Project/backend/codebase/graph_creator/pdf_handler.py
@@ -0,0 +1,36 @@
+import os
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.document_loaders import PyPDFLoader
+
+
+def process_pdf_into_chunks(filename):
+    """
+    Takes pdf file, and converts it into text chunks of equal length
+
+    Parameters
+    ----------
+    filename : str
+        The name of the pdf file to be proccessed
+
+    Returns
+    -------
+    list
+        a list of strings that are the chunks of the pdf converted to text
+    """
+
+    # load pdf
+    if not os.path.isfile(filename):
+        raise ValueError("Invalid PDF file path.")
+    if not filename.endswith(".pdf"):
+        raise ValueError("File is not a PDF.")
+    loader = PyPDFLoader(filename)
+    docs = loader.load()
+
+    if not docs:
+        raise ValueError("Failed to load PDF documents.")
+
+    # splits text into chunks including metadata for mapping from chunk to pdf page (splits[0].metadata['page'])
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    splits = text_splitter.split_documents(docs)
+
+    return splits
diff --git a/Project/backend/codebase/lifetime.py b/Project/backend/codebase/lifetime.py
@@ -26,7 +26,7 @@ def _setup_db(app: FastAPI) -> None:  # pragma: no cover
 
 
 def register_startup_event(
-        app: FastAPI,
+    app: FastAPI,
 ) -> Callable[[], Awaitable[None]]:  # pragma: no cover
     """
     Actions to run on application startup.
@@ -49,7 +49,7 @@ async def _startup() -> None:  # noqa: WPS430
 
 
 def register_shutdown_event(
-        app: FastAPI,
+    app: FastAPI,
 ) -> Callable[[], Awaitable[None]]:  # pragma: no cover
     """
     Actions to run on application's shutdown.

diff --git a/Project/backend/codebase/migrations/env.py b/Project/backend/codebase/migrations/env.py
@@ -1,6 +1,5 @@
 import os
 from logging.config import fileConfig
-from sys import modules
 
 from sqlalchemy import engine_from_config
 from sqlalchemy import pool
@@ -31,6 +30,7 @@
 # my_important_option = config.get_main_option("my_important_option")
 # ... etc.
 
+
 def get_url():
     user = os.getenv("POSTGRES_USER", "amos")
     password = os.getenv("POSTGRES_PASSWORD", "password")
@@ -82,9 +82,7 @@ def run_migrations_online() -> None:
     )
 
     with connectable.connect() as connection:
-        context.configure(
-            connection=connection, target_metadata=target_metadata
-        )
+        context.configure(connection=connection, target_metadata=target_metadata)
 
         with context.begin_transaction():
             context.run_migrations()

diff --git a/Project/backend/codebase/migrations/versions/ce5e8cc6632d_initial_migrations.py b/Project/backend/codebase/migrations/versions/ce5e8cc6632d_initial_migrations.py
@@ -5,31 +5,38 @@
 Create Date: 2024-05-12 23:49:26.779256
 
 """
+
 from typing import Sequence, Union
 
 from alembic import op
 import sqlalchemy as sa
 
 
 # revision identifiers, used by Alembic.
-revision: str = 'ce5e8cc6632d'
+revision: str = "ce5e8cc6632d"
 down_revision: Union[str, None] = None
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 
 
 def upgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
-    op.create_table('healthcheck',
-    sa.Column('id', sa.Uuid(), nullable=False),
-    sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
-    sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
-    sa.PrimaryKeyConstraint('id')
+    op.create_table(
+        "healthcheck",
+        sa.Column("id", sa.Uuid(), nullable=False),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            server_default=sa.text("now()"),
+            nullable=True,
+        ),
+        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
+        sa.PrimaryKeyConstraint("id"),
     )
     # ### end Alembic commands ###
 
 
 def downgrade() -> None:
     # ### commands auto generated by Alembic - please adjust! ###
-    op.drop_table('healthcheck')
+    op.drop_table("healthcheck")
     # ### end Alembic commands ###
diff --git a/Project/backend/codebase/monitoring/dao/healthcheck_dao.py b/Project/backend/codebase/monitoring/dao/healthcheck_dao.py
@@ -32,8 +32,8 @@ async def get_all_healthchecks(self, limit: int, offset: int) -> List[HealthChec
         return list(raw_checks.scalars().fetchall())
 
     async def get(
-            self,
-            obj_id: uuid.UUID,
+        self,
+        obj_id: uuid.UUID,
     ) -> HealthCheck:
         """
         Get specific healthcheck model.

diff --git a/Project/backend/codebase/monitoring/router.py b/Project/backend/codebase/monitoring/router.py
@@ -1,10 +1,13 @@
 from typing import List
 
 from fastapi import APIRouter, Depends
+from fastapi import UploadFile, File, HTTPException
 
 from monitoring.dao.healthcheck_dao import HealthCheckDAO
 from monitoring.schemas.healthcheck import HealthCheckResponse
 
+import os
+
 router = APIRouter()
 
 
@@ -31,9 +34,9 @@ async def create_check(check_dao: HealthCheckDAO = Depends()) -> {}:
 
 @router.get("/list-checks", response_model=List[HealthCheckResponse])
 async def get_dummy_models(
-        limit: int = 10,
-        offset: int = 0,
-        check_dao: HealthCheckDAO = Depends(),
+    limit: int = 10,
+    offset: int = 0,
+    check_dao: HealthCheckDAO = Depends(),
 ) -> List[HealthCheckResponse]:
     """
     Retrieve all health-check objects from the database.
@@ -48,3 +51,41 @@ async def get_dummy_models(
     """
 
     return await check_dao.get_all_healthchecks(limit=limit, offset=offset)
+
+
+# Endpoint for uploading PDF documents
+@router.post("/upload/")
+async def upload_pdf(file: UploadFile = File(...)):
+    """
+    Uploads a PDF document.
+
+    Args:
+        file (UploadFile): PDF document to be uploaded.
+
+    Returns:
+        dict: A dictionary containing the filename and status.
+            filename (str): Name of the uploaded file.
+            status (str): Status message upload is successful.
+
+    Raises:
+        HTTPException: If the uploaded file type is not valid (not PDF).
+    """
+
+    # Check if the uploaded file type is correct
+    if file.content_type != "application/pdf":
+        raise HTTPException(status_code=400, detail="Uploaded file is not a PDF.")
+
+    # Define the directory for saving files
+    documents_directory = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "documents"
+    )
+
+    if not os.path.exists(documents_directory):
+        os.makedirs(documents_directory)
+
+    # Save file
+    file_path = os.path.join(documents_directory, file.filename)
+    with open(file_path, "wb") as f:
+        f.write(file.file.read())
+
+    return {"filename": file.filename, "status": "uploaded successfully"}
diff --git a/Project/backend/codebase/monitoring/tests/test_monitoring.py b/Project/backend/codebase/monitoring/tests/test_monitoring.py
@@ -1,3 +1,8 @@
+
+import os
+import pytest
+
+from reportlab.pdfgen import canvas
 from starlette import status
 
 MONITORING_API = "monitoring"
@@ -7,3 +12,36 @@ def test_health_check(client):
     url = client.app.url_path_for("health_check")
     response = client.get(url)
     assert response.status_code == status.HTTP_200_OK
+
+
+
+@pytest.mark.api
+def test_upload_pdf(client):
+
+    # Create a PDF file for testing
+    pdf_file = "test_document.pdf"
+    file_path = os.path.join("monitoring/tests/", pdf_file)
+
+    file = canvas.Canvas(file_path)
+    file.drawString(100, 750, "This is a test PDF!")
+    file.save()
+
+    # Upload PDF file
+    with open(file_path, "rb") as f:
+        response = client.post(
+            client.app.url_path_for("upload_pdf"),
+            files={"file": (pdf_file, f, "application/pdf")},
+        )
+
+    # Check the response
+    assert response.status_code == 200
+    assert response.json() == {"filename": pdf_file, "status": "uploaded successfully"}
+
+    # Check if the file was saved in the correct directory
+    saved_file_path = os.path.join("monitoring/documents", pdf_file)
+    assert os.path.exists(saved_file_path)
+
+    # Remove the test files
+    os.remove(file_path)
+    os.remove(saved_file_path)
+