Skip to content

Commit

Permalink
Merge sprint-04-release pull request #69 from amosproj/develop
Browse files Browse the repository at this point in the history
sprint-04-release
  • Loading branch information
nikolas-rauscher authored May 22, 2024
2 parents 5d8de15 + 1bcc135 commit 0145fdc
Show file tree
Hide file tree
Showing 36 changed files with 4,290 additions and 21 deletions.
25 changes: 25 additions & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## Description
<!-- Describe the changes made in this pull request -->
*

## Related Backlog Item
<!-- Link to the specific backlog item this pull request addresses. -->
Issue: #

---

### Context
<!-- Why are these changes necessary? -->
*


---

## Checklist:
- [ ] I have documented my changes
- [ ] I have tested the changes and they work as expected
- [ ] I have assigned this PR to someone
- [ ] I have run `make format` to format my code

### Additional references

4 changes: 2 additions & 2 deletions Project/backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SHELL := /bin/sh
PROJECTNAME ?= amos-knowledge-graph
APP_NAME := $(PROJECTNAME)
BACKEND_APP_NAME := $(APP_NAME)-backend
DOCKER_COMPOSE != docker compose 1> /dev/null 2> /dev/null && echo docker compose || echo docker-compose
DOCKER_COMPOSE := $(shell DOCKER_COMPOSE != docker compose 1> /dev/null 2> /dev/null && echo docker compose || echo docker-compose)


define HELP
Expand Down Expand Up @@ -49,7 +49,7 @@ migrate:

build-dev:
DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 $(DOCKER_COMPOSE) -f docker-compose.yml up --build -d

@cd ../frontend && sudo npm install && sudo npm run dev

stop-dev:
@$(DOCKER_COMPOSE) -f docker-compose.yml down
Expand Down
Empty file.
85 changes: 85 additions & 0 deletions Project/backend/codebase/graph_creator/json_to_graphml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import json
import networkx as nx
import pandas as pd
import logging


def json_string_to_graph(json_string):
"""
Converts a JSON string to a NetworkX graph.
Args:
json_string (str): The JSON string representing the graph.
Returns:
nx.Graph: The NetworkX graph representation of the JSON.
"""
try:
json_object = json.loads(json_string)
except json.JSONDecodeError as e:
logging.error("Invalid JSON syntax: %s", e)
return None

if not isinstance(json_object, list):
logging.error("JSON does not contain a list")
return None

graph = nx.Graph()

for relation in json_object:
if not isinstance(relation, dict):
logging.error("Relation is not a dictionary: %s", relation)
continue

required_keys = {'node_1', 'node_2', 'edge'}
if set(relation.keys()) != required_keys:
logging.error("Relation does not have exactly two nodes and one edge: %s", relation)
continue

node_1 = relation.get('node_1')
node_2 = relation.get('node_2')
edge_label = relation.get('edge')

if not isinstance(node_1, str) or not isinstance(node_2, str) or not isinstance(edge_label, str):
logging.error("Node names and edge label must be strings: %s", relation)
continue

graph.add_node(node_1)
graph.add_node(node_2)
graph.add_edge(node_1, node_2, label=edge_label)

return graph


def graph_to_dfs(graph):
"""
Converts a NetworkX graph to DataFrames for nodes and edges.
Args:
graph (nx.Graph): The NetworkX graph to convert.
Returns:
tuple: A tuple containing the nodes DataFrame and edges DataFrame.
"""
# Create DataFrames for nodes and edges
nodes_df = pd.DataFrame(graph.nodes(), columns=["Node"])
edges_df = pd.DataFrame([(u, v, d['label']) for u, v, d in graph.edges(data=True)],
columns=["Node_1", "Node_2", "Edge"])

return nodes_df, edges_df


def graph_to_graphml(graph):
"""
Converts a NetworkX graph to a GraphML string.
Args:
graph (nx.Graph): The NetworkX graph to convert.
Returns:
str: The GraphML string representation of the graph.
"""
return nx.generate_graphml(graph, encoding='utf-8', prettyprint=True)
36 changes: 36 additions & 0 deletions Project/backend/codebase/graph_creator/pdf_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader


def process_pdf_into_chunks(filename):
"""
Takes pdf file, and converts it into text chunks of equal length
Parameters
----------
filename : str
The name of the pdf file to be proccessed
Returns
-------
list
a list of strings that are the chunks of the pdf converted to text
"""

# load pdf
if not os.path.isfile(filename):
raise ValueError("Invalid PDF file path.")
if not filename.endswith(".pdf"):
raise ValueError("File is not a PDF.")
loader = PyPDFLoader(filename)
docs = loader.load()

if not docs:
raise ValueError("Failed to load PDF documents.")

# splits text into chunks including metadata for mapping from chunk to pdf page (splits[0].metadata['page'])
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

return splits
4 changes: 2 additions & 2 deletions Project/backend/codebase/lifetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def _setup_db(app: FastAPI) -> None: # pragma: no cover


def register_startup_event(
app: FastAPI,
app: FastAPI,
) -> Callable[[], Awaitable[None]]: # pragma: no cover
"""
Actions to run on application startup.
Expand All @@ -49,7 +49,7 @@ async def _startup() -> None: # noqa: WPS430


def register_shutdown_event(
app: FastAPI,
app: FastAPI,
) -> Callable[[], Awaitable[None]]: # pragma: no cover
"""
Actions to run on application's shutdown.
Expand Down
6 changes: 2 additions & 4 deletions Project/backend/codebase/migrations/env.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os
from logging.config import fileConfig
from sys import modules

from sqlalchemy import engine_from_config
from sqlalchemy import pool
Expand Down Expand Up @@ -31,6 +30,7 @@
# my_important_option = config.get_main_option("my_important_option")
# ... etc.


def get_url():
user = os.getenv("POSTGRES_USER", "amos")
password = os.getenv("POSTGRES_PASSWORD", "password")
Expand Down Expand Up @@ -82,9 +82,7 @@ def run_migrations_online() -> None:
)

with connectable.connect() as connection:
context.configure(
connection=connection, target_metadata=target_metadata
)
context.configure(connection=connection, target_metadata=target_metadata)

with context.begin_transaction():
context.run_migrations()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,38 @@
Create Date: 2024-05-12 23:49:26.779256
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'ce5e8cc6632d'
revision: str = "ce5e8cc6632d"
down_revision: Union[str, None] = None
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('healthcheck',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint('id')
op.create_table(
"healthcheck",
sa.Column("id", sa.Uuid(), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=True),
sa.PrimaryKeyConstraint("id"),
)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('healthcheck')
op.drop_table("healthcheck")
# ### end Alembic commands ###
4 changes: 2 additions & 2 deletions Project/backend/codebase/monitoring/dao/healthcheck_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ async def get_all_healthchecks(self, limit: int, offset: int) -> List[HealthChec
return list(raw_checks.scalars().fetchall())

async def get(
self,
obj_id: uuid.UUID,
self,
obj_id: uuid.UUID,
) -> HealthCheck:
"""
Get specific healthcheck model.
Expand Down
47 changes: 44 additions & 3 deletions Project/backend/codebase/monitoring/router.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from typing import List

from fastapi import APIRouter, Depends
from fastapi import UploadFile, File, HTTPException

from monitoring.dao.healthcheck_dao import HealthCheckDAO
from monitoring.schemas.healthcheck import HealthCheckResponse

import os

router = APIRouter()


Expand All @@ -31,9 +34,9 @@ async def create_check(check_dao: HealthCheckDAO = Depends()) -> {}:

@router.get("/list-checks", response_model=List[HealthCheckResponse])
async def get_dummy_models(
limit: int = 10,
offset: int = 0,
check_dao: HealthCheckDAO = Depends(),
limit: int = 10,
offset: int = 0,
check_dao: HealthCheckDAO = Depends(),
) -> List[HealthCheckResponse]:
"""
Retrieve all health-check objects from the database.
Expand All @@ -48,3 +51,41 @@ async def get_dummy_models(
"""

return await check_dao.get_all_healthchecks(limit=limit, offset=offset)


# Endpoint for uploading PDF documents
@router.post("/upload/")
async def upload_pdf(file: UploadFile = File(...)):
"""
Uploads a PDF document.
Args:
file (UploadFile): PDF document to be uploaded.
Returns:
dict: A dictionary containing the filename and status.
filename (str): Name of the uploaded file.
status (str): Status message upload is successful.
Raises:
HTTPException: If the uploaded file type is not valid (not PDF).
"""

# Check if the uploaded file type is correct
if file.content_type != "application/pdf":
raise HTTPException(status_code=400, detail="Uploaded file is not a PDF.")

# Define the directory for saving files
documents_directory = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "documents"
)

if not os.path.exists(documents_directory):
os.makedirs(documents_directory)

# Save file
file_path = os.path.join(documents_directory, file.filename)
with open(file_path, "wb") as f:
f.write(file.file.read())

return {"filename": file.filename, "status": "uploaded successfully"}
38 changes: 38 additions & 0 deletions Project/backend/codebase/monitoring/tests/test_monitoring.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@

import os
import pytest

from reportlab.pdfgen import canvas
from starlette import status

MONITORING_API = "monitoring"
Expand All @@ -7,3 +12,36 @@ def test_health_check(client):
url = client.app.url_path_for("health_check")
response = client.get(url)
assert response.status_code == status.HTTP_200_OK



@pytest.mark.api
def test_upload_pdf(client):

# Create a PDF file for testing
pdf_file = "test_document.pdf"
file_path = os.path.join("monitoring/tests/", pdf_file)

file = canvas.Canvas(file_path)
file.drawString(100, 750, "This is a test PDF!")
file.save()

# Upload PDF file
with open(file_path, "rb") as f:
response = client.post(
client.app.url_path_for("upload_pdf"),
files={"file": (pdf_file, f, "application/pdf")},
)

# Check the response
assert response.status_code == 200
assert response.json() == {"filename": pdf_file, "status": "uploaded successfully"}

# Check if the file was saved in the correct directory
saved_file_path = os.path.join("monitoring/documents", pdf_file)
assert os.path.exists(saved_file_path)

# Remove the test files
os.remove(file_path)
os.remove(saved_file_path)

Loading

0 comments on commit 0145fdc

Please sign in to comment.