generated from amosproj/amos202Xss0Y-projname
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge sprint-04-release pull request #69 from amosproj/develop
sprint-04-release
- Loading branch information
Showing
36 changed files
with
4,290 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
## Description | ||
<!-- Describe the changes made in this pull request --> | ||
* | ||
|
||
## Related Backlog Item | ||
<!-- Link to the specific backlog item this pull request addresses. --> | ||
Issue: # | ||
|
||
--- | ||
|
||
### Context | ||
<!-- Why are these changes necessary? --> | ||
* | ||
|
||
|
||
--- | ||
|
||
## Checklist: | ||
- [ ] I have documented my changes | ||
- [ ] I have tested the changes and they work as expected | ||
- [ ] I have assigned this PR to someone | ||
- [ ] I have run `make format` to format my code | ||
|
||
### Additional references | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import json | ||
import networkx as nx | ||
import pandas as pd | ||
import logging | ||
|
||
|
||
def json_string_to_graph(json_string): | ||
""" | ||
Converts a JSON string to a NetworkX graph. | ||
Args: | ||
json_string (str): The JSON string representing the graph. | ||
Returns: | ||
nx.Graph: The NetworkX graph representation of the JSON. | ||
""" | ||
try: | ||
json_object = json.loads(json_string) | ||
except json.JSONDecodeError as e: | ||
logging.error("Invalid JSON syntax: %s", e) | ||
return None | ||
|
||
if not isinstance(json_object, list): | ||
logging.error("JSON does not contain a list") | ||
return None | ||
|
||
graph = nx.Graph() | ||
|
||
for relation in json_object: | ||
if not isinstance(relation, dict): | ||
logging.error("Relation is not a dictionary: %s", relation) | ||
continue | ||
|
||
required_keys = {'node_1', 'node_2', 'edge'} | ||
if set(relation.keys()) != required_keys: | ||
logging.error("Relation does not have exactly two nodes and one edge: %s", relation) | ||
continue | ||
|
||
node_1 = relation.get('node_1') | ||
node_2 = relation.get('node_2') | ||
edge_label = relation.get('edge') | ||
|
||
if not isinstance(node_1, str) or not isinstance(node_2, str) or not isinstance(edge_label, str): | ||
logging.error("Node names and edge label must be strings: %s", relation) | ||
continue | ||
|
||
graph.add_node(node_1) | ||
graph.add_node(node_2) | ||
graph.add_edge(node_1, node_2, label=edge_label) | ||
|
||
return graph | ||
|
||
|
||
def graph_to_dfs(graph): | ||
""" | ||
Converts a NetworkX graph to DataFrames for nodes and edges. | ||
Args: | ||
graph (nx.Graph): The NetworkX graph to convert. | ||
Returns: | ||
tuple: A tuple containing the nodes DataFrame and edges DataFrame. | ||
""" | ||
# Create DataFrames for nodes and edges | ||
nodes_df = pd.DataFrame(graph.nodes(), columns=["Node"]) | ||
edges_df = pd.DataFrame([(u, v, d['label']) for u, v, d in graph.edges(data=True)], | ||
columns=["Node_1", "Node_2", "Edge"]) | ||
|
||
return nodes_df, edges_df | ||
|
||
|
||
def graph_to_graphml(graph): | ||
""" | ||
Converts a NetworkX graph to a GraphML string. | ||
Args: | ||
graph (nx.Graph): The NetworkX graph to convert. | ||
Returns: | ||
str: The GraphML string representation of the graph. | ||
""" | ||
return nx.generate_graphml(graph, encoding='utf-8', prettyprint=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import os | ||
from langchain_text_splitters import RecursiveCharacterTextSplitter | ||
from langchain_community.document_loaders import PyPDFLoader | ||
|
||
|
||
def process_pdf_into_chunks(filename): | ||
""" | ||
Takes pdf file, and converts it into text chunks of equal length | ||
Parameters | ||
---------- | ||
filename : str | ||
The name of the pdf file to be proccessed | ||
Returns | ||
------- | ||
list | ||
a list of strings that are the chunks of the pdf converted to text | ||
""" | ||
|
||
# load pdf | ||
if not os.path.isfile(filename): | ||
raise ValueError("Invalid PDF file path.") | ||
if not filename.endswith(".pdf"): | ||
raise ValueError("File is not a PDF.") | ||
loader = PyPDFLoader(filename) | ||
docs = loader.load() | ||
|
||
if not docs: | ||
raise ValueError("Failed to load PDF documents.") | ||
|
||
# splits text into chunks including metadata for mapping from chunk to pdf page (splits[0].metadata['page']) | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | ||
splits = text_splitter.split_documents(docs) | ||
|
||
return splits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.