Skip to content

Feature/ted7 199/script updating mapping resources #578

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions notebooks/update_mapping_resources.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c6e159fa",
"metadata": {},
"source": [
"# Update Mapping Resources Notebook\n",
"\n",
"This notebook automates the process of executing SPARQL queries and saving the results in a structured JSON format. The main steps include:\n",
"\n",
"1. Reading SPARQL query files from the `resources/queries` directory.\n",
"2. Executing each query against the SPARQL endpoint: `https://publications.europa.eu/webapi/rdf/sparql`.\n",
"3. Formatting the query results as JSON with proper indentation.\n",
"4. Saving the formatted JSON files to the `resources/mapping_files` directory.\n",
"\n",
"This notebook ensures the output directory exists and provides basic logging to track the execution of queries and the status of results."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12f7cab4",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from pathlib import Path\n",
"\n",
"# Define paths\n",
"PROJECT_PATH = Path(os.getcwd()).resolve().parent\n",
"TED_SWS_PATH = PROJECT_PATH / \"ted_sws\"\n",
"queries_dir = TED_SWS_PATH / \"resources\" / \"queries\"\n",
"output_dir = TED_SWS_PATH / \"resources\" / \"mapping_files\"\n",
"endpoint_url = \"https://publications.europa.eu/webapi/rdf/sparql\"\n",
"\n",
"JSON_IDENT = 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61c22bd9",
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import json\n",
"\n",
"\n",
"# Ensure the output directory exists\n",
"output_dir.mkdir(parents=True, exist_ok=True)\n",
"\n",
"# Iterate through all SPARQL query files in the queries directory\n",
"for query_file in queries_dir.glob(\"*.rq\"):\n",
" # Read the SPARQL query\n",
" with query_file.open(\"r\", encoding=\"utf-8\") as file:\n",
" sparql_query = file.read()\n",
"\n",
" # Prepare the request parameters\n",
" params = {\n",
" \"default-graph-uri\": \"\",\n",
" \"query\": sparql_query,\n",
" \"format\": \"application/sparql-results+json\",\n",
" \"timeout\": 0,\n",
" \"debug\": \"on\"\n",
" }\n",
"\n",
" # Execute the query\n",
" print(f\"Executing query: {query_file.name}\")\n",
" response = requests.get(endpoint_url, params=params)\n",
" print(f\"Response status code for query {query_file.name}: {response.status_code}\")\n",
"\n",
" if response.status_code == 200:\n",
" # Save the result in the output directory\n",
" output_file = output_dir / f\"{query_file.stem}.json\"\n",
" with output_file.open(\"w\", encoding=\"utf-8\") as file:\n",
" # Format the JSON response before saving\n",
" json_data = response.json()\n",
" json.dump(json_data, file, indent=JSON_IDENT, ensure_ascii=False)\n",
" print(f\"Saved formatted results for {query_file.name} to {output_file}\")\n",
" else:\n",
" print(f\"Failed to execute query {query_file.name}. HTTP Status Code: {response.status_code}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.20"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading