OP-TED · duprijil · Apr 23, 2025 · Apr 23, 2025 · May 5, 2025 · May 5, 2025
diff --git a/notebooks/update_mapping_resources.ipynb b/notebooks/update_mapping_resources.ipynb
@@ -0,0 +1,108 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "c6e159fa",
+   "metadata": {},
+   "source": [
+    "# Update Mapping Resources Notebook\n",
+    "\n",
+    "This notebook automates the process of executing SPARQL queries and saving the results in a structured JSON format. The main steps include:\n",
+    "\n",
+    "1. Reading SPARQL query files from the `resources/queries` directory.\n",
+    "2. Executing each query against the SPARQL endpoint: `https://publications.europa.eu/webapi/rdf/sparql`.\n",
+    "3. Formatting the query results as JSON with proper indentation.\n",
+    "4. Saving the formatted JSON files to the `resources/mapping_files` directory.\n",
+    "\n",
+    "This notebook ensures the output directory exists and provides basic logging to track the execution of queries and the status of results."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "12f7cab4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Define paths\n",
+    "PROJECT_PATH = Path(os.getcwd()).resolve().parent\n",
+    "TED_SWS_PATH = PROJECT_PATH / \"ted_sws\"\n",
+    "queries_dir = TED_SWS_PATH / \"resources\" / \"queries\"\n",
+    "output_dir = TED_SWS_PATH / \"resources\" / \"mapping_files\"\n",
+    "endpoint_url = \"https://publications.europa.eu/webapi/rdf/sparql\"\n",
+    "\n",
+    "JSON_IDENT = 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61c22bd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests\n",
+    "import json\n",
+    "\n",
+    "\n",
+    "# Ensure the output directory exists\n",
+    "output_dir.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# Iterate through all SPARQL query files in the queries directory\n",
+    "for query_file in queries_dir.glob(\"*.rq\"):\n",
+    "    # Read the SPARQL query\n",
+    "    with query_file.open(\"r\", encoding=\"utf-8\") as file:\n",
+    "        sparql_query = file.read()\n",
+    "\n",
+    "    # Prepare the request parameters\n",
+    "    params = {\n",
+    "        \"default-graph-uri\": \"\",\n",
+    "        \"query\": sparql_query,\n",
+    "        \"format\": \"application/sparql-results+json\",\n",
+    "        \"timeout\": 0,\n",
+    "        \"debug\": \"on\"\n",
+    "    }\n",
+    "\n",
+    "    # Execute the query\n",
+    "    print(f\"Executing query: {query_file.name}\")\n",
+    "    response = requests.get(endpoint_url, params=params)\n",
+    "    print(f\"Response status code for query {query_file.name}: {response.status_code}\")\n",
+    "\n",
+    "    if response.status_code == 200:\n",
+    "        # Save the result in the output directory\n",
+    "        output_file = output_dir / f\"{query_file.stem}.json\"\n",
+    "        with output_file.open(\"w\", encoding=\"utf-8\") as file:\n",
+    "            # Format the JSON response before saving\n",
+    "            json_data = response.json()\n",
+    "            json.dump(json_data, file, indent=JSON_IDENT, ensure_ascii=False)\n",
+    "        print(f\"Saved formatted results for {query_file.name} to {output_file}\")\n",
+    "    else:\n",
+    "        print(f\"Failed to execute query {query_file.name}. HTTP Status Code: {response.status_code}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.20"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}