From 30d447ffb5b7155b99ee54daf0e216c04b89b5d8 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Tue, 15 Oct 2024 23:18:53 -0400
Subject: [PATCH] [ENH] Add `/pipelines` router & route for fetching available
 pipeline versions (#350)

* create sparql query

* add new GET route for fetching pipeline vers

* rename graph response unpacking util

* test new endpoint

* refactor pipeline routes into separate router

* update docstring
---
 app/api/crud.py              |  4 ++--
 app/api/routers/pipelines.py | 28 ++++++++++++++++++++++++++
 app/api/utility.py           | 18 +++++++++++++++--
 app/main.py                  |  3 ++-
 tests/test_pipelines.py      | 39 ++++++++++++++++++++++++++++++++++++
 tests/test_utility.py        |  4 ++--
 6 files changed, 89 insertions(+), 7 deletions(-)
 create mode 100644 app/api/routers/pipelines.py
 create mode 100644 tests/test_pipelines.py

diff --git a/app/api/crud.py b/app/api/crud.py
index 6482c97..506df55 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -86,7 +86,7 @@ def query_matching_dataset_sizes(dataset_uuids: list) -> dict:
     )
     return {
         ds["dataset_uuid"]: int(ds["total_subjects"])
-        for ds in util.unpack_http_response_json_to_dicts(
+        for ds in util.unpack_graph_response_json_to_dicts(
             matching_dataset_size_results
         )
     }
@@ -159,7 +159,7 @@ async def get(
     # the attribute does not end up in the graph API response or the below resulting processed dataframe.
     # Conforming the columns to a list of expected attributes ensures every subject-session has the same response shape from the node API.
     results_df = pd.DataFrame(
-        util.unpack_http_response_json_to_dicts(results)
+        util.unpack_graph_response_json_to_dicts(results)
     ).reindex(columns=ALL_SUBJECT_ATTRIBUTES)
 
     matching_dataset_sizes = query_matching_dataset_sizes(
diff --git a/app/api/routers/pipelines.py b/app/api/routers/pipelines.py
new file mode 100644
index 0000000..69982d9
--- /dev/null
+++ b/app/api/routers/pipelines.py
@@ -0,0 +1,28 @@
+from fastapi import APIRouter
+from pydantic import constr
+
+from .. import crud
+from .. import utility as util
+from ..models import CONTROLLED_TERM_REGEX
+
+router = APIRouter(prefix="/pipelines", tags=["pipelines"])
+
+
+@router.get("/{pipeline_term}/versions")
+async def get_pipeline_versions(
+    pipeline_term: constr(regex=CONTROLLED_TERM_REGEX),
+):
+    """
+    When a GET request is sent, return a dict keyed on the specified pipeline resource, where the value is
+    list of pipeline versions available in the graph for that pipeline.
+    """
+    results = crud.post_query_to_graph(
+        util.create_pipeline_versions_query(pipeline_term)
+    )
+    results_dict = {
+        pipeline_term: [
+            res["pipeline_version"]
+            for res in util.unpack_graph_response_json_to_dicts(results)
+        ]
+    }
+    return results_dict
diff --git a/app/api/utility.py b/app/api/utility.py
index 4b4aee3..41a4f7e 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -88,9 +88,9 @@ def create_context() -> str:
     )
 
 
-def unpack_http_response_json_to_dicts(response: dict) -> list[dict]:
+def unpack_graph_response_json_to_dicts(response: dict) -> list[dict]:
     """
-    Reformats a nested dictionary object from a SPARQL query response JSON into a more human-readable list of dictionaries,
+    Reformats a nested dictionary object from a SPARQL query response JSON into a list of dictionaries,
     where the keys are the variables selected in the SPARQL query and the values correspond to the variable values.
     The number of dictionaries should correspond to the number of query matches.
     """
@@ -511,3 +511,17 @@ def create_snomed_term_lookup(output_path: Path):
     term_labels = {term["sctid"]: term["preferred_name"] for term in vocab}
     with open(output_path, "w") as f:
         f.write(json.dumps(term_labels, indent=2))
+
+
+def create_pipeline_versions_query(pipeline: str) -> str:
+    """Create a SPARQL query for all versions of a pipeline available in a graph."""
+    query_string = textwrap.dedent(
+        f"""\
+    SELECT DISTINCT ?pipeline_version
+    WHERE {{
+        ?completed_pipeline a nb:CompletedPipeline;
+            nb:hasPipelineName {pipeline};
+            nb:hasPipelineVersion ?pipeline_version.
+    }}"""
+    )
+    return "\n".join([create_context(), query_string])
diff --git a/app/main.py b/app/main.py
index 5d0ceb3..bd1a029 100644
--- a/app/main.py
+++ b/app/main.py
@@ -12,7 +12,7 @@
 from fastapi.responses import HTMLResponse, ORJSONResponse, RedirectResponse
 
 from .api import utility as util
-from .api.routers import attributes, query
+from .api.routers import attributes, pipelines, query
 from .api.security import check_client_id
 
 app = FastAPI(
@@ -143,6 +143,7 @@ async def cleanup_temp_vocab_dir():
 
 app.include_router(query.router)
 app.include_router(attributes.router)
+app.include_router(pipelines.router)
 
 # Automatically start uvicorn server on execution of main.py
 if __name__ == "__main__":
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
new file mode 100644
index 0000000..ac436e0
--- /dev/null
+++ b/tests/test_pipelines.py
@@ -0,0 +1,39 @@
+from app.api import crud
+
+BASE_ROUTE = "/pipelines"
+
+
+def test_get_pipeline_versions_response(
+    test_app, monkeypatch, set_test_credentials
+):
+    """
+    Given a request to /pipelines/{pipeline_term}/versions with a valid pipeline name,
+    returns a dict where the key is the pipeline resource and the value is a list of pipeline versions.
+    """
+
+    def mock_post_query_to_graph(query, timeout=5.0):
+        return {
+            "head": {"vars": ["pipeline_version"]},
+            "results": {
+                "bindings": [
+                    {
+                        "pipeline_version": {
+                            "type": "literal",
+                            "value": "23.1.3",
+                        }
+                    },
+                    {
+                        "pipeline_version": {
+                            "type": "literal",
+                            "value": "20.2.7",
+                        }
+                    },
+                ]
+            },
+        }
+
+    monkeypatch.setattr(crud, "post_query_to_graph", mock_post_query_to_graph)
+
+    response = test_app.get(f"{BASE_ROUTE}/np:fmriprep/versions")
+    assert response.status_code == 200
+    assert response.json() == {"np:fmriprep": ["23.1.3", "20.2.7"]}
diff --git a/tests/test_utility.py b/tests/test_utility.py
index 51cdd49..38ea1f4 100644
--- a/tests/test_utility.py
+++ b/tests/test_utility.py
@@ -3,7 +3,7 @@
 from app.api import utility as util
 
 
-def test_unpack_http_response_json_to_dicts():
+def test_unpack_graph_response_json_to_dicts():
     """Test that given a valid httpx JSON response, the function returns a simplified list of dicts with the correct keys and values."""
     mock_response_json = {
         "head": {"vars": ["dataset_uuid", "total_subjects"]},
@@ -46,7 +46,7 @@ def test_unpack_http_response_json_to_dicts():
         },
     }
 
-    assert util.unpack_http_response_json_to_dicts(mock_response_json) == [
+    assert util.unpack_graph_response_json_to_dicts(mock_response_json) == [
         {
             "dataset_uuid": "http://neurobagel.org/vocab/ds1234",
             "total_subjects": "70",