From 40703f468e59ee5d5717753e4ef0e86cf1947d16 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Tue, 1 Oct 2024 19:33:09 -0400
Subject: [PATCH 01/17] Added query parameters for pipeline name and version

---
 app/api/routers/query.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/api/routers/query.py b/app/api/routers/query.py
index f746b7b..e2c0e95 100644
--- a/app/api/routers/query.py
+++ b/app/api/routers/query.py
@@ -52,6 +52,8 @@ async def get_query(
         query.min_num_phenotypic_sessions,
         query.assessment,
         query.image_modal,
+        query.pipeline_version,
+        query.pipeline_name,
     )
 
     return response

From 7278d86789afec1ca9c4df2e8bf5072e6b18b1e2 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Tue, 1 Oct 2024 19:33:41 -0400
Subject: [PATCH 02/17] Updated query model

---
 app/api/models.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/app/api/models.py b/app/api/models.py
index 01c3c49..2b58113 100644
--- a/app/api/models.py
+++ b/app/api/models.py
@@ -8,6 +8,7 @@
 from pydantic import BaseModel, constr, root_validator
 
 CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
+VERSION_REGEX = r"^\d+\.\d+\.\d+$"
 
 
 class QueryModel(BaseModel):
@@ -22,6 +23,8 @@ class QueryModel(BaseModel):
     min_num_phenotypic_sessions: int = Query(default=None, ge=0)
     assessment: constr(regex=CONTROLLED_TERM_REGEX) = None
     image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None
+    pipeline_version: constr(regex=VERSION_REGEX) = None
+    pipeline_name: constr(regex=CONTROLLED_TERM_REGEX) = None
 
     @root_validator()
     def check_maxage_ge_minage(cls, values):
@@ -67,6 +70,8 @@ class SessionResponse(BaseModel):
     assessment: list
     image_modal: list
     session_file_path: Optional[str]
+    pipeline_version: list
+    pipeline_name: list
 
 
 class CohortQueryResponse(BaseModel):
@@ -81,6 +86,8 @@ class CohortQueryResponse(BaseModel):
     num_matching_subjects: int
     subject_data: Union[list[SessionResponse], str]
     image_modals: list
+    pipeline_version: list
+    pipeline_name: list
 
 
 class DataElementURI(str, Enum):

From 7204e0ecbd1a69f47807ddbac93dec0752b48800 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Tue, 1 Oct 2024 19:34:32 -0400
Subject: [PATCH 03/17] Updated SPARQL query template

Added nipoppy namespace
---
 app/api/utility.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index e09c80a..c706754 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -50,6 +50,7 @@
     "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
     "nidm": "http://purl.org/nidash/nidm#",
     "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/",
+    "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing",
 }
 
 # Store domains in named tuples
@@ -61,6 +62,8 @@
 IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
 ASSESSMENT = Domain("assessment", "nb:hasAssessment")
 IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
+PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion")
+PIPELINE_NAME = Domain("pipeline_name", "nb:hasPipelineName")
 PROJECT = Domain("project", "nb:hasSamples")
 
 
@@ -115,6 +118,8 @@ def create_query(
     min_num_phenotypic_sessions: Optional[int] = None,
     assessment: Optional[str] = None,
     image_modal: Optional[str] = None,
+    pipeline_version: Optional[str] = None,
+    pipeline_name: Optional[str] = None,
 ) -> str:
     """
     Creates a SPARQL query using a query template and filters it using the input parameters.
@@ -139,6 +144,10 @@ def create_query(
         Non-imaging assessment completed by subjects, by default None.
     image_modal : str, optional
         Imaging modality of subject scans, by default None.
+    pipeline_version : str, optional
+        Pipeline version of subject scans, by default None.
+    pipeline_name : str, optional
+        Pipeline name of subject scans, by default None.
 
     Returns
     -------
@@ -206,10 +215,23 @@ def create_query(
             "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
         )
 
+    pipeline_filters = ""
+    if pipeline_version is not None:
+        pipeline_filters += (
+            "\n"
+            + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").'  # Wrap with quotes
+        )
+
+    if pipeline_name is not None:
+        pipeline_filters += (
+            "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})."
+        )
+
     query_string = textwrap.dedent(
         f"""
         SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
-        ?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path
+        ?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions
+        ?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_version ?pipeline_name
         WHERE {{
             ?dataset_uuid a nb:Dataset;
                 nb:hasLabel ?dataset_name;
@@ -229,6 +251,13 @@ def create_query(
             OPTIONAL {{?session nb:hasDiagnosis ?diagnosis.}}
             OPTIONAL {{?session nb:isSubjectGroup ?subject_group.}}
             OPTIONAL {{?session nb:hasAssessment ?assessment.}}
+
+            OPTIONAL {{
+                ?session nb:hasCompletedPipeline ?pipeline.
+                ?pipeline nb:hasPipelineVersion ?pipeline_version.
+                ?pipeline nb:hasPipelineName ?pipeline_name.
+                {pipeline_filters}
+            }}
             {{
                 SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions)
                 WHERE {{
@@ -260,6 +289,7 @@ def create_query(
         }}
     """
     )
+    print(query_string)
 
     # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true,
     # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.

From 18172e009b84242092e9521f87f8a59397d3a24d Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Tue, 1 Oct 2024 19:34:57 -0400
Subject: [PATCH 04/17] Updated crud.get function

---
 app/api/crud.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/app/api/crud.py b/app/api/crud.py
index 876b82c..c53137b 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -100,6 +100,8 @@ async def get(
     min_num_phenotypic_sessions: int,
     assessment: str,
     image_modal: str,
+    pipeline_version: str,
+    pipeline_name: str,
 ) -> list[CohortQueryResponse]:
     """
     Sends SPARQL queries to the graph API via httpx POST requests for subject-session or dataset metadata
@@ -125,6 +127,10 @@ async def get(
         Non-imaging assessment completed by subjects.
     image_modal : str
         Imaging modality of subject scans.
+    pipeline_version : str
+        Pipeline version of subject scans.
+    pipeline_name : str
+        Pipeline name of subject scans.
 
     Returns
     -------
@@ -142,6 +148,8 @@ async def get(
             min_num_imaging_sessions=min_num_imaging_sessions,
             assessment=assessment,
             image_modal=image_modal,
+            pipeline_version=pipeline_version,
+            pipeline_name=pipeline_name,
         )
     )
 
@@ -184,6 +192,8 @@ async def get(
                             "subject_group": "first",
                             "assessment": lambda x: list(x.unique()),
                             "image_modal": lambda x: list(x.unique()),
+                            "pipeline_version": lambda x: list(x.unique()),
+                            "pipeline_name": lambda x: list(x.unique()),
                             "session_file_path": "first",
                         }
                     )
@@ -224,6 +234,16 @@ async def get(
                             group["image_modal"].notna()
                         ].unique()
                     ),
+                    pipeline_version=list(
+                        group["pipeline_version"][
+                            group["pipeline_version"].notna()
+                        ].unique()
+                    ),
+                    pipeline_name=list(
+                        group["pipeline_name"][
+                            group["pipeline_name"].notna()
+                        ].unique()
+                    ),
                 )
             )
 

From 165ee588c0de5eaad7605d2bd6ff363487e89553 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 2 Oct 2024 13:21:06 -0400
Subject: [PATCH 05/17] Updated `query_string` in `create_query` function

---
 app/api/utility.py | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index c706754..0917177 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -215,15 +215,14 @@ def create_query(
             "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
         )
 
-    pipeline_filters = ""
     if pipeline_version is not None:
-        pipeline_filters += (
+        imaging_session_level_filters += (
             "\n"
             + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").'  # Wrap with quotes
         )
 
     if pipeline_name is not None:
-        pipeline_filters += (
+        imaging_session_level_filters += (
             "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})."
         )
 
@@ -253,11 +252,11 @@ def create_query(
             OPTIONAL {{?session nb:hasAssessment ?assessment.}}
 
             OPTIONAL {{
-                ?session nb:hasCompletedPipeline ?pipeline.
-                ?pipeline nb:hasPipelineVersion ?pipeline_version.
-                ?pipeline nb:hasPipelineName ?pipeline_name.
-                {pipeline_filters}
-            }}
+                        ?session nb:hasCompletedPipeline ?pipeline.
+                        ?pipeline nb:hasPipelineVersion ?pipeline_version.
+                        ?pipeline nb:hasPipelineName ?pipeline_name.
+                    }}
+
             {{
                 SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions)
                 WHERE {{
@@ -282,6 +281,11 @@ def create_query(
                         ?imaging_session a nb:ImagingSession;
                             nb:hasAcquisition/nb:hasContrastType ?image_modal.
                     }}
+                    OPTIONAL {{
+                        ?imaging_session     nb:hasCompletedPipeline ?pipeline.
+                        ?pipeline nb:hasPipelineVersion ?pipeline_version.
+                        ?pipeline nb:hasPipelineName ?pipeline_name.
+                    }}
                     {imaging_session_level_filters}
                 }} GROUP BY ?subject
             }}
@@ -289,7 +293,6 @@ def create_query(
         }}
     """
     )
-    print(query_string)
 
     # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true,
     # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata.

From c2097c6492dce9be2cbc5ac2b91aa177e3b5697e Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 2 Oct 2024 13:34:05 -0400
Subject: [PATCH 06/17] Updated the `default_neurobagel_query`

---
 docs/default_neurobagel_query.rq | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/default_neurobagel_query.rq b/docs/default_neurobagel_query.rq
index ae2ff14..132ca6c 100644
--- a/docs/default_neurobagel_query.rq
+++ b/docs/default_neurobagel_query.rq
@@ -6,7 +6,7 @@ PREFIX nidm: <http://purl.org/nidash/nidm#>
 PREFIX snomed: <http://purl.bioontology.org/ontology/SNOMEDCT/>
 
 SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex
-?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path
+?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_name ?pipeline_version
 WHERE {
     ?dataset_uuid a nb:Dataset;
         nb:hasLabel ?dataset_name;
@@ -41,6 +41,11 @@ WHERE {
 
         } GROUP BY ?subject
     }
+    OPTIONAL {
+        ?session nb:hasCompletedPipeline ?pipeline.
+        ?pipeline nb:hasPipelineVersion ?pipeline_version.
+        ?pipeline nb:hasPipelineName ?pipeline_name.
+    }
     {
         SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions)
         WHERE {
@@ -50,6 +55,11 @@ WHERE {
                 ?imaging_session a nb:ImagingSession;
                     nb:hasAcquisition/nb:hasContrastType ?image_modal.
             }
+            OPTIONAL {
+                ?imaging_session nb:hasCompletedPipeline ?pipeline.
+                ?pipeline nb:hasPipelineVersion ?pipeline_version.
+                ?pipeline nb:hasPipelineName ?pipeline_name.
+            }
 
         } GROUP BY ?subject
     }

From d22570f466b681d54f849773ccf131ca15a652dd Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 2 Oct 2024 13:35:49 -0400
Subject: [PATCH 07/17] Refactored `query_string` in `create_query` function

---
 app/api/utility.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index 0917177..70b4aee 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -250,13 +250,6 @@ def create_query(
             OPTIONAL {{?session nb:hasDiagnosis ?diagnosis.}}
             OPTIONAL {{?session nb:isSubjectGroup ?subject_group.}}
             OPTIONAL {{?session nb:hasAssessment ?assessment.}}
-
-            OPTIONAL {{
-                        ?session nb:hasCompletedPipeline ?pipeline.
-                        ?pipeline nb:hasPipelineVersion ?pipeline_version.
-                        ?pipeline nb:hasPipelineName ?pipeline_name.
-                    }}
-
             {{
                 SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions)
                 WHERE {{
@@ -272,6 +265,12 @@ def create_query(
                     {phenotypic_session_level_filters}
                 }} GROUP BY ?subject
             }}
+
+            OPTIONAL {{
+                ?session nb:hasCompletedPipeline ?pipeline.
+                ?pipeline nb:hasPipelineVersion ?pipeline_version.
+                ?pipeline nb:hasPipelineName ?pipeline_name.
+            }}
             {{
                 SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions)
                 WHERE {{
@@ -282,7 +281,7 @@ def create_query(
                             nb:hasAcquisition/nb:hasContrastType ?image_modal.
                     }}
                     OPTIONAL {{
-                        ?imaging_session     nb:hasCompletedPipeline ?pipeline.
+                        ?imaging_session nb:hasCompletedPipeline ?pipeline.
                         ?pipeline nb:hasPipelineVersion ?pipeline_version.
                         ?pipeline nb:hasPipelineName ?pipeline_name.
                     }}

From 00ffaaa1e1ada5bdd35c4aee7aaddf8cac5be11e Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 2 Oct 2024 14:14:12 -0400
Subject: [PATCH 08/17] Updated test fixtures

---
 tests/conftest.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index dd92310..18fb019 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -73,6 +73,8 @@ def test_data():
                 "http://purl.org/nidash/nidm#T1Weighted",
                 "http://purl.org/nidash/nidm#T2Weighted",
             ],
+            "pipeline_version": ["7.3.2", "23.1.3"],
+            "pipeline_name": ["freesurfer", "fmriprep"],
         },
         {
             "dataset_uuid": "http://neurobagel.org/vocab/67890",
@@ -86,6 +88,8 @@ def test_data():
                 "http://purl.org/nidash/nidm#FlowWeighted",
                 "http://purl.org/nidash/nidm#T1Weighted",
             ],
+            "pipeline_version": ["7.3.2"],
+            "pipeline_name": ["freesurfer"],
         },
     ]
 
@@ -178,6 +182,8 @@ async def _mock_get_with_exception(
         min_num_phenotypic_sessions,
         assessment,
         image_modal,
+        pipeline_version,
+        pipeline_name,
     ):
         raise request.param
 
@@ -206,6 +212,8 @@ async def _mock_get(
         min_num_phenotypic_sessions,
         assessment,
         image_modal,
+        pipeline_version,
+        pipeline_name,
     ):
         return request.param
 
@@ -226,6 +234,8 @@ async def _mock_successful_get(
         min_num_phenotypic_sessions,
         assessment,
         image_modal,
+        pipeline_version,
+        pipeline_name,
     ):
         return test_data
 

From 3b2e46be6fe7996323e9102b330e944a9f033e6e Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 2 Oct 2024 14:15:43 -0400
Subject: [PATCH 09/17] Added tests for `pipeline_version` and `pipeline_name`

---
 tests/test_query.py | 84 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/tests/test_query.py b/tests/test_query.py
index 2e92ec0..3e7a91b 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -477,6 +477,90 @@ def test_get_undefined_prefix_image_modal(
     assert response.status_code == 500
 
 
+@pytest.mark.parametrize("valid_pipeline_version", ["7.3.2", "23.1.3"])
+def test_get_valid_pipeline_version(
+    test_app,
+    mock_successful_get,
+    monkeypatch,
+    mock_auth_header,
+    set_mock_verify_token,
+    valid_pipeline_version,
+):
+    """Given a valid pipeline version, returns a 200 status code and a non-empty list of results."""
+
+    monkeypatch.setattr(crud, "get", mock_successful_get)
+    response = test_app.get(
+        f"{ROUTE}?pipeline_version={valid_pipeline_version}",
+        headers=mock_auth_header,
+    )
+    assert response.status_code == 200
+    assert response.json() != []
+
+
+@pytest.mark.parametrize("mock_get", [None], indirect=True)
+@pytest.mark.parametrize("invalid_pipeline_version", ["latest", "7.2", "23"])
+def test_get_invalid_pipeline_version(
+    test_app,
+    mock_get,
+    monkeypatch,
+    mock_auth_header,
+    set_mock_verify_token,
+    invalid_pipeline_version,
+):
+    """Given an invalid pipeline version, returns a 422 status code."""
+
+    monkeypatch.setattr(crud, "get", mock_get)
+    response = test_app.get(
+        f"{ROUTE}?pipeline_version={invalid_pipeline_version}",
+        headers=mock_auth_header,
+    )
+    assert response.status_code == 422
+
+
+@pytest.mark.parametrize(
+    "valid_pipeline_name", ["np:fmriprep", "np:freesurfer"]
+)
+def test_get_valid_pipeline_name(
+    test_app,
+    mock_successful_get,
+    monkeypatch,
+    mock_auth_header,
+    set_mock_verify_token,
+    valid_pipeline_name,
+):
+    """Given a valid pipeline name, returns a 200 status code and a non-empty list of results."""
+
+    monkeypatch.setattr(crud, "get", mock_successful_get)
+    response = test_app.get(
+        f"{ROUTE}?pipeline_name={valid_pipeline_name}",
+        headers=mock_auth_header,
+    )
+    assert response.status_code == 200
+    assert response.json() != []
+
+
+@pytest.mark.parametrize("mock_get", [None], indirect=True)
+@pytest.mark.parametrize(
+    "invalid_pipeline_name", ["n2p:coolpipeline", "apple", "some_thing:cool"]
+)
+def test_get_invalid_pipeline_name(
+    test_app,
+    mock_get,
+    monkeypatch,
+    mock_auth_header,
+    set_mock_verify_token,
+    invalid_pipeline_name,
+):
+    """Given an invalid pipeline name, returns a 422 status code."""
+
+    monkeypatch.setattr(crud, "get", mock_get)
+    response = test_app.get(
+        f"{ROUTE}?pipeline_name={invalid_pipeline_name}",
+        headers=mock_auth_header,
+    )
+    assert response.status_code == 422
+
+
 def test_aggregate_query_response_structure(
     test_app,
     set_test_credentials,

From d377c32cb13c6c4152f402b82fb77b74fb1e267a Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 3 Oct 2024 14:35:16 -0400
Subject: [PATCH 10/17] Updated `np` namespace

---
 app/api/utility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/api/utility.py b/app/api/utility.py
index 70b4aee..f17cd48 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -50,7 +50,7 @@
     "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#",
     "nidm": "http://purl.org/nidash/nidm#",
     "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/",
-    "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing",
+    "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing/",
 }
 
 # Store domains in named tuples

From 8f1ee1f7d205461f6c4d289c9174a3b87fe38574 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 3 Oct 2024 14:35:49 -0400
Subject: [PATCH 11/17] Refactored the pipeline information representation in
 the response

---
 app/api/crud.py   | 33 +++++++++++++++++++++++----------
 app/api/models.py |  6 ++----
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index c53137b..5a72ef4 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -16,6 +16,8 @@
     "dataset_uuid",
     "dataset_name",
     "dataset_portal_uri",
+    "pipeline_version",
+    "pipeline_name",
 ]
 
 
@@ -212,8 +214,28 @@ async def get(
                     all_nan_columns
                 ].replace({np.nan: None})
 
+                subject_data["pipeline"] = subject_data.apply(
+                    lambda row: {
+                        name: versions
+                        for name, versions in zip(
+                            row["pipeline_name"], row["pipeline_version"]
+                        )
+                        if pd.notna(name) and pd.notna(versions)
+                    },
+                    axis=1,
+                )
+
                 subject_data = list(subject_data.to_dict("records"))
 
+                pipeline_info = {}
+                for name, version in zip(
+                    group["pipeline_name"], group["pipeline_version"]
+                ):
+                    if pd.notna(name) and pd.notna(version):
+                        if name not in pipeline_info:
+                            pipeline_info[name] = set()
+                        pipeline_info[name].add(version)
+
             response_obj.append(
                 CohortQueryResponse(
                     dataset_uuid=dataset_uuid,
@@ -234,16 +256,7 @@ async def get(
                             group["image_modal"].notna()
                         ].unique()
                     ),
-                    pipeline_version=list(
-                        group["pipeline_version"][
-                            group["pipeline_version"].notna()
-                        ].unique()
-                    ),
-                    pipeline_name=list(
-                        group["pipeline_name"][
-                            group["pipeline_name"].notna()
-                        ].unique()
-                    ),
+                    pipeline=pipeline_info,
                 )
             )
 
diff --git a/app/api/models.py b/app/api/models.py
index 2b58113..6b5aca3 100644
--- a/app/api/models.py
+++ b/app/api/models.py
@@ -70,8 +70,7 @@ class SessionResponse(BaseModel):
     assessment: list
     image_modal: list
     session_file_path: Optional[str]
-    pipeline_version: list
-    pipeline_name: list
+    pipeline: dict
 
 
 class CohortQueryResponse(BaseModel):
@@ -86,8 +85,7 @@ class CohortQueryResponse(BaseModel):
     num_matching_subjects: int
     subject_data: Union[list[SessionResponse], str]
     image_modals: list
-    pipeline_version: list
-    pipeline_name: list
+    pipeline: dict
 
 
 class DataElementURI(str, Enum):

From 46595c596eac39a4dc04b581a1f18187da147e18 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 3 Oct 2024 14:37:07 -0400
Subject: [PATCH 12/17] Updated test fixture

---
 tests/conftest.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 18fb019..78b0a91 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -73,8 +73,7 @@ def test_data():
                 "http://purl.org/nidash/nidm#T1Weighted",
                 "http://purl.org/nidash/nidm#T2Weighted",
             ],
-            "pipeline_version": ["7.3.2", "23.1.3"],
-            "pipeline_name": ["freesurfer", "fmriprep"],
+            "pipeline": {"freesurfer": ["7.3.2", "2.8.2"]},
         },
         {
             "dataset_uuid": "http://neurobagel.org/vocab/67890",
@@ -88,8 +87,10 @@ def test_data():
                 "http://purl.org/nidash/nidm#FlowWeighted",
                 "http://purl.org/nidash/nidm#T1Weighted",
             ],
-            "pipeline_version": ["7.3.2"],
-            "pipeline_name": ["freesurfer"],
+            "pipeline": {
+                "freesurfer": ["7.3.2", "2.1.2"],
+                "fmriprep": ["23.1.3", "22.1.4"],
+            },
         },
     ]
 

From aace3cd3118008aba1774cd7a9d6127ab1f5ef54 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 3 Oct 2024 14:40:40 -0400
Subject: [PATCH 13/17] Fixed the test

---
 app/api/crud.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 5a72ef4..8f925cc 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -168,6 +168,7 @@ async def get(
 
     response_obj = []
     dataset_cols = ["dataset_uuid", "dataset_name"]
+    pipeline_info = {}
     if not results_df.empty:
         for (dataset_uuid, dataset_name), group in results_df.groupby(
             by=dataset_cols
@@ -227,7 +228,6 @@ async def get(
 
                 subject_data = list(subject_data.to_dict("records"))
 
-                pipeline_info = {}
                 for name, version in zip(
                     group["pipeline_name"], group["pipeline_version"]
                 ):

From 6cc67f8cd056585694aafc500273e6aad44274f9 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Thu, 3 Oct 2024 15:07:48 -0400
Subject: [PATCH 14/17] Fixed the pipeline info at the subject level

---
 app/api/crud.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 8f925cc..04f9055 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -217,11 +217,14 @@ async def get(
 
                 subject_data["pipeline"] = subject_data.apply(
                     lambda row: {
-                        name: versions
-                        for name, versions in zip(
-                            row["pipeline_name"], row["pipeline_version"]
+                        name: list(
+                            group.loc[
+                                group["pipeline_name"] == name,
+                                "pipeline_version",
+                            ].unique()
                         )
-                        if pd.notna(name) and pd.notna(versions)
+                        for name in set(row["pipeline_name"])
+                        if pd.notna(name)
                     },
                     axis=1,
                 )

From 8098cd8d4e54a9f64cdf847351d913b80ec81254 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 9 Oct 2024 08:01:32 -0400
Subject: [PATCH 15/17] Addressed requested changes

---
 app/api/crud.py          | 85 ++++++++++++++++++++++++++--------------
 app/api/models.py        |  9 +++--
 app/api/routers/query.py |  2 +-
 app/api/utility.py       | 42 ++++++++++++--------
 tests/conftest.py        |  8 ++--
 tests/test_query.py      | 32 ++++++++++++++-
 6 files changed, 123 insertions(+), 55 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 04f9055..32bff06 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -102,8 +102,8 @@ async def get(
     min_num_phenotypic_sessions: int,
     assessment: str,
     image_modal: str,
-    pipeline_version: str,
     pipeline_name: str,
+    pipeline_version: str,
 ) -> list[CohortQueryResponse]:
     """
     Sends SPARQL queries to the graph API via httpx POST requests for subject-session or dataset metadata
@@ -129,10 +129,10 @@ async def get(
         Non-imaging assessment completed by subjects.
     image_modal : str
         Imaging modality of subject scans.
-    pipeline_version : str
-        Pipeline version of subject scans.
     pipeline_name : str
-        Pipeline name of subject scans.
+        Name of pipeline run on subject scans.
+    pipeline_version : str
+        Version of pipeline run on subject scans.
 
     Returns
     -------
@@ -168,7 +168,8 @@ async def get(
 
     response_obj = []
     dataset_cols = ["dataset_uuid", "dataset_name"]
-    pipeline_info = {}
+    dataset_available_pipeline_info = {}
+    results_df.to_csv("output.csv", index=False)
     if not results_df.empty:
         for (dataset_uuid, dataset_name), group in results_df.groupby(
             by=dataset_cols
@@ -195,13 +196,53 @@ async def get(
                             "subject_group": "first",
                             "assessment": lambda x: list(x.unique()),
                             "image_modal": lambda x: list(x.unique()),
-                            "pipeline_version": lambda x: list(x.unique()),
-                            "pipeline_name": lambda x: list(x.unique()),
                             "session_file_path": "first",
                         }
                     )
                 )
 
+                pipeline_data = (
+                    group.groupby(
+                        [
+                            "sub_id",
+                            "session_id",
+                            "session_type",
+                            "pipeline_name",
+                        ]
+                    )
+                    .agg(
+                        {
+                            "pipeline_version": lambda x: list(
+                                x.dropna().unique()
+                            )
+                        }
+                    )
+                    .reset_index()
+                )
+
+                pipeline_dict = (
+                    pipeline_data.groupby(
+                        ["sub_id", "session_id", "session_type"]
+                    )
+                    .apply(
+                        lambda x: dict(
+                            zip(x["pipeline_name"], x["pipeline_version"])
+                        )
+                    )
+                    .reset_index(name="completed_pipelines")
+                )
+
+                subject_data = pd.merge(
+                    subject_data.reset_index(drop=True),
+                    pipeline_dict,
+                    on=["sub_id", "session_id", "session_type"],
+                    how="left",
+                )
+
+                subject_data["completed_pipelines"] = subject_data[
+                    "completed_pipelines"
+                ].apply(lambda x: x if isinstance(x, dict) else {})
+
                 # TODO: Revisit this as there may be a more elegant solution.
                 # The following code replaces columns with all NaN values with values of None, to ensure they show up in the final JSON as `null`.
                 # This is needed as the above .agg() seems to turn NaN into None for object-type columns (which have some non-missing values)
@@ -215,29 +256,15 @@ async def get(
                     all_nan_columns
                 ].replace({np.nan: None})
 
-                subject_data["pipeline"] = subject_data.apply(
-                    lambda row: {
-                        name: list(
-                            group.loc[
-                                group["pipeline_name"] == name,
-                                "pipeline_version",
-                            ].unique()
-                        )
-                        for name in set(row["pipeline_name"])
-                        if pd.notna(name)
-                    },
-                    axis=1,
-                )
-
                 subject_data = list(subject_data.to_dict("records"))
 
-                for name, version in zip(
-                    group["pipeline_name"], group["pipeline_version"]
-                ):
-                    if pd.notna(name) and pd.notna(version):
-                        if name not in pipeline_info:
-                            pipeline_info[name] = set()
-                        pipeline_info[name].add(version)
+                dataset_available_pipeline_info = (
+                    group.groupby("pipeline_name", dropna=True)[
+                        "pipeline_version"
+                    ]
+                    .apply(lambda x: list(x.dropna().unique()))
+                    .to_dict()
+                )
 
             response_obj.append(
                 CohortQueryResponse(
@@ -259,7 +286,7 @@ async def get(
                             group["image_modal"].notna()
                         ].unique()
                     ),
-                    pipeline=pipeline_info,
+                    available_pipelines=dataset_available_pipeline_info,
                 )
             )
 
diff --git a/app/api/models.py b/app/api/models.py
index 6b5aca3..27c6fa4 100644
--- a/app/api/models.py
+++ b/app/api/models.py
@@ -8,7 +8,7 @@
 from pydantic import BaseModel, constr, root_validator
 
 CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$"
-VERSION_REGEX = r"^\d+\.\d+\.\d+$"
+VERSION_REGEX = r"^([A-Za-z0-9-]+)\.(\d+)\.([A-Za-z0-9-]+)$"
 
 
 class QueryModel(BaseModel):
@@ -23,8 +23,9 @@ class QueryModel(BaseModel):
     min_num_phenotypic_sessions: int = Query(default=None, ge=0)
     assessment: constr(regex=CONTROLLED_TERM_REGEX) = None
     image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None
-    pipeline_version: constr(regex=VERSION_REGEX) = None
     pipeline_name: constr(regex=CONTROLLED_TERM_REGEX) = None
+    # TODO: Check back if validating using a regex is too restrictive
+    pipeline_version: constr(regex=VERSION_REGEX) = None
 
     @root_validator()
     def check_maxage_ge_minage(cls, values):
@@ -70,7 +71,7 @@ class SessionResponse(BaseModel):
     assessment: list
     image_modal: list
     session_file_path: Optional[str]
-    pipeline: dict
+    completed_pipelines: dict
 
 
 class CohortQueryResponse(BaseModel):
@@ -85,7 +86,7 @@ class CohortQueryResponse(BaseModel):
     num_matching_subjects: int
     subject_data: Union[list[SessionResponse], str]
     image_modals: list
-    pipeline: dict
+    available_pipelines: dict
 
 
 class DataElementURI(str, Enum):
diff --git a/app/api/routers/query.py b/app/api/routers/query.py
index e2c0e95..449518c 100644
--- a/app/api/routers/query.py
+++ b/app/api/routers/query.py
@@ -52,8 +52,8 @@ async def get_query(
         query.min_num_phenotypic_sessions,
         query.assessment,
         query.image_modal,
-        query.pipeline_version,
         query.pipeline_name,
+        query.pipeline_version,
     )
 
     return response
diff --git a/app/api/utility.py b/app/api/utility.py
index f17cd48..deebc09 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -62,8 +62,8 @@
 IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup")
 ASSESSMENT = Domain("assessment", "nb:hasAssessment")
 IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType")
-PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion")
 PIPELINE_NAME = Domain("pipeline_name", "nb:hasPipelineName")
+PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion")
 PROJECT = Domain("project", "nb:hasSamples")
 
 
@@ -118,8 +118,8 @@ def create_query(
     min_num_phenotypic_sessions: Optional[int] = None,
     assessment: Optional[str] = None,
     image_modal: Optional[str] = None,
-    pipeline_version: Optional[str] = None,
     pipeline_name: Optional[str] = None,
+    pipeline_version: Optional[str] = None,
 ) -> str:
     """
     Creates a SPARQL query using a query template and filters it using the input parameters.
@@ -144,10 +144,10 @@ def create_query(
         Non-imaging assessment completed by subjects, by default None.
     image_modal : str, optional
         Imaging modality of subject scans, by default None.
-    pipeline_version : str, optional
-        Pipeline version of subject scans, by default None.
     pipeline_name : str, optional
-        Pipeline name of subject scans, by default None.
+        Name of pipeline run on subject scans, by default None.
+    pipeline_version : str, optional
+        Version of pipeline run on subject scans, by default None.
 
     Returns
     -------
@@ -212,18 +212,21 @@ def create_query(
     imaging_session_level_filters = ""
     if image_modal is not None:
         imaging_session_level_filters += (
-            "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})."
+            "\n"
+            + f"{create_bound_filter(IMAGE_MODAL.var)} && ?{IMAGE_MODAL.var} = {image_modal})."
         )
 
-    if pipeline_version is not None:
+    if pipeline_name is not None:
         imaging_session_level_filters += (
             "\n"
-            + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").'  # Wrap with quotes
+            + f"{create_bound_filter(PIPELINE_NAME.var)} && (?{PIPELINE_NAME.var} = {pipeline_name})."
         )
 
-    if pipeline_name is not None:
+    # In case a user specified the pipeline version but not the name
+    if pipeline_version is not None:
         imaging_session_level_filters += (
-            "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})."
+            "\n"
+            + f'{create_bound_filter(PIPELINE_VERSION.var)} && ?{PIPELINE_VERSION.var} = "{pipeline_version}").'  # Wrap with quotes to avoid workaround implicit conversion
         )
 
     query_string = textwrap.dedent(
@@ -277,13 +280,18 @@ def create_query(
                     ?subject a nb:Subject.
                     OPTIONAL {{
                         ?subject nb:hasSession ?imaging_session.
-                        ?imaging_session a nb:ImagingSession;
-                            nb:hasAcquisition/nb:hasContrastType ?image_modal.
-                    }}
-                    OPTIONAL {{
-                        ?imaging_session nb:hasCompletedPipeline ?pipeline.
-                        ?pipeline nb:hasPipelineVersion ?pipeline_version.
-                        ?pipeline nb:hasPipelineName ?pipeline_name.
+                        ?imaging_session a nb:ImagingSession.
+
+                        OPTIONAL {{
+                            ?imaging_session nb:hasAcquisition ?acquisition.
+                            ?acquisition nb:hasContrastType ?image_modal.
+                        }}
+
+                        OPTIONAL {{
+                            ?imaging_session nb:hasCompletedPipeline ?pipeline.
+                            ?pipeline nb:hasPipelineVersion ?pipeline_version;
+                                    nb:hasPipelineName ?pipeline_name.
+                        }}
                     }}
                     {imaging_session_level_filters}
                 }} GROUP BY ?subject
diff --git a/tests/conftest.py b/tests/conftest.py
index 78b0a91..6e1b243 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -73,7 +73,9 @@ def test_data():
                 "http://purl.org/nidash/nidm#T1Weighted",
                 "http://purl.org/nidash/nidm#T2Weighted",
             ],
-            "pipeline": {"freesurfer": ["7.3.2", "2.8.2"]},
+            "available_pipelines": {
+                "freesurfer": ["7.3.2", "2.8.2", "8.7.0-rc"]
+            },
         },
         {
             "dataset_uuid": "http://neurobagel.org/vocab/67890",
@@ -87,9 +89,9 @@ def test_data():
                 "http://purl.org/nidash/nidm#FlowWeighted",
                 "http://purl.org/nidash/nidm#T1Weighted",
             ],
-            "pipeline": {
+            "available_pipelines": {
                 "freesurfer": ["7.3.2", "2.1.2"],
-                "fmriprep": ["23.1.3", "22.1.4"],
+                "fmriprep": ["23.1.3", "22.1.4", "v2.0.1"],
             },
         },
     ]
diff --git a/tests/test_query.py b/tests/test_query.py
index 3e7a91b..b9bf087 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -477,7 +477,9 @@ def test_get_undefined_prefix_image_modal(
     assert response.status_code == 500
 
 
-@pytest.mark.parametrize("valid_pipeline_version", ["7.3.2", "23.1.3"])
+@pytest.mark.parametrize(
+    "valid_pipeline_version", ["7.3.2", "23.1.3", "v2.0.1", "8.7.0-rc"]
+)
 def test_get_valid_pipeline_version(
     test_app,
     mock_successful_get,
@@ -561,6 +563,34 @@ def test_get_invalid_pipeline_name(
     assert response.status_code == 422
 
 
+@pytest.mark.parametrize(
+    "valid_pipeline_name",
+    ["np:fmriprep", "np:fmriprep", "np:freesurfer", "np:freesurfer"],
+)
+@pytest.mark.parametrize(
+    "valid_pipeline_version",
+    ["v2.0.1", "23.1.3", "7.3.2", "8.7.0-rc"],
+)
+def test_get_valid_pipeline_name_version(
+    test_app,
+    mock_successful_get,
+    monkeypatch,
+    mock_auth_header,
+    set_mock_verify_token,
+    valid_pipeline_name,
+    valid_pipeline_version,
+):
+    """Given a valid pipeline name and version, returns a 200 status code and a non-empty list of results."""
+
+    monkeypatch.setattr(crud, "get", mock_successful_get)
+    response = test_app.get(
+        f"{ROUTE}?pipeline_name={valid_pipeline_name}&pipeline_version={valid_pipeline_version}",
+        headers=mock_auth_header,
+    )
+    assert response.status_code == 200
+    assert response.json() != []
+
+
 def test_aggregate_query_response_structure(
     test_app,
     set_test_credentials,

From 3de38414eb15fdac37ace420ac00d7baed35b757 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 9 Oct 2024 08:06:25 -0400
Subject: [PATCH 16/17] Left over clean up

---
 app/api/crud.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 32bff06..85c6845 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -169,7 +169,6 @@ async def get(
     response_obj = []
     dataset_cols = ["dataset_uuid", "dataset_name"]
     dataset_available_pipeline_info = {}
-    results_df.to_csv("output.csv", index=False)
     if not results_df.empty:
         for (dataset_uuid, dataset_name), group in results_df.groupby(
             by=dataset_cols

From c324a17f3fd5a45219e0f8eb0140fbec99bbed15 Mon Sep 17 00:00:00 2001
From: rmanaem <rmanaem@protonmail.ch>
Date: Wed, 9 Oct 2024 13:51:16 -0400
Subject: [PATCH 17/17] Addressed second wave requested changes

---
 app/api/crud.py     | 12 +++++++-----
 app/api/utility.py  |  2 +-
 tests/test_query.py | 13 +++++++------
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/app/api/crud.py b/app/api/crud.py
index 85c6845..62e8736 100644
--- a/app/api/crud.py
+++ b/app/api/crud.py
@@ -200,14 +200,15 @@ async def get(
                     )
                 )
 
-                pipeline_data = (
+                pipeline_grouped_data = (
                     group.groupby(
                         [
                             "sub_id",
                             "session_id",
                             "session_type",
                             "pipeline_name",
-                        ]
+                        ],
+                        dropna=True,
                     )
                     .agg(
                         {
@@ -219,8 +220,8 @@ async def get(
                     .reset_index()
                 )
 
-                pipeline_dict = (
-                    pipeline_data.groupby(
+                session_completed_pipeline_data = (
+                    pipeline_grouped_data.groupby(
                         ["sub_id", "session_id", "session_type"]
                     )
                     .apply(
@@ -233,11 +234,12 @@ async def get(
 
                 subject_data = pd.merge(
                     subject_data.reset_index(drop=True),
-                    pipeline_dict,
+                    session_completed_pipeline_data,
                     on=["sub_id", "session_id", "session_type"],
                     how="left",
                 )
 
+                # ensure that for sessions missing completed pipeline info, completed_pipelines is still a dict rather than null/nan
                 subject_data["completed_pipelines"] = subject_data[
                     "completed_pipelines"
                 ].apply(lambda x: x if isinstance(x, dict) else {})
diff --git a/app/api/utility.py b/app/api/utility.py
index deebc09..795526b 100644
--- a/app/api/utility.py
+++ b/app/api/utility.py
@@ -290,7 +290,7 @@ def create_query(
                         OPTIONAL {{
                             ?imaging_session nb:hasCompletedPipeline ?pipeline.
                             ?pipeline nb:hasPipelineVersion ?pipeline_version;
-                                    nb:hasPipelineName ?pipeline_name.
+                            nb:hasPipelineName ?pipeline_name.
                         }}
                     }}
                     {imaging_session_level_filters}
diff --git a/tests/test_query.py b/tests/test_query.py
index b9bf087..9376156 100644
--- a/tests/test_query.py
+++ b/tests/test_query.py
@@ -564,12 +564,13 @@ def test_get_invalid_pipeline_name(
 
 
 @pytest.mark.parametrize(
-    "valid_pipeline_name",
-    ["np:fmriprep", "np:fmriprep", "np:freesurfer", "np:freesurfer"],
-)
-@pytest.mark.parametrize(
-    "valid_pipeline_version",
-    ["v2.0.1", "23.1.3", "7.3.2", "8.7.0-rc"],
+    "valid_pipeline_name, valid_pipeline_version",
+    [
+        ("np:fmriprep", "v2.0.1"),
+        ("np:fmriprep", "23.1.3"),
+        ("np:freesurfer", "7.3.2"),
+        ("np:freesurfer", "8.7.0-rc"),
+    ],
 )
 def test_get_valid_pipeline_name_version(
     test_app,