From 40703f468e59ee5d5717753e4ef0e86cf1947d16 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Tue, 1 Oct 2024 19:33:09 -0400 Subject: [PATCH 01/17] Added query parameters for pipeline name and version --- app/api/routers/query.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/api/routers/query.py b/app/api/routers/query.py index f746b7b..e2c0e95 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -52,6 +52,8 @@ async def get_query( query.min_num_phenotypic_sessions, query.assessment, query.image_modal, + query.pipeline_version, + query.pipeline_name, ) return response From 7278d86789afec1ca9c4df2e8bf5072e6b18b1e2 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Tue, 1 Oct 2024 19:33:41 -0400 Subject: [PATCH 02/17] Updated query model --- app/api/models.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/app/api/models.py b/app/api/models.py index 01c3c49..2b58113 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -8,6 +8,7 @@ from pydantic import BaseModel, constr, root_validator CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" +VERSION_REGEX = r"^\d+\.\d+\.\d+$" class QueryModel(BaseModel): @@ -22,6 +23,8 @@ class QueryModel(BaseModel): min_num_phenotypic_sessions: int = Query(default=None, ge=0) assessment: constr(regex=CONTROLLED_TERM_REGEX) = None image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None + pipeline_version: constr(regex=VERSION_REGEX) = None + pipeline_name: constr(regex=CONTROLLED_TERM_REGEX) = None @root_validator() def check_maxage_ge_minage(cls, values): @@ -67,6 +70,8 @@ class SessionResponse(BaseModel): assessment: list image_modal: list session_file_path: Optional[str] + pipeline_version: list + pipeline_name: list class CohortQueryResponse(BaseModel): @@ -81,6 +86,8 @@ class CohortQueryResponse(BaseModel): num_matching_subjects: int subject_data: Union[list[SessionResponse], str] image_modals: list + pipeline_version: list + pipeline_name: list class DataElementURI(str, Enum): From 7204e0ecbd1a69f47807ddbac93dec0752b48800 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Tue, 1 Oct 2024 19:34:32 -0400 Subject: [PATCH 03/17] Updated SPARQL query template Added nipoppy namespace --- app/api/utility.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/app/api/utility.py b/app/api/utility.py index e09c80a..c706754 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -50,6 +50,7 @@ "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#", "nidm": "http://purl.org/nidash/nidm#", "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/", + "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing", } # Store domains in named tuples @@ -61,6 +62,8 @@ IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup") ASSESSMENT = Domain("assessment", "nb:hasAssessment") IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType") +PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion") +PIPELINE_NAME = Domain("pipeline_name", "nb:hasPipelineName") PROJECT = Domain("project", "nb:hasSamples") @@ -115,6 +118,8 @@ def create_query( min_num_phenotypic_sessions: Optional[int] = None, assessment: Optional[str] = None, image_modal: Optional[str] = None, + pipeline_version: Optional[str] = None, + pipeline_name: Optional[str] = None, ) -> str: """ Creates a SPARQL query using a query template and filters it using the input parameters. @@ -139,6 +144,10 @@ def create_query( Non-imaging assessment completed by subjects, by default None. image_modal : str, optional Imaging modality of subject scans, by default None. + pipeline_version : str, optional + Pipeline version of subject scans, by default None. + pipeline_name : str, optional + Pipeline name of subject scans, by default None. Returns ------- @@ -206,10 +215,23 @@ def create_query( "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})." ) + pipeline_filters = "" + if pipeline_version is not None: + pipeline_filters += ( + "\n" + + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").' # Wrap with quotes + ) + + if pipeline_name is not None: + pipeline_filters += ( + "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})." + ) + query_string = textwrap.dedent( f""" SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex - ?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path + ?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions + ?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_version ?pipeline_name WHERE {{ ?dataset_uuid a nb:Dataset; nb:hasLabel ?dataset_name; @@ -229,6 +251,13 @@ def create_query( OPTIONAL {{?session nb:hasDiagnosis ?diagnosis.}} OPTIONAL {{?session nb:isSubjectGroup ?subject_group.}} OPTIONAL {{?session nb:hasAssessment ?assessment.}} + + OPTIONAL {{ + ?session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + {pipeline_filters} + }} {{ SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions) WHERE {{ @@ -260,6 +289,7 @@ def create_query( }} """ ) + print(query_string) # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true, # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata. From 18172e009b84242092e9521f87f8a59397d3a24d Mon Sep 17 00:00:00 2001 From: rmanaem Date: Tue, 1 Oct 2024 19:34:57 -0400 Subject: [PATCH 04/17] Updated crud.get function --- app/api/crud.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/app/api/crud.py b/app/api/crud.py index 876b82c..c53137b 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -100,6 +100,8 @@ async def get( min_num_phenotypic_sessions: int, assessment: str, image_modal: str, + pipeline_version: str, + pipeline_name: str, ) -> list[CohortQueryResponse]: """ Sends SPARQL queries to the graph API via httpx POST requests for subject-session or dataset metadata @@ -125,6 +127,10 @@ async def get( Non-imaging assessment completed by subjects. image_modal : str Imaging modality of subject scans. + pipeline_version : str + Pipeline version of subject scans. + pipeline_name : str + Pipeline name of subject scans. Returns ------- @@ -142,6 +148,8 @@ async def get( min_num_imaging_sessions=min_num_imaging_sessions, assessment=assessment, image_modal=image_modal, + pipeline_version=pipeline_version, + pipeline_name=pipeline_name, ) ) @@ -184,6 +192,8 @@ async def get( "subject_group": "first", "assessment": lambda x: list(x.unique()), "image_modal": lambda x: list(x.unique()), + "pipeline_version": lambda x: list(x.unique()), + "pipeline_name": lambda x: list(x.unique()), "session_file_path": "first", } ) @@ -224,6 +234,16 @@ async def get( group["image_modal"].notna() ].unique() ), + pipeline_version=list( + group["pipeline_version"][ + group["pipeline_version"].notna() + ].unique() + ), + pipeline_name=list( + group["pipeline_name"][ + group["pipeline_name"].notna() + ].unique() + ), ) ) From 165ee588c0de5eaad7605d2bd6ff363487e89553 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 2 Oct 2024 13:21:06 -0400 Subject: [PATCH 05/17] Updated `query_string` in `create_query` function --- app/api/utility.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/app/api/utility.py b/app/api/utility.py index c706754..0917177 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -215,15 +215,14 @@ def create_query( "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})." ) - pipeline_filters = "" if pipeline_version is not None: - pipeline_filters += ( + imaging_session_level_filters += ( "\n" + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").' # Wrap with quotes ) if pipeline_name is not None: - pipeline_filters += ( + imaging_session_level_filters += ( "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})." ) @@ -253,11 +252,11 @@ def create_query( OPTIONAL {{?session nb:hasAssessment ?assessment.}} OPTIONAL {{ - ?session nb:hasCompletedPipeline ?pipeline. - ?pipeline nb:hasPipelineVersion ?pipeline_version. - ?pipeline nb:hasPipelineName ?pipeline_name. - {pipeline_filters} - }} + ?session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + }} + {{ SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions) WHERE {{ @@ -282,6 +281,11 @@ def create_query( ?imaging_session a nb:ImagingSession; nb:hasAcquisition/nb:hasContrastType ?image_modal. }} + OPTIONAL {{ + ?imaging_session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + }} {imaging_session_level_filters} }} GROUP BY ?subject }} @@ -289,7 +293,6 @@ def create_query( }} """ ) - print(query_string) # The query defined above will return all subject-level attributes from the graph. If RETURN_AGG variable has been set to true, # wrap query in an aggregating statement so data returned from graph include only attributes needed for dataset-level aggregate metadata. From c2097c6492dce9be2cbc5ac2b91aa177e3b5697e Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 2 Oct 2024 13:34:05 -0400 Subject: [PATCH 06/17] Updated the `default_neurobagel_query` --- docs/default_neurobagel_query.rq | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/default_neurobagel_query.rq b/docs/default_neurobagel_query.rq index ae2ff14..132ca6c 100644 --- a/docs/default_neurobagel_query.rq +++ b/docs/default_neurobagel_query.rq @@ -6,7 +6,7 @@ PREFIX nidm: PREFIX snomed: SELECT DISTINCT ?dataset_uuid ?dataset_name ?dataset_portal_uri ?sub_id ?age ?sex -?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path +?diagnosis ?subject_group ?num_matching_phenotypic_sessions ?num_matching_imaging_sessions ?session_id ?session_type ?assessment ?image_modal ?session_file_path ?pipeline_name ?pipeline_version WHERE { ?dataset_uuid a nb:Dataset; nb:hasLabel ?dataset_name; @@ -41,6 +41,11 @@ WHERE { } GROUP BY ?subject } + OPTIONAL { + ?session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + } { SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions) WHERE { @@ -50,6 +55,11 @@ WHERE { ?imaging_session a nb:ImagingSession; nb:hasAcquisition/nb:hasContrastType ?image_modal. } + OPTIONAL { + ?imaging_session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + } } GROUP BY ?subject } From d22570f466b681d54f849773ccf131ca15a652dd Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 2 Oct 2024 13:35:49 -0400 Subject: [PATCH 07/17] Refactored `query_string` in `create_query` function --- app/api/utility.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/app/api/utility.py b/app/api/utility.py index 0917177..70b4aee 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -250,13 +250,6 @@ def create_query( OPTIONAL {{?session nb:hasDiagnosis ?diagnosis.}} OPTIONAL {{?session nb:isSubjectGroup ?subject_group.}} OPTIONAL {{?session nb:hasAssessment ?assessment.}} - - OPTIONAL {{ - ?session nb:hasCompletedPipeline ?pipeline. - ?pipeline nb:hasPipelineVersion ?pipeline_version. - ?pipeline nb:hasPipelineName ?pipeline_name. - }} - {{ SELECT ?subject (count(distinct ?phenotypic_session) as ?num_matching_phenotypic_sessions) WHERE {{ @@ -272,6 +265,12 @@ def create_query( {phenotypic_session_level_filters} }} GROUP BY ?subject }} + + OPTIONAL {{ + ?session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version. + ?pipeline nb:hasPipelineName ?pipeline_name. + }} {{ SELECT ?subject (count(distinct ?imaging_session) as ?num_matching_imaging_sessions) WHERE {{ @@ -282,7 +281,7 @@ def create_query( nb:hasAcquisition/nb:hasContrastType ?image_modal. }} OPTIONAL {{ - ?imaging_session nb:hasCompletedPipeline ?pipeline. + ?imaging_session nb:hasCompletedPipeline ?pipeline. ?pipeline nb:hasPipelineVersion ?pipeline_version. ?pipeline nb:hasPipelineName ?pipeline_name. }} From 00ffaaa1e1ada5bdd35c4aee7aaddf8cac5be11e Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 2 Oct 2024 14:14:12 -0400 Subject: [PATCH 08/17] Updated test fixtures --- tests/conftest.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index dd92310..18fb019 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,6 +73,8 @@ def test_data(): "http://purl.org/nidash/nidm#T1Weighted", "http://purl.org/nidash/nidm#T2Weighted", ], + "pipeline_version": ["7.3.2", "23.1.3"], + "pipeline_name": ["freesurfer", "fmriprep"], }, { "dataset_uuid": "http://neurobagel.org/vocab/67890", @@ -86,6 +88,8 @@ def test_data(): "http://purl.org/nidash/nidm#FlowWeighted", "http://purl.org/nidash/nidm#T1Weighted", ], + "pipeline_version": ["7.3.2"], + "pipeline_name": ["freesurfer"], }, ] @@ -178,6 +182,8 @@ async def _mock_get_with_exception( min_num_phenotypic_sessions, assessment, image_modal, + pipeline_version, + pipeline_name, ): raise request.param @@ -206,6 +212,8 @@ async def _mock_get( min_num_phenotypic_sessions, assessment, image_modal, + pipeline_version, + pipeline_name, ): return request.param @@ -226,6 +234,8 @@ async def _mock_successful_get( min_num_phenotypic_sessions, assessment, image_modal, + pipeline_version, + pipeline_name, ): return test_data From 3b2e46be6fe7996323e9102b330e944a9f033e6e Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 2 Oct 2024 14:15:43 -0400 Subject: [PATCH 09/17] Added tests for `pipeline_version` and `pipeline_name` --- tests/test_query.py | 84 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tests/test_query.py b/tests/test_query.py index 2e92ec0..3e7a91b 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -477,6 +477,90 @@ def test_get_undefined_prefix_image_modal( assert response.status_code == 500 +@pytest.mark.parametrize("valid_pipeline_version", ["7.3.2", "23.1.3"]) +def test_get_valid_pipeline_version( + test_app, + mock_successful_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + valid_pipeline_version, +): + """Given a valid pipeline version, returns a 200 status code and a non-empty list of results.""" + + monkeypatch.setattr(crud, "get", mock_successful_get) + response = test_app.get( + f"{ROUTE}?pipeline_version={valid_pipeline_version}", + headers=mock_auth_header, + ) + assert response.status_code == 200 + assert response.json() != [] + + +@pytest.mark.parametrize("mock_get", [None], indirect=True) +@pytest.mark.parametrize("invalid_pipeline_version", ["latest", "7.2", "23"]) +def test_get_invalid_pipeline_version( + test_app, + mock_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + invalid_pipeline_version, +): + """Given an invalid pipeline version, returns a 422 status code.""" + + monkeypatch.setattr(crud, "get", mock_get) + response = test_app.get( + f"{ROUTE}?pipeline_version={invalid_pipeline_version}", + headers=mock_auth_header, + ) + assert response.status_code == 422 + + +@pytest.mark.parametrize( + "valid_pipeline_name", ["np:fmriprep", "np:freesurfer"] +) +def test_get_valid_pipeline_name( + test_app, + mock_successful_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + valid_pipeline_name, +): + """Given a valid pipeline name, returns a 200 status code and a non-empty list of results.""" + + monkeypatch.setattr(crud, "get", mock_successful_get) + response = test_app.get( + f"{ROUTE}?pipeline_name={valid_pipeline_name}", + headers=mock_auth_header, + ) + assert response.status_code == 200 + assert response.json() != [] + + +@pytest.mark.parametrize("mock_get", [None], indirect=True) +@pytest.mark.parametrize( + "invalid_pipeline_name", ["n2p:coolpipeline", "apple", "some_thing:cool"] +) +def test_get_invalid_pipeline_name( + test_app, + mock_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + invalid_pipeline_name, +): + """Given an invalid pipeline name, returns a 422 status code.""" + + monkeypatch.setattr(crud, "get", mock_get) + response = test_app.get( + f"{ROUTE}?pipeline_name={invalid_pipeline_name}", + headers=mock_auth_header, + ) + assert response.status_code == 422 + + def test_aggregate_query_response_structure( test_app, set_test_credentials, From d377c32cb13c6c4152f402b82fb77b74fb1e267a Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 3 Oct 2024 14:35:16 -0400 Subject: [PATCH 10/17] Updated `np` namespace --- app/api/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/utility.py b/app/api/utility.py index 70b4aee..f17cd48 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -50,7 +50,7 @@ "ncit": "http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#", "nidm": "http://purl.org/nidash/nidm#", "snomed": "http://purl.bioontology.org/ontology/SNOMEDCT/", - "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing", + "np": "https://github.com/nipoppy/pipeline-catalog/tree/main/processing/", } # Store domains in named tuples From 8f1ee1f7d205461f6c4d289c9174a3b87fe38574 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 3 Oct 2024 14:35:49 -0400 Subject: [PATCH 11/17] Refactored the pipeline information representation in the response --- app/api/crud.py | 33 +++++++++++++++++++++++---------- app/api/models.py | 6 ++---- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index c53137b..5a72ef4 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -16,6 +16,8 @@ "dataset_uuid", "dataset_name", "dataset_portal_uri", + "pipeline_version", + "pipeline_name", ] @@ -212,8 +214,28 @@ async def get( all_nan_columns ].replace({np.nan: None}) + subject_data["pipeline"] = subject_data.apply( + lambda row: { + name: versions + for name, versions in zip( + row["pipeline_name"], row["pipeline_version"] + ) + if pd.notna(name) and pd.notna(versions) + }, + axis=1, + ) + subject_data = list(subject_data.to_dict("records")) + pipeline_info = {} + for name, version in zip( + group["pipeline_name"], group["pipeline_version"] + ): + if pd.notna(name) and pd.notna(version): + if name not in pipeline_info: + pipeline_info[name] = set() + pipeline_info[name].add(version) + response_obj.append( CohortQueryResponse( dataset_uuid=dataset_uuid, @@ -234,16 +256,7 @@ async def get( group["image_modal"].notna() ].unique() ), - pipeline_version=list( - group["pipeline_version"][ - group["pipeline_version"].notna() - ].unique() - ), - pipeline_name=list( - group["pipeline_name"][ - group["pipeline_name"].notna() - ].unique() - ), + pipeline=pipeline_info, ) ) diff --git a/app/api/models.py b/app/api/models.py index 2b58113..6b5aca3 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -70,8 +70,7 @@ class SessionResponse(BaseModel): assessment: list image_modal: list session_file_path: Optional[str] - pipeline_version: list - pipeline_name: list + pipeline: dict class CohortQueryResponse(BaseModel): @@ -86,8 +85,7 @@ class CohortQueryResponse(BaseModel): num_matching_subjects: int subject_data: Union[list[SessionResponse], str] image_modals: list - pipeline_version: list - pipeline_name: list + pipeline: dict class DataElementURI(str, Enum): From 46595c596eac39a4dc04b581a1f18187da147e18 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 3 Oct 2024 14:37:07 -0400 Subject: [PATCH 12/17] Updated test fixture --- tests/conftest.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 18fb019..78b0a91 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,8 +73,7 @@ def test_data(): "http://purl.org/nidash/nidm#T1Weighted", "http://purl.org/nidash/nidm#T2Weighted", ], - "pipeline_version": ["7.3.2", "23.1.3"], - "pipeline_name": ["freesurfer", "fmriprep"], + "pipeline": {"freesurfer": ["7.3.2", "2.8.2"]}, }, { "dataset_uuid": "http://neurobagel.org/vocab/67890", @@ -88,8 +87,10 @@ def test_data(): "http://purl.org/nidash/nidm#FlowWeighted", "http://purl.org/nidash/nidm#T1Weighted", ], - "pipeline_version": ["7.3.2"], - "pipeline_name": ["freesurfer"], + "pipeline": { + "freesurfer": ["7.3.2", "2.1.2"], + "fmriprep": ["23.1.3", "22.1.4"], + }, }, ] From aace3cd3118008aba1774cd7a9d6127ab1f5ef54 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 3 Oct 2024 14:40:40 -0400 Subject: [PATCH 13/17] Fixed the test --- app/api/crud.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/api/crud.py b/app/api/crud.py index 5a72ef4..8f925cc 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -168,6 +168,7 @@ async def get( response_obj = [] dataset_cols = ["dataset_uuid", "dataset_name"] + pipeline_info = {} if not results_df.empty: for (dataset_uuid, dataset_name), group in results_df.groupby( by=dataset_cols @@ -227,7 +228,6 @@ async def get( subject_data = list(subject_data.to_dict("records")) - pipeline_info = {} for name, version in zip( group["pipeline_name"], group["pipeline_version"] ): From 6cc67f8cd056585694aafc500273e6aad44274f9 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Thu, 3 Oct 2024 15:07:48 -0400 Subject: [PATCH 14/17] Fixed the pipeline info at the subject level --- app/api/crud.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 8f925cc..04f9055 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -217,11 +217,14 @@ async def get( subject_data["pipeline"] = subject_data.apply( lambda row: { - name: versions - for name, versions in zip( - row["pipeline_name"], row["pipeline_version"] + name: list( + group.loc[ + group["pipeline_name"] == name, + "pipeline_version", + ].unique() ) - if pd.notna(name) and pd.notna(versions) + for name in set(row["pipeline_name"]) + if pd.notna(name) }, axis=1, ) From 8098cd8d4e54a9f64cdf847351d913b80ec81254 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 9 Oct 2024 08:01:32 -0400 Subject: [PATCH 15/17] Addressed requested changes --- app/api/crud.py | 85 ++++++++++++++++++++++++++-------------- app/api/models.py | 9 +++-- app/api/routers/query.py | 2 +- app/api/utility.py | 42 ++++++++++++-------- tests/conftest.py | 8 ++-- tests/test_query.py | 32 ++++++++++++++- 6 files changed, 123 insertions(+), 55 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 04f9055..32bff06 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -102,8 +102,8 @@ async def get( min_num_phenotypic_sessions: int, assessment: str, image_modal: str, - pipeline_version: str, pipeline_name: str, + pipeline_version: str, ) -> list[CohortQueryResponse]: """ Sends SPARQL queries to the graph API via httpx POST requests for subject-session or dataset metadata @@ -129,10 +129,10 @@ async def get( Non-imaging assessment completed by subjects. image_modal : str Imaging modality of subject scans. - pipeline_version : str - Pipeline version of subject scans. pipeline_name : str - Pipeline name of subject scans. + Name of pipeline run on subject scans. + pipeline_version : str + Version of pipeline run on subject scans. Returns ------- @@ -168,7 +168,8 @@ async def get( response_obj = [] dataset_cols = ["dataset_uuid", "dataset_name"] - pipeline_info = {} + dataset_available_pipeline_info = {} + results_df.to_csv("output.csv", index=False) if not results_df.empty: for (dataset_uuid, dataset_name), group in results_df.groupby( by=dataset_cols @@ -195,13 +196,53 @@ async def get( "subject_group": "first", "assessment": lambda x: list(x.unique()), "image_modal": lambda x: list(x.unique()), - "pipeline_version": lambda x: list(x.unique()), - "pipeline_name": lambda x: list(x.unique()), "session_file_path": "first", } ) ) + pipeline_data = ( + group.groupby( + [ + "sub_id", + "session_id", + "session_type", + "pipeline_name", + ] + ) + .agg( + { + "pipeline_version": lambda x: list( + x.dropna().unique() + ) + } + ) + .reset_index() + ) + + pipeline_dict = ( + pipeline_data.groupby( + ["sub_id", "session_id", "session_type"] + ) + .apply( + lambda x: dict( + zip(x["pipeline_name"], x["pipeline_version"]) + ) + ) + .reset_index(name="completed_pipelines") + ) + + subject_data = pd.merge( + subject_data.reset_index(drop=True), + pipeline_dict, + on=["sub_id", "session_id", "session_type"], + how="left", + ) + + subject_data["completed_pipelines"] = subject_data[ + "completed_pipelines" + ].apply(lambda x: x if isinstance(x, dict) else {}) + # TODO: Revisit this as there may be a more elegant solution. # The following code replaces columns with all NaN values with values of None, to ensure they show up in the final JSON as `null`. # This is needed as the above .agg() seems to turn NaN into None for object-type columns (which have some non-missing values) @@ -215,29 +256,15 @@ async def get( all_nan_columns ].replace({np.nan: None}) - subject_data["pipeline"] = subject_data.apply( - lambda row: { - name: list( - group.loc[ - group["pipeline_name"] == name, - "pipeline_version", - ].unique() - ) - for name in set(row["pipeline_name"]) - if pd.notna(name) - }, - axis=1, - ) - subject_data = list(subject_data.to_dict("records")) - for name, version in zip( - group["pipeline_name"], group["pipeline_version"] - ): - if pd.notna(name) and pd.notna(version): - if name not in pipeline_info: - pipeline_info[name] = set() - pipeline_info[name].add(version) + dataset_available_pipeline_info = ( + group.groupby("pipeline_name", dropna=True)[ + "pipeline_version" + ] + .apply(lambda x: list(x.dropna().unique())) + .to_dict() + ) response_obj.append( CohortQueryResponse( @@ -259,7 +286,7 @@ async def get( group["image_modal"].notna() ].unique() ), - pipeline=pipeline_info, + available_pipelines=dataset_available_pipeline_info, ) ) diff --git a/app/api/models.py b/app/api/models.py index 6b5aca3..27c6fa4 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -8,7 +8,7 @@ from pydantic import BaseModel, constr, root_validator CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" -VERSION_REGEX = r"^\d+\.\d+\.\d+$" +VERSION_REGEX = r"^([A-Za-z0-9-]+)\.(\d+)\.([A-Za-z0-9-]+)$" class QueryModel(BaseModel): @@ -23,8 +23,9 @@ class QueryModel(BaseModel): min_num_phenotypic_sessions: int = Query(default=None, ge=0) assessment: constr(regex=CONTROLLED_TERM_REGEX) = None image_modal: constr(regex=CONTROLLED_TERM_REGEX) = None - pipeline_version: constr(regex=VERSION_REGEX) = None pipeline_name: constr(regex=CONTROLLED_TERM_REGEX) = None + # TODO: Check back if validating using a regex is too restrictive + pipeline_version: constr(regex=VERSION_REGEX) = None @root_validator() def check_maxage_ge_minage(cls, values): @@ -70,7 +71,7 @@ class SessionResponse(BaseModel): assessment: list image_modal: list session_file_path: Optional[str] - pipeline: dict + completed_pipelines: dict class CohortQueryResponse(BaseModel): @@ -85,7 +86,7 @@ class CohortQueryResponse(BaseModel): num_matching_subjects: int subject_data: Union[list[SessionResponse], str] image_modals: list - pipeline: dict + available_pipelines: dict class DataElementURI(str, Enum): diff --git a/app/api/routers/query.py b/app/api/routers/query.py index e2c0e95..449518c 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -52,8 +52,8 @@ async def get_query( query.min_num_phenotypic_sessions, query.assessment, query.image_modal, - query.pipeline_version, query.pipeline_name, + query.pipeline_version, ) return response diff --git a/app/api/utility.py b/app/api/utility.py index f17cd48..deebc09 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -62,8 +62,8 @@ IS_CONTROL = Domain("subject_group", "nb:isSubjectGroup") ASSESSMENT = Domain("assessment", "nb:hasAssessment") IMAGE_MODAL = Domain("image_modal", "nb:hasContrastType") -PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion") PIPELINE_NAME = Domain("pipeline_name", "nb:hasPipelineName") +PIPELINE_VERSION = Domain("pipeline_version", "nb:hasPipelineVersion") PROJECT = Domain("project", "nb:hasSamples") @@ -118,8 +118,8 @@ def create_query( min_num_phenotypic_sessions: Optional[int] = None, assessment: Optional[str] = None, image_modal: Optional[str] = None, - pipeline_version: Optional[str] = None, pipeline_name: Optional[str] = None, + pipeline_version: Optional[str] = None, ) -> str: """ Creates a SPARQL query using a query template and filters it using the input parameters. @@ -144,10 +144,10 @@ def create_query( Non-imaging assessment completed by subjects, by default None. image_modal : str, optional Imaging modality of subject scans, by default None. - pipeline_version : str, optional - Pipeline version of subject scans, by default None. pipeline_name : str, optional - Pipeline name of subject scans, by default None. + Name of pipeline run on subject scans, by default None. + pipeline_version : str, optional + Version of pipeline run on subject scans, by default None. Returns ------- @@ -212,18 +212,21 @@ def create_query( imaging_session_level_filters = "" if image_modal is not None: imaging_session_level_filters += ( - "\n" + f"FILTER (?{IMAGE_MODAL.var} = {image_modal})." + "\n" + + f"{create_bound_filter(IMAGE_MODAL.var)} && ?{IMAGE_MODAL.var} = {image_modal})." ) - if pipeline_version is not None: + if pipeline_name is not None: imaging_session_level_filters += ( "\n" - + f'FILTER (?{PIPELINE_VERSION.var} = "{pipeline_version}").' # Wrap with quotes + + f"{create_bound_filter(PIPELINE_NAME.var)} && (?{PIPELINE_NAME.var} = {pipeline_name})." ) - if pipeline_name is not None: + # In case a user specified the pipeline version but not the name + if pipeline_version is not None: imaging_session_level_filters += ( - "\n" + f"FILTER (?{PIPELINE_NAME.var} = {pipeline_name})." + "\n" + + f'{create_bound_filter(PIPELINE_VERSION.var)} && ?{PIPELINE_VERSION.var} = "{pipeline_version}").' # Wrap with quotes to avoid workaround implicit conversion ) query_string = textwrap.dedent( @@ -277,13 +280,18 @@ def create_query( ?subject a nb:Subject. OPTIONAL {{ ?subject nb:hasSession ?imaging_session. - ?imaging_session a nb:ImagingSession; - nb:hasAcquisition/nb:hasContrastType ?image_modal. - }} - OPTIONAL {{ - ?imaging_session nb:hasCompletedPipeline ?pipeline. - ?pipeline nb:hasPipelineVersion ?pipeline_version. - ?pipeline nb:hasPipelineName ?pipeline_name. + ?imaging_session a nb:ImagingSession. + + OPTIONAL {{ + ?imaging_session nb:hasAcquisition ?acquisition. + ?acquisition nb:hasContrastType ?image_modal. + }} + + OPTIONAL {{ + ?imaging_session nb:hasCompletedPipeline ?pipeline. + ?pipeline nb:hasPipelineVersion ?pipeline_version; + nb:hasPipelineName ?pipeline_name. + }} }} {imaging_session_level_filters} }} GROUP BY ?subject diff --git a/tests/conftest.py b/tests/conftest.py index 78b0a91..6e1b243 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -73,7 +73,9 @@ def test_data(): "http://purl.org/nidash/nidm#T1Weighted", "http://purl.org/nidash/nidm#T2Weighted", ], - "pipeline": {"freesurfer": ["7.3.2", "2.8.2"]}, + "available_pipelines": { + "freesurfer": ["7.3.2", "2.8.2", "8.7.0-rc"] + }, }, { "dataset_uuid": "http://neurobagel.org/vocab/67890", @@ -87,9 +89,9 @@ def test_data(): "http://purl.org/nidash/nidm#FlowWeighted", "http://purl.org/nidash/nidm#T1Weighted", ], - "pipeline": { + "available_pipelines": { "freesurfer": ["7.3.2", "2.1.2"], - "fmriprep": ["23.1.3", "22.1.4"], + "fmriprep": ["23.1.3", "22.1.4", "v2.0.1"], }, }, ] diff --git a/tests/test_query.py b/tests/test_query.py index 3e7a91b..b9bf087 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -477,7 +477,9 @@ def test_get_undefined_prefix_image_modal( assert response.status_code == 500 -@pytest.mark.parametrize("valid_pipeline_version", ["7.3.2", "23.1.3"]) +@pytest.mark.parametrize( + "valid_pipeline_version", ["7.3.2", "23.1.3", "v2.0.1", "8.7.0-rc"] +) def test_get_valid_pipeline_version( test_app, mock_successful_get, @@ -561,6 +563,34 @@ def test_get_invalid_pipeline_name( assert response.status_code == 422 +@pytest.mark.parametrize( + "valid_pipeline_name", + ["np:fmriprep", "np:fmriprep", "np:freesurfer", "np:freesurfer"], +) +@pytest.mark.parametrize( + "valid_pipeline_version", + ["v2.0.1", "23.1.3", "7.3.2", "8.7.0-rc"], +) +def test_get_valid_pipeline_name_version( + test_app, + mock_successful_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + valid_pipeline_name, + valid_pipeline_version, +): + """Given a valid pipeline name and version, returns a 200 status code and a non-empty list of results.""" + + monkeypatch.setattr(crud, "get", mock_successful_get) + response = test_app.get( + f"{ROUTE}?pipeline_name={valid_pipeline_name}&pipeline_version={valid_pipeline_version}", + headers=mock_auth_header, + ) + assert response.status_code == 200 + assert response.json() != [] + + def test_aggregate_query_response_structure( test_app, set_test_credentials, From 3de38414eb15fdac37ace420ac00d7baed35b757 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 9 Oct 2024 08:06:25 -0400 Subject: [PATCH 16/17] Left over clean up --- app/api/crud.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/api/crud.py b/app/api/crud.py index 32bff06..85c6845 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -169,7 +169,6 @@ async def get( response_obj = [] dataset_cols = ["dataset_uuid", "dataset_name"] dataset_available_pipeline_info = {} - results_df.to_csv("output.csv", index=False) if not results_df.empty: for (dataset_uuid, dataset_name), group in results_df.groupby( by=dataset_cols From c324a17f3fd5a45219e0f8eb0140fbec99bbed15 Mon Sep 17 00:00:00 2001 From: rmanaem Date: Wed, 9 Oct 2024 13:51:16 -0400 Subject: [PATCH 17/17] Addressed second wave requested changes --- app/api/crud.py | 12 +++++++----- app/api/utility.py | 2 +- tests/test_query.py | 13 +++++++------ 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/app/api/crud.py b/app/api/crud.py index 85c6845..62e8736 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -200,14 +200,15 @@ async def get( ) ) - pipeline_data = ( + pipeline_grouped_data = ( group.groupby( [ "sub_id", "session_id", "session_type", "pipeline_name", - ] + ], + dropna=True, ) .agg( { @@ -219,8 +220,8 @@ async def get( .reset_index() ) - pipeline_dict = ( - pipeline_data.groupby( + session_completed_pipeline_data = ( + pipeline_grouped_data.groupby( ["sub_id", "session_id", "session_type"] ) .apply( @@ -233,11 +234,12 @@ async def get( subject_data = pd.merge( subject_data.reset_index(drop=True), - pipeline_dict, + session_completed_pipeline_data, on=["sub_id", "session_id", "session_type"], how="left", ) + # ensure that for sessions missing completed pipeline info, completed_pipelines is still a dict rather than null/nan subject_data["completed_pipelines"] = subject_data[ "completed_pipelines" ].apply(lambda x: x if isinstance(x, dict) else {}) diff --git a/app/api/utility.py b/app/api/utility.py index deebc09..795526b 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -290,7 +290,7 @@ def create_query( OPTIONAL {{ ?imaging_session nb:hasCompletedPipeline ?pipeline. ?pipeline nb:hasPipelineVersion ?pipeline_version; - nb:hasPipelineName ?pipeline_name. + nb:hasPipelineName ?pipeline_name. }} }} {imaging_session_level_filters} diff --git a/tests/test_query.py b/tests/test_query.py index b9bf087..9376156 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -564,12 +564,13 @@ def test_get_invalid_pipeline_name( @pytest.mark.parametrize( - "valid_pipeline_name", - ["np:fmriprep", "np:fmriprep", "np:freesurfer", "np:freesurfer"], -) -@pytest.mark.parametrize( - "valid_pipeline_version", - ["v2.0.1", "23.1.3", "7.3.2", "8.7.0-rc"], + "valid_pipeline_name, valid_pipeline_version", + [ + ("np:fmriprep", "v2.0.1"), + ("np:fmriprep", "23.1.3"), + ("np:freesurfer", "7.3.2"), + ("np:freesurfer", "8.7.0-rc"), + ], ) def test_get_valid_pipeline_name_version( test_app,