diff --git a/app/api/crud.py b/app/api/crud.py index fbc206b..d147d98 100644 --- a/app/api/crud.py +++ b/app/api/crud.py @@ -60,9 +60,13 @@ async def get( if image_modal: params["image_modal"] = image_modal - for node_url in util.parse_nodes_as_list(util.NEUROBAGEL_NODES): + nodes_dict = util.parse_nodes_as_dict(util.NEUROBAGEL_NODES) + for node_url, node_name in nodes_dict.items(): response = util.send_get_request(node_url + "query/", params) + for result in response: + result["node_name"] = node_name + cross_node_results += response return cross_node_results @@ -85,16 +89,17 @@ async def get_terms(data_element_URI: str): cross_node_results = [] params = {data_element_URI: data_element_URI} - for node_url in util.parse_nodes_as_list(util.NEUROBAGEL_NODES): + for node_url in util.parse_nodes_as_dict(util.NEUROBAGEL_NODES).keys(): response = util.send_get_request( node_url + "attributes/" + data_element_URI, params ) cross_node_results.append(response) - unique_terms = set( - term - for list_of_terms in cross_node_results - for term in list_of_terms[data_element_URI] - ) - return {data_element_URI: list(unique_terms)} + unique_terms_dict = {} + + for list_of_terms in cross_node_results: + for term in list_of_terms[data_element_URI]: + unique_terms_dict[term["TermURL"]] = term + + return {data_element_URI: list(unique_terms_dict.values())} diff --git a/app/api/models.py b/app/api/models.py index 99658cd..d701843 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -1,4 +1,7 @@ """Data models.""" + +from typing import Optional, Union + from pydantic import BaseModel CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" @@ -15,3 +18,18 @@ class QueryModel(BaseModel): min_num_sessions: int = None assessment: str = None image_modal: str = None + + +class CohortQueryResponse(BaseModel): + """Data model for query results for one matching dataset (i.e., a cohort).""" + + node_name: str + dataset_uuid: str + # dataset_file_path: str # TODO: Revisit this field once we have datasets without imaging info/sessions. + dataset_name: str + dataset_portal_uri: Optional[str] + dataset_total_subjects: int + records_protected: bool + num_matching_subjects: int + subject_data: Union[list[dict], str] + image_modals: list diff --git a/app/api/routers/nodes.py b/app/api/routers/nodes.py new file mode 100644 index 0000000..910aa9e --- /dev/null +++ b/app/api/routers/nodes.py @@ -0,0 +1,14 @@ +from fastapi import APIRouter + +from .. import utility as util + +router = APIRouter(prefix="/nodes", tags=["nodes"]) + + +@router.get("/") +async def get_nodes(): + """Returns a dict of all available nodes apis where key is node URL and value is node name.""" + return [ + {"NodeName": v, "ApiURL": k} + for k, v in util.parse_nodes_as_dict(util.NEUROBAGEL_NODES).items() + ] diff --git a/app/api/routers/query.py b/app/api/routers/query.py index e4fad74..db3b6b3 100644 --- a/app/api/routers/query.py +++ b/app/api/routers/query.py @@ -1,14 +1,16 @@ """Router for query path operations.""" +from typing import List + from fastapi import APIRouter, Depends from .. import crud -from ..models import QueryModel +from ..models import CohortQueryResponse, QueryModel router = APIRouter(prefix="/query", tags=["query"]) -@router.get("/") +@router.get("/", response_model=List[CohortQueryResponse]) async def get_query(query: QueryModel = Depends(QueryModel)): """When a GET request is sent, return list of dicts corresponding to subject-level metadata aggregated by dataset.""" response = await crud.get( diff --git a/app/api/utility.py b/app/api/utility.py index 33ddac0..c6c5698 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -1,23 +1,29 @@ """Constants for federation.""" import os +import re import httpx from fastapi import HTTPException # Neurobagel nodes -NEUROBAGEL_NODES = os.environ.get("NB_NODES", "https://api.neurobagel.org/") +NEUROBAGEL_NODES = os.environ.get( + "LOCAL_NB_NODES", "(https://api.neurobagel.org/, OpenNeuro)" +) -def parse_nodes_as_list(nodes: str) -> list: - """Returns user-defined Neurobagel nodes as a list. - Empty strings are filtered out, because they are falsy. +def parse_nodes_as_dict(nodes: str) -> list: + """Returns user-defined Neurobagel nodes as a dict. + It uses a regular expression to match the url, name pairs. Makes sure node URLs end with a slash.""" - nodes_list = nodes.split(" ") - for i in range(len(nodes_list)): - if nodes_list[i] and not nodes_list[i].endswith("/"): - nodes_list[i] += "/" - return list(filter(None, nodes_list)) + pattern = re.compile(r"\((?Phttps?://[^\s]+), (?P