From 04892b4f577ea82cf703b72c19a0544063563644 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sat, 17 Feb 2024 09:19:22 -0500 Subject: [PATCH 1/9] fix: shortened default pattern --- src/nbiatoolkit/nbia.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index e829a0e..40a24b7 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -420,7 +420,7 @@ def downloadSeries( self, SeriesInstanceUID: Union[str, list], downloadDir: str = "./NBIA-Download", - filePattern: str = "%PatientName/%StudyDescription-%StudyDate/%SeriesNumber-%SeriesDescription-%SeriesInstanceUID/%InstanceNumber.dcm", + filePattern: str = "%PatientName/%SeriesNumber-%SeriesInstanceUID/%InstanceNumber.dcm", overwrite: bool = False, nParallel: int = 1, ) -> bool: From 214151e36ff5b0bc31c6a103e14a4b1e3b9bfb4a Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sat, 24 Feb 2024 12:34:56 -0500 Subject: [PATCH 2/9] docs: fix a few cells --- docs/Tutorial.ipynb | 796 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 726 insertions(+), 70 deletions(-) diff --git a/docs/Tutorial.ipynb b/docs/Tutorial.ipynb index 39c2cc7..3946acb 100644 --- a/docs/Tutorial.ipynb +++ b/docs/Tutorial.ipynb @@ -24,7 +24,16 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/bhklab/Documents/GitHub/NBIA-toolkit/src/nbiatoolkit/nbia_cli.py:20: SyntaxWarning: invalid escape sequence '\\/'\n", + " f = \"\"\"\n" + ] + } + ], "source": [ "import nbiatoolkit\n", "from nbiatoolkit import NBIAClient\n", @@ -43,7 +52,7 @@ { "data": { "text/plain": [ - "'0.22.1'" + "'0.32.1'" ] }, "execution_count": 2, @@ -56,6 +65,42 @@ "nbiatoolkit.__version__" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Return Types\n", + "\n", + "Most functions will by default return a list of dictionaries. If you would like to return a pandas DataFrame instead you can pass in the parameter `return_type`.\n", + "\n", + "Available options are made available through the `ReturnType` Enum which can be passed in as a parameter or alternatively, the string representation of the Enum value can be passed in as a string." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[, ]\n" + ] + } + ], + "source": [ + "from nbiatoolkit.utils import ReturnType\n", + "print(list(ReturnType))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "passing in `return_type=ReturnType.DATAFRAME` or `return_type=\"dataframe\"` will return a pandas DataFrame." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -69,46 +114,122 @@ "source": [ "### get list of collections (names only)\n", "``` python\n", - "client.getCollections(prefix: str = \"\")\n", + "client.getCollections(\n", + " prefix: str = \"\",\n", + " return_type: ReturnType | str = ReturnType.LIST)\n", "```" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Total collections: 124\n", - "['4D-Lung', 'ACRIN-6698', 'ACRIN-Contralateral-Breast-MR', 'ACRIN-FLT-Breast', 'ACRIN-NSCLC-FDG-PET']\n" + "[{'Collection': '4D-Lung'}, {'Collection': 'ACRIN-6698'}, {'Collection': 'ACRIN-Contralateral-Breast-MR'}, {'Collection': 'ACRIN-FLT-Breast'}, {'Collection': 'ACRIN-NSCLC-FDG-PET'}]\n" ] } ], "source": [ + "# To get all the collections:\n", "collections = client.getCollections()\n", - "print(\"Total collections: \", len(collections))\n", "print(collections[0:5])" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['TCGA-BLCA', 'TCGA-BRCA', 'TCGA-CESC', 'TCGA-COAD', 'TCGA-ESCA', 'TCGA-KICH', 'TCGA-KIRC', 'TCGA-KIRP', 'TCGA-LIHC', 'TCGA-LUAD', 'TCGA-LUSC', 'TCGA-OV', 'TCGA-PRAD', 'TCGA-READ', 'TCGA-SARC', 'TCGA-STAD', 'TCGA-THCA', 'TCGA-UCEC']\n" + "[{'Collection': 'TCGA-BLCA'}, {'Collection': 'TCGA-BRCA'}, {'Collection': 'TCGA-CESC'}, {'Collection': 'TCGA-COAD'}, {'Collection': 'TCGA-ESCA'}]\n" ] } ], "source": [ + "# To get all the collections with a prefix:\n", "collections = client.getCollections(prefix = \"TCGA\")\n", - "print(collections)" + "print(collections[0:5])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Collection
0TCGA-BLCA
1TCGA-BRCA
2TCGA-CESC
3TCGA-COAD
4TCGA-ESCA
\n", + "
" + ], + "text/plain": [ + " Collection\n", + "0 TCGA-BLCA\n", + "1 TCGA-BRCA\n", + "2 TCGA-CESC\n", + "3 TCGA-COAD\n", + "4 TCGA-ESCA" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# To get the same but as a pandas dataframe:\n", + "collections_df = client.getCollections(prefix = \"TCGA\", return_type=ReturnType.DATAFRAME)\n", + "collections_df.head()" ] }, { @@ -119,14 +240,15 @@ "\n", "``` python\n", "getCollectionDescriptions(\n", - " collectionName: str # (required)\n", + " collectionName: str, # (required)\n", + " return_type: ReturnType | str = ReturnType.LIST\n", ")\n", "```" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -153,28 +275,94 @@ "pprint(client.getCollectionDescriptions(\"TCGA-BLCA\"))" ] }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
collectionNamedescriptiondescriptionURIlastUpdated
0TCGA-BLCAThe Cancer Genome Atlas-Bladder Endothelial Ca...https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR2023-03-16
\n", + "
" + ], + "text/plain": [ + " collectionName description \\\n", + "0 TCGA-BLCA The Cancer Genome Atlas-Bladder Endothelial Ca... \n", + "\n", + " descriptionURI lastUpdated \n", + "0 https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR 2023-03-16 " + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.getCollectionDescriptions(\"TCGA-BLCA\", ReturnType.DATAFRAME)" + ] + }, { "cell_type": "markdown", "metadata": {}, "source": [ "### get Counts of Patients for each collection\n", - "`getCollectionPatientCount(prefx: str = \"\")`" + "``` python\n", + "getCollectionPatientCount(\n", + " prefx: str = \"\",\n", + " return_type: ReturnType | str = ReturnType.LIST\n", + ")\n", + "```" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'Collection': '4D-Lung', 'PatientCount': 20},\n", - " {'Collection': 'ACRIN-6698', 'PatientCount': 385},\n", - " {'Collection': 'ACRIN-Contralateral-Breast-MR', 'PatientCount': 984},\n", - " {'Collection': 'ACRIN-FLT-Breast', 'PatientCount': 83},\n", - " {'Collection': 'ACRIN-NSCLC-FDG-PET', 'PatientCount': 242}]\n" + "[{'count': '20', 'criteria': '4D-Lung'},\n", + " {'count': '385', 'criteria': 'ACRIN-6698'},\n", + " {'count': '984', 'criteria': 'ACRIN-Contralateral-Breast-MR'},\n", + " {'count': '83', 'criteria': 'ACRIN-FLT-Breast'},\n", + " {'count': '242', 'criteria': 'ACRIN-NSCLC-FDG-PET'}]\n" ] } ], @@ -185,41 +373,209 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[{'Collection': 'TCGA-BLCA', 'PatientCount': 120},\n", - " {'Collection': 'TCGA-BRCA', 'PatientCount': 139},\n", - " {'Collection': 'TCGA-CESC', 'PatientCount': 54},\n", - " {'Collection': 'TCGA-COAD', 'PatientCount': 25},\n", - " {'Collection': 'TCGA-ESCA', 'PatientCount': 16},\n", - " {'Collection': 'TCGA-KICH', 'PatientCount': 15},\n", - " {'Collection': 'TCGA-KIRC', 'PatientCount': 267},\n", - " {'Collection': 'TCGA-KIRP', 'PatientCount': 33},\n", - " {'Collection': 'TCGA-LIHC', 'PatientCount': 97},\n", - " {'Collection': 'TCGA-LUAD', 'PatientCount': 69},\n", - " {'Collection': 'TCGA-LUSC', 'PatientCount': 37},\n", - " {'Collection': 'TCGA-OV', 'PatientCount': 143},\n", - " {'Collection': 'TCGA-PRAD', 'PatientCount': 14},\n", - " {'Collection': 'TCGA-READ', 'PatientCount': 3},\n", - " {'Collection': 'TCGA-SARC', 'PatientCount': 5},\n", - " {'Collection': 'TCGA-STAD', 'PatientCount': 46},\n", - " {'Collection': 'TCGA-THCA', 'PatientCount': 6},\n", - " {'Collection': 'TCGA-UCEC', 'PatientCount': 65}]\n", - "Collection with max PatientCount: {'Collection': 'TCGA-KIRC', 'PatientCount': 267}\n" + "[{'count': '120', 'criteria': 'TCGA-BLCA'},\n", + " {'count': '139', 'criteria': 'TCGA-BRCA'},\n", + " {'count': '54', 'criteria': 'TCGA-CESC'},\n", + " {'count': '25', 'criteria': 'TCGA-COAD'},\n", + " {'count': '16', 'criteria': 'TCGA-ESCA'},\n", + " {'count': '15', 'criteria': 'TCGA-KICH'},\n", + " {'count': '267', 'criteria': 'TCGA-KIRC'},\n", + " {'count': '33', 'criteria': 'TCGA-KIRP'},\n", + " {'count': '97', 'criteria': 'TCGA-LIHC'},\n", + " {'count': '69', 'criteria': 'TCGA-LUAD'},\n", + " {'count': '37', 'criteria': 'TCGA-LUSC'},\n", + " {'count': '143', 'criteria': 'TCGA-OV'},\n", + " {'count': '14', 'criteria': 'TCGA-PRAD'},\n", + " {'count': '3', 'criteria': 'TCGA-READ'},\n", + " {'count': '5', 'criteria': 'TCGA-SARC'},\n", + " {'count': '46', 'criteria': 'TCGA-STAD'},\n", + " {'count': '6', 'criteria': 'TCGA-THCA'},\n", + " {'count': '65', 'criteria': 'TCGA-UCEC'}]\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'PatientCount'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[19], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m pprint(collectionsPatientCount)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# get the collection with max PatientCount\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCollection with max PatientCount: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;43mmax\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcollectionsPatientCount\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m:\u001b[49m\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPatientCount\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m)\n", + "Cell \u001b[0;32mIn[19], line 6\u001b[0m, in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m pprint(collectionsPatientCount)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# get the collection with max PatientCount\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCollection with max PatientCount: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mmax\u001b[39m(collectionsPatientCount, key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m x:\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPatientCount\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m))\n", + "\u001b[0;31mKeyError\u001b[0m: 'PatientCount'" ] } ], "source": [ + "# use prefix to get the patient count for a specific collection and then find the collection with max patient count\n", "collectionsPatientCount = client.getCollectionPatientCount(prefix=\"TCGA\")\n", "pprint(collectionsPatientCount)\n", "\n", "# get the collection with max PatientCount\n", - "print(\"Collection with max PatientCount: \", max(collectionsPatientCount, key=lambda x:x['PatientCount']))" + "print(\"Collection with max PatientCount: \", max(collectionsPatientCount, key=lambda x: x['PatientCount']))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
criteriacount
0TCGA-BLCA120
1TCGA-BRCA139
2TCGA-CESC54
3TCGA-COAD25
4TCGA-ESCA16
5TCGA-KICH15
6TCGA-KIRC267
7TCGA-KIRP33
8TCGA-LIHC97
9TCGA-LUAD69
10TCGA-LUSC37
11TCGA-OV143
12TCGA-PRAD14
13TCGA-READ3
14TCGA-SARC5
15TCGA-STAD46
16TCGA-THCA6
17TCGA-UCEC65
\n", + "
" + ], + "text/plain": [ + " criteria count\n", + "0 TCGA-BLCA 120\n", + "1 TCGA-BRCA 139\n", + "2 TCGA-CESC 54\n", + "3 TCGA-COAD 25\n", + "4 TCGA-ESCA 16\n", + "5 TCGA-KICH 15\n", + "6 TCGA-KIRC 267\n", + "7 TCGA-KIRP 33\n", + "8 TCGA-LIHC 97\n", + "9 TCGA-LUAD 69\n", + "10 TCGA-LUSC 37\n", + "11 TCGA-OV 143\n", + "12 TCGA-PRAD 14\n", + "13 TCGA-READ 3\n", + "14 TCGA-SARC 5\n", + "15 TCGA-STAD 46\n", + "16 TCGA-THCA 6\n", + "17 TCGA-UCEC 65" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.getCollectionPatientCount(prefix=\"TCGA\", return_type=ReturnType.DATAFRAME)" ] }, { @@ -227,12 +583,17 @@ "metadata": {}, "source": [ "### get Counts of Patients grouped by Body Parts\n", - "`getBodyPartCounts(collection: str = \"\", modality: str = \"\")`" + "``` python\n", + "getBodyPartCounts(\n", + " collection: str = \"\", \n", + " modality: str = \"\",\n", + " return_type: ReturnType | str = ReturnType.LIST\n", + ")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -241,11 +602,11 @@ "text": [ "Total Number of body parts:60\n", "First 5 body parts:\n", - "[{'BodyPartExamined': 'NOT SPECIFIED', 'Count': 7839},\n", - " {'BodyPartExamined': 'ABDOMEN', 'Count': 1731},\n", - " {'BodyPartExamined': 'ABDOMEN CAVIT', 'Count': 2},\n", - " {'BodyPartExamined': 'ABDOMENPELVIC', 'Count': 2},\n", - " {'BodyPartExamined': 'ABDOMENPELVIS', 'Count': 50}]\n" + "[{'count': '7839', 'criteria': 'NOT SPECIFIED'},\n", + " {'count': '1731', 'criteria': 'ABDOMEN'},\n", + " {'count': '2', 'criteria': 'ABDOMEN CAVIT'},\n", + " {'count': '2', 'criteria': 'ABDOMENPELVIC'},\n", + " {'count': '50', 'criteria': 'ABDOMENPELVIS'}]\n" ] } ], @@ -259,30 +620,130 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Total Number of body parts:1\n", - "Number of patients for each body part in 4D-Lung collection:\n", - "[{'BodyPartExamined': 'LUNG', 'Count': 20}]\n" + "Total Number of body parts:11\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
criteriacount
0NOT SPECIFIED239
1ABDOMEN91
2ABDOMENPELVIS2
3BRAIN W/WO_AH321
4CHEST124
5CHEST (THORAX)1
6CHESTABDOMEN1
7CHESTABDPELVIS1
8HEAD1
9OUTSIDE FIL1
10THORAX2
\n", + "
" + ], + "text/plain": [ + " criteria count\n", + "0 NOT SPECIFIED 239\n", + "1 ABDOMEN 91\n", + "2 ABDOMENPELVIS 2\n", + "3 BRAIN W/WO_AH32 1\n", + "4 CHEST 124\n", + "5 CHEST (THORAX) 1\n", + "6 CHESTABDOMEN 1\n", + "7 CHESTABDPELVIS 1\n", + "8 HEAD 1\n", + "9 OUTSIDE FIL 1\n", + "10 THORAX 2" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "bodypart_count = client.getBodyPartCounts(Collection = '4D-Lung')\n", + "bodypart_count = client.getBodyPartCounts(Collection = 'ACRIN-NSCLC-FDG-PET', return_type=ReturnType.DATAFRAME)\n", "print(\"Total Number of body parts:\" + str(len(bodypart_count)))\n", - "\n", - "print(\"Number of patients for each body part in 4D-Lung collection:\")\n", - "pprint(bodypart_count)" + "bodypart_count" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -290,21 +751,84 @@ "output_type": "stream", "text": [ "Total Number of body parts:5\n", - "Number of patients for each body part in 4D-Lung collection:\n", - "[{'BodyPartExamined': 'NOT SPECIFIED', 'Count': 194},\n", - " {'BodyPartExamined': 'ABDOMEN', 'Count': 11},\n", - " {'BodyPartExamined': 'CHEST', 'Count': 54},\n", - " {'BodyPartExamined': 'HEART', 'Count': 2},\n", - " {'BodyPartExamined': 'THORAX', 'Count': 1}]\n" + "Number of patients for each body part in 4D-Lung collection:\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
criteriacount
0NOT SPECIFIED194
1ABDOMEN11
2CHEST54
3HEART2
4THORAX1
\n", + "
" + ], + "text/plain": [ + " criteria count\n", + "0 NOT SPECIFIED 194\n", + "1 ABDOMEN 11\n", + "2 CHEST 54\n", + "3 HEART 2\n", + "4 THORAX 1" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "bodypart_count = client.getBodyPartCounts(Collection = 'NSCLC Radiogenomics', Modality='CT')\n", + "bodypart_count = client.getBodyPartCounts(Collection = 'NSCLC Radiogenomics', Modality='CT', return_type=ReturnType.DATAFRAME)\n", "print(\"Total Number of body parts:\" + str(len(bodypart_count)))\n", "\n", "print(\"Number of patients for each body part in 4D-Lung collection:\")\n", - "pprint(bodypart_count)" + "bodypart_count" ] }, { @@ -322,13 +846,14 @@ "``` python\n", "getPatients(\n", " Collection: str = \"\", # (optional)\n", + " return_type: ReturnType | str = ReturnType.LIST\n", ")\n", "````" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -361,6 +886,130 @@ "pprint(patients[0:2])" ] }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total patients in NSCLC-Radiomics: 422\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PatientIdPatientNamePatientSexCollectionPhantomSpeciesCodeSpeciesDescription
0LUNG1-001LUNG1-001MNSCLC-RadiomicsNO337915000Homo sapiens
1LUNG1-007LUNG1-007MNSCLC-RadiomicsNO337915000Homo sapiens
2LUNG1-029LUNG1-029FNSCLC-RadiomicsNO337915000Homo sapiens
3LUNG1-036LUNG1-036FNSCLC-RadiomicsNO337915000Homo sapiens
4LUNG1-056LUNG1-056FNSCLC-RadiomicsNO337915000Homo sapiens
\n", + "
" + ], + "text/plain": [ + " PatientId PatientName PatientSex Collection Phantom SpeciesCode \\\n", + "0 LUNG1-001 LUNG1-001 M NSCLC-Radiomics NO 337915000 \n", + "1 LUNG1-007 LUNG1-007 M NSCLC-Radiomics NO 337915000 \n", + "2 LUNG1-029 LUNG1-029 F NSCLC-Radiomics NO 337915000 \n", + "3 LUNG1-036 LUNG1-036 F NSCLC-Radiomics NO 337915000 \n", + "4 LUNG1-056 LUNG1-056 F NSCLC-Radiomics NO 337915000 \n", + "\n", + " SpeciesDescription \n", + "0 Homo sapiens \n", + "1 Homo sapiens \n", + "2 Homo sapiens \n", + "3 Homo sapiens \n", + "4 Homo sapiens " + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "patients_df = client.getPatients(Collection = \"NSCLC-Radiomics\", return_type=ReturnType.DATAFRAME)\n", + "print(f\"Total patients in NSCLC-Radiomics: {len(patients_df)}\")\n", + "patients_df.head()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -370,13 +1019,14 @@ "getPatientsByCollectionAndModality(\n", " Collection: str, # (required)\n", " Modality: str, # (required)\n", + " return_type: ReturnType | str = ReturnType.LIST\n", ")\n", - "````" + "```" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "metadata": {}, "outputs": [ { @@ -384,7 +1034,11 @@ "output_type": "stream", "text": [ "Total patients in TCGA-BLCA with modality CT: 107\n", - "['TCGA-CU-A3QU', 'TCGA-CU-A3KJ', 'TCGA-CU-A0YR', 'TCGA-CU-A0YO', 'TCGA-CU-A3YL']\n" + "[{'PatientId': 'TCGA-CU-A3QU'},\n", + " {'PatientId': 'TCGA-CU-A3KJ'},\n", + " {'PatientId': 'TCGA-CU-A0YR'},\n", + " {'PatientId': 'TCGA-CU-A0YO'},\n", + " {'PatientId': 'TCGA-CU-A3YL'}]\n" ] } ], @@ -406,7 +1060,8 @@ " Date: str, # (required) accepted formats:\n", " # \"%Y-%m-%d\", \"%Y/%m/%d\", \"%Y%m%d\", \n", " # \"%m/%d/%Y\", \"%d/%m/%Y\", \"%d-%m-%Y\"\n", - ")\n" + ")\n", + "```" ] }, { @@ -476,12 +1131,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "\n", "``` python\n", "getStudies(\n", " Collection: str, # (required)\n", " PatientID: str = \"\", # (optional)\n", " StudyInstanceUID: str = \"\" # (optional)\n", - " ) \n", + ") \n", "```" ] }, From b119d9321d9f20a46ab169fa8ac57348ed292c17 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 13:02:04 -0500 Subject: [PATCH 3/9] fix: return type in patientCount and Refactor response parsing in NBIAClient class --- src/nbiatoolkit/nbia.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index 158a3bd..8e8df7a 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -248,10 +248,10 @@ def getCollectionDescriptions( ), "The response from the API is empty. Please check the collection name." response[0] = { - "collectionName": response[0]["collectionName"], - "description": clean_html(response[0]["description"]), - "descriptionURI": response[0]["descriptionURI"], - "lastUpdated": convertMillis( + "Collection": response[0]["collectionName"], + "Description": clean_html(response[0]["description"]), + "DescriptionURI": response[0]["descriptionURI"], + "LastUpdated": convertMillis( millis=int(response[0]["collectionDescTimestamp"]) ), } @@ -340,16 +340,22 @@ def getCollectionPatientCount( ) -> List[dict[Any, Any]] | pd.DataFrame: returnType: ReturnType = self._get_return(return_type) + response: List[dict[Any, Any]] response = self.query_api(NBIA_ENDPOINTS.GET_COLLECTION_PATIENT_COUNT) - if prefix: - response = [ - response_dict - for response_dict in response - if response_dict["criteria"].lower().startswith(prefix.lower()) - ] + parsed_response: List[dict[Any, Any]] = [] - return conv_response_list(response, returnType) + for collection in response: + Collection = collection["criteria"] + if Collection.lower().startswith(prefix.lower()): + parsed_response.append( + { + "Collection": Collection, + "PatientCount": collection["count"], + } + ) + + return conv_response_list(parsed_response, returnType) def getBodyPartCounts( self, @@ -361,6 +367,7 @@ def getBodyPartCounts( PARAMS = self.parsePARAMS(locals()) + response: List[dict[Any, Any]] response = self.query_api( endpoint=NBIA_ENDPOINTS.GET_BODY_PART_PATIENT_COUNT, params=PARAMS ) @@ -378,6 +385,7 @@ def getStudies( PARAMS: dict = self.parsePARAMS(locals()) + response: List[dict[Any, Any]] response = self.query_api(endpoint=NBIA_ENDPOINTS.GET_STUDIES, params=PARAMS) return conv_response_list(response, returnType) @@ -398,6 +406,7 @@ def getSeries( PARAMS: dict = self.parsePARAMS(locals()) + response: List[dict[Any, Any]] response = self.query_api(endpoint=NBIA_ENDPOINTS.GET_SERIES, params=PARAMS) return conv_response_list(response, returnType) @@ -459,6 +468,7 @@ def getDICOMTags( returnType: ReturnType = self._get_return(return_type) PARAMS = self.parsePARAMS({"SeriesUID": SeriesInstanceUID}) + response: List[dict[Any, Any]] response = self.query_api(endpoint=NBIA_ENDPOINTS.GET_DICOM_TAGS, params=PARAMS) return conv_response_list(response, returnType) From 1c75711b9a223146b6153f5dccb752942ad6e525 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 13:29:22 -0500 Subject: [PATCH 4/9] docs: add some more detail on initialize client --- docs/InitializeClient.rst | 62 +++++++++++++++++++++++++++++++--- docs/markdowns/Installation.md | 7 ++-- src/nbiatoolkit/nbia.py | 30 +++++++++++++++- 3 files changed, 89 insertions(+), 10 deletions(-) diff --git a/docs/InitializeClient.rst b/docs/InitializeClient.rst index 3553e70..3dcc207 100644 --- a/docs/InitializeClient.rst +++ b/docs/InitializeClient.rst @@ -1,6 +1,10 @@ -Initialize Client +Setup ----------------- -By default, nbiatoolkit uses the guest account to access all collections in the API that are publicly available. + +Initialize Client +^^^^^^^^^^^^^^^^^ + +By default, the `NBIAClient` uses the guest account to access all collections in the API that are publicly available. If you have a user account that has been granted specific access to a collection, you can use your credentials to initialize the client when performing a query. @@ -71,11 +75,58 @@ This is especially useful when using the client in a script with a predefined sc The context manager is not available in the command line interface. +Return Types of Methods +^^^^^^^^^^^^^^^^^^^^^^^ +By default, most functions that query the API for metadata will return a list of dictionaries. +Available return types are made available through the `ReturnType` Enum which can be passed in as a parameter, +or its string representation. The available options as of writing are "list", and "dataframe". + +If you would like to return the data as a pandas DataFrame, you can pass the +`return_type` argument to the respective class method: + +.. tabs:: + + .. tab:: Python + + .. code-block:: python + + from nbiatoolkit import NBIAClient + from nbiatoolkit.utils import ReturnType + + client = NBIAClient() + client.getCollections(prefix='TCGA', return_type='dataframe') + # equivalent to + client.getCollections(prefix='TCGA', return_type=ReturnType.DATAFRAME) + + .. tab:: Command Line + + Return types are not yet available in the command line interface. + Feel free to open an issue on the GitHub repository if you would like to see this feature added. + + +Alternatively, you can set the return type for all methods by passing the `return_type` argument to the NBIAClient class. + +.. tabs:: + + .. tab:: Python + + .. code-block:: python + + from nbiatoolkit import NBIAClient + + client = NBIAClient(return_type='dataframe') + client.getCollections(prefix='TCGA') + + .. tab:: Command Line + + Return types are not yet available in the command line interface. + Feel free to open an issue on the GitHub repository if you would like to see this feature added. + Logging ^^^^^^^ The client can be initialized with a log level to control the verbosity of the logs. This is primarily intended for debugging and development purposes. -The default log level is 'INFO' and the available log levels are 'DEBUG', 'INFO', 'WARNING', 'ERROR'. +The default log level is 'INFO' and the available log levels are `DEBUG`, `INFO`, `WARNING`, `ERROR`. .. tabs:: @@ -85,9 +136,10 @@ The default log level is 'INFO' and the available log levels are 'DEBUG', 'INFO' from nbiatoolkit import NBIAClient - client = NBIAClient(log_level='DEBUG) + client = NBIAClient(log_level='DEBUG') client.getCollections(prefix='TCGA') .. tab:: Command Line - Logging is not yet available in the command line interface. Feel free to open an issue on the GitHub repository if you would like to see this feature added. + Logging is not yet available in the command line interface. + Feel free to open an issue on the GitHub repository if you would like to see this feature added. diff --git a/docs/markdowns/Installation.md b/docs/markdowns/Installation.md index 3f359a9..159d3f4 100644 --- a/docs/markdowns/Installation.md +++ b/docs/markdowns/Installation.md @@ -1,9 +1,8 @@ # Installation -> [!WARNING] -> `nbiatoolkit` is currently under development and is not guaranteed to be stable. -> Please refer to the [1.0.0 Stable Release Milestone](https://github.com/jjjermiah/nbia-toolkit/milestone/1) for the roadmap -> to the first stable release. +`nbiatoolkit` is currently under development and is not guaranteed to be stable. +Please refer to the [1.0.0 Stable Release Milestone](https://github.com/jjjermiah/nbia-toolkit/milestone/1) for the roadmap +to the first stable release. ## PyPi diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index 158a3bd..978ece8 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -36,6 +36,20 @@ def conv_response_list( response_json: List[dict[Any, Any]], return_type: ReturnType, ) -> List[dict[Any, Any]] | pd.DataFrame: + """ + Convert a response JSON to a list or a pandas DataFrame based on the specified return type. + + Args: + response_json (List[dict[Any, Any]]): The response JSON to be converted. + return_type (ReturnType): The desired return type (LIST or DATAFRAME). + + Returns: + List[dict[Any, Any]] | pd.DataFrame: The converted response in the specified return type. + + Raises: + AssertionError: If the response JSON is not a list. + + """ assert isinstance(response_json, list), "The response JSON must be a list" if return_type == ReturnType.LIST: @@ -53,7 +67,21 @@ def downloadSingleSeries( base_url: NBIA_ENDPOINTS, log: Logger, ): - + """ + Downloads a single series from the NBIA server. + + Args: + SeriesInstanceUID (str): The unique identifier of the series. + downloadDir (str): The directory where the series will be downloaded. + filePattern (str): The desired pattern for the downloaded files. + overwrite (bool): Flag indicating whether to overwrite existing files. + api_headers (dict[str, str]): The headers to be included in the API request. + base_url (NBIA_ENDPOINTS): The base URL of the NBIA server. + log (Logger): The logger object for logging messages. + + Returns: + bool: True if the series is downloaded and sorted successfully, False otherwise. + """ # create query_url query_url: str = base_url.value + NBIA_ENDPOINTS.DOWNLOAD_SERIES.value From f0f882458ad44200f4531a881b57f50afdc46dc3 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 14:20:58 -0500 Subject: [PATCH 5/9] fix: Update NBIA toolkit dependencies and fix base URL references. --- src/nbiatoolkit/auth.py | 10 +++++----- src/nbiatoolkit/nbia.py | 23 ++++++++++++++--------- src/nbiatoolkit/utils/__init__.py | 3 ++- src/nbiatoolkit/utils/nbia_endpoints.py | 17 +++++++++++++++-- tests/test_new_query_functionality.py | 2 +- tests/test_tcga_collections_separate.py | 6 +++--- 6 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/nbiatoolkit/auth.py b/src/nbiatoolkit/auth.py index 6b6612a..1af3cf5 100644 --- a/src/nbiatoolkit/auth.py +++ b/src/nbiatoolkit/auth.py @@ -1,7 +1,7 @@ import requests import time from typing import Union, Tuple -from .utils import NBIA_ENDPOINTS +from .utils import NBIA_ENDPOINTS, NBIA_BASE_URLS from cryptography.fernet import Fernet @@ -113,7 +113,7 @@ def __init__( username: str = "nbia_guest", password: str = "", client_id: str = "NBIA", - base_url: Union[str, NBIA_ENDPOINTS] = NBIA_ENDPOINTS.NBIA, + base_url: str | NBIA_BASE_URLS = NBIA_BASE_URLS.NBIA, ) -> None: """ Initialize the OAuth2 class. @@ -127,7 +127,7 @@ def __init__( The password for authentication. Default is an empty string. client_id : str, optional The client ID for authentication. Default is "NBIA". - base_url : str or NBIA_ENDPOINTS, optional. Default is NBIA_ENDPOINTS.NBIA + base_url : str or NBIA_BASE_URLS, optional. Default is NBIA_BASE_URLS.NBIA """ @@ -143,7 +143,7 @@ def __init__( key=self._fernet_key, username=username, password=password ) - if isinstance(base_url, NBIA_ENDPOINTS): + if isinstance(base_url, NBIA_BASE_URLS): self.base_url = base_url.value else: self.base_url = base_url @@ -292,7 +292,7 @@ def logout(self) -> None: if not self.access_token: return None - query_url = NBIA_ENDPOINTS.LOGOUT_URL.value + query_url = NBIA_BASE_URLS.LOGOUT_URL.value response = requests.get(query_url, headers=self.api_headers) response.raise_for_status() diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index 978ece8..e9fc1ab 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -11,6 +11,7 @@ from logging import Logger from .utils import ( NBIA_ENDPOINTS, + NBIA_BASE_URLS, validateMD5, clean_html, convertMillis, @@ -64,7 +65,7 @@ def downloadSingleSeries( filePattern: str, overwrite: bool, api_headers: dict[str, str], - base_url: NBIA_ENDPOINTS, + base_url: NBIA_BASE_URLS, log: Logger, ): """ @@ -123,12 +124,16 @@ def downloadSingleSeries( class NBIAClient: - """ - The NBIAClient class is a wrapper around the NBIA REST API. It provides - methods to query the API and download series. + """A client for interacting with the NBIA API. + + The NBIAClient class provides a high-level interface for querying the NBIA API and downloading DICOM series. - The default authentication uses the guest account. If you have a username - and password, you can pass them to the constructor. + Attributes: + OAuth_client (OAuth2): The OAuth2 client used for authentication. + headers (dict[str, str]): The API headers. + base_url (NBIA_ENDPOINTS): The base URL for API requests. + logger (Logger): The logger for logging client events. + return_type (str): The current return type for API responses. """ def __init__( @@ -152,7 +157,7 @@ def __init__( "Content-Type": "application/json", } - self._base_url: NBIA_ENDPOINTS = NBIA_ENDPOINTS.NBIA + self._base_url: NBIA_BASE_URLS = NBIA_BASE_URLS.NBIA self._return_type: ReturnType = ( return_type if isinstance(return_type, ReturnType) @@ -175,11 +180,11 @@ def headers(self): # create a setter for the base_url in case user want to use NLST @property - def base_url(self) -> NBIA_ENDPOINTS: + def base_url(self) -> NBIA_BASE_URLS: return self._base_url @base_url.setter - def base_url(self, nbia_url: NBIA_ENDPOINTS) -> None: + def base_url(self, nbia_url: NBIA_BASE_URLS) -> None: self._base_url = nbia_url @property diff --git a/src/nbiatoolkit/utils/__init__.py b/src/nbiatoolkit/utils/__init__.py index 8157520..676ecba 100644 --- a/src/nbiatoolkit/utils/__init__.py +++ b/src/nbiatoolkit/utils/__init__.py @@ -1,4 +1,4 @@ -from .nbia_endpoints import NBIA_ENDPOINTS +from .nbia_endpoints import NBIA_ENDPOINTS, NBIA_BASE_URLS from .md5 import validateMD5 from .parsers import ( convertMillis, @@ -10,6 +10,7 @@ __all__ = [ "NBIA_ENDPOINTS", + "NBIA_BASE_URLS", "validateMD5", "convertMillis", "clean_html", diff --git a/src/nbiatoolkit/utils/nbia_endpoints.py b/src/nbiatoolkit/utils/nbia_endpoints.py index 3f7177d..b65f06e 100644 --- a/src/nbiatoolkit/utils/nbia_endpoints.py +++ b/src/nbiatoolkit/utils/nbia_endpoints.py @@ -1,15 +1,28 @@ from enum import Enum -class NBIA_ENDPOINTS(Enum): +class NBIA_BASE_URLS(Enum): """ - This enum class defines the NBIA endpoints used in the NBIA toolkit. + This enum class defines the NBIA base URLs used in the NBIA toolkit. """ NBIA = "https://services.cancerimagingarchive.net/nbia-api/services/" NLST = "https://nlst.cancerimagingarchive.net/nbia-api/services/" LOGOUT_URL = "https://services.cancerimagingarchive.net/nbia-api/logout" + # Helper functions + def __str__(self): + return self.value + + def _format(self): + return self.value.split("/")[-1] + + +class NBIA_ENDPOINTS(Enum): + """ + This enum class defines the NBIA endpoints used in the NBIA toolkit. + """ + GET_COLLECTIONS = "v2/getCollectionValues" GET_COLLECTION_PATIENT_COUNT = "getCollectionValuesAndCounts" GET_COLLECTION_DESCRIPTIONS = "getCollectionDescriptions" diff --git a/tests/test_new_query_functionality.py b/tests/test_new_query_functionality.py index c347541..23c1fca 100644 --- a/tests/test_new_query_functionality.py +++ b/tests/test_new_query_functionality.py @@ -35,7 +35,7 @@ def test_nbia_properties(nbia_client2): assert "Content-Type" in nbia_client.headers.keys() assert nbia_client.headers["Content-Type"] == "application/json" - assert nbia_client.base_url == NBIA_ENDPOINTS.NBIA + assert nbia_client.base_url == NBIA_BASE_URLS.NBIA assert nbia_client.logger is not None diff --git a/tests/test_tcga_collections_separate.py b/tests/test_tcga_collections_separate.py index 5acaf79..5ad58d6 100644 --- a/tests/test_tcga_collections_separate.py +++ b/tests/test_tcga_collections_separate.py @@ -42,9 +42,9 @@ def test_nbia_properties(nbia_context_manager): def test_break(nbia_client_tobreak): - assert nbia_client_tobreak.base_url == NBIA_ENDPOINTS.NBIA - nbia_client_tobreak.base_url = NBIA_ENDPOINTS.NLST - assert nbia_client_tobreak.base_url == NBIA_ENDPOINTS.NLST + assert nbia_client_tobreak.base_url == NBIA_BASE_URLS.NBIA + nbia_client_tobreak.base_url = NBIA_BASE_URLS.NLST + assert nbia_client_tobreak.base_url == NBIA_BASE_URLS.NLST def test_getModalityValues(nbia_client, tcga_collections): From 4a7e8919a46100fee7680b0c657508daab101851 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 14:49:09 -0500 Subject: [PATCH 6/9] docs: update --- docs/_html_notebooks/test_setup.ipynb | 63 ------------------- docs/_static/css/custom.css | 20 ++++++ docs/conf.py | 18 ++++++ docs/index.md | 6 +- docs/requirements.txt | 3 + .../1_InitializeClient.rst} | 35 ++++++++--- docs/tutorial_files/2_ExploreCollections.rst | 9 +++ 7 files changed, 79 insertions(+), 75 deletions(-) delete mode 100644 docs/_html_notebooks/test_setup.ipynb create mode 100644 docs/_static/css/custom.css rename docs/{InitializeClient.rst => tutorial_files/1_InitializeClient.rst} (84%) create mode 100644 docs/tutorial_files/2_ExploreCollections.rst diff --git a/docs/_html_notebooks/test_setup.ipynb b/docs/_html_notebooks/test_setup.ipynb deleted file mode 100644 index 22d5422..0000000 --- a/docs/_html_notebooks/test_setup.ipynb +++ /dev/null @@ -1,63 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['TCGA-BLCA',\n", - " 'TCGA-BRCA',\n", - " 'TCGA-CESC',\n", - " 'TCGA-COAD',\n", - " 'TCGA-ESCA',\n", - " 'TCGA-KICH',\n", - " 'TCGA-KIRC',\n", - " 'TCGA-KIRP',\n", - " 'TCGA-LIHC',\n", - " 'TCGA-LUAD',\n", - " 'TCGA-LUSC',\n", - " 'TCGA-OV',\n", - " 'TCGA-PRAD',\n", - " 'TCGA-READ',\n", - " 'TCGA-SARC',\n", - " 'TCGA-STAD',\n", - " 'TCGA-THCA',\n", - " 'TCGA-UCEC']" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from nbiatoolkit import NBIAClient\n", - "client.getCollections(prefix='TCGA')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.7" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/_static/css/custom.css b/docs/_static/css/custom.css new file mode 100644 index 0000000..5464703 --- /dev/null +++ b/docs/_static/css/custom.css @@ -0,0 +1,20 @@ +.sphinx-tabs-panel { + background: none; + border: none; + color:aliceblue; +} + +.sphinx-tabs-tab{ + color: #1D5C87; + /* font color */ + background: none; + +} + + + +.sphinx-tabs-tab[aria-selected="false"]{ + color: white; + /* font color */ + background: none; +} diff --git a/docs/conf.py b/docs/conf.py index 6d9fea1..1c2314c 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,6 +4,7 @@ # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +import html import os import sys @@ -22,9 +23,11 @@ extensions = [ "myst_nb", "autoapi.extension", + "sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx_tabs.tabs", + "sphinx_exec_code", ] autoapi_dirs = ["../src/nbiatoolkit"] @@ -33,11 +36,26 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "_html_notebooks"] + +exec_code_working_dir = ".." +exec_code_source_folders = ["../src"] + + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # +# html_theme = "piccolo_theme" html_theme = "sphinx_rtd_theme" +# # user starts in dark mode +# default_dark_mode = True + html_static_path = ["_static"] + +# html_css_files = [ +# "css/custom.css", +# ] +# def setup(app): +# app.add_css_file("css/custom.css") diff --git a/docs/index.md b/docs/index.md index 32ecfce..cf2a2a7 100755 --- a/docs/index.md +++ b/docs/index.md @@ -7,10 +7,12 @@ markdowns/NBIA.md markdowns/Installation.md -InitializeClient.rst -Tutorial.ipynb +tutorial_files/1_InitializeClient.rst +tutorial_files/2_ExploreCollections.rst markdowns/CHANGELOG.md markdowns/CONTRIBUTING.md markdowns/CONDUCT.md autoapi/index ``` + + diff --git a/docs/requirements.txt b/docs/requirements.txt index 535eb3b..e458e87 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,3 +2,6 @@ myst-nb sphinx-autoapi sphinx-rtd-theme sphinx-tabs +sphinx_exec_code==0.12 +piccolo_theme +sphinx-rtd-dark-mode diff --git a/docs/InitializeClient.rst b/docs/tutorial_files/1_InitializeClient.rst similarity index 84% rename from docs/InitializeClient.rst rename to docs/tutorial_files/1_InitializeClient.rst index 3dcc207..0b9207c 100644 --- a/docs/InitializeClient.rst +++ b/docs/tutorial_files/1_InitializeClient.rst @@ -8,6 +8,8 @@ By default, the `NBIAClient` uses the guest account to access all collections in If you have a user account that has been granted specific access to a collection, you can use your credentials to initialize the client when performing a query. + + .. tabs:: .. tab:: Python @@ -19,12 +21,14 @@ initialize the client when performing a query. .. tab:: Guest Account - .. code-block:: python + .. exec_code:: from nbiatoolkit import NBIAClient client = NBIAClient() - client.getCollections(prefix='TCGA') + collections = client.getCollections(prefix='TCGA') + + print(collections[0:5]) .. tab:: Your Account @@ -33,7 +37,8 @@ initialize the client when performing a query. from nbiatoolkit import NBIAClient client = NBIAClient(username = "", password = "") - client.getCollections(prefix='TCGA') + collections = client.getCollections(prefix='TCGA') + .. tab:: Command Line @@ -64,12 +69,14 @@ This is especially useful when using the client in a script with a predefined sc .. tab:: Python - .. code-block:: python + .. exec_code:: from nbiatoolkit import NBIAClient with NBIAClient() as client: - client.getCollections(prefix='TCGA') + collections = client.getCollections(prefix='TCGA') + + print(collections[0:5]) .. tab:: Command Line @@ -88,15 +95,21 @@ If you would like to return the data as a pandas DataFrame, you can pass the .. tab:: Python - .. code-block:: python + .. exec_code:: from nbiatoolkit import NBIAClient from nbiatoolkit.utils import ReturnType client = NBIAClient() - client.getCollections(prefix='TCGA', return_type='dataframe') + collections_df = client.getCollections( + prefix='TCGA', return_type='dataframe' + ) # equivalent to - client.getCollections(prefix='TCGA', return_type=ReturnType.DATAFRAME) + collections_df = client.getCollections( + prefix='TCGA', return_type=ReturnType.DATAFRAME + ) + + print(collections_df.head()) .. tab:: Command Line @@ -110,12 +123,14 @@ Alternatively, you can set the return type for all methods by passing the `retur .. tab:: Python - .. code-block:: python + .. exec_code:: from nbiatoolkit import NBIAClient client = NBIAClient(return_type='dataframe') - client.getCollections(prefix='TCGA') + collections_df = client.getCollections(prefix='TCGA') + + print(collections_df.head()) .. tab:: Command Line diff --git a/docs/tutorial_files/2_ExploreCollections.rst b/docs/tutorial_files/2_ExploreCollections.rst new file mode 100644 index 0000000..a9a06eb --- /dev/null +++ b/docs/tutorial_files/2_ExploreCollections.rst @@ -0,0 +1,9 @@ +Metadata Query Methods +---------------------- + + +Collection Methods +^^^^^^^^^^^^^^^^^^ + +.. autoclass:: nbiatoolkit.NBIAClient + :members: From b4da2019ffe8d0e1d6060bada400b558838aeb99 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 15:27:11 -0500 Subject: [PATCH 7/9] docs: add for collection methods --- docs/tutorial_files/1_InitializeClient.rst | 1 + docs/tutorial_files/2_ExploreCollections.rst | 31 ++++++- src/nbiatoolkit/nbia.py | 89 +++++++++++++------- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/docs/tutorial_files/1_InitializeClient.rst b/docs/tutorial_files/1_InitializeClient.rst index 0b9207c..1345e86 100644 --- a/docs/tutorial_files/1_InitializeClient.rst +++ b/docs/tutorial_files/1_InitializeClient.rst @@ -1,6 +1,7 @@ Setup ----------------- + Initialize Client ^^^^^^^^^^^^^^^^^ diff --git a/docs/tutorial_files/2_ExploreCollections.rst b/docs/tutorial_files/2_ExploreCollections.rst index a9a06eb..07c17db 100644 --- a/docs/tutorial_files/2_ExploreCollections.rst +++ b/docs/tutorial_files/2_ExploreCollections.rst @@ -1,9 +1,34 @@ -Metadata Query Methods +API Query Methods ---------------------- Collection Methods ^^^^^^^^^^^^^^^^^^ +The simplest way to get a list of collections is to use the +:meth:`nbiatoolkit.NBIAClient.getCollections` method. +This method returns a list of all collections available in the NBIA database. -.. autoclass:: nbiatoolkit.NBIAClient - :members: +The method has the following signature: + +.. automethod:: nbiatoolkit.NBIAClient.getCollections + +Passing no parameters to the method will return a list of all collections available in the NBIA database. +Passing a `prefix` parameter will return a list of collections that match the prefix. + +.. tabs:: + + .. tab:: Python + + .. exec_code:: + + from nbiatoolkit import NBIAClient + + client = NBIAClient(return_type = "dataframe") + collections_df = client.getCollections(prefix='TCGA') + + print(f"The number of available collections is {len(collections_df)}") + + print(collections_df) + + +.. automethod:: nbiatoolkit.NBIAClient.getCollectionDescriptions diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index e9fc1ab..f391683 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -37,20 +37,16 @@ def conv_response_list( response_json: List[dict[Any, Any]], return_type: ReturnType, ) -> List[dict[Any, Any]] | pd.DataFrame: + """_summary_ + + :param response_json: _description_ + :type response_json: List[dict[Any, Any]] + :param return_type: _description_ + :type return_type: ReturnType + :return: _description_ + :rtype: List[dict[Any, Any]] | pd.DataFrame """ - Convert a response JSON to a list or a pandas DataFrame based on the specified return type. - Args: - response_json (List[dict[Any, Any]]): The response JSON to be converted. - return_type (ReturnType): The desired return type (LIST or DATAFRAME). - - Returns: - List[dict[Any, Any]] | pd.DataFrame: The converted response in the specified return type. - - Raises: - AssertionError: If the response JSON is not a list. - - """ assert isinstance(response_json, list), "The response JSON must be a list" if return_type == ReturnType.LIST: @@ -128,6 +124,13 @@ class NBIAClient: The NBIAClient class provides a high-level interface for querying the NBIA API and downloading DICOM series. + Args: + username (str, optional): The username for authentication. Defaults to "nbia_guest". + password (str, optional): The password for authentication. Defaults to an empty string. + log_level (str, optional): The log level for the logger. Defaults to "INFO". + return_type (Union[ReturnType, str], optional): The return type for API responses. + Defaults to ReturnType.LIST + Attributes: OAuth_client (OAuth2): The OAuth2 client used for authentication. headers (dict[str, str]): The API headers. @@ -251,6 +254,17 @@ def query_api( def getCollections( self, prefix: str = "", return_type: Optional[Union[ReturnType, str]] = None ) -> List[dict[Any, Any]] | pd.DataFrame: + """ + Retrieves the collections from the NBIA server. + + Args: + prefix (str, optional): Prefix to filter the collections by. Defaults to "". + return_type (Optional[Union[ReturnType, str]], optional): + Return type of the response. Defaults to None which uses the default return type. + + Returns: + List[dict[Any, Any]] | pd.DataFrame: List of collections or DataFrame containing the collections. + """ returnType: ReturnType = self._get_return(return_type) response: List[dict[Any, Any]] @@ -268,6 +282,19 @@ def getCollections( def getCollectionDescriptions( self, collectionName: str, return_type: Optional[Union[ReturnType, str]] = None ) -> List[dict[Any, Any]] | pd.DataFrame: + """ + Retrieves the description of a collection from the NBIA server. + + Args: + collectionName (str): The name of the collection. + return_type (Optional[Union[ReturnType, str]], optional): + Return type of the response. Defaults to None. + + Returns: + List[dict[Any, Any]] | pd.DataFrame: + List of collection descriptions or DataFrame containing the collection descriptions. + """ + returnType: ReturnType = self._get_return(return_type) PARAMS: dict = self.parsePARAMS(params=locals()) @@ -291,6 +318,25 @@ def getCollectionDescriptions( return conv_response_list(response, returnType) + # returns a list of dictionaries with the collection name and patient count + def getCollectionPatientCount( + self, + prefix: str = "", + return_type: Optional[Union[ReturnType, str]] = None, + ) -> List[dict[Any, Any]] | pd.DataFrame: + returnType: ReturnType = self._get_return(return_type) + + response = self.query_api(NBIA_ENDPOINTS.GET_COLLECTION_PATIENT_COUNT) + + if prefix: + response = [ + response_dict + for response_dict in response + if response_dict["criteria"].lower().startswith(prefix.lower()) + ] + + return conv_response_list(response, returnType) + def getModalityValues( self, Collection: str = "", @@ -365,25 +411,6 @@ def getPatientsByCollectionAndModality( return conv_response_list(response, returnType) - # returns a list of dictionaries with the collection name and patient count - def getCollectionPatientCount( - self, - prefix: str = "", - return_type: Optional[Union[ReturnType, str]] = None, - ) -> List[dict[Any, Any]] | pd.DataFrame: - returnType: ReturnType = self._get_return(return_type) - - response = self.query_api(NBIA_ENDPOINTS.GET_COLLECTION_PATIENT_COUNT) - - if prefix: - response = [ - response_dict - for response_dict in response - if response_dict["criteria"].lower().startswith(prefix.lower()) - ] - - return conv_response_list(response, returnType) - def getBodyPartCounts( self, Collection: str = "", From 44d56b5c70f4633adf864079b559907aa959e93c Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 16:06:07 -0500 Subject: [PATCH 8/9] fix: bug causing duplicate instantiations --- src/nbiatoolkit/logger/logger.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/nbiatoolkit/logger/logger.py b/src/nbiatoolkit/logger/logger.py index 16837f9..b405d21 100644 --- a/src/nbiatoolkit/logger/logger.py +++ b/src/nbiatoolkit/logger/logger.py @@ -34,6 +34,9 @@ def setup_logger( raise ValueError(f"Invalid log level: {log_level}") # Create logger + if logging.getLogger(name).hasHandlers(): + logging.getLogger(name).handlers.clear() + logger = logging.getLogger(name) logger.setLevel(level) From 80785ac2616ded5badeab2ce6e2910a899acecc8 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Sun, 25 Feb 2024 16:45:42 -0500 Subject: [PATCH 9/9] feat: Add logger configuration options --- docs/Tutorial.ipynb | 1735 -------------------- docs/index.md | 3 +- docs/tutorial_files/1_InitializeClient.rst | 2 + docs/tutorial_files/logger.rst | 60 + src/nbiatoolkit/logger/logger.py | 7 +- src/nbiatoolkit/nbia.py | 13 +- 6 files changed, 75 insertions(+), 1745 deletions(-) delete mode 100644 docs/Tutorial.ipynb create mode 100644 docs/tutorial_files/logger.rst diff --git a/docs/Tutorial.ipynb b/docs/Tutorial.ipynb deleted file mode 100644 index 3946acb..0000000 --- a/docs/Tutorial.ipynb +++ /dev/null @@ -1,1735 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setup \n", - "\n", - "By default the client will use the Public API with the Guest Login credentials.\n", - "from the nbia which has access to all public data. \n", - "\n", - "To use your own credentials you can pass them in as parameters to the client using:\n", - "\n", - "``` python\n", - "NBIAClient(\n", - " username=\"YOUR_USERNAME\", \n", - " password=\"YOUR_PASSWORD\", \n", - " log_level = [\"DEBUG\", \"INFO\", \"WARNING\", \"ERROR\", \"CRITICAL\"]\n", - ")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/bhklab/Documents/GitHub/NBIA-toolkit/src/nbiatoolkit/nbia_cli.py:20: SyntaxWarning: invalid escape sequence '\\/'\n", - " f = \"\"\"\n" - ] - } - ], - "source": [ - "import nbiatoolkit\n", - "from nbiatoolkit import NBIAClient\n", - "from pprint import pprint\n", - "\n", - "# Instantiate the client. \n", - "# NOTE: if using guest access, you dont have to provide the following, it will default to guest access\n", - "client = NBIAClient(username = \"nbia_guest\", password = \"\", log_level='info')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'0.32.1'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# import nbiatoolkit \n", - "nbiatoolkit.__version__" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Return Types\n", - "\n", - "Most functions will by default return a list of dictionaries. If you would like to return a pandas DataFrame instead you can pass in the parameter `return_type`.\n", - "\n", - "Available options are made available through the `ReturnType` Enum which can be passed in as a parameter or alternatively, the string representation of the Enum value can be passed in as a string." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[, ]\n" - ] - } - ], - "source": [ - "from nbiatoolkit.utils import ReturnType\n", - "print(list(ReturnType))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "passing in `return_type=ReturnType.DATAFRAME` or `return_type=\"dataframe\"` will return a pandas DataFrame." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Get Collection Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get list of collections (names only)\n", - "``` python\n", - "client.getCollections(\n", - " prefix: str = \"\",\n", - " return_type: ReturnType | str = ReturnType.LIST)\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'Collection': '4D-Lung'}, {'Collection': 'ACRIN-6698'}, {'Collection': 'ACRIN-Contralateral-Breast-MR'}, {'Collection': 'ACRIN-FLT-Breast'}, {'Collection': 'ACRIN-NSCLC-FDG-PET'}]\n" - ] - } - ], - "source": [ - "# To get all the collections:\n", - "collections = client.getCollections()\n", - "print(collections[0:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'Collection': 'TCGA-BLCA'}, {'Collection': 'TCGA-BRCA'}, {'Collection': 'TCGA-CESC'}, {'Collection': 'TCGA-COAD'}, {'Collection': 'TCGA-ESCA'}]\n" - ] - } - ], - "source": [ - "# To get all the collections with a prefix:\n", - "collections = client.getCollections(prefix = \"TCGA\")\n", - "print(collections[0:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Collection
0TCGA-BLCA
1TCGA-BRCA
2TCGA-CESC
3TCGA-COAD
4TCGA-ESCA
\n", - "
" - ], - "text/plain": [ - " Collection\n", - "0 TCGA-BLCA\n", - "1 TCGA-BRCA\n", - "2 TCGA-CESC\n", - "3 TCGA-COAD\n", - "4 TCGA-ESCA" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# To get the same but as a pandas dataframe:\n", - "collections_df = client.getCollections(prefix = \"TCGA\", return_type=ReturnType.DATAFRAME)\n", - "collections_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get Collection Description\n", - "\n", - "``` python\n", - "getCollectionDescriptions(\n", - " collectionName: str, # (required)\n", - " return_type: ReturnType | str = ReturnType.LIST\n", - ")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'collectionName': 'TCGA-BLCA',\n", - " 'description': 'The Cancer Genome Atlas-Bladder Endothelial Carcinoma '\n", - " '(TCGA-BLCA) data collection is part of a larger effort to '\n", - " 'enhance the TCGA http://cancergenome.nih.gov/ data set with '\n", - " 'characterized radiological images. The Cancer Imaging '\n", - " 'Program (CIP), with the cooperation of several of the TCGA '\n", - " 'tissue-contributing institutions, has archived a large '\n", - " 'portion of the radiological images of the '\n", - " 'genetically-analyzed BLCA cases. Please see the TCGA-BLCA '\n", - " 'page to learn more about the images and to obtain any '\n", - " 'supporting metadata for this collection.',\n", - " 'descriptionURI': 'https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR',\n", - " 'lastUpdated': '2023-03-16'}]\n" - ] - } - ], - "source": [ - "pprint(client.getCollectionDescriptions(\"TCGA-BLCA\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
collectionNamedescriptiondescriptionURIlastUpdated
0TCGA-BLCAThe Cancer Genome Atlas-Bladder Endothelial Ca...https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR2023-03-16
\n", - "
" - ], - "text/plain": [ - " collectionName description \\\n", - "0 TCGA-BLCA The Cancer Genome Atlas-Bladder Endothelial Ca... \n", - "\n", - " descriptionURI lastUpdated \n", - "0 https://doi.org/10.7937/K9/TCIA.2016.8LNG8XDR 2023-03-16 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.getCollectionDescriptions(\"TCGA-BLCA\", ReturnType.DATAFRAME)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get Counts of Patients for each collection\n", - "``` python\n", - "getCollectionPatientCount(\n", - " prefx: str = \"\",\n", - " return_type: ReturnType | str = ReturnType.LIST\n", - ")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'count': '20', 'criteria': '4D-Lung'},\n", - " {'count': '385', 'criteria': 'ACRIN-6698'},\n", - " {'count': '984', 'criteria': 'ACRIN-Contralateral-Breast-MR'},\n", - " {'count': '83', 'criteria': 'ACRIN-FLT-Breast'},\n", - " {'count': '242', 'criteria': 'ACRIN-NSCLC-FDG-PET'}]\n" - ] - } - ], - "source": [ - "collectionsPatientCount = client.getCollectionPatientCount()\n", - "pprint(collectionsPatientCount[0:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'count': '120', 'criteria': 'TCGA-BLCA'},\n", - " {'count': '139', 'criteria': 'TCGA-BRCA'},\n", - " {'count': '54', 'criteria': 'TCGA-CESC'},\n", - " {'count': '25', 'criteria': 'TCGA-COAD'},\n", - " {'count': '16', 'criteria': 'TCGA-ESCA'},\n", - " {'count': '15', 'criteria': 'TCGA-KICH'},\n", - " {'count': '267', 'criteria': 'TCGA-KIRC'},\n", - " {'count': '33', 'criteria': 'TCGA-KIRP'},\n", - " {'count': '97', 'criteria': 'TCGA-LIHC'},\n", - " {'count': '69', 'criteria': 'TCGA-LUAD'},\n", - " {'count': '37', 'criteria': 'TCGA-LUSC'},\n", - " {'count': '143', 'criteria': 'TCGA-OV'},\n", - " {'count': '14', 'criteria': 'TCGA-PRAD'},\n", - " {'count': '3', 'criteria': 'TCGA-READ'},\n", - " {'count': '5', 'criteria': 'TCGA-SARC'},\n", - " {'count': '46', 'criteria': 'TCGA-STAD'},\n", - " {'count': '6', 'criteria': 'TCGA-THCA'},\n", - " {'count': '65', 'criteria': 'TCGA-UCEC'}]\n" - ] - }, - { - "ename": "KeyError", - "evalue": "'PatientCount'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[19], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m pprint(collectionsPatientCount)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# get the collection with max PatientCount\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCollection with max PatientCount: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;43mmax\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcollectionsPatientCount\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m:\u001b[49m\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPatientCount\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m)\n", - "Cell \u001b[0;32mIn[19], line 6\u001b[0m, in \u001b[0;36m\u001b[0;34m(x)\u001b[0m\n\u001b[1;32m 3\u001b[0m pprint(collectionsPatientCount)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# get the collection with max PatientCount\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCollection with max PatientCount: \u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mmax\u001b[39m(collectionsPatientCount, key\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mlambda\u001b[39;00m x:\u001b[43mx\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPatientCount\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m))\n", - "\u001b[0;31mKeyError\u001b[0m: 'PatientCount'" - ] - } - ], - "source": [ - "# use prefix to get the patient count for a specific collection and then find the collection with max patient count\n", - "collectionsPatientCount = client.getCollectionPatientCount(prefix=\"TCGA\")\n", - "pprint(collectionsPatientCount)\n", - "\n", - "# get the collection with max PatientCount\n", - "print(\"Collection with max PatientCount: \", max(collectionsPatientCount, key=lambda x: x['PatientCount']))" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
criteriacount
0TCGA-BLCA120
1TCGA-BRCA139
2TCGA-CESC54
3TCGA-COAD25
4TCGA-ESCA16
5TCGA-KICH15
6TCGA-KIRC267
7TCGA-KIRP33
8TCGA-LIHC97
9TCGA-LUAD69
10TCGA-LUSC37
11TCGA-OV143
12TCGA-PRAD14
13TCGA-READ3
14TCGA-SARC5
15TCGA-STAD46
16TCGA-THCA6
17TCGA-UCEC65
\n", - "
" - ], - "text/plain": [ - " criteria count\n", - "0 TCGA-BLCA 120\n", - "1 TCGA-BRCA 139\n", - "2 TCGA-CESC 54\n", - "3 TCGA-COAD 25\n", - "4 TCGA-ESCA 16\n", - "5 TCGA-KICH 15\n", - "6 TCGA-KIRC 267\n", - "7 TCGA-KIRP 33\n", - "8 TCGA-LIHC 97\n", - "9 TCGA-LUAD 69\n", - "10 TCGA-LUSC 37\n", - "11 TCGA-OV 143\n", - "12 TCGA-PRAD 14\n", - "13 TCGA-READ 3\n", - "14 TCGA-SARC 5\n", - "15 TCGA-STAD 46\n", - "16 TCGA-THCA 6\n", - "17 TCGA-UCEC 65" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "client.getCollectionPatientCount(prefix=\"TCGA\", return_type=ReturnType.DATAFRAME)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get Counts of Patients grouped by Body Parts\n", - "``` python\n", - "getBodyPartCounts(\n", - " collection: str = \"\", \n", - " modality: str = \"\",\n", - " return_type: ReturnType | str = ReturnType.LIST\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total Number of body parts:60\n", - "First 5 body parts:\n", - "[{'count': '7839', 'criteria': 'NOT SPECIFIED'},\n", - " {'count': '1731', 'criteria': 'ABDOMEN'},\n", - " {'count': '2', 'criteria': 'ABDOMEN CAVIT'},\n", - " {'count': '2', 'criteria': 'ABDOMENPELVIC'},\n", - " {'count': '50', 'criteria': 'ABDOMENPELVIS'}]\n" - ] - } - ], - "source": [ - "bodypart_count = client.getBodyPartCounts()\n", - "print(\"Total Number of body parts:\" + str(len(bodypart_count)))\n", - "\n", - "print(\"First 5 body parts:\")\n", - "pprint(bodypart_count[0:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total Number of body parts:11\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
criteriacount
0NOT SPECIFIED239
1ABDOMEN91
2ABDOMENPELVIS2
3BRAIN W/WO_AH321
4CHEST124
5CHEST (THORAX)1
6CHESTABDOMEN1
7CHESTABDPELVIS1
8HEAD1
9OUTSIDE FIL1
10THORAX2
\n", - "
" - ], - "text/plain": [ - " criteria count\n", - "0 NOT SPECIFIED 239\n", - "1 ABDOMEN 91\n", - "2 ABDOMENPELVIS 2\n", - "3 BRAIN W/WO_AH32 1\n", - "4 CHEST 124\n", - "5 CHEST (THORAX) 1\n", - "6 CHESTABDOMEN 1\n", - "7 CHESTABDPELVIS 1\n", - "8 HEAD 1\n", - "9 OUTSIDE FIL 1\n", - "10 THORAX 2" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bodypart_count = client.getBodyPartCounts(Collection = 'ACRIN-NSCLC-FDG-PET', return_type=ReturnType.DATAFRAME)\n", - "print(\"Total Number of body parts:\" + str(len(bodypart_count)))\n", - "bodypart_count" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total Number of body parts:5\n", - "Number of patients for each body part in 4D-Lung collection:\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
criteriacount
0NOT SPECIFIED194
1ABDOMEN11
2CHEST54
3HEART2
4THORAX1
\n", - "
" - ], - "text/plain": [ - " criteria count\n", - "0 NOT SPECIFIED 194\n", - "1 ABDOMEN 11\n", - "2 CHEST 54\n", - "3 HEART 2\n", - "4 THORAX 1" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bodypart_count = client.getBodyPartCounts(Collection = 'NSCLC Radiogenomics', Modality='CT', return_type=ReturnType.DATAFRAME)\n", - "print(\"Total Number of body parts:\" + str(len(bodypart_count)))\n", - "\n", - "print(\"Number of patients for each body part in 4D-Lung collection:\")\n", - "bodypart_count" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Get Patient Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### getPatients\n", - "``` python\n", - "getPatients(\n", - " Collection: str = \"\", # (optional)\n", - " return_type: ReturnType | str = ReturnType.LIST\n", - ")\n", - "````" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total patients in TCGA-BLCA: 120\n", - "[{'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': '1',\n", - " 'PatientId': 'TCGA-CU-A3QU',\n", - " 'PatientName': 'TCGA-CU-A3QU',\n", - " 'PatientSex': 'M',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': '1',\n", - " 'PatientId': 'TCGA-CU-A3KJ',\n", - " 'PatientName': 'TCGA-CU-A3KJ',\n", - " 'PatientSex': 'M',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'}]\n" - ] - } - ], - "source": [ - "patients = client.getPatients(Collection = \"TCGA-BLCA\")\n", - "print(f\"Total patients in TCGA-BLCA: {len(patients)}\")\n", - "pprint(patients[0:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total patients in NSCLC-Radiomics: 422\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PatientIdPatientNamePatientSexCollectionPhantomSpeciesCodeSpeciesDescription
0LUNG1-001LUNG1-001MNSCLC-RadiomicsNO337915000Homo sapiens
1LUNG1-007LUNG1-007MNSCLC-RadiomicsNO337915000Homo sapiens
2LUNG1-029LUNG1-029FNSCLC-RadiomicsNO337915000Homo sapiens
3LUNG1-036LUNG1-036FNSCLC-RadiomicsNO337915000Homo sapiens
4LUNG1-056LUNG1-056FNSCLC-RadiomicsNO337915000Homo sapiens
\n", - "
" - ], - "text/plain": [ - " PatientId PatientName PatientSex Collection Phantom SpeciesCode \\\n", - "0 LUNG1-001 LUNG1-001 M NSCLC-Radiomics NO 337915000 \n", - "1 LUNG1-007 LUNG1-007 M NSCLC-Radiomics NO 337915000 \n", - "2 LUNG1-029 LUNG1-029 F NSCLC-Radiomics NO 337915000 \n", - "3 LUNG1-036 LUNG1-036 F NSCLC-Radiomics NO 337915000 \n", - "4 LUNG1-056 LUNG1-056 F NSCLC-Radiomics NO 337915000 \n", - "\n", - " SpeciesDescription \n", - "0 Homo sapiens \n", - "1 Homo sapiens \n", - "2 Homo sapiens \n", - "3 Homo sapiens \n", - "4 Homo sapiens " - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "patients_df = client.getPatients(Collection = \"NSCLC-Radiomics\", return_type=ReturnType.DATAFRAME)\n", - "print(f\"Total patients in NSCLC-Radiomics: {len(patients_df)}\")\n", - "patients_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### getPatientsByCollectionAndModality\n", - "``` python\n", - "getPatientsByCollectionAndModality(\n", - " Collection: str, # (required)\n", - " Modality: str, # (required)\n", - " return_type: ReturnType | str = ReturnType.LIST\n", - ")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total patients in TCGA-BLCA with modality CT: 107\n", - "[{'PatientId': 'TCGA-CU-A3QU'},\n", - " {'PatientId': 'TCGA-CU-A3KJ'},\n", - " {'PatientId': 'TCGA-CU-A0YR'},\n", - " {'PatientId': 'TCGA-CU-A0YO'},\n", - " {'PatientId': 'TCGA-CU-A3YL'}]\n" - ] - } - ], - "source": [ - "patients = client.getPatientsByCollectionAndModality(Collection=\"TCGA-BLCA\", Modality=\"CT\")\n", - "print(f\"Total patients in TCGA-BLCA with modality CT: {len(patients)}\")\n", - "pprint(patients[0:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### getNewPatients In Collection\n", - "\n", - "``` python\n", - "getNewPatients(\n", - " Collection: str, # (required)\n", - " Date: str, # (required) accepted formats:\n", - " # \"%Y-%m-%d\", \"%Y/%m/%d\", \"%Y%m%d\", \n", - " # \"%m/%d/%Y\", \"%d/%m/%Y\", \"%d-%m-%Y\"\n", - ")\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total new patients in TCGA-BLCA after 2019-01-01: 15\n", - "[{'Collection': 'TCGA-BLCA',\n", - " 'PatientId': 'TCGA-4Z-AA86',\n", - " 'PatientName': 'TCGA-4Z-AA86',\n", - " 'PatientSex': 'M',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'PatientId': 'TCGA-G2-A2EC',\n", - " 'PatientName': 'TCGA-G2-A2EC',\n", - " 'PatientSex': 'F',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': 'W',\n", - " 'PatientId': 'TCGA-G2-A2EF',\n", - " 'PatientName': 'TCGA-G2-A2EF',\n", - " 'PatientSex': 'M',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'PatientId': 'TCGA-G2-A2EJ',\n", - " 'PatientName': 'TCGA-G2-A2EJ',\n", - " 'PatientSex': 'F',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': 'W',\n", - " 'PatientId': 'TCGA-G2-A2EK',\n", - " 'PatientName': 'TCGA-G2-A2EK',\n", - " 'PatientSex': 'M',\n", - " 'Phantom': 'NO',\n", - " 'SpeciesCode': '337915000',\n", - " 'SpeciesDescription': 'Homo sapiens'}]\n" - ] - } - ], - "source": [ - "newPatients = client.getNewPatients(Collection=\"TCGA-BLCA\", Date=\"2019-01-01\")\n", - "print(f\"Total new patients in TCGA-BLCA after 2019-01-01: {len(newPatients)}\")\n", - "pprint(newPatients[0:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Get Studies Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "``` python\n", - "getStudies(\n", - " Collection: str, # (required)\n", - " PatientID: str = \"\", # (optional)\n", - " StudyInstanceUID: str = \"\" # (optional)\n", - ") \n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total studies in TCGA-BLCA: 192\n", - "[{'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': '1',\n", - " 'PatientAge': '058Y',\n", - " 'PatientID': 'TCGA-CU-A3QU',\n", - " 'PatientName': 'TCGA-CU-A3QU',\n", - " 'PatientSex': 'M',\n", - " 'SeriesCount': 2,\n", - " 'StudyDate': '2004-01-20 00:00:00.0',\n", - " 'StudyDescription': 'CT ABDOMEN PELVIS W CONT',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.8421.4016.447463489999137002327924629563'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'EthnicGroup': '1',\n", - " 'PatientAge': '076Y',\n", - " 'PatientID': 'TCGA-CU-A3KJ',\n", - " 'PatientName': 'TCGA-CU-A3KJ',\n", - " 'PatientSex': 'M',\n", - " 'SeriesCount': 2,\n", - " 'StudyDate': '2003-12-30 00:00:00.0',\n", - " 'StudyDescription': 'CT ABDOMEN PELVIS W CONT',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.8421.4016.457123040069610055154620740646'}]\n" - ] - } - ], - "source": [ - "studies = client.getStudies(Collection = \"TCGA-BLCA\")\n", - "print(f\"Total studies in TCGA-BLCA: {len(studies)}\")\n", - "pprint(studies[0:2])" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Total studies in TCGA-BLCA for patient TCGA-CU-A0YO: 2\n", - "[{'Collection': 'TCGA-BLCA',\n", - " 'PatientAge': '084Y',\n", - " 'PatientID': 'TCGA-CU-A0YO',\n", - " 'PatientName': 'TCGA-CU-A0YO',\n", - " 'PatientSex': 'M',\n", - " 'SeriesCount': 5,\n", - " 'StudyDate': '2001-11-06 00:00:00.0',\n", - " 'StudyDescription': 'Outside Read or Comparison BODY CT',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.8421.4016.315224324288394499015843085109'},\n", - " {'Collection': 'TCGA-BLCA',\n", - " 'PatientAge': '084Y',\n", - " 'PatientID': 'TCGA-CU-A0YO',\n", - " 'PatientName': 'TCGA-CU-A0YO',\n", - " 'PatientSex': 'M',\n", - " 'SeriesCount': 5,\n", - " 'StudyDate': '2001-05-31 00:00:00.0',\n", - " 'StudyDescription': 'Outside Read or Comparison BODY CT',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.8421.4016.119440479687638160287088326194'}]\n" - ] - } - ], - "source": [ - "collection = \"TCGA-BLCA\"\n", - "patients = client.getPatientsByCollectionAndModality(Collection=collection, Modality=\"CT\")\n", - "studies = client.getStudies(Collection = collection, PatientID = patients[3])\n", - "print(f\"Total studies in TCGA-BLCA for patient {patients[3]}: {len(studies)}\")\n", - "pprint(studies[0:5])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Get Series Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get Series Data using parameters\n", - "\n", - "``` python\n", - "getSeries(\n", - " Collection: str = \"\", \n", - " PatientID: str = \"\",\n", - " StudyInstanceUID: str = \"\",\n", - " Modality: str = \"\",\n", - " SeriesInstanceUID: str = \"\",\n", - " BodyPartExamined: str = \"\",\n", - " ManufacturerModelName: str = \"\",\n", - " Manufacturer: str = \"\") \n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There are 1351 series in the NSCLC Radiogenomics collection.\n", - "First series:\n", - "{'Collection': 'NSCLC Radiogenomics',\n", - " 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2017.7hs46erv',\n", - " 'FileSize': 135541046,\n", - " 'ImageCount': 257,\n", - " 'LicenseName': 'Creative Commons Attribution 3.0 Unported License',\n", - " 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/',\n", - " 'Manufacturer': 'GE MEDICAL SYSTEMS',\n", - " 'ManufacturerModelName': 'Discovery STE',\n", - " 'Modality': 'CT',\n", - " 'PatientID': 'R01-054',\n", - " 'ProtocolName': '6.2 VA_STD_TORSO_3D',\n", - " 'SeriesDate': '1993-10-09 00:00:00.0',\n", - " 'SeriesDescription': 'CT_SLICES',\n", - " 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.4334.1501.313535848942036628030417605312',\n", - " 'SeriesNumber': 3,\n", - " 'SoftwareVersions': 'dm09_dvctsp1.23',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.4334.1501.251618660025569025068340782649',\n", - " 'TimeStamp': '2017-11-21 10:18:42.0'}\n" - ] - } - ], - "source": [ - "# Get all the series in the NSCLC Radiogenomics collection\n", - "seriesJSON = client.getSeries(Collection=\"NSCLC Radiogenomics\")\n", - "print(f\"There are {len(seriesJSON)} series in the NSCLC Radiogenomics collection.\")\n", - "print(\"First series:\")\n", - "pprint(seriesJSON[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "There are 8 series in the NSCLC Radiogenomics collection for HEART.\n", - "First series:\n", - "{'BodyPartExamined': 'HEART',\n", - " 'Collection': 'NSCLC Radiogenomics',\n", - " 'CollectionURI': 'https://doi.org/10.7937/K9/TCIA.2017.7hs46erv',\n", - " 'FileSize': 196233190,\n", - " 'ImageCount': 372,\n", - " 'LicenseName': 'Creative Commons Attribution 3.0 Unported License',\n", - " 'LicenseURI': 'http://creativecommons.org/licenses/by/3.0/',\n", - " 'Manufacturer': 'SIEMENS',\n", - " 'Modality': 'CT',\n", - " 'PatientID': 'AMC-015',\n", - " 'ProtocolName': 'GATED_CHEST_CTA',\n", - " 'SeriesDate': '1992-02-04 00:00:00.0',\n", - " 'SeriesDescription': 'Gated Chest 1.0 B25f BestDiast 70 %',\n", - " 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.4334.1501.253298261882254993527951068007',\n", - " 'SeriesNumber': 5,\n", - " 'SoftwareVersions': 'syngo CT 2008G',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.4334.1501.119531128953610472040332469413',\n", - " 'TimeStamp': '2017-12-12 13:58:34.0'}\n" - ] - } - ], - "source": [ - "# Get all the series in the NSCLC Radiogenomics collection for a given body part\n", - "seriesbyPatientJSON = client.getSeries(Collection=\"NSCLC Radiogenomics\", BodyPartExamined=\"HEART\")\n", - "print(f\"There are {len(seriesbyPatientJSON)} series in the NSCLC Radiogenomics collection for HEART.\")\n", - "print(\"First series:\")\n", - "pprint(seriesbyPatientJSON[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### get New Series after a given date\n", - "\n", - "``` python\n", - "getNewSeries(\n", - " Date: Union[str, datetime], # (required) accepted formats:\n", - " # \"%Y-%m-%d\", \"%Y/%m/%d\", \"%Y%m%d\", \n", - " # \"%m/%d/%Y\", \"%d/%m/%Y\", \"%d-%m-%Y\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "01/01/2024\n", - "Total new series after 2024-01-01: 4751\n", - "{'BodyPartExamined': 'BREAST',\n", - " 'Collection': 'Advanced-MRI-Breast-Lesions',\n", - " 'ImageCount': 580,\n", - " 'Manufacturer': 'GE MEDICAL SYSTEMS',\n", - " 'ManufacturerModelName': 'Signa HDxt',\n", - " 'Modality': 'MR',\n", - " 'PatientID': 'AMBL-376',\n", - " 'SeriesDate': '2005-04-14 00:00:00.0',\n", - " 'SeriesDescription': 'AX Sen Vibrant MultiPhase',\n", - " 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.196168555404542578475976858220037429361',\n", - " 'SeriesNumber': 5,\n", - " 'SoftwareVersions': '24',\n", - " 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.201692485188138093977458202425301357349'}\n" - ] - } - ], - "source": [ - "newSeries = client.getNewSeries(Date=\"2024/01/01\")\n", - "print(f\"Total new series after 2024-01-01: {len(newSeries)}\")\n", - "pprint(newSeries[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# download Series Methods\n", - "\n", - "``` python\n", - "downloadSeries(\n", - " SeriesInstanceUID: Union[str, list],\n", - " downloadDir: str,\n", - " filePattern: str,\n", - " overwrite: bool,\n", - " nParallel: int)\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['1.3.6.1.4.1.14519.5.2.1.4334.1501.313535848942036628030417605312',\n", - " '1.3.6.1.4.1.14519.5.2.1.4334.1501.180142538487513002896749276822',\n", - " '1.3.6.1.4.1.14519.5.2.1.4334.1501.207646861287507872880088145139',\n", - " '1.3.6.1.4.1.14519.5.2.1.4334.1501.823771517339737505191046408406',\n", - " '1.3.6.1.4.1.14519.5.2.1.4334.1501.351960674707545278169101032360']\n" - ] - } - ], - "source": [ - "# Get all the series in the NSCLC Radiogenomics collection\n", - "seriesJSON = client.getSeries(Collection=\"NSCLC Radiogenomics\")\n", - "\n", - "# first get a list of the SeriesInstanceUIDs\n", - "seriesUIDS = [series['SeriesInstanceUID'] for series in seriesJSON]\n", - "pprint(seriesUIDS[0:5])" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading to: /Users/bhklab/Documents/GitHub/NBIA-toolkit/docs/data\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading 5 series: 0%| | 0/5 [00:00 diff --git a/docs/tutorial_files/1_InitializeClient.rst b/docs/tutorial_files/1_InitializeClient.rst index 1345e86..c03630a 100644 --- a/docs/tutorial_files/1_InitializeClient.rst +++ b/docs/tutorial_files/1_InitializeClient.rst @@ -159,3 +159,5 @@ The default log level is 'INFO' and the available log levels are `DEBUG`, `INFO` Logging is not yet available in the command line interface. Feel free to open an issue on the GitHub repository if you would like to see this feature added. + +For more configuration options for logging see :ref:`Configuring Logger`. diff --git a/docs/tutorial_files/logger.rst b/docs/tutorial_files/logger.rst new file mode 100644 index 0000000..de01c4e --- /dev/null +++ b/docs/tutorial_files/logger.rst @@ -0,0 +1,60 @@ +Configuring Logger +------------------ + +Any logger from the `logging` module can be used. +A utility function `setup_logger` is provided for convenience. + +.. autofunction:: nbiatoolkit.logger.setup_logger + +.. tabs:: + + .. tab:: Python + + .. exec_code:: python + + from nbiatoolkit import NBIAClient + from nbiatoolkit import setup_logger + + my_logger = setup_logger( + name="my_logger", + log_level="DEBUG", + console_logging=False, + log_file="logfile.log", + log_dir="logs", + log_format="%(asctime)s | %(name)s | %(levelname)s | %(message)s", + datefmt="%y-%m-%d %H:%M", + ) + + # log some messages + my_logger.info("This is an info message") + + + # --- hide: start --- + print("\n") + # --- hide: stop --- + + # read in the log file + print("Contents of logfile.log:") + with open("logs/logfile.log", "r") as f: + print(f.read()) + + + client_logger = setup_logger( + name="NBIAClient", + log_level="DEBUG", + console_logging=False, + log_file="logfile.log", + log_dir="logs", + log_format="%(asctime)s | %(name)s | %(levelname)s | %(message)s", + datefmt="%y-%m-%d %H:%M", + ) + + client = NBIAClient(logger=client_logger) + + # --- hide: start --- + print("\n") + # --- hide: stop --- + + print("Contents of logfile.log after creating NBIAClient:") + with open("logs/logfile.log", "r") as f: + print(f.read()) diff --git a/src/nbiatoolkit/logger/logger.py b/src/nbiatoolkit/logger/logger.py index b405d21..7718f1c 100644 --- a/src/nbiatoolkit/logger/logger.py +++ b/src/nbiatoolkit/logger/logger.py @@ -15,6 +15,8 @@ def setup_logger( ) -> logging.Logger: """ Set up a logger object that can be used to log messages to a file and/or console with daily log file rotation. + If passing a `log_file`, the log file will be created in the current working directory unless a `log_dir` is provided. + The `log_file` is created with a `TimedRotatingFileHandler` to rotate the log file daily. Args: name (str): The name of the logger. @@ -68,8 +70,3 @@ def setup_logger( logger.addHandler(console_handler) return logger - - -# Example usage -# from nbiatoolkit.utils.logger import setup_logger -# logger = setup_logger(name='my_logger', log_level='DEBUG', console_logging=True, log_file='my_log.log', log_dir='logs') diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index 087e07f..5e8984b 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -144,11 +144,18 @@ def __init__( username: str = "nbia_guest", password: str = "", log_level: str = "INFO", + logger: Optional[Logger] = None, return_type: Union[ReturnType, str] = ReturnType.LIST, ) -> None: - # Setup logger - self._log: Logger = setup_logger( - name="NBIAClient", log_level=log_level, console_logging=True, log_file=None + self._log: Logger = ( + setup_logger( + name="NBIAClient", + log_level=log_level, + console_logging=True, + log_file=None, + ) + if logger is None + else logger ) # Setup OAuth2 client