diff --git a/models/bamf_nnunet_ct_lungnodules/config/default.yml b/models/bamf_nnunet_ct_lungnodules/config/default.yml new file mode 100644 index 00000000..306adb2c --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/config/default.yml @@ -0,0 +1,36 @@ +general: + data_base_dir: /app/data + version: 1.0 + description: default configuration for 3D semantic image segmentation of the lung and lung nodules from ct scan (dicom to dicom) + +execute: +- DicomImporter +- NiftiConverter +- NNUnetRunnerV2 +- BamfProcessorRunner +- DsegConverter +- DataOrganizer + +modules: + DicomImporter: + source_dir: input_data + import_dir: sorted_data + sort_data: true + meta: + mod: '%Modality' + + NiftiConverter: + engine: dcm2niix + + NNUnetRunnerV2: + in_data: nifti:mod=ct + + DsegConverter: + model_name: BAMF Lung and Lung Nodule AI Segmentation + target_dicom: dicom:mod=ct + source_segs: nifti:mod=seg:processor=bamf + skip_empty_slices: True + + DataOrganizer: + targets: + - dicomseg-->[i:sid]/bamf_nnunet_ct_lungnodules.seg.dcm diff --git a/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile b/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile new file mode 100644 index 00000000..f7ebf9eb --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/dockerfiles/Dockerfile @@ -0,0 +1,31 @@ +FROM mhubai/base:latest + +# FIXME: set this environment variable as a shortcut to avoid nnunet crashing the build +# by pulling sklearn instead of scikit-learn +# N.B. this is a known issue: +# https://github.com/MIC-DKFZ/nnUNet/issues/1281 +# https://github.com/MIC-DKFZ/nnUNet/pull/1209 +ENV SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True + +# Install nnunet version 2 +RUN pip3 install --no-cache-dir nnunetv2==2.0 + +# Clone the main branch of MHubAI/models +ARG MHUB_MODELS_REPO +RUN buildutils/import_mhub_model.sh bamf_nnunet_ct_lungnodules ${MHUB_MODELS_REPO} + +# Pull weights into the container +ENV WEIGHTS_DIR=/root/.nnunet/nnUNet_models/nnUNet/ +RUN mkdir -p $WEIGHTS_DIR +ENV WEIGHTS_FN=Dataset007_Nodules.zip +ENV WEIGHTS_URL=https://zenodo.org/record/11582738/files/$WEIGHTS_FN +RUN wget --directory-prefix ${WEIGHTS_DIR} ${WEIGHTS_URL} +RUN unzip ${WEIGHTS_DIR}${WEIGHTS_FN} -d ${WEIGHTS_DIR} +RUN rm ${WEIGHTS_DIR}${WEIGHTS_FN} + +# specify nnunet specific environment variables +ENV WEIGHTS_FOLDER=$WEIGHTS_DIR + +# Default run script +ENTRYPOINT ["mhub.run"] +CMD ["--config", "/app/models/bamf_nnunet_ct_lungnodules/config/default.yml"] diff --git a/models/bamf_nnunet_ct_lungnodules/meta.json b/models/bamf_nnunet_ct_lungnodules/meta.json new file mode 100644 index 00000000..330185ef --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/meta.json @@ -0,0 +1,149 @@ +{ + "id": "", + "name": "bamf_nnunet_ct_lungnodules", + "title": "AIMI CT Lung and Nodules", + "summary": { + "description": "An nnU-Net based model to segment Lung and Nodules (3mm-30mm) from CT scans", + "inputs": [ + { + "label": "Input Image", + "description": "The CT scan of a patient.", + "format": "DICOM", + "modality": "CT", + "bodypartexamined": "LUNG", + "slicethickness": "3mm", + "non-contrast": true, + "contrast": false + } + ], + "outputs": [ + { + "label": "Segmentation", + "type": "Segmentation", + "description": "Lung and Nodules (3mm-30mm) from CT scans", + "classes": [ + "LUNG", + "LUNG+NODULE" + ] + } + ], + "model": { + "architecture": "U-net", + "training": "supervised", + "cmpapproach": "3D" + }, + "data": { + "training": { + "vol_samples": 1299 + }, + "evaluation": { + "vol_samples": 114 + }, + "public": true, + "external": true + } + }, + "details": { + "name": "AIMI CT Lung and Nodule", + "version": "2.0.0", + "devteam": "BAMF Health", + "authors": [ + "Soni, Rahul", + "McCrumb, Diana", + "Murugesan, Gowtham Krishnan", + "Van Oss, Jeff" + ], + "type": "nnU-Net (U-Net structure, optimized by data-driven heuristics)", + "date": { + "code": "28.09.2024", + "weights": "11.06.2024", + "pub": "30.09.2024" + }, + "cite": "Gowtham Krishnan Murugesan, Diana McCrumb, Rahul Soni, Jithendra Kumar, Leonard Nuernberg, Linmin Pei, Ulrike Wagner, Sutton Granger, Andrey Y. Fedorov, Stephen Moore, Jeff Van Oss. AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons. arXiv:2409.20342 (2024).", + "license": { + "code": "MIT", + "weights": "CC BY-NC 4.0" + }, + "publications": [ + { + "title": "AI generated annotations for Breast, Brain, Liver, Lungs and Prostate cancer collections in National Cancer Institute Imaging Data Commons", + "uri": "https://arxiv.org/abs/2409.20342" + } + ], + "github": "https://github.com/bamf-health/aimi-lung2-ct" + }, + "info": { + "use": { + "title": "Intended Use", + "text": "This model is designed for analyzing thoracic CT scans to segment lung structures and nodules. It requires input images from CT scans, which are processed using deep learning methods like U-Net. The model identifies and delineates lung regions and nodules, assisting in lung cancer screening and diagnostics. " + }, + "analyses": { + "title": "Quantitative Analyses", + "text": "The model's performance was assessed using the Dice Coefficient, Hausdorff distance and NSD. Source radiological images from publicly available NCI IDC collections were filtered to match the modality and region requirements. To ensure the quality of AI-generated annotations, 10% of these annotations were evaluated by radiologists. " + }, + "evaluation": { + "title": "Evaluation Data", + "text": "Quantitative metrics between AI and Radiologists annotations. The model was used to segment cases 1157 from the QIN LUNG CT [1], SPIE-AAPM Lung CT Challenge [2] and NLST [3] collection. 114 of those cases were randomly selected to be reviewed and corrected by a board-certified radiologist.", + "tables": [ + { + "label": "Dice Score", + "entries": { + "Lung": "1.0±0.0", + "Nodules": "0.78±0.28" + } + }, + { + "label": "95% Hausdorff Distance", + "entries": { + "Lung": "0.00±0.00", + "Nodules": "62.07±10.54" + } + }, + { + "label": "Normalized surface distance ", + "entries": { + "Lung": "0.02±0.11", + "Nodules": "10.54±14.43" + } + } + ], + "references": [ + { + "label": "QIN LUNG CT", + "uri": "https://www.cancerimagingarchive.net/collection/qin-lung-ct/" + }, + { + "label": "SPIE-AAPM Lung CT Challenge", + "uri": "https://www.cancerimagingarchive.net/collection/spie-aapm-lung-ct-challenge/" + }, + { + "label": "NLST", + "uri": "https://www.cancerimagingarchive.net/collection/nlst/" + } + + ] + }, + "training": { + "title": "Training Data", + "text": "416 CT cases from NSCLC-Radiomics [2] and 883 CT cases from DICOM-LIDC-IDRI-Nodules [1] were used to train the model. Annotations for the lung regions in the training dataset were generated utilizing Totalsegmentator[3]", + "references": [ + { + "label": "DICOM-LIDC-IDRI-Nodules", + "uri": "https://wiki.cancerimagingarchive.net/pages/viewpage.action?pageId=44499647" + }, + { + "label": "NSCLC-Radiomics", + "uri": "https://www.cancerimagingarchive.net/collection/nsclc-radiomics/" + }, + { + "label": "Totalsegmentator", + "uri": "https://mhub.ai/models/totalsegmentator" + } + ] + }, + "limitations": { + "title": "Limitations", + "text": "The model has been trained and tested on scans acquired during clinical care of patients, so it might not be suited for a healthy population. The generalization capabilities of the model on a range of ages, genders, and ethnicities are unknown." + } + } +} \ No newline at end of file diff --git a/models/bamf_nnunet_ct_lungnodules/mhub.toml b/models/bamf_nnunet_ct_lungnodules/mhub.toml new file mode 100644 index 00000000..7659547e --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/mhub.toml @@ -0,0 +1,3 @@ + +[model.deployment] +test = "https://zenodo.org/records/13880663/files/bamf_nnunet_ct_lungnodules.test.zip?download=1" \ No newline at end of file diff --git a/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py b/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py new file mode 100644 index 00000000..b23051e1 --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/utils/BamfProcessorRunner.py @@ -0,0 +1,152 @@ +""" +------------------------------------------------- +MHub - Run Module for perform postprocessing logic on segmentations. +------------------------------------------------- +------------------------------------------------- +Author: Jithendra Kumar +Email: jithendra.kumar@bamfhealth.com +------------------------------------------------- +""" + +from mhubio.core import Instance, InstanceData +from mhubio.core import Module, IO +from skimage import measure +import SimpleITK as sitk +import numpy as np + + +class BamfProcessorRunner(Module): + + def max_planar_dimension(self, label_img, label_cnt): + """ + Calculate the maximum planar dimension of a specific label in a 3D label image. + + Args: + label_img (sitk.Image): The 3D label image. + label_cnt (int): The label number to analyze. + + Returns: + float: The maximum size of the label in millimeters (mm) across the most planar dimension. + """ + tumor = label_img == label_cnt + + assert tumor.GetDimension() == 3 + spacing = tumor.GetSpacing() + if spacing[0] == spacing[1] and spacing[1] != spacing[2]: + axis = 2 + plane_space = spacing[0] + elif spacing[0] != spacing[1] and spacing[1] == spacing[2]: + axis = 0 + plane_space = spacing[1] + else: + axis = 1 + plane_space = spacing[2] + + lsif = sitk.LabelShapeStatisticsImageFilter() + lsif.Execute(tumor) + + boundingBox = np.array(lsif.GetBoundingBox(1)) + sizes = boundingBox[3:].tolist() + del sizes[axis] + max_planar_size = plane_space * max(sizes) # mm + return max_planar_size + + def filter_nodules(self, label_img, min_size=3): + """ + Filter lung nodules based on their size and re-label them accordingly. + + Args: + label_img (sitk.Image): The 3D label image containing lung and nodule labels. + min_size (float): Minimum planar size (in mm) to retain a nodule. + + Returns: + sitk.Image: The processed label image with nodules filtered by size. + """ + label_val_lung = 1 + label_val_nodule = 2 + label_val_large_nodule = 3 + + nodules_img = label_img == label_val_nodule + nodule_components = sitk.ConnectedComponent(nodules_img) + + nodules_to_remove = [] + + for lbl in range(1, sitk.GetArrayFromImage(nodule_components).max() + 1): + max_size = self.max_planar_dimension(nodule_components, lbl) + + if max_size < min_size: + nodules_to_remove.append(lbl) + # print("Removing label", lbl, "with size", max_size) + elif 3 <= max_size <= 30: + label_img = sitk.ChangeLabel(label_img, {lbl: label_val_nodule}) + # print("Marking label", lbl, "as Nodule (label 2) with size", max_size) + else: + label_img = sitk.ChangeLabel(label_img, {lbl: label_val_large_nodule}) + # print("Marking label", lbl, "as Large Nodule (label 3) with size", max_size) + + label_img = sitk.ChangeLabel(label_img, {label_val_nodule: label_val_lung}) + big_nodules = sitk.ChangeLabel(nodule_components, {x: 0 for x in nodules_to_remove}) + label_img = sitk.Mask(label_img, big_nodules > 0, label_val_nodule, label_val_lung) + label_img = self.n_connected(label_img) + + return label_img + + + def n_connected(self, img): + """ + Retain the largest connected components in a binary label image. + + Args: + img (sitk.Image): The input binary label image. + + Returns: + sitk.Image: The processed image with only the largest connected components retained. + """ + img_data = sitk.GetArrayFromImage(img) + img_data_mask = np.zeros(img_data.shape) + img_data_mask[img_data > 0] = 1 + img_filtered = np.zeros(img_data_mask.shape) + blobs_labels = measure.label(img_data_mask, background=0) + lbl, counts = np.unique(blobs_labels, return_counts=True) + lbl_dict = {} + for i, j in zip(lbl, counts): + lbl_dict[i] = j + sorted_dict = dict(sorted(lbl_dict.items(), key=lambda x: x[1], reverse=True)) + count = 0 + + for key, value in sorted_dict.items(): + if count >= 1 and count <= 2: + if count == 1: + val = value + img_filtered[blobs_labels == key] = 1 + if count == 2 and value > (val * 0.2): + img_filtered[blobs_labels == key] = 1 + + count += 1 + + img_data[img_filtered != 1] = 0 + img_masked = sitk.GetImageFromArray(img_data) + img_masked.CopyInformation(img) + return img_masked + + @IO.Instance() + @IO.Input('in_data', 'nifti:mod=seg:model=nnunet', the='input segmentations') + @IO.Output('out_data', 'bamf_processed.nii.gz', 'nifti:mod=seg:processor=bamf:roi=LUNG,LUNG+NODULE', data='in_data', the="lung and filtered nodules segmentation") + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + """ + Main task function that processes the input lung and nodule segmentations, + filters nodules based on their size, and writes the output image. + + Args: + instance (Instance): The MHub instance for processing. + in_data (InstanceData): Input data containing the segmentation. + out_data (InstanceData): Output data path to save the processed image. + """ + # Log bamf runner info + self.log("Running BamfProcessor on....") + self.log(f" > input data: {in_data.abspath}") + self.log(f" > output data: {out_data.abspath}") + + label_img = sitk.ReadImage(in_data.abspath) + filtered_label_img = self.filter_nodules(label_img, min_size=3) + sitk.WriteImage(filtered_label_img, out_data.abspath) \ No newline at end of file diff --git a/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py new file mode 100644 index 00000000..aab7296c --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/utils/NNUnetRunnerV2.py @@ -0,0 +1,72 @@ +""" +------------------------------------------------- +MHub - NNU-Net Runner v2 + Custom Runner for pre-trained nnunet v2 models. +------------------------------------------------- + +------------------------------------------------- +Author: Jithendra Kumar +Email: jithendra.kumar@bamfhealth.com +------------------------------------------------- +""" + + +import os, shutil +from mhubio.core import Module, Instance, InstanceData, DataType, FileType, IO + + + +@IO.ConfigInput('in_data', 'nifti:mod=ct', the="input data to run nnunet on") +class NNUnetRunnerV2(Module): + + nnunet_dataset: str = 'Dataset007_Nodules' + nnunet_config: str = '3d_fullres' + input_data_type: DataType + + @IO.Instance() + @IO.Input("in_data", the="input data to run nnunet on") + @IO.Output("out_data", 'VOLUME_001.nii.gz', 'nifti:mod=seg:model=nnunet:nnunet_dataset=Dataset007_Nodules:' + 'nnunet_config=3d_fullres:roi=LUNG,LUNG+NODULE', data='in_data', the="output data from nnunet") + def task(self, instance: Instance, in_data: InstanceData, out_data: InstanceData) -> None: + + # get the nnunet model to run + self.v("Running nnUNetv2_predict.") + self.v(f" > input data: {in_data.abspath}") + + # download weights if not found + # NOTE: only for testing / debugging. For productiio always provide the weights in the Docker container. + if not os.path.isdir(os.path.join(os.environ["WEIGHTS_FOLDER"], '')): + print("Downloading nnUNet model weights...") + bash_command = ["nnUNet_download_pretrained_model", self.nnunet_dataset] + self.subprocess(bash_command, text=True) + + inp_dir = self.config.data.requestTempDir(label="nnunet-model-inp") + inp_file = f'VOLUME_001_0000.nii.gz' + shutil.copyfile(in_data.abspath, os.path.join(inp_dir, inp_file)) + + # define output folder (temp dir) and also override environment variable for nnunet + out_dir = self.config.data.requestTempDir(label="nnunet-model-out") + os.environ['nnUNet_results'] = out_dir + + # create symlink in python + # NOTE: this is a workaround for the nnunet bash script that expects the model data to be in a output folder + # structure. This is not the case for the mhub data structure. + os.symlink(os.path.join(os.environ['WEIGHTS_FOLDER'], self.nnunet_dataset), os.path.join(out_dir, self.nnunet_dataset)) + + # construct nnunet inference command + bash_command = ["nnUNetv2_predict"] + bash_command += ["-i", str(inp_dir)] + bash_command += ["-o", str(out_dir)] + bash_command += ["-d", self.nnunet_dataset] + bash_command += ["-c", self.nnunet_config] + + self.v(f" > bash_command: {bash_command}") + # run command + self.subprocess(bash_command, text=True) + + # get output data + out_file = f'VOLUME_001.nii.gz' + out_path = os.path.join(out_dir, out_file) + + # copy output data to instance + shutil.copyfile(out_path, out_data.abspath) diff --git a/models/bamf_nnunet_ct_lungnodules/utils/__init__.py b/models/bamf_nnunet_ct_lungnodules/utils/__init__.py new file mode 100644 index 00000000..d6522730 --- /dev/null +++ b/models/bamf_nnunet_ct_lungnodules/utils/__init__.py @@ -0,0 +1 @@ +from .BamfProcessorRunner import * \ No newline at end of file