Merge branch 'main' into negative_control_tests

bhklab · Dec 21, 2023 · c6ff12e · c6ff12e
2 parents e812bcc + 5a51e07
commit c6ff12e
Show file tree

Hide file tree

Showing 11 changed files with 276 additions and 79 deletions.
diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml
@@ -36,7 +36,7 @@ jobs:
 
   cd:
     # Only run this job if the "ci" job passes
-    needs: ci
+    # needs: ci
 
     # Only run this job if new work is pushed to "main"
     if: github.event_name == 'push' && github.ref == 'refs/heads/main'
@@ -62,14 +62,24 @@ jobs:
       - name: Install package
         run: poetry install
 
-      - name: Use Python Semantic Release to prepare release
-        env:
-          # This token is created automatically by GH Actions
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      # This action uses Python Semantic Release v8
+      - name: Python Semantic Release
+        id: release
+        uses: python-semantic-release/python-semantic-release@master
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+
+
+      - name: Install packaging-related tool
+        run:
+          python3 -m pip install build twine
+
+      - name: Build package
         run: |
-            git config user.name github-actions
-            git config user.email [email protected]
-            poetry run semantic-release publish
+          poetry version $(git describe --tags --abbrev=0 | sed 's/^v//')
+          python -m build --sdist --wheel --outdir dist/ .
+          ls dist/
+
 
       - name: Publish to TestPyPI
         uses: pypa/gh-action-pypi-publish@release/v1
@@ -85,8 +95,8 @@ jobs:
             --extra-index-url https://pypi.org/simple \
             yarea
 
-      - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
+      # - name: Publish to PyPI
+      #   uses: pypa/gh-action-pypi-publish@release/v1
+      #   with:
+      #     user: __token__
+      #     password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -139,4 +139,7 @@ dmypy.json
 **.Rhistory
 
 # MacOS
-.DS_Store
+.DS_Store
+
+# VS Code
+.vscode
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "yarea"
-version = "0.1.0"
+version = "0.2.0"
 description = "A package to extract radiomic features!"
 authors = ["Katy Scott"]
 license = "MIT"
@@ -37,6 +37,9 @@ upload_to_pypi = false                      # don't auto-upload to PyPI
 remove_dist = false                         # don't remove dists
 patch_without_tag = true                    # patch release by default
 
+[tool.poetry.scripts]
+yarea = "yarea.pipeline:main"
+
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
diff --git a/src/yarea/data/1-1.dcm b/src/yarea/data/1-1.dcm
diff --git a/src/yarea/datasets.py b/src/yarea/datasets.py
diff --git a/src/yarea/feature_extraction.py b/src/yarea/feature_extraction.py
@@ -32,22 +32,17 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image,
     dict
         Dictionary containing image metadata, versions for key packages used for extraction, and radiomic features
     """
+    # If no pyradiomics paramater file passed, use default
+    if pyradiomicsParamFilePath == None:
+        pyradiomicsParamFilePath = "./src/yarea/data/default_pyradiomics.yaml"
+
     # In case segmentation contains extra axis, flatten to 3D by removing it
     roiImage = flattenImage(roiImage)
     # Segmentation has different origin, align it to the CT for proper feature extraction
     alignedROIImage = alignImages(ctImage, roiImage)
 
     # Get pixel value for the segmentation
     segmentationLabel = getROIVoxelLabel(alignedROIImage)
-
-    # Check that image and segmentation mask have the same dimensions
-    if ctImage.GetSize() != alignedROIImage.GetSize():
-        # Checking if number of segmentation slices is less than CT 
-        if ctImage.GetSize()[2] > alignedROIImage.GetSize()[2]:  
-            print("Slice number mismatch between CT and segmentation for", patID, ". Padding segmentation to match.")
-            alignedROIImage = padSEGtoMatchCT(ctFolderPath, segFilePath, ctImage, alignedROIImage)
-        else:
-            raise RuntimeError()
 
     # Check that CT and segmentation correspond, segmentationLabel is present, and dimensions match
     segBoundingBox, correctedROIImage = imageoperations.checkMask(ctImage, alignedROIImage, label=segmentationLabel)
@@ -65,7 +60,11 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image,
 
     # Load PyRadiomics feature extraction parameters to use
     # Initialize feature extractor with parameters
-    featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath)
+    try:
+        featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath)
+    except OSError as e:
+        print("ERROR: Supplied pyradiomics parameter file does not exist or is not at that location.")
+        raise
 
     # Extract radiomic features from CT with segmentation as mask
     idFeatureVector = featureExtractor.execute(croppedCT, croppedROI, label=segmentationLabel)
@@ -110,6 +109,10 @@ def radiomicFeatureExtraction(imageMetadataPath:str,
     logger = logging.getLogger("radiomics")
     logger.setLevel(logging.ERROR)
 
+    # If no pyradiomics paramater file passed, use default
+    if pyradiomicsParamFilePath == None:
+        pyradiomicsParamFilePath = "./src/yarea/data/default_pyradiomics.yaml"
+
     # Load in summary file generated by radiogenomic_pipeline
     pdImageInfo = pd.read_csv(imageMetadataPath, header=0)
 
@@ -125,9 +128,9 @@ def featureExtraction(ctSeriesID):
         print("Processing ", patID)
 
         # Get absolute path to CT image files 
-        ctFolderPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]['folder_CT'])
+        ctDirPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]['folder_CT'])
         # Load CT by passing in specific series to find in a directory
-        ctImage = read_dicom_series(path = ctFolderPath, series_id = ctSeriesID)
+        ctImage = read_dicom_series(path = ctDirPath, series_id = ctSeriesID)
 
         # Get list of segmentations to iterate over
         segSeriesIDList = ctSeriesInfo['series_seg'].unique()
@@ -149,7 +152,7 @@ def featureExtraction(ctSeriesID):
             segFilePath = os.path.join(imageDirPath, segSeriesInfo.iloc[0]['file_path_seg'])
             # Get dictionary of ROI sitk Images for this segmentation file
             segImages = loadSegmentation(segFilePath, modality = segSeriesInfo.iloc[0]['modality_seg'], 
-                                         baseImageDirPath = ctFolderPath, roiNames = roiNames)
+                                         baseImageDirPath = ctDirPath, roiNames = roiNames)
 
             # Check that this series has ROIs to extract from (dictionary isn't empty)
             if not segImages:
@@ -161,38 +164,56 @@ def featureExtraction(ctSeriesID):
                     # ROI counter for image metadata output
                     roiNum = roiCount + 1
 
+                    # Extract features listed in the parameter file
+                    print("Calculating radiomic features for segmentation:", roiImageName)
+
+                    # Get sitk Image object for this ROI
+                    roiImage = segImages[roiImageName]
+
                     # Exception catch for if the segmentation dimensions do not match that original image
                     try:
-                        # Extract features listed in the parameter file
-                        print("Calculating radiomic features for segmentation:", roiImageName)
-
-                        # Extract radiomic features from this CT/segmentation pair
-                        idFeatureVector = singleRadiomicFeatureExtraction(ctImage, roiImage = segImages[roiImageName],
-                                                                          pyradiomicsParamFilePath = pyradiomicsParamFilePath,
-                                                                          negativeControl = negativeControl)
-
-                        # Create dictionary of image metadata to append to front of output table
-                        sampleROIData = {"patient_ID": patID,
-                                        "study_description": segSeriesInfo.iloc[0]['study_description_CT'],
-                                        "series_UID": segSeriesInfo.iloc[0]['series_CT'],
-                                        "series_description": segSeriesInfo.iloc[0]['series_description_CT'],
-                                        "image_modality": segSeriesInfo.iloc[0]['modality_CT'],
-                                        "instances": segSeriesInfo.iloc[0]['instances_CT'],
-                                        "seg_series_UID": segSeriesInfo.iloc[0]['series_seg'],
-                                        "seg_modality": segSeriesInfo.iloc[0]['modality_seg'],
-                                        "seg_ref_image": segSeriesInfo.iloc[0]['reference_ct_seg'],
-                                        "roi": roiImageName,
-                                        "roi_number": roiNum,
-                                        "negative_control": negativeControl}
-
-                        # Concatenate image metadata with PyRadiomics features
-                        sampleROIData.update(idFeatureVector)
-                        # Store this ROI's info in the segmentation level list
-                        ctAllData.append(sampleROIData)
-
-                    except Exception as e:
+                        # Check if segmentation just has an extra axis with a size of 1 and remove it
+                        if roiImage.GetDimension() > 3 and roiImage.GetSize()[3] == 1:
+                            roiImage = flattenImage(roiImage)
+
+                        # Check that image and segmentation mask have the same dimensions
+                        if ctImage.GetSize() != roiImage.GetSize():
+                            # Checking if number of segmentation slices is less than CT 
+                            if ctImage.GetSize()[2] > roiImage.GetSize()[2]:  
+                                print("Slice number mismatch between CT and segmentation for", patID, ". Padding segmentation to match.")
+                                roiImage = padSEGtoMatchCT(ctDirPath, segFilePath, ctImage, roiImage)
+                            else:
+                                raise RuntimeError("CT and ROI dimensions do not match.")
+
+                    # Catching CT and segmentation size mismatch error
+                    except RuntimeError as e:
                         print(str(e))
 
+
+                    # Extract radiomic features from this CT/segmentation pair
+                    idFeatureVector = singleRadiomicFeatureExtraction(ctImage, roiImage = roiImage,
+                                                                      pyradiomicsParamFilePath = pyradiomicsParamFilePath,
+                                                                      negativeControl = negativeControl)
+
+                    # Create dictionary of image metadata to append to front of output table
+                    sampleROIData = {"patient_ID": patID,
+                                    "study_description": segSeriesInfo.iloc[0]['study_description_CT'],
+                                    "series_UID": segSeriesInfo.iloc[0]['series_CT'],
+                                    "series_description": segSeriesInfo.iloc[0]['series_description_CT'],
+                                    "image_modality": segSeriesInfo.iloc[0]['modality_CT'],
+                                    "instances": segSeriesInfo.iloc[0]['instances_CT'],
+                                    "seg_series_UID": segSeriesInfo.iloc[0]['series_seg'],
+                                    "seg_modality": segSeriesInfo.iloc[0]['modality_seg'],
+                                    "seg_ref_image": segSeriesInfo.iloc[0]['reference_ct_seg'],
+                                    "roi": roiImageName,
+                                    "roi_number": roiNum,
+                                    "negative_control": negativeControl}
+
+                    # Concatenate image metadata with PyRadiomics features
+                    sampleROIData.update(idFeatureVector)
+                    # Store this ROI's info in the segmentation level list
+                    ctAllData.append(sampleROIData)
+
         return ctAllData
         ###### END featureExtraction #######
 
@@ -214,10 +235,10 @@ def featureExtraction(ctSeriesID):
         datasetName = imageMetadataPath.partition("match_list_")[2]
         # Setup output file name with the dataset name as a suffix
         if negativeControl == None:
-            outFileName = "radfeatures_" + datasetName
+            outFileName = "radiomicfeatures_" + datasetName
         else:
             # Add negative control identifier to output file name
-            outFileName = "radfeatures_" + negativeControl + "_" + datasetName
+            outFileName = "radiomicfeatures_" + negativeControl + "_" + datasetName
 
         # Join outputDirPath, a features directory, and the output file name
         outputFilePath = os.path.join(outputDirPath, "features/", outFileName)

diff --git a/src/yarea/metadata.py b/src/yarea/metadata.py
@@ -57,6 +57,10 @@ def matchCTtoSegmentation(imgFileListPath: str,
     if segType != "RTSTRUCT" and segType != "SEG":
         raise ValueError("Incorrect segmentation file type. Must be RTSTRUCT or SEG.")
 
+    # Check that imgFileListPath is a csv file to properly be loaded in
+    if not imgFileListPath.endswith('.csv'):
+        raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv")
+
     # Load in complete list of patient image directories of all modalities (output from med-imagetools crawl)
     fullDicomList = pd.read_csv(imgFileListPath, index_col=0)
 
@@ -86,4 +90,38 @@ def matchCTtoSegmentation(imgFileListPath: str,
         outputFilePath = os.path.join(outputDirPath, fileName)
         saveDataframeCSV(samplesWSeg, outputFilePath)
 
-    return samplesWSeg
+    return samplesWSeg
+
+
+def getSegmentationType(imgFileListPath: str):
+    """Find the segmentation type from the full list of image files.
+
+    Parameters
+    ----------
+    imgFileListPath : str
+        Path to csv containing list of image directories/paths in the dataset. 
+        Expecting output from med-imagetools autopipeline .imgtools_[dataset]
+
+    Returns
+    -------
+    str
+        Segmentation type (RTSTRUCT or SEG)
+    """
+    # Check that imgFileListPath is a csv file to properly be loaded in
+    if not imgFileListPath.endswith('.csv'):
+        raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv")
+
+    # Load in complete list of patient image directories of all modalities (output from med-imagetools crawl)
+    fullDicomList = pd.read_csv(imgFileListPath, index_col=0)
+
+    # Get list of unique modalities 
+    modalities = list(fullDicomList['modality'].unique())
+
+    if "RTSTRUCT" in modalities:
+        segType = "RTSTRUCT"
+    elif "SEG" in modalities:
+        segType = "SEG"
+    else:
+        raise RuntimeError("No suitable segmentation type found. YAREA can only use RTSTRUCTs and DICOM-SEG segmentations.")
+
+    return segType