Skip to content

Commit

Permalink
Merge branch 'main' into negative_control_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Mogtaba-Alim authored Dec 21, 2023
2 parents e812bcc + 5a51e07 commit c6ff12e
Show file tree
Hide file tree
Showing 11 changed files with 276 additions and 79 deletions.
36 changes: 23 additions & 13 deletions .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:

cd:
# Only run this job if the "ci" job passes
needs: ci
# needs: ci

# Only run this job if new work is pushed to "main"
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
Expand All @@ -62,14 +62,24 @@ jobs:
- name: Install package
run: poetry install

- name: Use Python Semantic Release to prepare release
env:
# This token is created automatically by GH Actions
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# This action uses Python Semantic Release v8
- name: Python Semantic Release
id: release
uses: python-semantic-release/python-semantic-release@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}


- name: Install packaging-related tool
run:
python3 -m pip install build twine

- name: Build package
run: |
git config user.name github-actions
git config user.email [email protected]
poetry run semantic-release publish
poetry version $(git describe --tags --abbrev=0 | sed 's/^v//')
python -m build --sdist --wheel --outdir dist/ .
ls dist/
- name: Publish to TestPyPI
uses: pypa/gh-action-pypi-publish@release/v1
Expand All @@ -85,8 +95,8 @@ jobs:
--extra-index-url https://pypi.org/simple \
yarea
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
# - name: Publish to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
# with:
# user: __token__
# password: ${{ secrets.PYPI_API_TOKEN }}
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,7 @@ dmypy.json
**.Rhistory

# MacOS
.DS_Store
.DS_Store

# VS Code
.vscode
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "yarea"
version = "0.1.0"
version = "0.2.0"
description = "A package to extract radiomic features!"
authors = ["Katy Scott"]
license = "MIT"
Expand Down Expand Up @@ -37,6 +37,9 @@ upload_to_pypi = false # don't auto-upload to PyPI
remove_dist = false # don't remove dists
patch_without_tag = true # patch release by default

[tool.poetry.scripts]
yarea = "yarea.pipeline:main"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Binary file removed src/yarea/data/1-1.dcm
Binary file not shown.
18 changes: 0 additions & 18 deletions src/yarea/datasets.py

This file was deleted.

107 changes: 64 additions & 43 deletions src/yarea/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,17 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image,
dict
Dictionary containing image metadata, versions for key packages used for extraction, and radiomic features
"""
# If no pyradiomics paramater file passed, use default
if pyradiomicsParamFilePath == None:
pyradiomicsParamFilePath = "./src/yarea/data/default_pyradiomics.yaml"

# In case segmentation contains extra axis, flatten to 3D by removing it
roiImage = flattenImage(roiImage)
# Segmentation has different origin, align it to the CT for proper feature extraction
alignedROIImage = alignImages(ctImage, roiImage)

# Get pixel value for the segmentation
segmentationLabel = getROIVoxelLabel(alignedROIImage)

# Check that image and segmentation mask have the same dimensions
if ctImage.GetSize() != alignedROIImage.GetSize():
# Checking if number of segmentation slices is less than CT
if ctImage.GetSize()[2] > alignedROIImage.GetSize()[2]:
print("Slice number mismatch between CT and segmentation for", patID, ". Padding segmentation to match.")
alignedROIImage = padSEGtoMatchCT(ctFolderPath, segFilePath, ctImage, alignedROIImage)
else:
raise RuntimeError()

# Check that CT and segmentation correspond, segmentationLabel is present, and dimensions match
segBoundingBox, correctedROIImage = imageoperations.checkMask(ctImage, alignedROIImage, label=segmentationLabel)
Expand All @@ -65,7 +60,11 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image,

# Load PyRadiomics feature extraction parameters to use
# Initialize feature extractor with parameters
featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath)
try:
featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath)
except OSError as e:
print("ERROR: Supplied pyradiomics parameter file does not exist or is not at that location.")
raise

# Extract radiomic features from CT with segmentation as mask
idFeatureVector = featureExtractor.execute(croppedCT, croppedROI, label=segmentationLabel)
Expand Down Expand Up @@ -110,6 +109,10 @@ def radiomicFeatureExtraction(imageMetadataPath:str,
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)

# If no pyradiomics paramater file passed, use default
if pyradiomicsParamFilePath == None:
pyradiomicsParamFilePath = "./src/yarea/data/default_pyradiomics.yaml"

# Load in summary file generated by radiogenomic_pipeline
pdImageInfo = pd.read_csv(imageMetadataPath, header=0)

Expand All @@ -125,9 +128,9 @@ def featureExtraction(ctSeriesID):
print("Processing ", patID)

# Get absolute path to CT image files
ctFolderPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]['folder_CT'])
ctDirPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]['folder_CT'])
# Load CT by passing in specific series to find in a directory
ctImage = read_dicom_series(path = ctFolderPath, series_id = ctSeriesID)
ctImage = read_dicom_series(path = ctDirPath, series_id = ctSeriesID)

# Get list of segmentations to iterate over
segSeriesIDList = ctSeriesInfo['series_seg'].unique()
Expand All @@ -149,7 +152,7 @@ def featureExtraction(ctSeriesID):
segFilePath = os.path.join(imageDirPath, segSeriesInfo.iloc[0]['file_path_seg'])
# Get dictionary of ROI sitk Images for this segmentation file
segImages = loadSegmentation(segFilePath, modality = segSeriesInfo.iloc[0]['modality_seg'],
baseImageDirPath = ctFolderPath, roiNames = roiNames)
baseImageDirPath = ctDirPath, roiNames = roiNames)

# Check that this series has ROIs to extract from (dictionary isn't empty)
if not segImages:
Expand All @@ -161,38 +164,56 @@ def featureExtraction(ctSeriesID):
# ROI counter for image metadata output
roiNum = roiCount + 1

# Extract features listed in the parameter file
print("Calculating radiomic features for segmentation:", roiImageName)

# Get sitk Image object for this ROI
roiImage = segImages[roiImageName]

# Exception catch for if the segmentation dimensions do not match that original image
try:
# Extract features listed in the parameter file
print("Calculating radiomic features for segmentation:", roiImageName)

# Extract radiomic features from this CT/segmentation pair
idFeatureVector = singleRadiomicFeatureExtraction(ctImage, roiImage = segImages[roiImageName],
pyradiomicsParamFilePath = pyradiomicsParamFilePath,
negativeControl = negativeControl)

# Create dictionary of image metadata to append to front of output table
sampleROIData = {"patient_ID": patID,
"study_description": segSeriesInfo.iloc[0]['study_description_CT'],
"series_UID": segSeriesInfo.iloc[0]['series_CT'],
"series_description": segSeriesInfo.iloc[0]['series_description_CT'],
"image_modality": segSeriesInfo.iloc[0]['modality_CT'],
"instances": segSeriesInfo.iloc[0]['instances_CT'],
"seg_series_UID": segSeriesInfo.iloc[0]['series_seg'],
"seg_modality": segSeriesInfo.iloc[0]['modality_seg'],
"seg_ref_image": segSeriesInfo.iloc[0]['reference_ct_seg'],
"roi": roiImageName,
"roi_number": roiNum,
"negative_control": negativeControl}

# Concatenate image metadata with PyRadiomics features
sampleROIData.update(idFeatureVector)
# Store this ROI's info in the segmentation level list
ctAllData.append(sampleROIData)

except Exception as e:
# Check if segmentation just has an extra axis with a size of 1 and remove it
if roiImage.GetDimension() > 3 and roiImage.GetSize()[3] == 1:
roiImage = flattenImage(roiImage)

# Check that image and segmentation mask have the same dimensions
if ctImage.GetSize() != roiImage.GetSize():
# Checking if number of segmentation slices is less than CT
if ctImage.GetSize()[2] > roiImage.GetSize()[2]:
print("Slice number mismatch between CT and segmentation for", patID, ". Padding segmentation to match.")
roiImage = padSEGtoMatchCT(ctDirPath, segFilePath, ctImage, roiImage)
else:
raise RuntimeError("CT and ROI dimensions do not match.")

# Catching CT and segmentation size mismatch error
except RuntimeError as e:
print(str(e))


# Extract radiomic features from this CT/segmentation pair
idFeatureVector = singleRadiomicFeatureExtraction(ctImage, roiImage = roiImage,
pyradiomicsParamFilePath = pyradiomicsParamFilePath,
negativeControl = negativeControl)

# Create dictionary of image metadata to append to front of output table
sampleROIData = {"patient_ID": patID,
"study_description": segSeriesInfo.iloc[0]['study_description_CT'],
"series_UID": segSeriesInfo.iloc[0]['series_CT'],
"series_description": segSeriesInfo.iloc[0]['series_description_CT'],
"image_modality": segSeriesInfo.iloc[0]['modality_CT'],
"instances": segSeriesInfo.iloc[0]['instances_CT'],
"seg_series_UID": segSeriesInfo.iloc[0]['series_seg'],
"seg_modality": segSeriesInfo.iloc[0]['modality_seg'],
"seg_ref_image": segSeriesInfo.iloc[0]['reference_ct_seg'],
"roi": roiImageName,
"roi_number": roiNum,
"negative_control": negativeControl}

# Concatenate image metadata with PyRadiomics features
sampleROIData.update(idFeatureVector)
# Store this ROI's info in the segmentation level list
ctAllData.append(sampleROIData)

return ctAllData
###### END featureExtraction #######

Expand All @@ -214,10 +235,10 @@ def featureExtraction(ctSeriesID):
datasetName = imageMetadataPath.partition("match_list_")[2]
# Setup output file name with the dataset name as a suffix
if negativeControl == None:
outFileName = "radfeatures_" + datasetName
outFileName = "radiomicfeatures_" + datasetName
else:
# Add negative control identifier to output file name
outFileName = "radfeatures_" + negativeControl + "_" + datasetName
outFileName = "radiomicfeatures_" + negativeControl + "_" + datasetName

# Join outputDirPath, a features directory, and the output file name
outputFilePath = os.path.join(outputDirPath, "features/", outFileName)
Expand Down
40 changes: 39 additions & 1 deletion src/yarea/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ def matchCTtoSegmentation(imgFileListPath: str,
if segType != "RTSTRUCT" and segType != "SEG":
raise ValueError("Incorrect segmentation file type. Must be RTSTRUCT or SEG.")

# Check that imgFileListPath is a csv file to properly be loaded in
if not imgFileListPath.endswith('.csv'):
raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv")

# Load in complete list of patient image directories of all modalities (output from med-imagetools crawl)
fullDicomList = pd.read_csv(imgFileListPath, index_col=0)

Expand Down Expand Up @@ -86,4 +90,38 @@ def matchCTtoSegmentation(imgFileListPath: str,
outputFilePath = os.path.join(outputDirPath, fileName)
saveDataframeCSV(samplesWSeg, outputFilePath)

return samplesWSeg
return samplesWSeg


def getSegmentationType(imgFileListPath: str):
"""Find the segmentation type from the full list of image files.
Parameters
----------
imgFileListPath : str
Path to csv containing list of image directories/paths in the dataset.
Expecting output from med-imagetools autopipeline .imgtools_[dataset]
Returns
-------
str
Segmentation type (RTSTRUCT or SEG)
"""
# Check that imgFileListPath is a csv file to properly be loaded in
if not imgFileListPath.endswith('.csv'):
raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv")

# Load in complete list of patient image directories of all modalities (output from med-imagetools crawl)
fullDicomList = pd.read_csv(imgFileListPath, index_col=0)

# Get list of unique modalities
modalities = list(fullDicomList['modality'].unique())

if "RTSTRUCT" in modalities:
segType = "RTSTRUCT"
elif "SEG" in modalities:
segType = "SEG"
else:
raise RuntimeError("No suitable segmentation type found. YAREA can only use RTSTRUCTs and DICOM-SEG segmentations.")

return segType
Loading

0 comments on commit c6ff12e

Please sign in to comment.