From aa1dcb479720f8eec295071ad0b45aedaaf275d2 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:22:15 +0000 Subject: [PATCH 01/21] fix: test deployment --- .github/workflows/ci-cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 0db745e..c8d8df0 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -36,7 +36,7 @@ jobs: cd: # Only run this job if the "ci" job passes - needs: ci + # needs: ci # # Only run this job if new work is pushed to "main" # if: github.event_name == 'push' && github.ref == 'refs/heads/main' From af61beba9a677a86695268a55bd2887138fb2c59 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:27:10 +0000 Subject: [PATCH 02/21] fix: test builds and update toml to include this branch --- .github/workflows/ci-cd.yml | 104 ++++++++++++++++++------------------ pyproject.toml | 4 +- 2 files changed, 55 insertions(+), 53 deletions(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index c8d8df0..bebd7d3 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -3,36 +3,36 @@ name: ci-cd on: [push, pull_request] jobs: - ci: - # Set up operating system - runs-on: ubuntu-latest + # ci: + # # Set up operating system + # runs-on: ubuntu-latest - # Define job steps - steps: - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: "3.9" + # # Define job steps + # steps: + # - name: Set up Python + # uses: actions/setup-python@v4 + # with: + # python-version: "3.9" - - name: Check-out repository - uses: actions/checkout@v3 + # - name: Check-out repository + # uses: actions/checkout@v3 - - name: Install poetry - uses: snok/install-poetry@v1 + # - name: Install poetry + # uses: snok/install-poetry@v1 - - name: Install package - run: poetry install + # - name: Install package + # run: poetry install - - name: Test with pytest - run: poetry run pytest tests/ --cov=readii --cov-report=xml + # - name: Test with pytest + # run: poetry run pytest tests/ --cov=readii --cov-report=xml - - name: Use Codecov to track coverage - uses: codecov/codecov-action@v3 - with: - files: ./coverage.xml # coverage report + # - name: Use Codecov to track coverage + # uses: codecov/codecov-action@v3 + # with: + # files: ./coverage.xml # coverage report - - name: Build documentation - run: poetry run make html --directory docs/ + # - name: Build documentation + # run: poetry run make html --directory docs/ cd: # Only run this job if the "ci" job passes @@ -76,35 +76,35 @@ jobs: echo "${{ steps.release.outputs.tag }}" - - name: Install packaging-related tool - run: - python3 -m pip install build twine - - - name: Build package - run: | - poetry version $(git describe --tags --abbrev=0 | sed 's/^v//') - python -m build --sdist --wheel --outdir dist/ . - ls dist/ - - - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.TEST_PYPI_API_TOKEN }} - repository_url: https://test.pypi.org/legacy/ - - - name: Test install from TestPyPI - run: | - pip install \ - --index-url https://test.pypi.org/simple/ \ - --extra-index-url https://pypi.org/simple \ - readii - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} + # - name: Install packaging-related tool + # run: + # python3 -m pip install build twine + + # - name: Build package + # run: | + # poetry version $(git describe --tags --abbrev=0 | sed 's/^v//') + # python -m build --sdist --wheel --outdir dist/ . + # ls dist/ + + # - name: Publish to TestPyPI + # uses: pypa/gh-action-pypi-publish@release/v1 + # with: + # user: __token__ + # password: ${{ secrets.TEST_PYPI_API_TOKEN }} + # repository-url: https://test.pypi.org/legacy/ + + # - name: Test install from TestPyPI + # run: | + # pip install \ + # --index-url https://test.pypi.org/simple/ \ + # --extra-index-url https://pypi.org/simple \ + # readii + + # - name: Publish to PyPI + # uses: pypa/gh-action-pypi-publish@release/v1 + # with: + # user: __token__ + # password: ${{ secrets.PYPI_API_TOKEN }} - name: Set up QEMU if: steps.release.outputs.released == 'true' diff --git a/pyproject.toml b/pyproject.toml index 4be2a0b..08e2376 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,13 +29,15 @@ python-semantic-release = "^8.5.2" [tool.semantic_release] version_toml = ["pyproject.toml:tool.poetry.version"] # version location -branch = "main" # branch to make releases of changelog_file = "CHANGELOG.md" # changelog file dist_path = "dist/" # where to put dists upload_to_release = true # auto-create GitHub release remove_dist = false # don't remove dists patch_without_tag = true # patch release by default +[tool.semantic_release.branches] +include = ["main", "dockerfile"] # only release from these branches + [tool.poetry.scripts] readii = "readii.pipeline:main" From b55baabb4800e074f0f08684874947052ecb9195 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:27:53 +0000 Subject: [PATCH 03/21] fix: wrong group in toml --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 08e2376..4275e00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,8 +35,8 @@ upload_to_release = true # auto-create GitHub release remove_dist = false # don't remove dists patch_without_tag = true # patch release by default -[tool.semantic_release.branches] -include = ["main", "dockerfile"] # only release from these branches +[tool.semantic_release.branches.main] +match = "(main|dockerfile)" [tool.poetry.scripts] readii = "readii.pipeline:main" From f13c5e670fba091c7bb3cb652a9cbdd8e02d10d2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 15 Mar 2024 20:29:33 +0000 Subject: [PATCH 04/21] 1.2.0 Automatically generated by python-semantic-release --- CHANGELOG.md | 15 +++++++++++++++ pyproject.toml | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 58069bc..e88da89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ +## v1.2.0 (2024-03-15) + +### Feature + +* feat: adding dockerfile and gha to build and deploy ([`b7d6b73`](https://github.com/bhklab/readii/commit/b7d6b734ef6b955e5b61ff2d980a08893dc2dddb)) + +### Fix + +* fix: wrong group in toml ([`b55baab`](https://github.com/bhklab/readii/commit/b55baabb4800e074f0f08684874947052ecb9195)) + +* fix: test builds and update toml to include this branch ([`af61beb`](https://github.com/bhklab/readii/commit/af61beba9a677a86695268a55bd2887138fb2c59)) + +* fix: test deployment ([`aa1dcb4`](https://github.com/bhklab/readii/commit/aa1dcb479720f8eec295071ad0b45aedaaf275d2)) + + ## v1.1.3 (2024-03-06) ### Build diff --git a/pyproject.toml b/pyproject.toml index 4275e00..9eedd4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "readii" -version = "1.1.3" +version = "1.2.0" description = "A package to extract radiomic features!" authors = ["Katy Scott"] license = "MIT" From fa35141d0d9b1772ac4d6242d942f4ad9c0061d3 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:44:02 +0000 Subject: [PATCH 05/21] Update ci-cd.yml and pyproject.toml --- .github/workflows/ci-cd.yml | 116 +++++++++++++++++++----------------- README.md | 3 + pyproject.toml | 4 +- 3 files changed, 67 insertions(+), 56 deletions(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index bebd7d3..5592209 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -1,45 +1,48 @@ name: ci-cd -on: [push, pull_request] +# only run on pushes to main or pull requests +on: + push: + branches: [ "*"] + pull_request: + branches: [ main , development ] jobs: - # ci: - # # Set up operating system - # runs-on: ubuntu-latest - - # # Define job steps - # steps: - # - name: Set up Python - # uses: actions/setup-python@v4 - # with: - # python-version: "3.9" + ci: + # Set up operating system + runs-on: ubuntu-latest - # - name: Check-out repository - # uses: actions/checkout@v3 + # Define job steps + steps: + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.9" - # - name: Install poetry - # uses: snok/install-poetry@v1 + - name: Check-out repository + uses: actions/checkout@v3 - # - name: Install package - # run: poetry install + - name: Install poetry + uses: snok/install-poetry@v1 - # - name: Test with pytest - # run: poetry run pytest tests/ --cov=readii --cov-report=xml + - name: Install package + run: poetry install - # - name: Use Codecov to track coverage - # uses: codecov/codecov-action@v3 - # with: - # files: ./coverage.xml # coverage report + # Commented out until we have a repo for coverage + # - name: Test with pytest + # run: poetry run pytest tests/ --cov=readii --cov-report=xml - # - name: Build documentation - # run: poetry run make html --directory docs/ + # - name: Use Codecov to track coverage + # uses: codecov/codecov-action@v3 + # with: + # files: ./coverage.xml # coverage report cd: # Only run this job if the "ci" job passes - # needs: ci + needs: ci # # Only run this job if new work is pushed to "main" - # if: github.event_name == 'push' && github.ref == 'refs/heads/main' + if: github.ref == 'refs/heads/main' # Set up operating system runs-on: ubuntu-latest @@ -75,36 +78,40 @@ jobs: echo "${{ steps.release.outputs.version }}" echo "${{ steps.release.outputs.tag }}" + - name: Install packaging-related tool + if: steps.release.outputs.released == 'true' + run: + python3 -m pip install build twine - # - name: Install packaging-related tool - # run: - # python3 -m pip install build twine + - name: Build package + if: steps.release.outputs.released == 'true' + run: | + poetry version ${{ steps.release.outputs.version }} + python -m build --sdist --wheel --outdir dist/ . + ls dist/ - # - name: Build package - # run: | - # poetry version $(git describe --tags --abbrev=0 | sed 's/^v//') - # python -m build --sdist --wheel --outdir dist/ . - # ls dist/ + - name: Publish to TestPyPI + if: steps.release.outputs.released == 'true' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository-url: https://test.pypi.org/legacy/ - # - name: Publish to TestPyPI - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # user: __token__ - # password: ${{ secrets.TEST_PYPI_API_TOKEN }} - # repository-url: https://test.pypi.org/legacy/ - - # - name: Test install from TestPyPI - # run: | - # pip install \ - # --index-url https://test.pypi.org/simple/ \ - # --extra-index-url https://pypi.org/simple \ - # readii - - # - name: Publish to PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # user: __token__ - # password: ${{ secrets.PYPI_API_TOKEN }} + - name: Test install from TestPyPI + if: steps.release.outputs.released == 'true' + run: | + pip install \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple \ + readii + + - name: Publish to PyPI + if: steps.release.outputs.released == 'true' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} - name: Set up QEMU if: steps.release.outputs.released == 'true' @@ -130,6 +137,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Extract metadata (tags, labels) for Docker + if: steps.release.outputs.released == 'true' id: meta uses: docker/metadata-action@v3 with: diff --git a/README.md b/README.md index c430e53..3dd8d3b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # READII +![Docker Pulls](https://img.shields.io/docker/pulls/bhklab/readii) +![GitHub Release](https://img.shields.io/github/v/release/bhklab/readii) + **R**adiomic **E**xtraction and **A**nalysis for **DI**COM **I**mages diff --git a/pyproject.toml b/pyproject.toml index 4275e00..1e0bb88 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "readii" -version = "1.1.3" +version = "1.2.0" description = "A package to extract radiomic features!" authors = ["Katy Scott"] license = "MIT" @@ -36,7 +36,7 @@ remove_dist = false # don't remove dists patch_without_tag = true # patch release by default [tool.semantic_release.branches.main] -match = "(main|dockerfile)" +match = "(main|master)" [tool.poetry.scripts] readii = "readii.pipeline:main" From e3349dbc68dd3f8c4f72afe351148f8af2ef846f Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:47:09 +0000 Subject: [PATCH 06/21] fix: no development branch --- .github/workflows/ci-cd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 5592209..bcd1613 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -5,7 +5,7 @@ on: push: branches: [ "*"] pull_request: - branches: [ main , development ] + branches: [ main ] jobs: ci: From 8ca1a9349c7e0ac3bf13da572aa13d691d1cb56d Mon Sep 17 00:00:00 2001 From: Jermiah Date: Fri, 15 Mar 2024 20:50:40 +0000 Subject: [PATCH 07/21] fixed wrong docker repo --- .github/workflows/ci-cd.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index bcd1613..30e5273 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -142,7 +142,7 @@ jobs: uses: docker/metadata-action@v3 with: images: | - ${{ secrets.BHKLAB_DOCKERHUB_USERNAME }}/readii:${{ steps.release.outputs.tag }} + bhklab/readii:${{ steps.release.outputs.tag }} ghcr.io/${{ github.repository }}/readii:${{ steps.release.outputs.tag }} - name: Build @@ -154,8 +154,8 @@ jobs: file: ./Dockerfile push: true tags: | - ${{ secrets.BHKLAB_DOCKERHUB_USERNAME }}/readii:${{ steps.release.outputs.tag }} - ${{ secrets.BHKLAB_DOCKERHUB_USERNAME }}/readii:latest + bhklab/readii:${{ steps.release.outputs.tag }} + bhklab/readii:latest ghcr.io/${{ github.repository }}/readii:${{ steps.release.outputs.tag }} ghcr.io/${{ github.repository }}/readii:latest labels: ${{ steps.meta.outputs.labels }} From d52a5a3189cef9fc7599687f6dc1558378d87506 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:11:21 +0000 Subject: [PATCH 08/21] build: add pytest-xdist for development parallel tests --- poetry.lock | 39 +++++++++++++++++++++++++++++++++++++-- pyproject.toml | 1 + 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index dd1c0ae..5a1f46c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "alabaster" @@ -759,6 +759,20 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "execnet" +version = "2.0.2" +description = "execnet: rapid multi-Python deployment" +optional = false +python-versions = ">=3.7" +files = [ + {file = "execnet-2.0.2-py3-none-any.whl", hash = "sha256:88256416ae766bc9e8895c76a87928c0012183da3cc4fc18016e6f050e025f41"}, + {file = "execnet-2.0.2.tar.gz", hash = "sha256:cc59bc4423742fd71ad227122eb0dd44db51efb3dc4095b45ac9a08c770096af"}, +] + +[package.extras] +testing = ["hatch", "pre-commit", "pytest", "tox"] + [[package]] name = "executing" version = "2.0.1" @@ -2934,6 +2948,26 @@ pytest = ">=4.6" [package.extras] testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] +[[package]] +name = "pytest-xdist" +version = "3.5.0" +description = "pytest xdist plugin for distributed testing, most importantly across multiple CPUs" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-xdist-3.5.0.tar.gz", hash = "sha256:cbb36f3d67e0c478baa57fa4edc8843887e0f6cfc42d677530a36d7472b32d8a"}, + {file = "pytest_xdist-3.5.0-py3-none-any.whl", hash = "sha256:d075629c7e00b611df89f490a5063944bee7a4362a5ff11c7cc7824a03dfce24"}, +] + +[package.dependencies] +execnet = ">=1.1" +pytest = ">=6.2.0" + +[package.extras] +psutil = ["psutil (>=3.0)"] +setproctitle = ["setproctitle"] +testing = ["filelock"] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3119,6 +3153,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -4422,4 +4457,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "996f1da0c6d66ec1d6db2cb820257ef608fbbc29b460ada86d951b0697031411" +content-hash = "f1c61344fc24091724e64aea20f6940d7a7b2a685766cce6d0bd72a5f4f800d1" diff --git a/pyproject.toml b/pyproject.toml index 1e0bb88..75c9258 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ pyarrow = "^15.0.0" [tool.poetry.group.dev.dependencies] pytest = "^7.4.3" pytest-cov = "^4.1.0" +pytest-xdist = "^3.5.0" jupyter = "^1.0.0" myst-nb = {version = "^1.0.0", python = "^3.9"} sphinx-autoapi = "^3.0.0" From b4d49d53fcccaa63e0e0d0b720da39f9f1dda3fb Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:13:11 +0000 Subject: [PATCH 09/21] build: Fix formatting in ci-cd.yml and add back unit tests. Rename jobs for better clarity --- .github/workflows/ci-cd.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 30e5273..927ab4d 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -3,12 +3,12 @@ name: ci-cd # only run on pushes to main or pull requests on: push: - branches: [ "*"] + branches: [ "*" ] pull_request: branches: [ main ] jobs: - ci: + Unit-Testing: # Set up operating system runs-on: ubuntu-latest @@ -28,6 +28,9 @@ jobs: - name: Install package run: poetry install + - name: Test with pytest + run: poetry run pytest tests/ + # Commented out until we have a repo for coverage # - name: Test with pytest # run: poetry run pytest tests/ --cov=readii --cov-report=xml @@ -37,9 +40,8 @@ jobs: # with: # files: ./coverage.xml # coverage report - cd: - # Only run this job if the "ci" job passes - needs: ci + Continuous-Deployment: + needs: Unit-Testing # # Only run this job if new work is pushed to "main" if: github.ref == 'refs/heads/main' From 11f0d92a3f86e1c1db328172d338b92b09654f46 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:20:02 +0000 Subject: [PATCH 10/21] refactor: update type annotations to handle optional parameters, format with black for readability, refactor applyNegativeControl function to raise AssertionErrror for optional baseROI, handle edge case and raise error if none of the nc_types. --- src/readii/negative_controls.py | 170 +++++++++++++++++++++++--------- 1 file changed, 125 insertions(+), 45 deletions(-) diff --git a/src/readii/negative_controls.py b/src/readii/negative_controls.py index 079c00f..22d51d5 100644 --- a/src/readii/negative_controls.py +++ b/src/readii/negative_controls.py @@ -4,8 +4,12 @@ from readii.image_processing import alignImages, getROIVoxelLabel +from typing import Optional -def shuffleImage(imageToShuffle: sitk.Image): +def shuffleImage( + imageToShuffle: sitk.Image +) -> sitk.Image: + """Function to shuffle all pixel values in a sitk Image (developed for 3D, should work on 2D as well) Parameters @@ -28,7 +32,9 @@ def shuffleImage(imageToShuffle: sitk.Image): flatArrImage = arrImage.flatten() # Shuffle the flat array - np.random.shuffle(flatArrImage, ) + np.random.shuffle( + flatArrImage, + ) # Reshape the array back into the original image dimensions shuffled3DArrImage = np.reshape(flatArrImage, imgDimensions) @@ -42,7 +48,9 @@ def shuffleImage(imageToShuffle: sitk.Image): return alignedShuffledImage -def makeRandomImage(baseImage: sitk.Image): +def makeRandomImage( + baseImage: sitk.Image +) -> sitk.Image: """Function to generate random pixel values based on the range of values in a sitk Image (developed for 3D, should work on 2D as well) Parameters @@ -66,10 +74,12 @@ def makeRandomImage(baseImage: sitk.Image): maxVoxelVal = np.max(arrImage) # Delete arrImage to save memory - del (arrImage) + del arrImage # Generate random array with same dimensions as baseImage - random3DArrImage = np.random.randint(low=minVoxelVal, high=maxVoxelVal, size=imgDimensions) + random3DArrImage = np.random.randint( + low=minVoxelVal, high=maxVoxelVal, size=imgDimensions + ) # Convert random array to a sitk Image randomImage = sitk.GetImageFromArray(random3DArrImage) @@ -80,14 +90,18 @@ def makeRandomImage(baseImage: sitk.Image): return alignedRandomImage -def makeRandomRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): +def makeRandomRoi( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to generate random pixel values within the Region of Interest based on the range of values in a sitk Image Parameters ---------- baseImage : sitk.Image Image to randomly generate pixel values in Region of Interest - baseROI : sitk.Image + baseROI : sitk.Image Image detailing Region of Interest roiLabel : int The label representing the ROI in baseROI @@ -102,8 +116,8 @@ def makeRandomRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = No roiLabel = getROIVoxelLabel(baseROI) # Initialize variables to track the highest and lowest pixel values in the ROI - maxVoxelVal = float('-inf') - minVoxelVal = float('inf') + maxVoxelVal = float("-inf") + minVoxelVal = float("inf") # Iterate through baseROI to find the highest and lowest values in baseImage's ROI baseROISize = baseROI.GetSize() @@ -121,7 +135,7 @@ def makeRandomRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = No # Create a new base image so we are not directly editing the input image new_base = baseImage.__copy__() # Delete the input image to save space - del (baseImage) + del baseImage # Now iterate over the pixels of the ROI in the image and randomly generate a new value for them for x in range(baseROISize[0]): @@ -137,7 +151,11 @@ def makeRandomRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = No return new_base -def shuffleROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): +def shuffleROI( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to shuffle all pixel values within the Region of Interest in a sitk Image Parameters @@ -172,7 +190,7 @@ def shuffleROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None) # Create a new base image so we are not directly editing the input image new_base = baseImage.__copy__() # Delete the input image to save space - del (baseImage) + del baseImage # # Randomly shuffling the pixel values random.shuffle(count) @@ -187,7 +205,11 @@ def shuffleROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None) return new_base -def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): +def makeRandomNonRoi( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to generate random pixel values outside the Region of Interest based on the range of values in a sitk Image Parameters @@ -196,7 +218,7 @@ def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = Image to randomly generate pixel values in outside the Region of Interest baseROI : sitk.Image Image detailing the Region of Interest - roiLabel : int + roiLabel : int The label representing the ROI in baseROI Returns @@ -209,8 +231,8 @@ def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = roiLabel = getROIVoxelLabel(baseROI) # Initialize variables to track the highest and lowest pixel values not in the ROI - maxVoxelVal = float('-inf') - minVoxelVal = float('inf') + maxVoxelVal: float = float("-inf") + minVoxelVal: float = float("inf") # Iterate through baseImage to find the highest and lowest values not in baseImage's ROI baseImageSize = baseImage.GetSize() @@ -219,8 +241,12 @@ def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): # Ensure only pixels that do not match the roiLabel of the image or are not in the ROI are looked at - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Once a pixel is confirmed to be outside the ROI, we get it's corresponding value from the baseImage current_value = baseImage.GetPixel(x, y, z) # Update the max and min values @@ -230,14 +256,18 @@ def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = # Create a new base image so we are not directly editing the input image new_base = baseImage.__copy__() # Delete the input image to save space - del (baseImage) + del baseImage # Now iterate over the pixels outside the ROI in the image and randomly generate a new value for them for x in range(baseImageSize[0]): for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Randomly assigning the current value to the range [maxVoxelVal, maxVoxelVal] mapped_value = random.randint(minVoxelVal, maxVoxelVal) @@ -247,7 +277,11 @@ def makeRandomNonRoi(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = return new_base -def shuffleNonROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): +def shuffleNonROI( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to shuffle all pixel values that are not within the Region of Interest in a sitk Image Parameters @@ -276,15 +310,19 @@ def shuffleNonROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = No for x in range(baseImageSize[0]): for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Here the key is the pixel coordinate and the value is the value of the pixel in the base image count.append(baseImage.GetPixel(x, y, z)) # Create a new base image so we are not directly editing the input image new_base = baseImage.__copy__() # Delete the input image to save space - del (baseImage) + del baseImage # Randomly shuffling the pixel values random.shuffle(count) @@ -292,14 +330,21 @@ def shuffleNonROI(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = No for x in range(baseImageSize[0]): for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Set the value of a pixel outside the ROI to be a shuffled value new_base.SetPixel(x, y, z, count.pop()) return new_base -def randomizeImageFromDistribtutionSampling(imageToRandomize: sitk.Image): + +def randomizeImageFromDistribtutionSampling( + imageToRandomize: sitk.Image +) -> sitk.Image: """Function to randomly sample all the pixel values in a sitk Image, from the distribution of existing values Parameters @@ -335,7 +380,12 @@ def randomizeImageFromDistribtutionSampling(imageToRandomize: sitk.Image): return alignedRandomlySampledImage -def makeRandomFromRoiDistribution(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): + +def makeRandomFromRoiDistribution( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to randomly sample pixel values within the Region of Interest uniformly from the distribution of pixel values in the ROI region sitk Image Parameters @@ -387,7 +437,12 @@ def makeRandomFromRoiDistribution(baseImage: sitk.Image, baseROI: sitk.Image, ro return new_base -def makeRandomNonRoiFromDistribution(baseImage: sitk.Image, baseROI: sitk.Image, roiLabel: str = None): + +def makeRandomNonRoiFromDistribution( + baseImage: sitk.Image, + baseROI: sitk.Image, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to random sample pixel values outside the Region of Interest uniformly from the distribution of pixel values outside the ROI in a sitk Image Parameters @@ -418,22 +473,30 @@ def makeRandomNonRoiFromDistribution(baseImage: sitk.Image, baseROI: sitk.Image, for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): # Ensure only pixels that do not match the roiLabel of the image or are not in the ROI are looked at - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Once a pixel is confirmed to be outside the ROI, we get it's corresponding value from the baseImage distributionROI.append(baseImage.GetPixel(x, y, z)) # Create a new base image so we are not directly editing the input image new_base = baseImage.__copy__() # Delete the input image to save space - del (baseImage) + del baseImage # Now iterate over the pixels outside the ROI in the image and randomly generate a new value for them for x in range(baseImageSize[0]): for y in range(baseImageSize[1]): for z in range(baseImageSize[2]): - if x > baseROISize[0] or y > baseROISize[1] or z > baseROISize[2] or baseROI.GetPixel(x, y, - z) != roiLabel: + if ( + x > baseROISize[0] + or y > baseROISize[1] + or z > baseROISize[2] + or baseROI.GetPixel(x, y, z) != roiLabel + ): # Assigning the current value to the randomly sampled value from within the ROI mapped_value = random.choice(distributionROI) @@ -443,7 +506,12 @@ def makeRandomNonRoiFromDistribution(baseImage: sitk.Image, baseROI: sitk.Image, return new_base -def applyNegativeControl(nc_type: str, baseImage: sitk.Image, baseROI: sitk.Image = None, roiLabel: str = None): +def applyNegativeControl( + nc_type: str, + baseImage: sitk.Image, + baseROI: Optional[sitk.Image] = None, + roiLabel: Optional[int] = None, +) -> sitk.Image: """Function to generate random pixel values within the Region of Interest based on the range of values in a sitk Image Parameters @@ -461,12 +529,28 @@ def applyNegativeControl(nc_type: str, baseImage: sitk.Image, baseROI: sitk.Imag ------- sitk.Image The output image with the negative control applied - """ - + + Raises + ------ + ValueError + If the nc_type is not a valid negative control type + """ + if nc_type == "randomized_full": # Make negative control version of ctImage (randomized pixel size) return makeRandomImage(baseImage) - elif nc_type == "randomized_roi": + elif nc_type == "shuffled_full": + # Make negative control version of ctImage (random shuffled pixels, same size) + return shuffleImage(baseImage) + elif nc_type == "randomized_sampled_full": + # Make negative control version of ctImage (random sampled pixels from original distribution, same size) + return randomizeImageFromDistribtutionSampling(baseImage) + + # typesafety check here to ensure baseROI is not None for the following negative control types + assert baseROI is not None, \ + f"baseROI must be provided for {nc_type} negative control type" + + if nc_type == "randomized_roi": # Make negative control version of ctImage (randomized pixel size inside the ROI) return makeRandomRoi(baseImage, baseROI, roiLabel) elif nc_type == "shuffled_roi": @@ -478,15 +562,11 @@ def applyNegativeControl(nc_type: str, baseImage: sitk.Image, baseROI: sitk.Imag elif nc_type == "shuffled_non_roi": # Make negative control version of ctImage (shuffled pixels outside the ROI, same size) return shuffleNonROI(baseImage, baseROI, roiLabel) - elif nc_type == "randomized_sampled_full": - # Make negative control version of ctImage (random sampled pixels from original distribution, same size) - return randomizeImageFromDistribtutionSampling(baseImage) elif nc_type == "randomized_sampled_roi": # Make negative control version of ctImage (random sampled pixels from original distribution inside ROI, same size) return makeRandomFromRoiDistribution(baseImage, baseROI, roiLabel) elif nc_type == "randomized_sampled_non_roi": # Make negative control version of ctImage (random sampled pixels from original distribution outside ROI, same size) return makeRandomNonRoiFromDistribution(baseImage, baseROI, roiLabel) - elif nc_type == "shuffled_full": - # Make negative control version of ctImage (random shuffled pixels, same size) - return shuffleImage(baseImage) + else: + raise ValueError("Invalid nc_type. Please choose a valid negative control type.") From c6ee885422df735b3230c5541936d3f71ab17f6c Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:22:16 +0000 Subject: [PATCH 11/21] refactor: update type annotations. --- src/readii/loaders.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/readii/loaders.py b/src/readii/loaders.py index a54ad6c..5c6ef02 100644 --- a/src/readii/loaders.py +++ b/src/readii/loaders.py @@ -5,6 +5,7 @@ from imgtools.ops import StructureSetToSegmentation from imgtools.io import read_dicom_auto +from typing import Optional def loadDicomSITK(imgDirPath: str) -> sitk.Image: """Read DICOM series as SimpleITK Image. @@ -28,7 +29,7 @@ def loadDicomSITK(imgDirPath: str) -> sitk.Image: def loadRTSTRUCTSITK(rtstructPath: str, baseImageDirPath: str, - roiNames:str = None) -> dict: + roiNames: Optional[str] = None) -> dict: """ Load RTSTRUCT into SimpleITK Image. Parameters @@ -77,8 +78,9 @@ def loadRTSTRUCTSITK(rtstructPath: str, def loadSegmentation(segImagePath: str, modality: str, - baseImageDirPath: str = None, - roiNames: str = None) -> dict: + baseImageDirPath: Optional[str] = None, + roiNames: Optional[str] = None +) -> dict: ''' Function to load a segmentation with the correct function. Parameters From 80547997d7d2c4c852a0f4f43b61d3c49ebe1f51 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:25:47 +0000 Subject: [PATCH 12/21] refactor: explicitly import all, nested import * can lead to performance issues --- src/readii/feature_extraction.py | 233 +++++++++++++++++++++---------- tests/test_feature_extraction.py | 14 +- 2 files changed, 174 insertions(+), 73 deletions(-) diff --git a/src/readii/feature_extraction.py b/src/readii/feature_extraction.py index a64d76e..9a2113c 100644 --- a/src/readii/feature_extraction.py +++ b/src/readii/feature_extraction.py @@ -2,19 +2,57 @@ from itertools import chain from joblib import Parallel, delayed from radiomics import featureextractor, imageoperations, logging -import radiomics -from readii.image_processing import * -from readii.loaders import * -from readii.metadata import * -from readii.negative_controls import * - -def singleRadiomicFeatureExtraction(ctImage:sitk.Image, - roiImage:sitk.Image, - pyradiomicsParamFilePath:str = "./src/readii/data/default_pyradiomics.yaml", - negativeControl:str = None): +import os +import pandas as pd +import SimpleITK as sitk + +from readii.image_processing import ( + flattenImage, + alignImages, + padSegToMatchCT, + getROIVoxelLabel, + displayImageSlice, + displayCTSegOverlay, + getROICenterCoords, + getCroppedImages, +) + +from readii.loaders import ( + loadDicomSITK, + loadRTSTRUCTSITK, + loadSegmentation, +) + +from readii.metadata import ( + saveDataframeCSV, + matchCTtoSegmentation, + getSegmentationType, +) +from readii.negative_controls import ( + applyNegativeControl, + shuffleImage, + makeRandomImage, + makeRandomRoi, + shuffleROI, + makeRandomNonRoi, + shuffleNonROI, + randomizeImageFromDistribtutionSampling, + makeRandomFromRoiDistribution, + makeRandomNonRoiFromDistribution, +) + +from typing import Optional, Any +from collections import OrderedDict + +def singleRadiomicFeatureExtraction( + ctImage: sitk.Image, + roiImage: sitk.Image, + pyradiomicsParamFilePath: str = "./src/readii/data/default_pyradiomics.yaml", + negativeControl: Optional[str] = None, +) -> OrderedDict[Any, Any]: """Function to perform radiomic feature extraction for a single CT image and its corresponding segmentation. - CT and segmentation will be aligned and cropped prior to extraction. + CT and segmentation will be aligned and cropped prior to extraction. Parameters ---------- @@ -29,7 +67,7 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image, Returns ------- - dict + OrderedDict[Any, Any] Dictionary containing image metadata, versions for key packages used for extraction, and radiomic features """ # If no pyradiomics paramater file passed, use default @@ -42,43 +80,60 @@ def singleRadiomicFeatureExtraction(ctImage:sitk.Image, alignedROIImage = alignImages(ctImage, roiImage) # Get pixel value for the segmentation - segmentationLabel = getROIVoxelLabel(alignedROIImage) - + segmentationLabel: int = getROIVoxelLabel(alignedROIImage) + # Check that CT and segmentation correspond, segmentationLabel is present, and dimensions match - segBoundingBox, correctedROIImage = imageoperations.checkMask(ctImage, alignedROIImage, label=segmentationLabel) + segBoundingBox, correctedROIImage = imageoperations.checkMask( + ctImage, alignedROIImage, label=segmentationLabel + ) # Update the ROI image if a correction was generated by checkMask if correctedROIImage is not None: alignedROIImage = correctedROIImage - + # Crop the image and mask to a bounding box around the mask to reduce volume size to process - croppedCT, croppedROI = imageoperations.cropToTumorMask(ctImage, alignedROIImage, segBoundingBox) + croppedCT, croppedROI = imageoperations.cropToTumorMask( + ctImage, alignedROIImage, segBoundingBox + ) if negativeControl != None: print("Generating ", negativeControl, "negative control for CT.") # Make negative control version of ctImage - croppedCT = applyNegativeControl(nc_type=negativeControl, baseImage=croppedCT, baseROI=croppedROI, roiLabel=segmentationLabel) + croppedCT = applyNegativeControl( + nc_type=negativeControl, + baseImage=croppedCT, + baseROI=croppedROI, + roiLabel=segmentationLabel, + ) # Load PyRadiomics feature extraction parameters to use # Initialize feature extractor with parameters try: - featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath) + featureExtractor = featureextractor.RadiomicsFeatureExtractor( + pyradiomicsParamFilePath + ) except OSError as e: - print("ERROR: Supplied pyradiomics parameter file does not exist or is not at that location.") + print( + "ERROR: Supplied pyradiomics parameter file does not exist or is not at that location." + ) raise # Extract radiomic features from CT with segmentation as mask - idFeatureVector = featureExtractor.execute(croppedCT, croppedROI, label=segmentationLabel) + idFeatureVector = featureExtractor.execute( + croppedCT, croppedROI, label=segmentationLabel + ) return idFeatureVector -def radiomicFeatureExtraction(imageMetadataPath:str, - imageDirPath:str, - roiNames:str = None, - pyradiomicsParamFilePath:str = "src/readii/data/default_pyradiomics.yaml", - outputDirPath:str = None, - negativeControl:str = None, - parallel:bool = False,): +def radiomicFeatureExtraction( + imageMetadataPath: str, + imageDirPath: str, + roiNames: Optional[str] = None, + pyradiomicsParamFilePath: str = "src/readii/data/default_pyradiomics.yaml", + outputDirPath: Optional[str] = None, + negativeControl: Optional[str] = None, + parallel: bool = False, +) -> pd.DataFrame: """Perform radiomic feature extraction using PyRadiomics on CT images with a corresponding segmentation. Utilizes outputs from med-imagetools (https://github.com/bhklab/med-imagetools) run on the image dataset. @@ -98,8 +153,8 @@ def radiomicFeatureExtraction(imageMetadataPath:str, negativeControl : str Name of negative control to generate from the CT to perform feature extraction on. If set to None, will extract features from original CT image. parallel : bool - Flag to decide whether to run extraction in parallel. - + Flag to decide whether to run extraction in parallel. + Returns ------- pd.DataFrame @@ -120,43 +175,59 @@ def radiomicFeatureExtraction(imageMetadataPath:str, ctSeriesIDList = pdImageInfo["series_CT"].unique() def featureExtraction(ctSeriesID): - ''' Function to extract PyRadiomics features for all ROIs present in a CT. Inner function so it can be run in parallel with joblib.''' + """Function to extract PyRadiomics features for all ROIs present in a CT. Inner function so it can be run in parallel with joblib.""" # Get all info rows for this ctSeries ctSeriesInfo = pdImageInfo.loc[pdImageInfo["series_CT"] == ctSeriesID] patID = ctSeriesInfo.iloc[0]["patient_ID"] print("Processing ", patID) - # Get absolute path to CT image files - ctDirPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]['folder_CT']) + # Get absolute path to CT image files + ctDirPath = os.path.join(imageDirPath, ctSeriesInfo.iloc[0]["folder_CT"]) # Load CT by passing in specific series to find in a directory - ctImage = read_dicom_series(path = ctDirPath, series_id = ctSeriesID) + ctImage = read_dicom_series(path=ctDirPath, series_id=ctSeriesID) # Get list of segmentations to iterate over - segSeriesIDList = ctSeriesInfo['series_seg'].unique() + segSeriesIDList = ctSeriesInfo["series_seg"].unique() # Initialize dictionary to store radiomics data for each segmentation (image metadata + features) ctAllData = [] # Loop over every segmentation associated with this CT - only loading CT once for segCount, segSeriesID in enumerate(segSeriesIDList): - segSeriesInfo = ctSeriesInfo.loc[ctSeriesInfo['series_seg'] == segSeriesID] + segSeriesInfo = ctSeriesInfo.loc[ctSeriesInfo["series_seg"] == segSeriesID] # Check that a single segmentation file is being processed if len(segSeriesInfo) > 1: # Check that if there are multiple rows that it's not due to a CT with subseries (this is fine, the whole series is loaded) - if not segSeriesInfo.duplicated(subset=['series_CT'], keep=False).all(): - raise RuntimeError("Some kind of duplication of segmentation and CT matches not being caught. Check seg_and_ct_dicom_list in radiogenomic_output.") - + if not segSeriesInfo.duplicated(subset=["series_CT"], keep=False).all(): + raise RuntimeError( + "Some kind of duplication of segmentation and CT matches not being caught. Check seg_and_ct_dicom_list in radiogenomic_output." + ) + # Get absolute path to segmentation image file - segFilePath = os.path.join(imageDirPath, segSeriesInfo.iloc[0]['file_path_seg']) + segFilePath = os.path.join( + imageDirPath, segSeriesInfo.iloc[0]["file_path_seg"] + ) # Get dictionary of ROI sitk Images for this segmentation file - segImages = loadSegmentation(segFilePath, modality = segSeriesInfo.iloc[0]['modality_seg'], - baseImageDirPath = ctDirPath, roiNames = roiNames) - + segImages = loadSegmentation( + segFilePath, + modality=segSeriesInfo.iloc[0]["modality_seg"], + baseImageDirPath=ctDirPath, + roiNames=roiNames, + ) + # Check that this series has ROIs to extract from (dictionary isn't empty) if not segImages: - print('CT ', ctSeriesID, 'and segmentation ',segSeriesID, ' has no ROIs or no ROIs with the label ', roiNames, '. Moving to next segmentation.') + print( + "CT ", + ctSeriesID, + "and segmentation ", + segSeriesID, + " has no ROIs or no ROIs with the label ", + roiNames, + ". Moving to next segmentation.", + ) else: # Loop over each ROI contained in the segmentation to perform radiomic feature extraction @@ -165,7 +236,9 @@ def featureExtraction(ctSeriesID): roiNum = roiCount + 1 # Extract features listed in the parameter file - print("Calculating radiomic features for segmentation:", roiImageName) + print( + "Calculating radiomic features for segmentation:", roiImageName + ) # Get sitk Image object for this ROI roiImage = segImages[roiImageName] @@ -175,45 +248,61 @@ def featureExtraction(ctSeriesID): # Check if segmentation just has an extra axis with a size of 1 and remove it if roiImage.GetDimension() > 3 and roiImage.GetSize()[3] == 1: roiImage = flattenImage(roiImage) - + # Check that image and segmentation mask have the same dimensions if ctImage.GetSize() != roiImage.GetSize(): - # Checking if number of segmentation slices is less than CT - if ctImage.GetSize()[2] > roiImage.GetSize()[2]: - print("Slice number mismatch between CT and segmentation for", patID, ". Padding segmentation to match.") - roiImage = padSegToMatchCT(ctDirPath, segFilePath, ctImage, roiImage) + # Checking if number of segmentation slices is less than CT + if ctImage.GetSize()[2] > roiImage.GetSize()[2]: + print( + "Slice number mismatch between CT and segmentation for", + patID, + ". Padding segmentation to match.", + ) + roiImage = padSegToMatchCT( + ctDirPath, segFilePath, ctImage, roiImage + ) else: - raise RuntimeError("CT and ROI dimensions do not match.") + raise RuntimeError( + "CT and ROI dimensions do not match." + ) # Catching CT and segmentation size mismatch error except RuntimeError as e: print(str(e)) - # Extract radiomic features from this CT/segmentation pair - idFeatureVector = singleRadiomicFeatureExtraction(ctImage, roiImage = roiImage, - pyradiomicsParamFilePath = pyradiomicsParamFilePath, - negativeControl = negativeControl) + idFeatureVector = singleRadiomicFeatureExtraction( + ctImage, + roiImage=roiImage, + pyradiomicsParamFilePath=pyradiomicsParamFilePath, + negativeControl=negativeControl, + ) # Create dictionary of image metadata to append to front of output table - sampleROIData = {"patient_ID": patID, - "study_description": segSeriesInfo.iloc[0]['study_description_CT'], - "series_UID": segSeriesInfo.iloc[0]['series_CT'], - "series_description": segSeriesInfo.iloc[0]['series_description_CT'], - "image_modality": segSeriesInfo.iloc[0]['modality_CT'], - "instances": segSeriesInfo.iloc[0]['instances_CT'], - "seg_series_UID": segSeriesInfo.iloc[0]['series_seg'], - "seg_modality": segSeriesInfo.iloc[0]['modality_seg'], - "seg_ref_image": segSeriesInfo.iloc[0]['reference_ct_seg'], - "roi": roiImageName, - "roi_number": roiNum, - "negative_control": negativeControl} + sampleROIData = { + "patient_ID": patID, + "study_description": segSeriesInfo.iloc[0][ + "study_description_CT" + ], + "series_UID": segSeriesInfo.iloc[0]["series_CT"], + "series_description": segSeriesInfo.iloc[0][ + "series_description_CT" + ], + "image_modality": segSeriesInfo.iloc[0]["modality_CT"], + "instances": segSeriesInfo.iloc[0]["instances_CT"], + "seg_series_UID": segSeriesInfo.iloc[0]["series_seg"], + "seg_modality": segSeriesInfo.iloc[0]["modality_seg"], + "seg_ref_image": segSeriesInfo.iloc[0]["reference_ct_seg"], + "roi": roiImageName, + "roi_number": roiNum, + "negative_control": negativeControl, + } # Concatenate image metadata with PyRadiomics features sampleROIData.update(idFeatureVector) # Store this ROI's info in the segmentation level list ctAllData.append(sampleROIData) - + return ctAllData ###### END featureExtraction ####### @@ -224,8 +313,10 @@ def featureExtraction(ctSeriesID): features = [featureExtraction(ctSeriesID) for ctSeriesID in ctSeriesIDList] else: # Run feature extraction in parallel - features = Parallel(n_jobs=-1, require='sharedmem')(delayed(featureExtraction)(ctSeriesID) for ctSeriesID in ctSeriesIDList) - + features = Parallel(n_jobs=-1, require="sharedmem")( + delayed(featureExtraction)(ctSeriesID) for ctSeriesID in ctSeriesIDList + ) + # Flatten the list of dictionaries (happens when there are multiple ROIs or SEGs associated with a single CT) flatFeatures = list(chain.from_iterable(features)) # Convert list of feature sets into a pandas dataframe to save out @@ -245,4 +336,4 @@ def featureExtraction(ctSeriesID): # Save out the features saveDataframeCSV(featuresTable, outputFilePath) - return featuresTable \ No newline at end of file + return featuresTable diff --git a/tests/test_feature_extraction.py b/tests/test_feature_extraction.py index 6ec69ea..16ec1ae 100644 --- a/tests/test_feature_extraction.py +++ b/tests/test_feature_extraction.py @@ -1,8 +1,18 @@ -from readii.loaders import * -from readii.feature_extraction import * +from readii.loaders import ( + loadDicomSITK, + loadRTSTRUCTSITK, + loadSegmentation, +) + +from readii.feature_extraction import ( + singleRadiomicFeatureExtraction, + radiomicFeatureExtraction, +) import pytest import collections +import pandas as pd +import os @pytest.fixture def nsclcCTImage(): From 5776e5f0f618c3634d01fb86c685e012bc073a08 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 16:58:30 +0000 Subject: [PATCH 13/21] refactor: Update version variables in pyproject.toml --- pyproject.toml | 7 ++++++- src/readii/__init__.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 75c9258..4340533 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,12 @@ sphinx-rtd-theme = "^1.3.0" python-semantic-release = "^8.5.2" [tool.semantic_release] -version_toml = ["pyproject.toml:tool.poetry.version"] # version location +version_variables = [ + "src/readii/__init__.py:__version__", +] +version_toml = [ + "pyproject.toml:tool.poetry.version" +] # version location changelog_file = "CHANGELOG.md" # changelog file dist_path = "dist/" # where to put dists upload_to_release = true # auto-create GitHub release diff --git a/src/readii/__init__.py b/src/readii/__init__.py index bd95616..d05d8c5 100644 --- a/src/readii/__init__.py +++ b/src/readii/__init__.py @@ -1,3 +1,4 @@ # read version from installed package from importlib.metadata import version -__version__ = version("readii") \ No newline at end of file +__version__ = "1.2.0" + From 16a83b9541d717f132f6a32afbeb89db02653c63 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:01:40 +0000 Subject: [PATCH 14/21] refactor: Refactor saveDataframeCSV and matchCTtoSegmentation functions, add type hints, and improve error handling, format with black --- src/readii/metadata.py | 106 +++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/src/readii/metadata.py b/src/readii/metadata.py index ab4044a..76d3f3e 100644 --- a/src/readii/metadata.py +++ b/src/readii/metadata.py @@ -1,9 +1,13 @@ import os -import pandas as pd +import pandas as pd +from typing import Optional, Literal -def saveDataframeCSV(dataframe: pd.DataFrame, - outputFilePath: str): - """ Function to save a pandas Dataframe as a csv file with the index removed. + +def saveDataframeCSV( + dataframe: pd.DataFrame, + outputFilePath: str +) -> None: + """Function to save a pandas Dataframe as a csv file with the index removed. Checks if the path in the outputFilePath exists and will create any missing directories. Parameters @@ -12,10 +16,19 @@ def saveDataframeCSV(dataframe: pd.DataFrame, Pandas dataframe to save out as a csv outputFilePath : str Full file path to save the dataframe out to. + + Raises + ------ + ValueError + If the outputFilePath does not end in .csv, if the dataframe is not a pandas DataFrame, + or if an error occurs while saving the dataframe. """ - if not outputFilePath.endswith('.csv'): - raise ValueError("This function saves .csv files, so outputFilePath must end in .csv") + if not outputFilePath.endswith(".csv"): + raise ValueError( + "This function saves .csv files, so outputFilePath must end in .csv" + ) + if not isinstance(dataframe, pd.DataFrame): raise ValueError("Function expects a pandas DataFrame to save out.") @@ -23,15 +36,20 @@ def saveDataframeCSV(dataframe: pd.DataFrame, if not os.path.exists(os.path.dirname(outputFilePath)): os.makedirs(os.path.dirname(outputFilePath)) - # Save out feature set - dataframe.to_csv(outputFilePath, index=False) - - return - + try: + # Save out DataFrame + dataframe.to_csv(outputFilePath, index=False) + except Exception as e: + error_msg = f"An error occurred while saving the DataFrame: {str(e)}" + raise ValueError(error_msg) from e + else: + return -def matchCTtoSegmentation(imgFileListPath: str, - segType: str, - outputDirPath: str = None): +def matchCTtoSegmentation( + imgFileListPath: str, + segType: str, + outputDirPath: Optional[str] = None, +) -> pd.DataFrame: """From full list of image files, extract CT and corresponding segmentation files and create new table. One row of the table contains both the CT and segmentation data for one patient. This function currently assumes there is one segmentation for each patient. @@ -39,17 +57,22 @@ def matchCTtoSegmentation(imgFileListPath: str, Parameters ---------- imgFileListPath : str - Path to csv containing list of image directories/paths in the dataset. + Path to csv containing list of image directories/paths in the dataset. Expecting output from med-imagetools autopipeline .imgtools_[dataset] segType : str Type of file segmentation is in. Can be SEG or RTSTRUCT. outputDirPath : str Optional path to directory to save the dataframe to as a csv. - + Returns ------- pd.Dataframe Dataframe containing the CT and corresponding segmentation data for each patient + + Raises + ------ + ValueError + If the segmentation file type is not RTSTRUCT or SEG, or if the imgFileListPath does not end in .csv Note: All subseries of CT will be kept in the dataframe in this function """ @@ -58,28 +81,33 @@ def matchCTtoSegmentation(imgFileListPath: str, raise ValueError("Incorrect segmentation file type. Must be RTSTRUCT or SEG.") # Check that imgFileListPath is a csv file to properly be loaded in - if not imgFileListPath.endswith('.csv'): - raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv") + if not imgFileListPath.endswith(".csv"): + raise ValueError( + "This function expects to load in a .csv file, so imgFileListPath must end in .csv" + ) # Load in complete list of patient image directories of all modalities (output from med-imagetools crawl) - fullDicomList = pd.read_csv(imgFileListPath, index_col=0) + fullDicomList: pd.DataFrame = pd.read_csv(imgFileListPath, index_col=0) # Extract all CT rows - allCTRows = fullDicomList.loc[fullDicomList['modality'] == "CT"] + allCTRows: pd.DataFrame = fullDicomList.loc[fullDicomList["modality"] == "CT"] # Extract all SEG rows - allSegRows = fullDicomList.loc[fullDicomList['modality'] == segType] + allSegRows: pd.DataFrame = fullDicomList.loc[fullDicomList["modality"] == segType] # Merge the CT and segmentation dataframes based on the CT ID (referenced in the segmentation rows) # Uses only segmentation keys, so no extra CTs are kept # If multiple CTs have the same ID, they are both included in this table - samplesWSeg = allCTRows.merge(allSegRows, how='inner', - left_on=['series', 'patient_ID'], - right_on=['reference_ct', 'patient_ID'], - suffixes=('_CT','_seg')) + samplesWSeg: pd.DataFrame = allCTRows.merge( + allSegRows, + how="inner", + left_on=["series", "patient_ID"], + right_on=["reference_ct", "patient_ID"], + suffixes=("_CT", "_seg"), + ) # Sort dataframe by ascending patient ID value - samplesWSeg.sort_values(by='patient_ID', inplace=True) + samplesWSeg.sort_values(by="patient_ID", inplace=True) # Save out the combined list if outputDirPath != None: @@ -89,17 +117,19 @@ def matchCTtoSegmentation(imgFileListPath: str, # Join this name with the output directory and add file prefix and csv suffix outputFilePath = os.path.join(outputDirPath, fileName) saveDataframeCSV(samplesWSeg, outputFilePath) - + return samplesWSeg -def getSegmentationType(imgFileListPath: str): +def getSegmentationType( + imgFileListPath: str +) -> Literal['RTSTRUCT', 'SEG']: """Find the segmentation type from the full list of image files. Parameters ---------- imgFileListPath : str - Path to csv containing list of image directories/paths in the dataset. + Path to csv containing list of image directories/paths in the dataset. Expecting output from med-imagetools autopipeline .imgtools_[dataset] Returns @@ -108,20 +138,24 @@ def getSegmentationType(imgFileListPath: str): Segmentation type (RTSTRUCT or SEG) """ # Check that imgFileListPath is a csv file to properly be loaded in - if not imgFileListPath.endswith('.csv'): - raise ValueError("This function expects to load in a .csv file, so imgFileListPath must end in .csv") + if not imgFileListPath.endswith(".csv"): + raise ValueError( + "This function expects to load in a .csv file, so imgFileListPath must end in .csv" + ) # Load in complete list of patient image directories of all modalities (output from med-imagetools crawl) - fullDicomList = pd.read_csv(imgFileListPath, index_col=0) + fullDicomList: pd.DataFrame = pd.read_csv(imgFileListPath, index_col=0) - # Get list of unique modalities - modalities = list(fullDicomList['modality'].unique()) + # Get list of unique modalities + modalities = list(fullDicomList["modality"].unique()) if "RTSTRUCT" in modalities: segType = "RTSTRUCT" elif "SEG" in modalities: segType = "SEG" else: - raise RuntimeError("No suitable segmentation type found. YAREA can only use RTSTRUCTs and DICOM-SEG segmentations.") + raise RuntimeError( + "No suitable segmentation type found. YAREA can only use RTSTRUCTs and DICOM-SEG segmentations." + ) - return segType \ No newline at end of file + return segType From bc0d18da89907ca82e30eb81fbed1ab1ae475cfe Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:03:21 +0000 Subject: [PATCH 15/21] refactor: Refactor type check in test_radiomicFeatureExtraction --- tests/test_feature_extraction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_feature_extraction.py b/tests/test_feature_extraction.py index 16ec1ae..fbc635d 100644 --- a/tests/test_feature_extraction.py +++ b/tests/test_feature_extraction.py @@ -94,7 +94,7 @@ def test_radiomicFeatureExtraction(nsclcMetadataPath): actual = radiomicFeatureExtraction(nsclcMetadataPath, imageDirPath="tests/", roiNames = None) - assert type(actual) == pd.core.frame.DataFrame, \ + assert isinstance(actual, pd.DataFrame), \ "Wrong return type, expect a pandas DataFrame" assert actual.shape[1] == 1365, \ "Wrong return size, should include image metadata, diagnostics, and pyradiomics features" From f91afa43e63daeb06be8d9276c9607453d7fe71c Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:03:51 +0000 Subject: [PATCH 16/21] refactor: explicit imports from readii.metadata module for performance --- tests/test_metadata.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/test_metadata.py b/tests/test_metadata.py index bd8456a..8e5af31 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -1,7 +1,12 @@ -from readii.metadata import * import pytest import os +from readii.metadata import ( + matchCTtoSegmentation, + getSegmentationType, + saveDataframeCSV +) + @pytest.fixture def nsclcSummaryFilePath(): return "tests/.imgtools/imgtools_NSCLC_Radiogenomics.csv" From 07d04d8f1b8bacea29a896557971468446e22bf8 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:04:37 +0000 Subject: [PATCH 17/21] refactor: format with black --- src/readii/loaders.py | 68 ++++++++++++++++++++++++------------------ src/readii/metadata.py | 7 +++++ 2 files changed, 46 insertions(+), 29 deletions(-) diff --git a/src/readii/loaders.py b/src/readii/loaders.py index 5c6ef02..cf6f0bd 100644 --- a/src/readii/loaders.py +++ b/src/readii/loaders.py @@ -7,6 +7,7 @@ from typing import Optional + def loadDicomSITK(imgDirPath: str) -> sitk.Image: """Read DICOM series as SimpleITK Image. @@ -18,7 +19,7 @@ def loadDicomSITK(imgDirPath: str) -> sitk.Image: Returns ------- sitk.Image - The loaded image. + The loaded image. """ # Set up the reader for the DICOM series reader = sitk.ImageSeriesReader() @@ -27,21 +28,21 @@ def loadDicomSITK(imgDirPath: str) -> sitk.Image: return reader.Execute() -def loadRTSTRUCTSITK(rtstructPath: str, - baseImageDirPath: str, - roiNames: Optional[str] = None) -> dict: - """ Load RTSTRUCT into SimpleITK Image. +def loadRTSTRUCTSITK( + rtstructPath: str, baseImageDirPath: str, roiNames: Optional[str] = None +) -> dict: + """Load RTSTRUCT into SimpleITK Image. Parameters ---------- rtstructPath : str Path to the DICOM file containing the RTSTRUCT baseImageDirPath : str - Path to the directory containing the DICOMS for the original image the segmentation + Path to the directory containing the DICOMS for the original image the segmentation was created from (e.g. CT). This is required to load the RTSTRUCT. roiNames : str Identifier for which region(s) of interest to load from the total segmentation file - + Returns ------- dict @@ -51,17 +52,22 @@ def loadRTSTRUCTSITK(rtstructPath: str, # Set up segmentation loader makeMask = StructureSetToSegmentation(roi_names=roiNames) - + # Read in the base image (CT) and segmentation DICOMs into SITK Images baseImage = read_dicom_auto(baseImageDirPath) segImage = read_dicom_auto(rtstructPath) try: # Get the individual ROI masks - segMasks = makeMask(segImage, baseImage.image, existing_roi_indices={"background":0}, ignore_missing_regex=False) + segMasks = makeMask( + segImage, + baseImage.image, + existing_roi_indices={"background": 0}, + ignore_missing_regex=False, + ) except ValueError: return {} - + # Get list of ROIs present in this rtstruct loadedROINames = segMasks.raw_roi_names # Initialize dictionary to store ROI names and images @@ -72,17 +78,18 @@ def loadRTSTRUCTSITK(rtstructPath: str, roiMask = segMasks.get_label(name=roi) # Store the ROI name and image roiStructs[roi] = roiMask - + return roiStructs -def loadSegmentation(segImagePath: str, - modality: str, - baseImageDirPath: Optional[str] = None, - roiNames: Optional[str] = None +def loadSegmentation( + segImagePath: str, + modality: str, + baseImageDirPath: Optional[str] = None, + roiNames: Optional[str] = None, ) -> dict: - ''' Function to load a segmentation with the correct function. - + """Function to load a segmentation with the correct function. + Parameters ---------- segImagePath : str @@ -90,34 +97,37 @@ def loadSegmentation(segImagePath: str, modality : str Type of image that imgPath points to to load. If RTSTRUCT, must set baseImageDirPath baseImageDirPath : str - Path to the directory containing the DICOMS for the original image the segmentation - was created from. + Path to the directory containing the DICOMS for the original image the segmentation + was created from. roiNames : str Identifier for which region(s) of interest to load from the total segmentation file - + Returns ------- dict A dictionary of each of the ROIs and their name in the segmentation image as sitk.Image objects. - + Examples -------- >>> segImages = loadSegmentation("/path/to/segmentation/1.dcm", 'RTSTRUCT', '/path/to/CT', 'GTVp.*') - ''' + """ - if modality in ['SEG', 'seg']: + if modality in ["SEG", "seg"]: # Loading SEG requires directory containing file, not the actual file path imgFolder, _ = os.path.split(segImagePath) segHeader = pydicom.dcmread(segImagePath, stop_before_pixels=True) roiName = segHeader.SegmentSequence[0].SegmentLabel return {roiName: loadDicomSITK(imgFolder)} - - elif modality in ['RTSTRUCT', 'rtstruct']: + + elif modality in ["RTSTRUCT", "rtstruct"]: if baseImageDirPath == None: - raise ValueError("Missing path to original image segmentation was taken from. RTSTRUCT loader requires original image.") + raise ValueError( + "Missing path to original image segmentation was taken from. RTSTRUCT loader requires original image." + ) else: return loadRTSTRUCTSITK(segImagePath, baseImageDirPath, roiNames) - - else: - raise ValueError('This segmentation modality is not supported. Must be one of RTSTRUCT or SEG') + else: + raise ValueError( + "This segmentation modality is not supported. Must be one of RTSTRUCT or SEG" + ) diff --git a/src/readii/metadata.py b/src/readii/metadata.py index 76d3f3e..6ff1144 100644 --- a/src/readii/metadata.py +++ b/src/readii/metadata.py @@ -136,6 +136,13 @@ def getSegmentationType( ------- str Segmentation type (RTSTRUCT or SEG) + + Raises + ------ + ValueError + If the imgFileListPath does not end in .csv + RuntimeError + If no suitable segmentation type is found in the dataset """ # Check that imgFileListPath is a csv file to properly be loaded in if not imgFileListPath.endswith(".csv"): From 6999f208f33132e53f6ba42037387ae41070bf07 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:11:27 +0000 Subject: [PATCH 18/21] build: update poetry lock --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 5a1f46c..129e439 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4457,4 +4457,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "f1c61344fc24091724e64aea20f6940d7a7b2a685766cce6d0bd72a5f4f800d1" +content-hash = "7e3c4a276ce999035dcc07f560b76f74a91d2f8f7194e082ace96e36313f13b4" From e8f67b9108f254431c0542a2812c674578db0794 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:14:59 +0000 Subject: [PATCH 19/21] build: Update pytest command to run tests in parallel --- .github/workflows/ci-cd.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-cd.yml b/.github/workflows/ci-cd.yml index 927ab4d..1e54163 100644 --- a/.github/workflows/ci-cd.yml +++ b/.github/workflows/ci-cd.yml @@ -29,7 +29,8 @@ jobs: run: poetry install - name: Test with pytest - run: poetry run pytest tests/ + # Github action runners now have 4 cores + run: poetry run pytest -s -v -n 4 tests/ # Commented out until we have a repo for coverage # - name: Test with pytest From 04bada5a9d1933f840d647c4ef383329a6e9d2e7 Mon Sep 17 00:00:00 2001 From: Jermiah Date: Tue, 19 Mar 2024 17:29:51 +0000 Subject: [PATCH 20/21] refactor: format with black, explicit imports, and update type annotations --- src/readii/image_processing.py | 154 +++++++++++++++++++++++---------- 1 file changed, 106 insertions(+), 48 deletions(-) diff --git a/src/readii/image_processing.py b/src/readii/image_processing.py index 3cd8958..fba9063 100644 --- a/src/readii/image_processing.py +++ b/src/readii/image_processing.py @@ -5,7 +5,14 @@ from radiomics import imageoperations import SimpleITK as sitk -from readii.loaders import * +from typing import Optional + +from readii.loaders import ( + loadDicomSITK, + loadRTSTRUCTSITK, + loadSegmentation, +) + def flattenImage(image: sitk.Image) -> sitk.Image: """Remove axes of image with size one. (ex. shape is [1, 100, 256, 256]) @@ -14,7 +21,7 @@ def flattenImage(image: sitk.Image) -> sitk.Image: ---------- image : sitk.Image Image to remove axes with size one. - + Returns ------- sitk.Image @@ -37,7 +44,7 @@ def alignImages(originImage: sitk.Image, movingImage: sitk.Image) -> sitk.Image: movingImage : sitk.Image Image to align to originImage - + Returns ------- sitk.Image @@ -50,10 +57,12 @@ def alignImages(originImage: sitk.Image, movingImage: sitk.Image) -> sitk.Image: return movingImage -def padSegToMatchCT(ctDirPath:str, - segImagePath:str = None, - ctImage:sitk.Image = None, - alignedSegImage:sitk.Image = None) -> sitk.Image: +def padSegToMatchCT( + ctDirPath: str, + segImagePath: Optional[str] = None, + ctImage: Optional[sitk.Image] = None, + alignedSegImage: Optional[sitk.Image] = None, +) -> sitk.Image: """Function to take a segmentation that doesn't have the same slice count as the CT image, maps it to the corresponding CT slices, and pads it with slices containing 0s so it maps properly onto the original image. @@ -61,18 +70,18 @@ def padSegToMatchCT(ctDirPath:str, ---------- ctDirPath : str Path to DICOM series folder containing all CT image files. Must be a directory. - + segImagePath : str Path to the DICOM SEG file that corresponds with CT in ctDirPath that has incorrect slice count. ctImage : sitk.Image Optional argument, base image to align the padded segmentation image to. If None is passed, will be loaded in from ctFolderPath. - + alignedSegImage : sitk.Image Optional argument, if image has already been loaded it can be passed in to be adjusted. Assumes that flattenImage and alignImages has already been run. If not passed, will use segFilePath to load the image. - + Returns ------- sitk.Image @@ -88,35 +97,56 @@ def padSegToMatchCT(ctDirPath:str, """ # Load the CT image to align the segmentation to if not passed as argument - if ctImage == None: + if ctImage is None: ctImage = loadDicomSITK(ctDirPath) # Load in the segmentation image if not passed as argument - if alignedSegImage == None: - if segImagePath == None: - raise ValueError("Must pass either a loaded and aligned segmentation or the path to load the segmentation from.") + if alignedSegImage is None: + if segImagePath is None: + raise ValueError( + "Must pass either a loaded and aligned segmentation or the path to load the segmentation from." + ) else: segImage = loadSegmentation(segImagePath, modality="SEG") # Segmentation contains extra axis, flatten to 3D by removing it segImage = flattenImage(segImage) # Segmentation has different origin, align it to the CT for proper feature extraction alignedSegImage = alignImages(ctImage, segImage) - + # Load in header information for the CT and SEG files ctSeries = Series(ctDirPath) segWithHeader = pydicom.dcmread(segImagePath, stop_before_pixels=True) # Get the first and last reference ID for the slices of the CT that are in the SEG file - lastSliceRef = segWithHeader.ReferencedSeriesSequence[0].ReferencedInstanceSequence[0].ReferencedSOPInstanceUID - firstSliceRef = segWithHeader.ReferencedSeriesSequence[0].ReferencedInstanceSequence[-1].ReferencedSOPInstanceUID + lastSliceRef = ( + segWithHeader.ReferencedSeriesSequence[0] + .ReferencedInstanceSequence[0] + .ReferencedSOPInstanceUID + ) + firstSliceRef = ( + segWithHeader.ReferencedSeriesSequence[0] + .ReferencedInstanceSequence[-1] + .ReferencedSOPInstanceUID + ) # Get the index of the reference IDs in the CT image - firstSliceIdx = ctSeries['SOPInstanceUID'].index(firstSliceRef) - lastSliceIdx = ctSeries['SOPInstanceUID'].index(lastSliceRef) + firstSliceIdx = ctSeries["SOPInstanceUID"].index(firstSliceRef) + lastSliceIdx = ctSeries["SOPInstanceUID"].index(lastSliceRef) # Convert the segmentation image to an array and pad with 0s so segmentation mask is in the correct indices arrSeg = sitk.GetArrayFromImage(alignedSegImage) - padArrSeg = np.pad(arrSeg, (((firstSliceIdx, (ctSeries.data.shape[-1]-lastSliceIdx-1)), (0,0), (0,0))), 'constant', constant_values=(0)) + padArrSeg = np.pad( + arrSeg, + ( + ( + (firstSliceIdx, (ctSeries.data.shape[-1] - lastSliceIdx - 1)), + (0, 0), + (0, 0), + ) + ), + "constant", + constant_values=(0), + ) # Convert back to Image object paddedSegImage = sitk.GetImageFromArray(padArrSeg) @@ -125,7 +155,13 @@ def padSegToMatchCT(ctDirPath:str, return paddedSegImage -def displayImageSlice(image, sliceIdx, cmap=plt.cm.Greys_r, dispMin = None, dispMax = None): +def displayImageSlice( + image, + sliceIdx, + cmap=plt.cm.Greys_r, + dispMin=None, + dispMax=None +) -> None: """Function to display a 2D slice from a 3D image By default, displays slice in greyscale with min and max range set to min and max value in the slice. @@ -143,9 +179,9 @@ def displayImageSlice(image, sliceIdx, cmap=plt.cm.Greys_r, dispMin = None, disp Value to use as max for cmap in display """ # If image is a simple ITK image, convert to array for display - if type(image) == sitk.SimpleITK.Image: + if type(image) == sitk.Image: image = sitk.GetArrayFromImage(image) - + # Get min and max value from image to define range in display if dispMin == None: dispMin = image.min() @@ -153,11 +189,19 @@ def displayImageSlice(image, sliceIdx, cmap=plt.cm.Greys_r, dispMin = None, disp dispMax = image.max() # Display the slice of the image - plt.imshow(image[sliceIdx,:,:], cmap=cmap, vmin=dispMin, vmax=dispMax) - plt.axis('off') - - -def displayCTSegOverlay(ctImage, segImage, sliceIdx=-1, cmapCT=plt.cm.Greys_r, cmapSeg=plt.cm.brg, alpha=0.3, crop=False): + plt.imshow(image[sliceIdx, :, :], cmap=cmap, vmin=dispMin, vmax=dispMax) + plt.axis("off") + + +def displayCTSegOverlay( + ctImage, + segImage, + sliceIdx=-1, + cmapCT=plt.cm.Greys_r, + cmapSeg=plt.cm.brg, + alpha=0.3, + crop=False, +) -> None: """Function to display a 2D slice from a CT with the ROI from a segmentation image overlaid in green Parameters ---------- @@ -185,31 +229,39 @@ def displayCTSegOverlay(ctImage, segImage, sliceIdx=-1, cmapCT=plt.cm.Greys_r, c if sliceIdx == -1: sliceIdx, _, _ = getROICenterCoords(segImage) - # If image is a simple ITK image, convert to array for display - if type(ctImage) == sitk.SimpleITK.Image: + # If image is a simple ITK image, convert to array for display + if type(ctImage) == sitk.Image: ctImage = sitk.GetArrayFromImage(ctImage) - # If segmentation is a simple ITK image, convert to array for display - if type(segImage) == sitk.SimpleITK.Image: + # If segmentation is a simple ITK image, convert to array for display + if type(segImage) == sitk.Image: segImage = sitk.GetArrayFromImage(segImage) # Make mask of ROI to ignore background in overlaid plot maskSeg = np.ma.masked_where(segImage == 0, segImage) # Plot slice of CT - plt.imshow(ctImage[sliceIdx,:,:], cmap=cmapCT, vmin=ctImage.min(), vmax=ctImage.max()) + plt.imshow( + ctImage[sliceIdx, :, :], cmap=cmapCT, vmin=ctImage.min(), vmax=ctImage.max() + ) # Plot mask of ROI overtop - plt.imshow(maskSeg[sliceIdx,:,:], cmap=cmapSeg, vmin=segImage.min(), vmax=segImage.max(), alpha=alpha) - plt.axis('off') + plt.imshow( + maskSeg[sliceIdx, :, :], + cmap=cmapSeg, + vmin=segImage.min(), + vmax=segImage.max(), + alpha=alpha, + ) + plt.axis("off") -def getROICenterCoords(segImage:sitk.Image): +def getROICenterCoords(segImage: sitk.Image): """A function to find the slice number and coordinates for the center of an ROI in a loaded RTSTRUCT or SEG file. Parameters ---------- segImage sitk.Image, a loaded segmentation image, should be binary with segmentation voxels as a non-zero value - + Returns ------- centerSliceIdx : int @@ -221,7 +273,7 @@ def getROICenterCoords(segImage:sitk.Image): """ # Convert segmentation image to a numpy array arrSeg = sitk.GetArrayFromImage(segImage) - + nonZeroIndices = np.nonzero(arrSeg) nzSliceIndices = nonZeroIndices[0] nzColumnIndices = nonZeroIndices[1] @@ -234,14 +286,14 @@ def getROICenterCoords(segImage:sitk.Image): return centerSliceIdx, centerColumnPixelIdx, centerRowPixelIdx -def getROIVoxelLabel(segImage:sitk.Image): +def getROIVoxelLabel(segImage: sitk.Image): """A function to find the non-zero value that identifies segmentation voxels in a loaded RTSTRUCT or SEG file. - + Parameters ---------- segImage sitk.Image, a loaded segmentation image, should be binary with segmentation voxels as a non-zero value - + Returns ------- labelValue @@ -257,10 +309,12 @@ def getROIVoxelLabel(segImage:sitk.Image): labelValue = roiVoxels[0] return int(labelValue) else: - raise ValueError("Multiple label values present in this segmentation. Must all be the same.") + raise ValueError( + "Multiple label values present in this segmentation. Must all be the same." + ) -def getCroppedImages(ctImage, segImage, segmentationLabel = None): +def getCroppedImages(ctImage, segImage, segmentationLabel=None): """A function to crop a CT and segmentation to close to the ROI within the segmentation. Parameters @@ -268,7 +322,7 @@ def getCroppedImages(ctImage, segImage, segmentationLabel = None): ctImage : sitk.Image CT image to crop. segImage : sitk.Image - Segmentation image containing a ROI to overlay with CT. Must be aligned to CT. + Segmentation image containing a ROI to overlay with CT. Must be aligned to CT. segmentationLabel : int Value of pixels within the ROI in the segImage. If not passed, will use getROIVoxelLabel to find it. @@ -283,12 +337,16 @@ def getCroppedImages(ctImage, segImage, segmentationLabel = None): segmentationLabel = getROIVoxelLabel(segImage) # Check that CT and segmentation correspond, segmentationLabel is present, and dimensions match - segBoundingBox, correctedROIImage = imageoperations.checkMask(ctImage, segImage, label=segmentationLabel) + segBoundingBox, correctedROIImage = imageoperations.checkMask( + ctImage, segImage, label=segmentationLabel + ) # Update the ROI image if a correction was generated by checkMask if correctedROIImage is not None: alignedROIImage = correctedROIImage - + # Crop the image and mask to a bounding box around the mask to reduce volume size to process - croppedCT, croppedROI = imageoperations.cropToTumorMask(ctImage, segImage, segBoundingBox) + croppedCT, croppedROI = imageoperations.cropToTumorMask( + ctImage, segImage, segBoundingBox + ) - return croppedCT, croppedROI \ No newline at end of file + return croppedCT, croppedROI From 09331a3d7f9b6459ab151b9b3611877c04f4d265 Mon Sep 17 00:00:00 2001 From: Katy Scott Date: Wed, 27 Mar 2024 09:57:12 -0400 Subject: [PATCH 21/21] style(feature_extraction.py): changed some function call spacing --- src/readii/feature_extraction.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/readii/feature_extraction.py b/src/readii/feature_extraction.py index 9a2113c..c80ffc6 100644 --- a/src/readii/feature_extraction.py +++ b/src/readii/feature_extraction.py @@ -83,17 +83,14 @@ def singleRadiomicFeatureExtraction( segmentationLabel: int = getROIVoxelLabel(alignedROIImage) # Check that CT and segmentation correspond, segmentationLabel is present, and dimensions match - segBoundingBox, correctedROIImage = imageoperations.checkMask( - ctImage, alignedROIImage, label=segmentationLabel - ) + segBoundingBox, correctedROIImage = imageoperations.checkMask(ctImage, alignedROIImage, label=segmentationLabel) + # Update the ROI image if a correction was generated by checkMask if correctedROIImage is not None: alignedROIImage = correctedROIImage # Crop the image and mask to a bounding box around the mask to reduce volume size to process - croppedCT, croppedROI = imageoperations.cropToTumorMask( - ctImage, alignedROIImage, segBoundingBox - ) + croppedCT, croppedROI = imageoperations.cropToTumorMask(ctImage, alignedROIImage, segBoundingBox) if negativeControl != None: print("Generating ", negativeControl, "negative control for CT.") @@ -108,9 +105,7 @@ def singleRadiomicFeatureExtraction( # Load PyRadiomics feature extraction parameters to use # Initialize feature extractor with parameters try: - featureExtractor = featureextractor.RadiomicsFeatureExtractor( - pyradiomicsParamFilePath - ) + featureExtractor = featureextractor.RadiomicsFeatureExtractor(pyradiomicsParamFilePath) except OSError as e: print( "ERROR: Supplied pyradiomics parameter file does not exist or is not at that location." @@ -118,9 +113,7 @@ def singleRadiomicFeatureExtraction( raise # Extract radiomic features from CT with segmentation as mask - idFeatureVector = featureExtractor.execute( - croppedCT, croppedROI, label=segmentationLabel - ) + idFeatureVector = featureExtractor.execute(croppedCT, croppedROI, label=segmentationLabel) return idFeatureVector