diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index af0ddd8f7..102f8d3b8 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -10,25 +10,21 @@ on: jobs: pre-commit-checks: - strategy: - matrix: - os: [ "macos-latest", "windows-latest", "ubuntu-18.04", "ubuntu-20.04", "macos-10.15"] - python-version: [3.6, 3.7, 3.8] - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Setup Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v3 with: - python-version: ${{ matrix.python-version }} + python-version: 3.8 - name: Cache pip dependencies id: cache-pip-dependencies - uses: actions/cache@v2 + uses: actions/cache@v3 with: # Ubuntu-specific, see # https://github.com/actions/cache/blob/main/examples.md#python---pip @@ -40,9 +36,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - if [[ -f requirements_dev.txt ]]; then pip install -r requirements_dev.txt; fi - pip install -e . - pre-commit install + pip install 'pre-commit>=2.10.1' shell: bash - name: Run pre-commit large file check diff --git a/.github/workflows/publish_package.yml b/.github/workflows/publish_package.yml index 557f98fa9..02e70a13a 100644 --- a/.github/workflows/publish_package.yml +++ b/.github/workflows/publish_package.yml @@ -7,12 +7,6 @@ on: # publish from the Releases page: release: types: [published] - # publish from the Actions page: - workflow_dispatch: - inputs: - version: - description: 'Version (e.g. 2.0.3)' - required: true jobs: deploy: @@ -20,12 +14,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.6' + uses: actions/setup-python@v3 - name: Install dependencies run: | @@ -41,10 +33,8 @@ jobs: with: files: 'dist/*' fail_on_unmatched_files: true - tag_name: ${{ github.event.inputs.version }} # in the workflow_dispatch case, make a new tag from the given input; in the published release case, this will be empty and will fall back to updating that release. - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - + prerelease: ${{ contains(github.ref, 'rc') || contains(github.ref, 'dev') }} + - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index f1a5ec56e..31451bfcc 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -8,9 +8,33 @@ on: push: branches: - master + paths-ignore: + - '**.md' + - '**.rst' + - '**.bib' + - '.github/**' + - '!.github/workflows/run_tests.yml' + - 'dev/**' + - 'docs/**' + - 'images/**' + - '.gitignore' + - '.pre-commit-config.yaml' + - '.readthedocs.yml' # Trigger the workflow on pull requests, but reveal no secrets, do not use pull_request_target, see: # https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request_target pull_request: + paths-ignore: + - '**.md' + - '**.rst' + - '**.bib' + - '.github/**' + - '!.github/workflows/run_tests.yml' + - 'dev/**' + - 'docs/**' + - 'images/**' + - '.gitignore' + - '.pre-commit-config.yaml' + - '.readthedocs.yml' env: # Even when given -y, apt will still sometimes hang at a prompt if a package @@ -19,14 +43,16 @@ env: # (TravisCI quietly defined this on all their platforms, but we have to give it manually on GithubCI.) DEBIAN_FRONTEND: 'noninteractive' HDF5_USE_FILE_LOCKING: 'FALSE' + # Skip to the headless matplotlib renderer, which is less + # bug-prone in the constrained environment of CI + # Tip from a matplotlib dev: https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/3388#issuecomment-846091012 + # Ref: https://matplotlib.org/stable/users/explain/backends.html + MPLBACKEND: 'Agg' jobs: ultra_matrix_test: name: Matrix Test of Python ${{ matrix.python-version }} on ${{ matrix.os }} - # Matrix driven OS - runs-on: ${{ matrix.os }} - # Default shell for ALL subsequent steps. defaults: run: @@ -39,32 +65,34 @@ jobs: fail-fast: false matrix: - os: [ "macos-latest", "windows-latest", "ubuntu-18.04", "ubuntu-20.04", "macos-10.15" ] - python-version: [ 3.6, 3.7, 3.8 ] + os: [ "macos-latest", "windows-latest", "ubuntu-20.04" ] + python-version: [ '3.8', '3.9', '3.10' ] test-name: - integration-test + # Matrix driven OS + runs-on: ${{ matrix.os }} + # Main steps for the test to be reproduced across OS x Python steps: # Step 0: Checkout code. - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Step 1: Setup python version - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - # Step 2: Install requirement dependencies. - - name: Install dependencies - uses: py-actions/py-dependency-install@v2 - with: - path: "requirements_dev.txt" + # Step 2: Install dependencies + - name: Install ivadomed + run: | + pip install -e .[dev] - # Step 3: install additional pytest/dev related dependencies - - name: Install IvadoMed Main Modules + # Step 3: List installed packages + - name: List installed packages run: | - pip install -e . + pip list # Step 4: Lint. - name: Lint with flake8 diff --git a/.github/workflows/run_tests_dummy.yml b/.github/workflows/run_tests_dummy.yml new file mode 100644 index 000000000..4b7e44c79 --- /dev/null +++ b/.github/workflows/run_tests_dummy.yml @@ -0,0 +1,39 @@ +# This is a dummy version of run_tests.yml, which allows for conditional checks to still be "Required Statuses" for pull requests to be allowed to be merged + +name: Run tests on all platforms + +# see https://docs.github.com/en/enterprise-cloud@latest/repositories/configuring-branches-and-merges-in-your-repository/defining-the-mergeability-of-pull-requests/troubleshooting-required-status-checks#handling-skipped-but-required-checks +# and https://github.com/orgs/community/discussions/13690 +on: + pull_request: + paths: + # this list needs to be kept in sync manually with the opposite list in run_tests.yml + - '**.md' + - '**.rst' + - '**.bib' + - '.github/**' + - '!.github/workflows/run_tests.yml' + - 'dev/**' + - 'docs/**' + - 'images/**' + - '.gitignore' + - '.pre-commit-config.yaml' + - '.readthedocs.yml' + +jobs: + ultra_matrix_test: + name: Matrix Test of Python ${{ matrix.python-version }} on ${{ matrix.os }} + + # this section also needs to be kept in sync with run_tests.yml + strategy: + matrix: + # This list must be kept **in sync** with the Required Statuses in https://github.com/ivadomed/ivadomed/settings/branch_protection_rules/5051948 + os: [ "macos-latest", "windows-latest", "ubuntu-20.04" ] + python-version: [ 3.8 ] + test-name: + - integration-test + + runs-on: ubuntu-latest + + steps: + - run: 'echo "No code changes, so no build required" ' diff --git a/.gitignore b/.gitignore index cca103914..2f383a6d0 100644 --- a/.gitignore +++ b/.gitignore @@ -21,15 +21,19 @@ log.txt # Pycharm stuff .idea +# VSCode stuff +.vscode + # Pytest Cache, anywhere: .pytest_cache/ # IvadoMed pytest downloaded data file, anywhere: data_testing/ data_functional_testing/ +data_multi_testing/ # OS Specific hidden file .DS_Store # venv -venv/ \ No newline at end of file +venv/ diff --git a/.readthedocs.yml b/.readthedocs.yml index 4013c2c38..ae86ac22b 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,17 +5,20 @@ # Required version: 2 +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.10" + # Build documentation in the docs/ directory with Sphinx sphinx: configuration: docs/source/conf.py python: - version: 3.7 install: - # Ensure the Find External Link PyTorch version is installed - - requirements: requirements_dev.txt # Install IvadoMed package via setup.py content - method: pip path: . extra_requirements: - - docs + - dev diff --git a/CHANGES.md b/CHANGES.md index c81ada7d1..41f26109e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,445 @@ + ## v2.9.10 (2024-03-12) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.9...release) + +**BUG** + + - Fix edge case bug where grayscale image has an alpha channel, minor rtd fix. [View pull request](https://github.com/ivadomed/ivadomed/pull/1313) + +**DEPENDENCIES** + + - Test upgrades for pinned dependencies to improve downstream compatibility. [View pull request](https://github.com/ivadomed/ivadomed/pull/1308) + + ## v2.9.9 (2023-12-11) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.8...release) + +**FEATURE** + - Introduce `segment_image` CLI. [View pull request](https://github.com/ivadomed/ivadomed/pull/1254) + +**CI** + - chore: remove ubuntu-18.04 and python 3.7 from `run_tests` workflow. [View pull request](https://github.com/ivadomed/ivadomed/pull/1298) + +**BUG** + - chore: Dependency Maintenance (imageio 2->3, pyBIDS<0.15.6, readthedocs.yml v2, Python 3.7->3.8). [View pull request](https://github.com/ivadomed/ivadomed/pull/1297) + - Fix NormalizeInstance for uniform samples. [View pull request](https://github.com/ivadomed/ivadomed/pull/1267) + +**INSTALLATION** + - Test out newer versions of PyTorch (`torch>=2.0.0`) for compatibility with downstream projects (SCT, ADS). [View pull request](https://github.com/ivadomed/ivadomed/pull/1304) + +**DOCUMENTATION** + - Update doc for segment_image.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1290) + - Clarified usage for `fname_split`. [View pull request](https://github.com/ivadomed/ivadomed/pull/1283) + - Point to the URL specific to the learning rate. [View pull request](https://github.com/ivadomed/ivadomed/pull/1281) + - Match location of `object_detection` in the template config file and in the documentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1278) + - Resolve the discrepancies. [View pull request](https://github.com/ivadomed/ivadomed/pull/1271) + - Clarify usage of --no-patch for 2D only. [View pull request](https://github.com/ivadomed/ivadomed/pull/1265) + +**ENHANCEMENT** + - Do not use wandb if not specified in the config file. [View pull request](https://github.com/ivadomed/ivadomed/pull/1253) + +**REFACTORING** + - Harmonize get_item in MRI2D dataset and MRI3D dataset. [View pull request](https://github.com/ivadomed/ivadomed/pull/1266) + + ## v2.9.8 (2023-01-04) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.7...release) + +**CI** + + - chore: remove numpy related deprecations in support of v1.24.0 . [View pull request](https://github.com/ivadomed/ivadomed/pull/1246) + +**BUG** + + - Fix 3D training with data augmentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1222) + - Fix GPU behavior in segment_volume. [View pull request](https://github.com/ivadomed/ivadomed/pull/1209) + +**DOCUMENTATION** + + - Clarify testing output files. [View pull request](https://github.com/ivadomed/ivadomed/pull/1244) + - doc: clarify validation fraction. [View pull request](https://github.com/ivadomed/ivadomed/pull/1207) + +**DEPENDENCIES** + + - chore: remove numpy related deprecations in support of v1.24.0 . [View pull request](https://github.com/ivadomed/ivadomed/pull/1246) + +**ENHANCEMENT** + + - fix/feat: update the path for wandb logs. [View pull request](https://github.com/ivadomed/ivadomed/pull/1237) + +**REFACTORING** + + - Clarify testing output files. [View pull request](https://github.com/ivadomed/ivadomed/pull/1244) + - Remove force indexing of microscopy and update ct. [View pull request](https://github.com/ivadomed/ivadomed/pull/1203) + - Refactoring in ConfigurationManager class (config_manager.py). [View pull request](https://github.com/ivadomed/ivadomed/pull/1195) + - Type Hint for utils.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1162) + - Type Hint for slice_filter.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1161) + - Type Hint for segmentation_pair.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1160) + - Type Hint for sample_meta_data.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1159) + - Type Hint for patch_filter.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1158) + - Type Hint for mri3d_subvolume_segmentation_dataset.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1157) + - Type Hint for mri2d_segmentation_dataset.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1156) + - type hinting for bids_dataset.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1155) + - Type Hint for bids_dataframe.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1154) + - Type Hint for bids3d_dataset. [View pull request](https://github.com/ivadomed/ivadomed/pull/1153) + - Type Hinting for balanced_sampler.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1152) + - Typehint for loader/film.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1151) + - Type Hinting for loader.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/1150) + + ## v2.9.7 (2022-10-31) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.6...release) + +**FEATURE** + + - feat: update default args for `wandb.login()`. [View pull request](https://github.com/ivadomed/ivadomed/pull/1193) + - Add Config Parameter to Disable Validation When Loading BIDS Info. [View pull request](https://github.com/ivadomed/ivadomed/pull/1168) + - Add auto disk cache capability to mri2d and mri3d dataset classes. [View pull request](https://github.com/ivadomed/ivadomed/pull/1121) + - Segment 2D images without patches. [View pull request](https://github.com/ivadomed/ivadomed/pull/1101) + +**CI** + + - chore: add an upper bound version specifier for pandas to prevent breaking the tests. [View pull request](https://github.com/ivadomed/ivadomed/pull/1194) + - Drop testing on macOS 10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1190) + - Only run tests on code changes. [View pull request](https://github.com/ivadomed/ivadomed/pull/1186) + - chore: upgrade run_tests workflow. [View pull request](https://github.com/ivadomed/ivadomed/pull/1146) + - Don't install ivadomed just to run pre-commit checks.. [View pull request](https://github.com/ivadomed/ivadomed/pull/1145) + +**BUG** + + - chore: add an upper bound version specifier for pandas to prevent breaking the tests. [View pull request](https://github.com/ivadomed/ivadomed/pull/1194) + - Drop testing on macOS 10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1190) + - Resolve imageio v2->v3 imread deprecation warnings. [View pull request](https://github.com/ivadomed/ivadomed/pull/1181) + - fix: adapt the filenames of the predictions in pred_masks as per target_suffix. [View pull request](https://github.com/ivadomed/ivadomed/pull/1173) + - Fix long evaluation time on microscopy images. [View pull request](https://github.com/ivadomed/ivadomed/pull/1081) + +**INSTALLATION** + + - chore: update installation for ivadomed tutorials. [View pull request](https://github.com/ivadomed/ivadomed/pull/1200) + - Remove deprecated torch and dev installation instructions. [View pull request](https://github.com/ivadomed/ivadomed/pull/1179) + - Get Rid of Python 3.6. [View pull request](https://github.com/ivadomed/ivadomed/pull/1149) + - Support python3.10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1137) + +**DOCUMENTATION** + + - fix: update link to contribution guidelines. [View pull request](https://github.com/ivadomed/ivadomed/pull/1196) + - Tests README: Add instructions to install testing-related packages. [View pull request](https://github.com/ivadomed/ivadomed/pull/1180) + - Remove deprecated torch and dev installation instructions. [View pull request](https://github.com/ivadomed/ivadomed/pull/1179) + - Add description and default values to some parameters.. [View pull request](https://github.com/ivadomed/ivadomed/pull/1174) + - Add Note in WandB. [View pull request](https://github.com/ivadomed/ivadomed/pull/1171) + +**DEPENDENCIES** + + - chore: add an upper bound version specifier for pandas to prevent breaking the tests. [View pull request](https://github.com/ivadomed/ivadomed/pull/1194) + - Support python3.10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1137) + +**ENHANCEMENT** + + - feat: update default args for `wandb.login()`. [View pull request](https://github.com/ivadomed/ivadomed/pull/1193) + - Only run tests on code changes. [View pull request](https://github.com/ivadomed/ivadomed/pull/1186) + - Transformation on Subvolume for mri3d_subvolume_segmentation_dataset. [View pull request](https://github.com/ivadomed/ivadomed/pull/1169) + - Support python3.10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1137) + - Add syntax highlighting and improve flow of the Colab tutorials. [View pull request](https://github.com/ivadomed/ivadomed/pull/1127) + - Add auto disk cache capability to mri2d and mri3d dataset classes. [View pull request](https://github.com/ivadomed/ivadomed/pull/1121) + +**TESTING** + + - Drop testing on macOS 10. [View pull request](https://github.com/ivadomed/ivadomed/pull/1190) + - Only run tests on code changes. [View pull request](https://github.com/ivadomed/ivadomed/pull/1186) + - Add syntax highlighting and improve flow of the Colab tutorials. [View pull request](https://github.com/ivadomed/ivadomed/pull/1127) + +**REFACTORING** + + - feat: update default args for `wandb.login()`. [View pull request](https://github.com/ivadomed/ivadomed/pull/1193) + - Convert pred data to uint8 prior to imwrite png. [View pull request](https://github.com/ivadomed/ivadomed/pull/1185) + - Minor correction to unsupported file extension error message. [View pull request](https://github.com/ivadomed/ivadomed/pull/1177) + +## v2.9.6 (2022-06-02) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.5...release) + +**Installation** + + - unify installation (attempt #2). [View pull request](https://github.com/ivadomed/ivadomed/pull/1129) + +**DOCUMENTATION** + + - Update documentation and config files to add WandB details. [View pull request](https://github.com/ivadomed/ivadomed/pull/1120) + +**ENHANCEMENT** + + - Support for WandB Experimental Tracking . [View pull request](https://github.com/ivadomed/ivadomed/pull/1069) + +**CONTINUOUS INTEGRATION** + + - Adds various CI and testing improvement not impacting end users. + +## v2.9.5 (2022-04-06) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.4...release) + +**BUG** + +- Fix TSV metadata indexation and remove unused lines from bids_dataframe based on split_method. [View pull request](https://github.com/ivadomed/ivadomed/pull/1112) +- Fix loading of TIF 16bits grayscale files. [View pull request](https://github.com/ivadomed/ivadomed/pull/1107) +- Fix loading when names of multiple target_suffix overlap. [View pull request](https://github.com/ivadomed/ivadomed/pull/1100) + +**ENHANCEMENT** + +- Add type hintings to fields inside all keywords(KW) dataclasses. [View pull request](https://github.com/ivadomed/ivadomed/pull/1109) + +**DOCUMENTATION** + +- Clarify data and loading documentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1103) + + +## v2.9.4 (2022-03-09) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.3...release) + +**FEATURE** + +- Segment with ONNX or PT model based on CPU/GPU availability. [View pull request](https://github.com/ivadomed/ivadomed/pull/1086) + +**ENHANCEMENT** + +- Update microscopy following BEP release. [View pull request](https://github.com/ivadomed/ivadomed/pull/1025) + +**BUG** + +- Fixing mix-up for GPU training. [View pull request](https://github.com/ivadomed/ivadomed/pull/1063) + +- **REFACTORING** + +- Refactor missing print statements to be using logger. [View pull request](https://github.com/ivadomed/ivadomed/pull/1085) +- Convert print to logger format for much more granular unified control.. [View pull request](https://github.com/ivadomed/ivadomed/pull/1040) +- Update pybids to 0.14.0. [View pull request](https://github.com/ivadomed/ivadomed/pull/994) + +**DOCUMENTATION** + +- Add ADS use case in documentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1080) +- Updated documentation for SoftSeg training. [View pull request](https://github.com/ivadomed/ivadomed/pull/1064) +- Rewrite tutorial 2 with sphinx tab. [View pull request](https://github.com/ivadomed/ivadomed/pull/1045) +- Format revamped Tutorial 1 to highlight the CLI vs JSON approaches. [View pull request](https://github.com/ivadomed/ivadomed/pull/1039) +- Improve Installation Doc Readability based for Step 3 relating to GPU setup. [View pull request](https://github.com/ivadomed/ivadomed/pull/1037) + +## v2.9.3 (2022-02-01) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.2...release) + +**FEATURE** + +- Apply filter parameters on 2D patches to remove empty patches. [View pull request](https://github.com/ivadomed/ivadomed/pull/980) + +**REFACTORING** + +- Update pred_to_png prediction filenames for ADS integration. [View pull request](https://github.com/ivadomed/ivadomed/pull/1050) + +**DOCUMENTATION** + +- Instruction to update `"bids_config"` key in microscopy tutorial. [View pull request](https://github.com/ivadomed/ivadomed/pull/1053) + + +## v2.9.2 (2022-01-18) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.1...release) + +**FEATURE** + +- Implementation of Random Blur Augmentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1034) +- Implementation of Random Bias Field Augmentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1033) +- Implementation of Random Gamma Contrast Augmentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/1015) + +**DEPENDENCIES** + +- Unpin `tensorboard` to avoid conflict with downstream SCT requirements. [View pull request](https://github.com/ivadomed/ivadomed/pull/1048) + +**BUG** + +- Rename prediction filenames: add class index and compat. for multi-rater. [View pull request](https://github.com/ivadomed/ivadomed/pull/1043) +- Fix pixel size keyword in run_segment_command. [View pull request](https://github.com/ivadomed/ivadomed/pull/1024) +- Replaced flip_axes with the correct bool element at index.. [View pull request](https://github.com/ivadomed/ivadomed/pull/1013) + +**DOCUMENTATION** + +- Add microscopy tutorial. [View pull request](https://github.com/ivadomed/ivadomed/pull/1036) +- Removed one child-headings for clarity. [View pull request](https://github.com/ivadomed/ivadomed/pull/1028) +- Typo fix for URL that is bricking the Colab link. [View pull request](https://github.com/ivadomed/ivadomed/pull/1021) +- Experimental incorporation of tutorial jupyter notebooks open in Colab path. [View pull request](https://github.com/ivadomed/ivadomed/pull/998) + + +## v2.9.1 (2021-12-13) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.9.0...release) + +**ENHANCEMENT** + +- Add forced indexation of "micr" datatype. [View pull request](https://github.com/ivadomed/ivadomed/pull/995) +- Apply transforms on 2D patches. [View pull request](https://github.com/ivadomed/ivadomed/pull/982) + +**DOCUMENTATION** + +- Update Tutorial 1/2/3 and readme.md to fix minor display issues. [View pull request](https://github.com/ivadomed/ivadomed/pull/992) +- Update installation instruction to fit recent CUDA11 and torch 1.8+ push. [View pull request](https://github.com/ivadomed/ivadomed/pull/969) + +**REFACTORING** + +- Fully Remove HeMIS model, Adaptive and h5py/HDF5. [View pull request](https://github.com/ivadomed/ivadomed/pull/984) +- Use keywords for the rest of the files. [View pull request](https://github.com/ivadomed/ivadomed/pull/946) + + +## v2.9.0 (2021-11-14) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.8.0...release) + +**ENHANCEMENT** + +- Make ivadomed be compatible with python 3.9 and PyTorch 1.8. [View pull request](https://github.com/ivadomed/ivadomed/pull/819) + +**DEPENDENCIES** + +- Pin to CUDA-11. [View pull request](https://github.com/ivadomed/ivadomed/pull/951) + +**BUG FIXES** + +- Pin PyParsing version to be compatible with pip 20. [View pull request](https://github.com/ivadomed/ivadomed/pull/987) +- Fix pytest test_download_data_no_dataset_specified fail bug. [View pull request](https://github.com/ivadomed/ivadomed/pull/968) +- Fix GeneralizedDiceLoss with `include_background=true` and `batch_size>1` . [View pull request](https://github.com/ivadomed/ivadomed/pull/962) +- Fix undo_transforms in volume reconstruction. [View pull request](https://github.com/ivadomed/ivadomed/pull/957) +- Fix undo_transforms in image reconstruction. [View pull request](https://github.com/ivadomed/ivadomed/pull/956) +- add metadata to create_metadata_dict. [View pull request](https://github.com/ivadomed/ivadomed/pull/954) +- Update scripts in `dev/prepare_data` to use new SCT config syntax (`.yml`). [View pull request](https://github.com/ivadomed/ivadomed/pull/949) +- Fix config loading errors. [View pull request](https://github.com/ivadomed/ivadomed/pull/944) +- Fix dropout_rate key in models.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/937) +- Add additional check for incorrect final_activation value. [View pull request](https://github.com/ivadomed/ivadomed/pull/933) +- Make ivadomed be compatible with python3.9 and PyTorch 1.8. [View pull request](https://github.com/ivadomed/ivadomed/pull/819) + +**DOCUMENTATION** + +- Minor modifications to the documentation for tutorial 3. [View pull request](https://github.com/ivadomed/ivadomed/pull/988) +- Fix resample axis order in documentation. [View pull request](https://github.com/ivadomed/ivadomed/pull/978) +- Update help.rst. [View pull request](https://github.com/ivadomed/ivadomed/pull/967) +- Fixing issues in estimate uncertainty tutorial. [View pull request](https://github.com/ivadomed/ivadomed/pull/936) +- Fix link to data file in ivadomed instructions. [View pull request](https://github.com/ivadomed/ivadomed/pull/929) +- Fixes object detection path in cascaded architecture tutorial. [View pull request](https://github.com/ivadomed/ivadomed/pull/922) +- Make ivadomed be compatible with python3.9 and PyTorch 1.8. [View pull request](https://github.com/ivadomed/ivadomed/pull/819) + +**REFACTORING** + +- Fully Remove HeMIS model, Adaptive and h5py/HDF5. [View pull request](https://github.com/ivadomed/ivadomed/pull/984) +- Fix path_output in automated training. [View pull request](https://github.com/ivadomed/ivadomed/pull/914) +- Using keywords for ivadomed/scripts folder. [View pull request](https://github.com/ivadomed/ivadomed/pull/934) +- Keywords refactoring Phase II: loader focus. [View pull request](https://github.com/ivadomed/ivadomed/pull/909) +- Adopting pathllib for loader/bids_dataframe. [View pull request](https://github.com/ivadomed/ivadomed/pull/947) +- Adopting pathlib for tests. [View pull request](https://github.com/ivadomed/ivadomed/pull/901) +- Adopting pathlib training.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/897) +- Adopting pathlib for main.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/892) +- Adopting pathlib for loader/utils.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/879) + +**TESTING** + +- Fix pytest test_download_data_no_dataset_specified fail bug. [View pull request](https://github.com/ivadomed/ivadomed/pull/968) + +**CI** + +- Update Sphinx dependency version and check RTD.org performance. [View pull request](https://github.com/ivadomed/ivadomed/pull/974) +- Fix pytest problem. [View pull request](https://github.com/ivadomed/ivadomed/pull/968) +- Update to GitHub Action to use `setup-python@v2`. [View pull request](https://github.com/ivadomed/ivadomed/pull/959) +- Make ivadomed be compatible with python3.9 and PyTorch 1.8. [View pull request](https://github.com/ivadomed/ivadomed/pull/819) + +## v2.8.0 (2021-08-31) +[View detailed changelog](https://github.com/ivadomed/ivadomed/compare/v2.7.4...v.2.8.0) + +**FEATURE** + +- Add image reconstruction from 2D patches. [View pull request](https://github.com/ivadomed/ivadomed/pull/782) +- Add sha256 for training data. [View pull request](https://github.com/ivadomed/ivadomed/pull/760) + +**CI** + +- Exclude testing directory in coveralls. [View pull request](https://github.com/ivadomed/ivadomed/pull/776) +- Improve current GitHub Action CI with multi OS support. [View pull request](https://github.com/ivadomed/ivadomed/pull/757) + +**BUG** + +- Fix training_curve.py output. [View pull request](https://github.com/ivadomed/ivadomed/pull/923) +- Fix inverted dimensions in microscopy pixelsize. [View pull request](https://github.com/ivadomed/ivadomed/pull/916) +- Fix segment functions for models without pre-processing transforms. [View pull request](https://github.com/ivadomed/ivadomed/pull/874) +- Fix microscopy ground-truth range of values. [View pull request](https://github.com/ivadomed/ivadomed/pull/870) +- `utils.py`: Only raise ArgParseException for non-zero SystemExits. [View pull request](https://github.com/ivadomed/ivadomed/pull/854) +- Remove `anaconda` from explicit dependencies. [View pull request](https://github.com/ivadomed/ivadomed/pull/845) +- Fix multiclass evaluation bug. [View pull request](https://github.com/ivadomed/ivadomed/pull/837) +- Fix last slice missing in testing bug. [View pull request](https://github.com/ivadomed/ivadomed/pull/835) +- Skip all NumpyToTensor transformation for retrocompatibility. [View pull request](https://github.com/ivadomed/ivadomed/pull/830) +- Remove all NumpyToTensor configs keys. [View pull request](https://github.com/ivadomed/ivadomed/pull/826) +- Add missing "-r" flags to installation.rst. [View pull request](https://github.com/ivadomed/ivadomed/pull/820) +- Call NumpyToTensor last. [View pull request](https://github.com/ivadomed/ivadomed/pull/818) +- Fix bug in loader for multiple raters. [View pull request](https://github.com/ivadomed/ivadomed/pull/806) +- Hot patch to address Inference issue #803. [View pull request](https://github.com/ivadomed/ivadomed/pull/804) +- Add tmp and log file to gitignore. [View pull request](https://github.com/ivadomed/ivadomed/pull/794) + +**INSTALLATION** + +- Remove `anaconda` from explicit dependencies. [View pull request](https://github.com/ivadomed/ivadomed/pull/845) + +**DOCUMENTATION** + +- Fix neuropoly guidelines link in ivadomed contribution guidelines document. [View pull request](https://github.com/ivadomed/ivadomed/pull/924) +- Change readme to point to the latest build version. [View pull request](https://github.com/ivadomed/ivadomed/pull/875) +- Installation instruction steps explicity recommended for MacOS but not Linux. [View pull request](https://github.com/ivadomed/ivadomed/pull/847) +- Clarified step 2 for pytorch/torchvision. [View pull request](https://github.com/ivadomed/ivadomed/pull/842) +- Add missing "-r" flags to installation.rst. [View pull request](https://github.com/ivadomed/ivadomed/pull/820) +- Update one class segmentation tutorial's output and segmentation image. [View pull request](https://github.com/ivadomed/ivadomed/pull/779) +- Update documentation with the solution to failing test_adaptive.py on MacOS. [View pull request](https://github.com/ivadomed/ivadomed/pull/771) +- Added link to JOSS paper. [View pull request](https://github.com/ivadomed/ivadomed/pull/748) + +**DEPENDENCIES** + +- Remove `anaconda` from explicit dependencies. [View pull request](https://github.com/ivadomed/ivadomed/pull/845) + +**ENHANCEMENT** + +- Fix training_curve.py output. [View pull request](https://github.com/ivadomed/ivadomed/pull/923) +- Fix microscopy ground-truth range of values. [View pull request](https://github.com/ivadomed/ivadomed/pull/870) +- Fix generate_sha_256 for joblib files. [View pull request](https://github.com/ivadomed/ivadomed/pull/866) +- Add microscopy config file. [View pull request](https://github.com/ivadomed/ivadomed/pull/850) +- Add the inference steps for PNG/TIF microscopy data. [View pull request](https://github.com/ivadomed/ivadomed/pull/834) +- New loader: Load PNG/TIF/JPG microscopy files as Nibabel objects. [View pull request](https://github.com/ivadomed/ivadomed/pull/813) +- Speed up IvadoMed Import Speed. [View pull request](https://github.com/ivadomed/ivadomed/pull/793) +- Remove data dependencies from `if` statements in the `Decoder()` forward pass. [View pull request](https://github.com/ivadomed/ivadomed/pull/752) + +**TESTING** + +- Unsilence test_rbg. [View pull request](https://github.com/ivadomed/ivadomed/pull/832) +- Fix test_sampler. [View pull request](https://github.com/ivadomed/ivadomed/pull/831) +- Fix bug in loader for multiple raters. [View pull request](https://github.com/ivadomed/ivadomed/pull/806) +- Exclude testing directory in coveralls. [View pull request](https://github.com/ivadomed/ivadomed/pull/776) +- Update documentation with the solution to failing test_adaptive.py on MacOS. [View pull request](https://github.com/ivadomed/ivadomed/pull/771) +- Migrate test_segment_volume.py from unit_tests to functional_tests. [View pull request](https://github.com/ivadomed/ivadomed/pull/767) +- Improve current GitHub Action CI with multi OS support. [View pull request](https://github.com/ivadomed/ivadomed/pull/757) + +**REFACTORING** + +- Extract class SliceFilter, BalancedSample and SampleMetaData from loader.util. [View pull request](https://github.com/ivadomed/ivadomed/pull/928) +- Extracted BidsDataFrame class outside of loader/utils.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/917) +- Initialize the adoption of centralized management of keywords via keywords.py (Phase I: compilation of all keywords). [View pull request](https://github.com/ivadomed/ivadomed/pull/904) +- Fix empty list default parameter antipattern.. [View pull request](https://github.com/ivadomed/ivadomed/pull/903) +- Pathlib adoption for visualize.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/900) +- Pathlib adoption for utils.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/899) +- Pathlib adoption for uncertainty.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/898) +- Pathlib adoption for testing.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/896) +- Pathlib adoption for postprocessing.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/895) +- Pathlib adoption for models.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/894) +- Pathlib adoption for mixup.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/893) +- Pathlib adoption for inference.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/891) +- Pathlib adoption for evaluation.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/890) +- pathlib config_manager.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/889) +- Pathlib adoption for visualize_transform. [View pull request](https://github.com/ivadomed/ivadomed/pull/888) +- Pathlib adoption for visualize_and_compare_testing_models.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/887) +- Pathlib adoption for script/training_curve.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/886) +- Pathlib adoption for script/prepare_dataset_vertibral_labeling.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/885) +- Pathlib adoption for extract_small_dataset. [View pull request](https://github.com/ivadomed/ivadomed/pull/884) +- pathlib for download_data. [View pull request](https://github.com/ivadomed/ivadomed/pull/883) +- pathlib for script/automate_training.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/881) +- pathlib change for object_detection/utils.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/880) +- Pathlib adoption for loader/segmentation_pair.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/878) +- Pathlib adoption for loader/film.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/877) +- Pathlib adoption for adaptative.py. [View pull request](https://github.com/ivadomed/ivadomed/pull/876) +- Update config_bids.json following changes in microscopy BEP. [View pull request](https://github.com/ivadomed/ivadomed/pull/838) +- Extracted Loader Classes into separate files. [View pull request](https://github.com/ivadomed/ivadomed/pull/828) +- Refactor segment_volume to reduce complexity. [View pull request](https://github.com/ivadomed/ivadomed/pull/791) +- Refactoring: BidsDataset __init__ reduce complexity . [View pull request](https://github.com/ivadomed/ivadomed/pull/765) +- Refactoring: reduce complexity of BIDStoHDF5 _load_filenames. [View pull request](https://github.com/ivadomed/ivadomed/pull/737) + ## v2.7.4 (2021-03-15) See `2.7.3`. We had to re-release because the GitHub Action didn't get triggered to push the release diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 123ef485d..cb5491719 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,8 +1,22 @@ Contributing to ivadomed ======================== +General Guidelines +++++++++++++++++++ + Thank you for your interest in contributing to ivadomed! This project uses the following pages to guide new contributions: - * The `ivadomed GitHub repository `_ is where the source code for the project is maintained, and where new contributions are submitted to. - * The `NeuroPoly Contributing Guidelines `_ provide instructions for development workflows, such as reporting issues or submitting pull requests. - * The `ivadomed Developer Wiki `_ acts as a knowledge base for documenting internal design decisions specific to the ivadomed codebase. It also contains step-by-step walkthroughs for common ivadomed maintainer tasks. \ No newline at end of file + * The `ivadomed GitHub repository `_ + is where the source code for the project is maintained, and where new + contributions are submitted to. We welcome any type of contribution + and recommend setting up ``ivadomed`` by following the Contributor + or Developer installation as instructed below before proceeding + towards any contribution. + + * The `NeuroPoly Contributing Guidelines `_ + provide instructions for development workflows, such as reporting issues or submitting pull requests. + + * The `ivadomed Developer Wiki `_ + acts as a knowledge base for documenting internal design decisions specific + to the ivadomed codebase. It also contains step-by-step walkthroughs for + common ivadomed maintainer tasks. \ No newline at end of file diff --git a/README.md b/README.md index cd4265f16..d314a2721 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ - + +> [!WARNING] +> `ivadomed` is no more maintained. New models integrated in our 3rd party software (SCT, AxonDeepSeg, etc.) are now trained using MONAI and/or nnUnet. + ![ivadomed Overview](https://raw.githubusercontent.com/ivadomed/doc-figures/main/index/overview_title.png) [![DOI](https://joss.theoj.org/papers/10.21105/joss.02868/status.svg)](https://doi.org/10.21105/joss.02868) @@ -11,15 +14,15 @@ `ivadomed` is an integrated framework for medical image analysis with deep learning. -The technical documentation is available [here](https://ivadomed.org). +The technical documentation is available [here](https://ivadomed.org). The more detailed installation instruction is available [there](https://ivadomed.org/installation.html) ## Installation -``ivadomed`` requires Python >= 3.6 and < 3.9 as well as PyTorch == 1.5.0. We recommend working under a virtual environment, which could be set as follows: +``ivadomed`` requires Python >= 3.7 and < 3.10 as well as PyTorch == 1.8. We recommend working under a virtual environment, which could be set as follows: ```bash -virtualenv venv-ivadomed -source venv-ivadomed/bin/activate +python -m venv ivadomed_env +source ivadomed_env/bin/activate ``` ### Install from release (recommended) @@ -33,8 +36,7 @@ pip install ivadomed ### Install from source -Bleeding-edge developments are available on the project's master branch -on Github. Installation procedure is the following: +Bleeding-edge developments builds are available on the project's master branch on Github. Installation procedure is the following: ```bash git clone https://github.com/neuropoly/ivadomed.git @@ -42,6 +44,7 @@ cd ivadomed pip install -e . ``` + ## Contributors

diff --git a/dev/class_balance.py b/dev/class_balance.py index 7be7513b2..ae7ba8918 100755 --- a/dev/class_balance.py +++ b/dev/class_balance.py @@ -11,19 +11,21 @@ # ############################################################## -import json import argparse import numpy as np from ivadomed.loader.bids_dataset import BidsDataset from ivadomed import config_manager as imed_config_manager from ivadomed.loader import utils as imed_loader_utils -from ivadomed import utils as imed_utils +from ivadomed.loader.slice_filter import SliceFilter from ivadomed import transforms as imed_transforms +from ivadomed import utils as imed_utils from torchvision import transforms as torch_transforms from torch.utils.data import DataLoader +from loguru import logger + def get_parser(): parser = argparse.ArgumentParser() @@ -32,12 +34,6 @@ def get_parser(): return parser -def print_stats(arr): - print('\tMean: {} %'.format(np.mean(arr))) - print('\tMedian: {} %'.format(np.median(arr))) - print('\tInter-quartile range: [{}, {}] %'.format(np.percentile(arr, 25), np.percentile(arr, 75))) - - def run_main(args): context = imed_config_manager.ConfigurationManager(args.c).get_config() @@ -55,7 +51,7 @@ def run_main(args): balance_dct = {} for ds_lst, ds_name in zip([train_lst, valid_lst, test_lst], ['train', 'valid', 'test']): - print("\nLoading {} set.\n".format(ds_name)) + logger.info(f"\nLoading {ds_name} set.\n") ds = BidsDataset(context["path_data"], subject_lst=ds_lst, target_suffix=context["target_suffix"], @@ -64,9 +60,9 @@ def run_main(args): metadata_choice=context["metadata"], contrast_balance=context["contrast_balance"], transform=transform_lst, - slice_filter_fn=imed_loader_utils.SliceFilter()) + slice_filter_fn=SliceFilter()) - print("Loaded {} axial slices for the {} set.".format(len(ds), ds_name)) + logger.info(f"Loaded {len(ds)} axial slices for the {ds_name} set.") ds_loader = DataLoader(ds, batch_size=1, shuffle=False, pin_memory=False, collate_fn=imed_loader_utils.imed_collate, @@ -82,11 +78,11 @@ def run_main(args): balance_dct[ds_name] = balance_lst for ds_name in balance_dct: - print('\nClass balance in {} set:'.format(ds_name)) - print_stats(balance_dct[ds_name]) + logger.info(f"\nClass balance in {ds_name} set:") + imed_utils.print_stats(balance_dct[ds_name]) - print('\nClass balance in full set:') - print_stats([e for d in balance_dct for e in balance_dct[d]]) + logger.info("\nClass balance in full set:") + imed_utils.print_stats([e for d in balance_dct for e in balance_dct[d]]) if __name__ == '__main__': diff --git a/dev/df_new_loader.py b/dev/df_new_loader.py index 4039d53c6..252ca57a1 100644 --- a/dev/df_new_loader.py +++ b/dev/df_new_loader.py @@ -12,9 +12,10 @@ # IMPORTS import os +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import config_manager as imed_config_manager -from ivadomed.loader import utils as imed_loader_utils +from loguru import logger # GET LOADER PARAMETERS FROM IVADOMED CONFIG FILE # The loader parameters have 2 new fields: "bids_config" and "extensions". @@ -36,13 +37,13 @@ # CREATE OUTPUT PATH path_output = context["path_output"] if not os.path.isdir(path_output): - print('Creating output path: {}'.format(path_output)) + logger.info(f"Creating output path: {path_output}") os.makedirs(path_output) else: - print('Output path already exists: {}'.format(path_output)) + logger.warning(f"Output path already exists: {path_output}") # CREATE BIDSDataframe OBJECT -bids_df = imed_loader_utils.BidsDataframe(loader_params, path_output, derivatives) +bids_df = BidsDataframe(loader_params, path_output, derivatives) df = bids_df.df # DROP "path" COLUMN AND SORT BY FILENAME FOR TESTING PURPOSES WITH data-testing @@ -52,4 +53,4 @@ # SAVE DATAFRAME TO CSV FILE FOR data-testing path_csv = "test_df_new_loader.csv" df.to_csv(path_csv, index=False) -print(df) +logger.debug(df) diff --git a/dev/filtering_lesion.py b/dev/filtering_lesion.py index 61fcdf4ed..f609db64f 100755 --- a/dev/filtering_lesion.py +++ b/dev/filtering_lesion.py @@ -16,6 +16,7 @@ # from sklearn.metrics import auc import matplotlib.pyplot as plt from scipy.ndimage import label, generate_binary_structure +from loguru import logger from ivadomed import config_manager as imed_config_manager from ivadomed import main as imed @@ -81,17 +82,17 @@ def print_unc_stats(unc_name, pred_folder, im_lst): p75s.append(np.percentile(vals, 75)) for n, l in zip(['min', 'max', 'p25', 'p50', 'p75'], [mins, maxs, p25s, p50s, p75s]): - print('\t{}: {}'.format(n, np.mean(l))) + logger.debug(f"\t{n}: {np.mean(l)}") def count_retained(data_before, data_after, level): if level == 'vox': cmpt_before, cmpt_after = np.count_nonzero(data_before), np.count_nonzero(data_after) else: # level == 'obj' - print(np.sum(data_before), np.sum(data_after)) + logger.debug(f"{np.sum(data_before)} {np.sum(data_after)}") _, cmpt_before = label(data_before, structure=BIN_STRUCT) _, cmpt_after = label(data_after, structure=BIN_STRUCT) - print(cmpt_before, cmpt_after) + logger.debug(f"{cmpt_before} {cmpt_after}") percent_rm = (cmpt_before - cmpt_after) * 100. / cmpt_before return 100. - percent_rm @@ -122,7 +123,7 @@ def run_experiment(level, unc_name, thr_unc_lst, thr_pred_lst, gt_folder, pred_f if os.path.isfile(fname_gt): nib_gt = nib.load(fname_gt) data_gt = nib_gt.get_data() - print(np.sum(data_gt)) + logger.debug(np.sum(data_gt)) # soft prediction data_soft = np.mean(data_pred_lst, axis=0) @@ -133,7 +134,7 @@ def run_experiment(level, unc_name, thr_unc_lst, thr_pred_lst, gt_folder, pred_f data_soft_thrUnc[data_unc > thr_unc] = 0 cmpt = count_retained((data_soft > 0).astype(np.int), (data_soft_thrUnc > 0).astype(np.int), level) res_dct['retained_elt'][i_unc].append(cmpt) - print(thr_unc, cmpt) + logger.debug(f"{thr_unc} {cmpt}") for i_pred, thr_pred in enumerate(thr_pred_lst): data_hard = imed_postpro.threshold_predictions(deepcopy(data_soft_thrUnc), thr=thr_pred)\ .astype(np.uint8) @@ -149,8 +150,8 @@ def run_experiment(level, unc_name, thr_unc_lst, thr_pred_lst, gt_folder, pred_f else: tpr, _ = eval.get_ltpr() fdr = eval.get_lfdr() - print(thr_pred, np.count_nonzero(deepcopy(data_soft_thrUnc)), np.count_nonzero(data_hard), tpr, - fdr) + logger.debug(f"{thr_pred} {np.count_nonzero(deepcopy(data_soft_thrUnc))} " + f"{np.count_nonzero(data_hard)} {tpr} {fdr}") res_dct['tpr'][i_unc][i_pred].append(tpr / 100.) res_dct['fdr'][i_unc][i_pred].append(fdr / 100.) @@ -158,15 +159,15 @@ def run_experiment(level, unc_name, thr_unc_lst, thr_pred_lst, gt_folder, pred_f def print_retained_elt(thr_unc_lst, retained_elt_lst): - print('Mean percentage of retained elt:') + logger.info('Mean percentage of retained elt:') for i, t in enumerate(thr_unc_lst): - print('\tUnc threshold: {} --> {}'.format(t, np.mean(retained_elt_lst[i]))) + logger.info(f"\tUnc threshold: {t} --> {np.mean(retained_elt_lst[i])}") def plot_roc(thr_unc_lst, thr_pred_lst, res_dct, metric, fname_out): plt.figure(figsize=(10, 10)) for i_unc, thr_unc in enumerate(thr_unc_lst): - print('Unc Thr: {}'.format(thr_unc)) + logger.info(f"Unc Thr: {thr_unc}") tpr_vals = np.array([np.nanmean(res_dct['tpr'][i_unc][i_pred]) for i_pred in range(len(thr_pred_lst))]) fdr_vals = np.array([np.nanmean(res_dct['fdr'][i_unc][i_pred]) for i_pred in range(len(thr_pred_lst))]) @@ -175,7 +176,7 @@ def plot_roc(thr_unc_lst, thr_pred_lst, res_dct, metric, fname_out): optimal_idx = np.argmax(tpr_vals - fdr_vals) optimal_threshold = thr_pred_lst[optimal_idx] - print('AUC: {}, Optimal Pred Thr: {}'.format(auc_, optimal_threshold)) + logger.info(f"AUC: {auc_}, Optimal Pred Thr: {optimal_threshold}") plt.scatter(fdr_vals, tpr_vals, label='Unc thr={0:0.2f} (area = {1:0.2f})'.format(thr_unc, auc_), s=22) @@ -191,13 +192,13 @@ def plot_roc(thr_unc_lst, thr_pred_lst, res_dct, metric, fname_out): def run_inference(pred_folder, im_lst, thr_pred, gt_folder, target_suf, param_eval, unc_name=None, thr_unc=None): - # init df - df_results = pd.DataFrame() + # init df row list + df_lst = [] # loop across images for fname_pref in im_lst: if not any(elem is None for elem in [unc_name, thr_unc]): - print(thr_unc) + logger.debug(thr_unc) # uncertainty map fname_unc = os.path.join(pred_folder, fname_pref + unc_name + '.nii.gz') im = nib.load(fname_unc) @@ -220,7 +221,7 @@ def run_inference(pred_folder, im_lst, thr_pred, gt_folder, target_suf, param_ev data_soft = np.mean(data_pred_lst, axis=0) if not any(elem is None for elem in [unc_name, thr_unc]): - print('thr') + logger.debug("thr") # discard uncertain lesions from data_soft data_soft[data_unc > thr_unc] = 0 @@ -235,8 +236,9 @@ def run_inference(pred_folder, im_lst, thr_pred, gt_folder, target_suf, param_ev # save results of this fname_pred results_pred['image_id'] = fname_pref.split('_')[0] - df_results = df_results.append(results_pred, ignore_index=True) + df_lst.append(results_pred) + df_results = pd.DataFrame(df_lst) return df_results @@ -263,7 +265,7 @@ def run_main(args): if thrPred is None: for exp in exp_dct.keys(): config_dct = exp_dct[exp] - print(config_dct['uncertainty_measure']) + logger.debug(config_dct['uncertainty_measure']) # print_unc_stats is used to determine 'uncertainty_thr' print_unc_stats(config_dct['uncertainty_measure'], pred_folder, subj_acq_lst) @@ -299,9 +301,9 @@ def run_main(args): param_eval=context["eval_params"], unc_name=sufUnc, thr_unc=thrUnc) - print(df.head()) + logger.debug(df.head()) vals = [v for v in df.dice_class0 if str(v) != 'nan'] - print('Median (IQR): {} ({} - {}).'.format(np.median(vals), np.percentile(vals, 25), np.percentile(vals, 75))) + logger.info(f"Median (IQR): {np.median(vals)} ({np.percentile(vals, 25)} - {np.percentile(vals, 75)}).") df.to_csv(os.path.join(ofolder, '_'.join([str(sufUnc), str(thrUnc), str(thrPred)]) + '.csv')) diff --git a/dev/metadata_config.py b/dev/metadata_config.py index a8ecc3482..2359d06e1 100755 --- a/dev/metadata_config.py +++ b/dev/metadata_config.py @@ -6,9 +6,8 @@ from ivadomed.loader.bids_dataset import BidsDataset from ivadomed import config_manager as imed_config_manager -from ivadomed.loader import utils as imed_loader_utils +from ivadomed.loader.slice_filter import SliceFilter from ivadomed import transforms as imed_transforms -from ivadomed import utils as imed_utils metadata_type = ['FlipAngle', 'EchoTime', 'RepetitionTime'] @@ -29,7 +28,7 @@ def run_main(context): contrast_lst=context["contrast_train_validation"] if subset != "test" else context["contrast_test"], transform=no_transform, - slice_filter_fn=imed_loader_utils.SliceFilter()) + slice_filter_fn=SliceFilter()) for m in metadata_type: if m in metadata_dct: diff --git a/dev/plot_cluster_metadata.py b/dev/plot_cluster_metadata.py index b11fa3408..e5d501f4b 100755 --- a/dev/plot_cluster_metadata.py +++ b/dev/plot_cluster_metadata.py @@ -6,17 +6,16 @@ import sys import os -import json import numpy as np import matplotlib.pyplot as plt from sklearn.externals import joblib from torchvision import transforms as torch_transforms +from loguru import logger from ivadomed.loader.bids_dataset import BidsDataset from ivadomed import config_manager as imed_config_manager -from ivadomed.loader import utils as imed_loader_utils -from ivadomed import utils as imed_utils +from ivadomed.loader.slice_filter import SliceFilter from ivadomed import transforms as imed_transforms metadata_type = ['FlipAngle', 'EchoTime', 'RepetitionTime'] @@ -48,7 +47,7 @@ def plot_decision_boundaries(data, model, x_range, metadata_name, fname_out): plt.xscale('log') fig.savefig(fname_out) - print('\tSave as: ' + fname_out) + logger.info(f"\tSave as: {fname_out}") def run_main(context): @@ -68,7 +67,7 @@ def run_main(context): contrast_lst=context["contrast_train_validation"] if subset != "test" else context["contrast_test"], transform=no_transform, - slice_filter_fn=imed_loader_utils.SliceFilter()) + slice_filter_fn=SliceFilter()) for m in metadata_type: if m in metadata_dct: @@ -84,7 +83,7 @@ def run_main(context): for m in metadata_type: values = [v for s in ['train', 'valid', 'test'] for v in metadata_dct[s][m]] - print('\n{}: Min={}, Max={}, Median={}'.format(m, min(values), max(values), np.median(values))) + logger.info(f"\n{m}: Min={min(values)}, Max={max(values)}, Median={np.median(values)}") plot_decision_boundaries(metadata_dct, cluster_dct[m], metadata_range[m], m, os.path.join(out_dir, m + '.png')) diff --git a/dev/plot_film_parameters.py b/dev/plot_film_parameters.py index 6363a573c..abac79933 100755 --- a/dev/plot_film_parameters.py +++ b/dev/plot_film_parameters.py @@ -13,6 +13,7 @@ from sklearn.decomposition import PCA from sklearn.manifold import TSNE +from loguru import logger from ivadomed import config_manager as imed_config_manager @@ -101,7 +102,7 @@ def visualize_tsne(data, metadata_values, layer_no, fname_out): tsne_df = pd.concat([tsne_df, tsne_df2]) - print('t-SNE done!') + logger.info("t-SNE done!") # Visualize fig = plt.figure(figsize=(16,10)) @@ -151,11 +152,11 @@ def run_main(context): try: visualize_pca(gammas[layer_no], metadata_values, layer_no, out_dir + f"/pca_gamma_{layer_no}.png") except ValueError: - print(f"No PCA for gamma from the film layer {layer_no} because of a too small dimension.") + logger.error(f"No PCA for gamma from the film layer {layer_no} because of a too small dimension.") try: visualize_pca(betas[layer_no], metadata_values, layer_no, out_dir + f"/pca_beta_{layer_no}.png") except ValueError: - print(f"No PCA for beta from the film layer {layer_no} because of a too small dimension.") + logger.error(f"No PCA for beta from the film layer {layer_no} because of a too small dimension.") # save tsne for betas and gammas for layer_no in gammas.keys(): diff --git a/dev/prepare_data/README.md b/dev/prepare_data/README.md index fb0a870b2..a2fda6485 100644 --- a/dev/prepare_data/README.md +++ b/dev/prepare_data/README.md @@ -1,6 +1,6 @@ # Data preparation -These scripts prepare the data for training. It takes as input the [Spinal Cord MRI Public Database](https://osf.io/76jkx/) and outputs BIDS-compatible datasets with segmentation labels for each subject. More specifically, for each subject, the segmentation is run in one volume (T1w), then all volumes are registered to the T1w volume so that all volumes are in the same voxel space and the unique segmentation can be used across volumes. +These scripts prepare the data for training. It takes as input the [Spine Generic Public Database (Multi-Subject)](https://github.com/spine-generic/data-multi-subject) and outputs BIDS-compatible datasets with segmentation labels for each subject. More specifically, for each subject, the segmentation is run in one volume (T1w), then all volumes are registered to the T1w volume so that all volumes are in the same voxel space and the unique segmentation can be used across volumes. ## Dependencies @@ -17,11 +17,11 @@ source PATH_TO_YOUR_VENV/venv-ivadomed/bin/activate #### Initial steps, check for folder integrity -- Copy the file `parameters_template.sh` and rename it as `parameters.sh`. -- Edit the file `parameters.sh` and modify the variables according to your needs. +- Copy the file `config_template.yml` and rename it as `config.yml`. +- Edit the file `config.yml` and modify the values according to your needs. - Make sure input files are present: ~~~ -sct_run_batch parameters.sh check_input_files.sh +sct_run_batch -script check_input_files.sh -config config.yml ~~~ #### Run first processing @@ -29,7 +29,7 @@ sct_run_batch parameters.sh check_input_files.sh Loop across subjects and run full processing: ~~~ -sct_run_batch parameters.sh prepare_data.sh +sct_run_batch -script prepare_data.sh -config config.yml ~~~ #### Perform QC @@ -66,18 +66,18 @@ Check the following files under e.g. `result/sub-balgrist01/anat/tmp`: - If the segmentation exists in one slice but only consists of a few pixels, because the image quality is bad or because it is no more covering the cord (e.g. brainstem), remove all pixels in the current slice (better to have no segmentation than partial segmentation). - If the spinal cord is only partially visible (this can happen in T2star scans due to the registration), zero all pixels in the slice. - Save with suffix `-manual`. -- Move to a folder named seg_manual/$FILENAME. E.g.: `spineGeneric_201903031331/seg_manual/sub-amu01_acq-T1w_MTS_crop_r_seg-manual.nii.gz` +- Move to a folder named seg_manual/$FILENAME. E.g.: `~/data-multi-subject/derivatives/seg_manual/sub-amu01_acq-T1w_MTS_crop_r_seg-manual.nii.gz` #### Exclude images -If some images are of unacceptable quality, they could be excluded from the final output dataset. List images to exclude in **parameters.sh** using the field `TO_EXCLUDE`. Note: Only write the file prefix (see **parameters_template.sh** for examples). +If some images are of unacceptable quality, they could be excluded from the final output dataset. List images to exclude in **config.yml** using the field `exclude-list`. #### Re-run processing (using manually-corrected segmentations) -Make sure to update the field `PATH_SEGMANUAL` in the file `parameters.sh`, then re-run: +Make sure to place your manually-corrected segmentations in the directory specified by `config.yml`, then re-run: ~~~ -sct_run_batch parameters.sh prepare_data.sh +sct_run_batch -script prepare_data.sh -config config.yml ~~~ #### Copy files, final QC @@ -85,7 +85,7 @@ sct_run_batch parameters.sh prepare_data.sh Copy final files to anat/, copy json sidecars, move segmentations to derivatives/ and generate another QC: ~~~ -sct_run_batch parameters.sh final_qc.sh +sct_run_batch -script final_qc.sh config.yml ~~~ - Open the new QC: qc2/index.html @@ -100,5 +100,5 @@ sct_run_batch parameters.sh final_qc.sh Once QC and manual correction is done, remove tmp/ folder: ~~~ -sct_run_batch parameters.sh delete_tmp_files.sh +sct_run_batch -script delete_tmp_files.sh -config config.yml ~~~ diff --git a/dev/prepare_data/check_input_files.sh b/dev/prepare_data/check_input_files.sh index b0b8e9c3d..b4eecbd7e 100755 --- a/dev/prepare_data/check_input_files.sh +++ b/dev/prepare_data/check_input_files.sh @@ -10,12 +10,9 @@ trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT # Retrieve input params SUBJECT=$1 -PATH_RESULTS=$2 -PATH_QC=$3 -PATH_LOG=$4 # Set filenames -PATH_IN="`pwd`/${SUBJECT}/anat" +PATH_IN="${PATH_DATA}/${SUBJECT}/anat" file_t1w_mts="${SUBJECT}_acq-T1w_MTS" file_mton="${SUBJECT}_acq-MTon_MTS" file_mtoff="${SUBJECT}_acq-MToff_MTS" diff --git a/dev/prepare_data/clean.sh b/dev/prepare_data/clean.sh index 9ef5c5fcc..59e926d64 100755 --- a/dev/prepare_data/clean.sh +++ b/dev/prepare_data/clean.sh @@ -13,12 +13,8 @@ trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT # Retrieve input params SUBJECT=$1 -SITE=$2 -PATH_RESULTS=$3 -PATH_QC=$4 -PATH_LOG=$5 -cd ${SUBJECT}/anat +cd ${SUBJECT}/anat || exit rm straightening.cache rm ${SUBJECT}_acq-T1w_MTS_seg.nii.gz rm ${SUBJECT}_T1w_seg.nii.gz diff --git a/dev/prepare_data/config_template.yml b/dev/prepare_data/config_template.yml new file mode 100644 index 000000000..2366a1761 --- /dev/null +++ b/dev/prepare_data/config_template.yml @@ -0,0 +1,40 @@ +######################################################################################################## +# config.yml: Configuration parameters for sct_run_batch +# +# Usage: "sct_run_batch -script process_data.sh -config config.yml" +# +# Each "key: value" entry in this file corresponds to an input argument for 'sct_run_batch'. For example, +# you could call "sct_run_batch -path-data ./data -path-output ./output [...]" for all of the arguments, +# and you would get the same results. However, we strongly recommend using a config file like this one, +# as it will help with reproducibility. +######################################################################################################## + +# Path to the folder containing the BIDS dataset +path_data: "/home/julien/data-multi-subject/" + +# Path to save the output to. This path is what determines the following subdirectory environment variables: +# - PATH_DATA_PROCESSED: /data_processed +# - PATH_RESULTS: /results +# - PATH_QC: /qc +# - PATH_LOG: /log +# You can then use these subdirectory environment variables to direct the output for your batch processing script. +path_output: "/home/julien/data-multi-subject/derivatives" + +# Location of manually-corrected segmentations +path_segmanual: "/home/julien/data-multi-subject/derivatives/seg_manual" + +# If each subject folder starts with a prefix, indicate it here. Otherwise, set to "" +subject_prefix: "sub-" + +# If you want to process only specific subjects, uncomment the line below: +#include-list: sub-01 sub-05 +# If you want to exclude specific subjects, uncomment the line below +#exclude-list: sub-03 sub-07 + +# Number of jobs for parallel processing +# To know the number of available cores, run: getconf _NPROCESSORS_ONLN +# We recommend not using more than half the number of available cores. +jobs: 4 + +# Number of jobs for ANTs routine. Set to 1 if ANTs functions crash when CPU saturates. +#itk_threads: 1 \ No newline at end of file diff --git a/dev/prepare_data/delete_tmp_files.sh b/dev/prepare_data/delete_tmp_files.sh index 599706da7..01e97d631 100755 --- a/dev/prepare_data/delete_tmp_files.sh +++ b/dev/prepare_data/delete_tmp_files.sh @@ -12,12 +12,9 @@ set -e # Retrieve input params SUBJECT=$1 -FILEPARAM=$2 - -source $FILEPARAM # Create BIDS architecture -ofolder_reg="${PATH_RESULTS}/${SUBJECT}/anat" +ofolder_reg="${PATH_DATA_PROCESSED}/${SUBJECT}/anat" # Go to output anat folder, where most of the outputs will be located cd ${ofolder_reg} diff --git a/dev/prepare_data/final_qc.sh b/dev/prepare_data/final_qc.sh index c7f5f9f78..d923a12a5 100755 --- a/dev/prepare_data/final_qc.sh +++ b/dev/prepare_data/final_qc.sh @@ -16,14 +16,11 @@ set -e # Retrieve input params SUBJECT=$1 -FILEPARAM=$2 - -source $FILEPARAM # Create BIDS architecture -PATH_IN="`pwd`/${SUBJECT}/anat" -ofolder_seg="${PATH_RESULTS}/derivatives/labels/${SUBJECT}/anat" -ofolder_reg="${PATH_RESULTS}/${SUBJECT}/anat" +PATH_IN="${PATH_DATA}/${SUBJECT}/anat" +ofolder_seg="${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat" +ofolder_reg="${PATH_DATA_PROCESSED}/${SUBJECT}/anat" # Set filenames file_t1w_mts="${SUBJECT}_acq-T1w_MTS" diff --git a/dev/prepare_data/parameters_template.sh b/dev/prepare_data/parameters_template.sh deleted file mode 100755 index 155f1657b..000000000 --- a/dev/prepare_data/parameters_template.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# Environment variables for the spineGeneric study. - -# Set every other path relative to this path for convenience -# Do not add "/" at the end. Path should be absolute (i.e. do not use "~") -export PATH_PARENT="/Users/julien/spineGeneric_multiSubjects" - -# Path to the folder containing the BIDS dataset. -# Do not add "/" at the end. Path should be absolute (i.e. do not use "~") -export PATH_DATA="${PATH_PARENT}/data" - -# Paths to where to save the new dataset. -# Do not add "/" at the end. Path should be absolute (i.e. do not use "~") -export PATH_RESULTS="${PATH_PARENT}/results" -export PATH_QC="${PATH_PARENT}/qc" -export PATH_LOG="${PATH_PARENT}/log" - -# Location of manually-corrected segmentations -export PATH_SEGMANUAL="${PATH_PARENT}/seg_manual" - -# If you only want to process specific subjects, uncomment and list them here: -# export ONLY_PROCESS_THESE_SUBJECTS=( -# "sub-amu01" -# "sub-amu02" -# ) - -# List of images to exclude because of poor quality -# export TO_EXCLUDE=( - # "sub-brno02_T1w" - # "sub-brno03_T2w" - # "sub-unf04_T2star" - # "sub-unf03_acq-MToff_MTS" - # "sub-unf05_acq-MTon_MTS" - # "sub-unf05_acq-T1w_MTS" -# ) - -file_t1w_mts="${SUBJECT}_acq-T1w_MTS" -file_mton="${SUBJECT}_acq-MTon_MTS" -file_mtoff="${SUBJECT}_acq-MToff_MTS" -file_t2w="${SUBJECT}_T2w" -file_t2s="${SUBJECT}_T2star" -file_t1w="${SUBJECT}_T1w" - -# Number of jobs for parallel processing -# To know the number of available cores, run: getconf _NPROCESSORS_ONLN -# We recommend not using more than half the number of available cores. -export JOBS=4 - -# Number of jobs for ANTs routine. Set to 1 if ANTs functions crash when CPU saturates. -export ITK_GLOBAL_DEFAULT_NUMBER_OF_THREADS=1 diff --git a/dev/prepare_data/prepare_data.sh b/dev/prepare_data/prepare_data.sh index 606847ecd..8f590b6d5 100755 --- a/dev/prepare_data/prepare_data.sh +++ b/dev/prepare_data/prepare_data.sh @@ -20,14 +20,11 @@ trap "echo Caught Keyboard Interrupt within script. Exiting now.; exit" INT # Retrieve input params SUBJECT=$1 -FILEPARAM=$2 - -source $FILEPARAM # Create BIDS architecture -PATH_IN="`pwd`/${SUBJECT}/anat" -ofolder_seg="${PATH_RESULTS}/derivatives/labels/${SUBJECT}/anat" -ofolder_reg="${PATH_RESULTS}/${SUBJECT}/anat" +PATH_IN="${PATH_DATA}/${SUBJECT}/anat" +ofolder_seg="${PATH_DATA_PROCESSED}/derivatives/labels/${SUBJECT}/anat" +ofolder_reg="${PATH_DATA_PROCESSED}/${SUBJECT}/anat" mkdir -p ${ofolder_reg} mkdir -p ${ofolder_seg} diff --git a/dev/seek_contrast_sctTesting.py b/dev/seek_contrast_sctTesting.py index d8e8497fd..25a29627b 100755 --- a/dev/seek_contrast_sctTesting.py +++ b/dev/seek_contrast_sctTesting.py @@ -6,18 +6,19 @@ import os from tqdm import tqdm +from loguru import logger PATH_SCTTESTING = os.path.join(os.path.expanduser('~'), 'duke', 'sct_testing', 'large') def run_main(): if not os.path.isdir(PATH_SCTTESTING): - print('\nThis folder does not exist: {}'.format(PATH_SCTTESTING)) - print('Please change the path at the top of this file') + logger.warning(f"\nThis folder does not exist: {PATH_SCTTESTING}") + logger.warning("Please change the path at the top of this file") subj_lst = [os.path.join(PATH_SCTTESTING, s, 'anat') for s in os.listdir(PATH_SCTTESTING) if os.path.isdir(os.path.join(PATH_SCTTESTING, s, 'anat'))] - print('\n{} subjects found.\n'.format(str(len(subj_lst)))) + logger.info(f"\n{len(subj_lst)} subjects found.\n") contrast_lst_lst = [] for subj_fold in tqdm(subj_lst, desc="Scanning dataset"): @@ -27,9 +28,9 @@ def run_main(): contrast_lst = [sublst for lst in contrast_lst_lst for sublst in lst] contrast_lst_noDuplicate = list(set(contrast_lst)) - print('\n{} contrasts found.\n'.format(str(len(contrast_lst_noDuplicate)))) + logger.info(f"\n{len(contrast_lst_noDuplicate)} contrasts found.\n") - print('["{}"]'.format('", "'.join(contrast_lst_noDuplicate))) + logger.info(f"['{', '.join(contrast_lst_noDuplicate)}']") if __name__ == "__main__": diff --git a/dev/target_size.py b/dev/target_size.py index 5041155ca..3439b9fb4 100755 --- a/dev/target_size.py +++ b/dev/target_size.py @@ -24,8 +24,10 @@ import numpy as np import seaborn as sns from scipy.ndimage import label, generate_binary_structure +from loguru import logger from ivadomed import config_manager as imed_config_manager +from ivadomed import utils as imed_utils def get_parser(): parser = argparse.ArgumentParser() @@ -34,12 +36,6 @@ def get_parser(): return parser -def print_stats(arr): - print('\tMean: {}'.format(np.mean(arr))) - print('\tMedian: {}'.format(np.median(arr))) - print('\tInter-quartile range: [{}, {}]'.format(np.percentile(arr, 25), np.percentile(arr, 75))) - - def plot_distrib(arr, label, xlim, fname_out): fig = plt.figure() @@ -52,7 +48,7 @@ def plot_distrib(arr, label, xlim, fname_out): plt.xlim(xlim) plt.ylabel('Density') fig.savefig(fname_out) - print('\tSave as: ' + fname_out) + logger.info(f"\tSave as: {fname_out}") def run_main(args): @@ -85,14 +81,14 @@ def run_main(args): vox_lst.append(n_vox) mm3_lst.append(n_vox * px * py * pz) - print('\nTarget distribution in vox:') - print_stats(vox_lst) + logger.debug("\nTarget distribution in vox:") + imed_utils.print_stats(vox_lst) plot_distrib(vox_lst, context["target_suffix"] + ' size in vox', [0, np.percentile(vox_lst, 90)], context["target_suffix"] + '_vox.png') - print('\nTarget distribution in mm3:') - print_stats(mm3_lst) + logger.info("\nTarget distribution in mm3:") + imed_utils.print_stats(mm3_lst) plot_distrib(mm3_lst, context["target_suffix"] + ' size in mm3', [0, np.percentile(mm3_lst, 90)], context["target_suffix"] + '_mm3.png') diff --git a/docs/source/api_ref.rst b/docs/source/api_ref.rst index fb9df2623..265b90255 100644 --- a/docs/source/api_ref.rst +++ b/docs/source/api_ref.rst @@ -6,11 +6,6 @@ This document is for developers of ``ivadomed``, it contains the API functions. Loader API ++++++++++ -loader.adaptative -^^^^^^^^^^^^^^^^^ - -.. automodule:: ivadomed.loader.adaptative - loader.film ^^^^^^^^^^^ diff --git a/docs/source/conf.py b/docs/source/conf.py index 43a38acbe..0c3e85c08 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -88,6 +88,8 @@ def patched_parse(self): 'sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel', 'sphinx-jsonschema', + 'sphinx_tabs.tabs', + 'sphinx_toolbox.collapse', ] autoclass_content = "both" diff --git a/docs/source/configuration_file.rst b/docs/source/configuration_file.rst index e3d31bce3..4c933317a 100644 --- a/docs/source/configuration_file.rst +++ b/docs/source/configuration_file.rst @@ -48,8 +48,8 @@ General Parameters { "$schema": "http://json-schema.org/draft-04/schema#", "title": "gpu_ids", - "description": "List of IDs of one or more GPUs to use.", - "type": "list * integer" + "description": "List of IDs of one or more GPUs to use. Default: ``[0]``.", + "type": "list[int]" } .. code-block:: JSON @@ -66,7 +66,7 @@ General Parameters { "$schema": "http://json-schema.org/draft-04/schema#", - "title": "log_directory", + "title": "path_output", "description": "Folder name that will contain the output files (e.g., trained model, predictions, results).", "type": "string" @@ -88,7 +88,7 @@ General Parameters "$schema": "http://json-schema.org/draft-04/schema#", "title": "model_name", "description": "Folder name containing the trained model (ONNX format) and its configuration - file, located within ``log_directory/``", + file, located within ``log_directory/``.", "type": "string" } @@ -122,7 +122,7 @@ When possible, the folder name will follow the following convention: { "$schema": "http://json-schema.org/draft-04/schema#", "title": "debugging", - "description": "Extended verbosity and intermediate outputs.", + "description": "Extended verbosity and intermediate outputs. Default: ``False``.", "type": "boolean" } @@ -140,7 +140,7 @@ When possible, the folder name will follow the following convention: { "$schema": "http://json-schema.org/draft-04/schema#", "title": "log_file", - "description": "Name of the file to be logged to, located within ``log_directory/``", + "description": "Name of the file to be logged to, located within ``log_directory/``. Default: ``log``.", "type": "string" } @@ -153,6 +153,141 @@ When possible, the folder name will follow the following convention: } +Weights & Biases (WandB) +------------------------ + +WandB is an additional option to track your DL experiments. It provides a +feature-rich dashboard (accessible through any web-browser) to track and visualize the learning +curves, gradients, and media. It is recommended to setup a personal +WandB account to track experiments on WandB, however, you can still train ivadomed models +without an account, since the metrics are logged on Tensorboard by default. + + + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "wandb_api_key", + "$$description": [ + "A private key used to sync the local wandb folder with the wandb dashboard accessible through the browser.\n", + "The API key can be found from the browser in your WandB Account's Settings, under the section ``API Keys``.\n", + "Note that once it is successfully authenticated, a message would be printed in the terminal notifying\n", + "that the API key is stored in the ``.netrc`` file in the ``/home`` folder. + ], + "type": "string" + } + +.. code-block:: JSON + + { + "wandb": { + "wandb_api_key": "" + } + } + + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "project_name", + "$$description": [ + "Defines the name of the current project to which the groups and runs will be synced. Default: ``my_project``." + ], + "type": "string" + } + +.. code-block:: JSON + + { + "wandb": { + "project_name": "my-temp-project" + } + } + + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "group_name", + "$$description": [ + "Defines the name of the group to which the runs will be synced. On the WandB Dashboard,\n", + "the groups can be found on clicking the ``Projects`` tab on the left. Default: ``my_group``." + ], + "type": "string" + } + +.. code-block:: JSON + + { + "wandb": { + "group_name": "my-temp-group" + } + } + + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "run_name", + "$$description": [ + "Defines the name of the current run (or, experiment). All the previous and active runs\n", + "can be found under the corresponding group on the WandB Dashboard. Default: ``run-1``." + ], + "type": "string" + } + +.. code-block:: JSON + + { + "wandb": { + "run_name": "run-1" + } + } + +.. note:: + Using the same ``run_name`` does not replace the previous run but does create multiple entries of the same name. If left empty then the default is a random string assigned by WandB. + +.. note:: + We recommend defining the project/group/run names such that hierarchy is easily understandable. For instance, ``project_name`` could be the name of the dataset or the problem you are working (i.e. brain tumor segmentation/spinal cord lesion segmentation etc.), the ``group_name`` could be the various models you are willing to train, and the ``run_name`` could be the various experiments within a particular model (i.e. typically with different hyperparameters). + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "log_grads_every", + "$$description": [ + "Defines the frequency (in number of steps) of the logging of gradients on to the Dashboard to track and visualize\n", + "their histograms as the model trains. Default: ``100``.\n" + ], + "type": "int" + } + +.. code-block:: JSON + + { + "wandb": { + "log_grads_every": 100 + } + } + +.. note:: + There are two important points to be noted: + (1) Gradients can be large so they can consume more storage space if ``log_grads_every`` is set to a small number, + (2) ``log_grads_every`` also depends on the total duration of training, i.e. if the model is run for only + a few epochs, gradients might not be logged if ``log_grads_every`` is too large. Hence, the right frequency of + gradient logging depends on the training duration and model size. + +.. note:: + If ``debugging = True`` is specified in the config file, the training and validation input images, ground truth labels, and + the model predictions are also periodically logged on WandB, which can be seen under ``Media`` on the WandB Dashboard. + + + + Loader Parameters ----------------- @@ -162,7 +297,7 @@ Loader Parameters "$schema": "http://json-schema.org/draft-04/schema#", "title": "path_data", "description": "Path(s) of the BIDS folder(s).", - "type": "list or str" + "type": "str or list[str]" } @@ -191,8 +326,10 @@ Alternatively: { "$schema": "http://json-schema.org/draft-04/schema#", "title": "bids_config", - "description": "(Optional). Path of the custom BIDS configuration file for - BIDS non-compliant modalities", + "$$description": [ + "(Optional). Path of the custom BIDS configuration file for", + "BIDS non-compliant modalities. Default: ``ivadomed/config/config_bids.json``." + ], "type": "string" } @@ -216,19 +353,19 @@ Alternatively: "type": "dict", "options": { "n": { - "description": "List containing the number subjects of each metadata.", - "type": "list" + "description": "List containing the number subjects of each metadata. Default: ``[]``.", + "type": "list[int]" }, "metadata": { "$$description": [ "List of metadata used to select the subjects. Each metadata should be the name\n", - "of a column from the participants.tsv file." + "of a column from the participants.tsv file. Default: ``[]``." ], - "type": "list" + "type": "list[str]" }, "value": { - "description": "List of metadata values of the subject to be selected.", - "type": "list" + "description": "List of metadata values of the subject to be selected. Default: ``[]``.", + "type": "list[str]" } } } @@ -257,8 +394,8 @@ for training/testing. { "$schema": "http://json-schema.org/draft-04/schema#", "title": "target_suffix", - "description": "Suffix list of the derivative file containing the ground-truth of interest.", - "type": "list * string" + "description": "Suffix list of the derivative file containing the ground-truth of interest. Default: ``[]``.", + "type": "list[str]" } @@ -286,9 +423,11 @@ will be randomly chosen. { "$schema": "http://json-schema.org/draft-04/schema#", "title": "extensions", - "description": "Used to specify a list of file extensions to be selected for - training/testing. If not specified, then `.nii` and `.nii.gz` will be used by default.", - "type": "list, string" + "$$description": [ + "Used to specify a list of file extensions to be selected for training/testing.\n", + "Must include the file extensions of both the raw data and derivatives. Default: ``[]``." + ], + "type": "list[str]" } @@ -309,21 +448,21 @@ will be randomly chosen. "title": "contrast_params", "type": "dict", "options": { - "train_validation": { - "type": "list, string", + "training_validation": { + "type": "list[str]", "$$description": [ "List of image contrasts (e.g. ``T1w``, ``T2w``) loaded for the training and\n", "validation. If ``multichannel`` is ``true``, this list represents the different\n", "channels of the input tensors (i.e. its length equals model's ``in_channel``).\n", "Otherwise, the contrasts are mixed and the model has only one input channel\n", - "(i.e. model's ``in_channel=1``)" + "(i.e. model's ``in_channel=1``)." ] }, - "test": { - "type": "list, string", + "testing": { + "type": "list[str]", "$$description": [ "List of image contrasts (e.g. ``T1w``, ``T2w``) loaded in the testing dataset.\n", - "Same comment as for ``train_validation`` regarding ``multichannel``." + "Same comment as for ``training_validation`` regarding ``multichannel``." ] }, "balance": { @@ -332,7 +471,7 @@ will be randomly chosen. "Enables to weight the importance of specific channels (or contrasts) in the\n", "dataset: e.g. ``{'T1w': 0.1}`` means that only 10% of the available ``T1w``\n", "images will be included into the training/validation/test set. Please set\n", - "``multichannel`` to ``false`` if you are using this parameter." + "``multichannel`` to ``false`` if you are using this parameter. Default: ``{}``." ] } } @@ -359,11 +498,11 @@ will be randomly chosen. "$schema": "http://json-schema.org/draft-04/schema#", "title": "multichannel", "description": "Indicated if more than a contrast (e.g. ``T1w`` and ``T2w``) is - used by the model.", + used by the model. Default: ``False``.", "type": "boolean" } -See details in both ``train_validation`` and ``test`` for the contrasts that are input. +See details in both ``training_validation`` and ``testing`` for the contrasts that are input. @@ -376,12 +515,32 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are } +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "bids_validate", + "description": "Indicates if the loader should validate the dataset for compliance with BIDS. Default: ``True``.", + "type": "boolean" + } + + + +.. code-block:: JSON + + { + "loader_parameters": { + "bids_validate": true + } + } + + .. jsonschema:: { "$schema": "http://json-schema.org/draft-04/schema#", "title": "slice_axis", - "description": "Sets the slice orientation for 3D NIfTI files on which the model will be used.", + "description": "Sets the slice orientation for 3D NIfTI files on which the model will be used. Default: ``axial``.", "type": "string", "options": {"sagittal": "plane dividing body into left/right", "coronal": "plane dividing body into front/back", @@ -405,37 +564,38 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are { "$schema": "http://json-schema.org/draft-04/schema#", "title": "slice_filter_params", - "description": "Discard a slice from the dataset if it meets a condition, see - below.", + "$$description": [ + "Discard a slice from the dataset if it meets a condition, defined below.\n", + "A slice is an entire 2D image taken from a 3D volume (e.g. an image of size 128x128 taken from a volume of size 128x128x16).\n", + "Therefore, the parameter ``slice_filter_params`` is applicable for 2D models only.", + ], "type": "dict", "options": { "filter_empty_input": { "type": "boolean", - "description": "Discard slices where all voxel - intensities are zeros." + "description": "Discard slices where all voxel intensities are zeros. Default: ``True``." }, "filter_empty_mask": { "type": "boolean", - "description": "Discard slices where all voxel labels are zeros." + "description": "Discard slices where all voxel labels are zeros. Default: ``False``." }, "filter_absent_class": { "type": "boolean", "$$description": [ "Discard slices where all voxel labels are zero for one or more classes\n", - "(this is most relevant for multi-class models that need GT for all classes at train time)." + "(this is most relevant for multi-class models that need GT for all classes at training time). Default: ``False``." ] }, "filter_classification": { "type": "boolean", "$$description": [ "Discard slices where all images fail a custom classifier filter. If used,\n", - "``classifier_path`` must also be specified, pointing to a saved PyTorch classifier." + "``classifier_path`` must also be specified, pointing to a saved PyTorch classifier. Default: ``False``." ] } } } - .. code-block:: JSON { @@ -447,20 +607,68 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are } } + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "patch_filter_params", + "$$description": [ + "Discard a 2D or 3D patch from the dataset if it meets a condition at training time, defined below.\n", + "A 2D patch is a portion of a 2D image (e.g. a patch of size 32x32 taken inside an image of size 128x128).\n", + "A 3D patch is a portion of a 3D volume (e.g. a patch of size 32x32x16 from a volume of size 128x128x16).\n", + "Therefore, the parameter ``patch_filter_params`` is applicable for 2D or 3D models.\n", + "In addition, contrary to ``slice_filter_params`` which applies at training and testing time, ``patch_filter_params``\n", + "is applied only at training time. This is because the reconstruction algorithm for predictions from patches\n", + "needs to have the predictions for all patches at testing time." + ], + "type": "dict", + "options": { + "filter_empty_input": { + "type": "boolean", + "description": "Discard 2D or 3D patches where all voxel intensities are zeros. Default: ``False``." + }, + "filter_empty_mask": { + "type": "boolean", + "description": "Discard 2D or 3D patches where all voxel labels are zeros. Default: ``False``." + }, + "filter_absent_class": { + "type": "boolean", + "$$description": [ + "Discard 2D or 3D patches where all voxel labels are zero for one or more classes\n", + "(this is most relevant for multi-class models that need GT for all classes).\n", + "Default: ``False``." + ] + } + } + } + + +.. code-block:: JSON + + { + "loader_parameters": { + "patch_filter_params": { + "filter_empty_mask": false, + "filter_empty_input": false + } + } + } + + .. jsonschema:: { "$schema": "http://json-schema.org/draft-04/schema#", "title": "roi_params", - "description": "Parameters for the region of interest", + "description": "Parameters for the region of interest.", "type": "dict", "options": { "suffix": { "type": "string", "$$description": [ "Suffix of the derivative file containing the ROI used to crop\n", - "(e.g. ``_seg-manual``) with ``ROICrop`` as transform. Please use ``null`` if", - "you do not want to use an ROI to crop." + "(e.g. ``_seg-manual``) with ``ROICrop`` as transform. Default: ``null``." ] }, "slice_filter_roi": { @@ -470,7 +678,7 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are "the slice will be discarded from the dataset. This feature helps with\n", "noisy labels, e.g., if a slice contains only 2-3 labeled voxels, we do\n", "not want to use these labels to crop the image. This parameter is only\n", - "considered when using ``ROICrop``." + "considered when using ``ROICrop``. Default: ``null``." ] } } @@ -497,7 +705,8 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are "$$description": [ "Indicates if a soft mask will be used as ground-truth to train\n", "and / or evaluate a model. In particular, the masks are not binarized\n", - "after interpolations implied by preprocessing or data-augmentation operations." + "after interpolations implied by preprocessing or data-augmentation operations.\n", + "Approach inspired by the `SoftSeg `__ paper. Default: ``False``." ], "type": "boolean" } @@ -510,6 +719,14 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are } } +.. note:: + To get the full advantage of the soft segmentations, in addition to setting + ``soft_gt: true`` the following keys in the config file must also be changed: + (i) ``final_activation: relu`` - to use the normalized ReLU activation function + (ii) ``loss: AdapWingLoss`` - a regression loss described in the + paper. Note: It is also recommended to use the ``DiceLoss`` since convergence + with ``AdapWingLoss`` is sometimes difficult to achieve. + .. jsonschema:: { @@ -520,7 +737,7 @@ See details in both ``train_validation`` and ``test`` for the contrasts that are "This option trains a model to be robust to missing modalities by setting \n", "to zero input channels (from 0 to all channels - 1). Always at least one \n", "channel will remain. If one or more modalities are already missing, they will \n", - "be considered as dropped." + "be considered as dropped. Default: ``False``." ], "type": "boolean" } @@ -544,11 +761,9 @@ Split Dataset "$schema": "http://json-schema.org/draft-04/schema#", "title": "fname_split", "$$description": [ - "File name of the log (`joblib `__)\n", - "that contains the list of training/validation/testing filenames. This file can later\n", - "be used to re-train a model using the same data splitting scheme. If ``null``,\n", - "a new splitting scheme is performed. If specified, the .joblib file data splitting scheme\n", - "bypasses all the other split dataset parameters." + "Name of the `joblib `__ file that was generated during a previous training, and that contains the list of training/validation/testing filenames.\n", + "Specifying the .joblib file ensures reproducible data splitting across multiple trainings. When specified, the other split parameters are\n", + "ignored. If ``null`` is specified, a new splitting scheme is performed." ], "type": "string" } @@ -571,7 +786,7 @@ Split Dataset "$$description": [ "Seed used by the random number generator to split the dataset between\n", "training/validation/testing sets. The use of the same seed ensures the same split between\n", - "the sub-datasets, which is useful for reproducibility." + "the sub-datasets, which is useful for reproducibility. Default: ``6``." ], "type": "int" } @@ -591,10 +806,10 @@ Split Dataset "$schema": "http://json-schema.org/draft-04/schema#", "title": "split_method", "$$description": [ - "Metadata contained in a BIDS tabular file on which the files are shuffled, then split\n", - "between train/validation/test, according to ``train_fraction`` and ``test_fraction``.\n", - "For example, ``participant_id`` from the ``participants.tsv`` file will shuffle all participants,\n", - "then split between train/validation/test sets." + "Metadata contained in a BIDS tabular (TSV) file or a BIDS sidecar JSON file on which the files are shuffled\n", + "then split between train/validation/test, according to ``train_fraction`` and ``test_fraction``.\n", + "For examples, ``participant_id`` will shuffle all participants from the ``participants.tsv`` file\n", + "then split between train/validation/test sets. Default: ``participant_id``." ], "type": "string" } @@ -614,22 +829,23 @@ Split Dataset "title": "data_testing", "$$description": ["(Optional) Used to specify a custom metadata to only include in the testing dataset (not validation).\n", "For example, to not mix participants from different institutions between the train/validation set and the test set,\n", - "use the column ``institution_id`` from ``participants.tsv`` in ``data_type``.\n" - ], + "use the column ``institution_id`` from ``participants.tsv`` in ``data_type``.\n" + ], "type": "dict", "options": { "data_type": { "$$description": [ - "Metadata to include in the testing dataset.\n", - "If specified, the ``test_fraction`` is applied to this metadata." + "Metadata to include in the testing dataset.\n", + "If specified, the ``test_fraction`` is applied to this metadata.\n", + "If not specified, ``data_type`` is the same as ``split_method``. Default: ``null``." ], "type": "string" }, "data_value": { "$$description": [ - "(Optional) List of metadata values from the ``data_type`` column to include in\n", - "the testing dataset. If specified, the testing set contains only files from the\n", - "``data_value`` list and the ``test_fraction`` is not used." + "(Optional) List of metadata values from the ``data_type`` column to include in the testing dataset.\n", + "If specified, the testing set contains only files from the ``data_value`` list and the ``test_fraction`` is not used.\n", + "If not specified, create a random ``data_value`` according to ``data_type`` and ``test_fraction``. Default: ``[]``." ], "type": "list" } @@ -651,7 +867,7 @@ Split Dataset "title": "balance", "$$description": [ "Metadata contained in ``participants.tsv`` file with categorical values. Each category\n", - "will be evenly distributed in the training, validation and testing datasets." + "will be evenly distributed in the training, validation and testing datasets. Default: ``null``." ], "type": "string", "required": "false" @@ -670,7 +886,7 @@ Split Dataset { "$schema": "http://json-schema.org/draft-04/schema#", "title": "train_fraction", - "description": "Fraction of the dataset used as training set.", + "description": "Fraction of the dataset used as training set. Default: ``0.6``.", "type": "float", "range": "[0, 1]" } @@ -688,9 +904,7 @@ Split Dataset { "$schema": "http://json-schema.org/draft-04/schema#", "title": "test_fraction", - "$$description": [ - "Fraction of the dataset used as testing set.\n" - ], + "description": "Fraction of the dataset used as testing set. Default: ``0.2``.", "type": "float", "range": "[0, 1]" } @@ -703,6 +917,55 @@ Split Dataset } } +.. note:: + .. line-block:: + The fraction of the dataset used as validation set will correspond to ``1 - train_fraction - test_fraction``. + For example: ``1 - 0.6 - 0.2 = 0.2``. + + +Cascaded Models +--------------- + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "object_detection_params", + "type": "dict", + "required": "false", + "options": { + "object_detection_path": { + "type": "string", + "$$description": [ + "Path to the object detection model. The folder,\n", + "configuration file, and model need to have the same name\n", + "(e.g. ``findcord_tumor/``, ``findcord_tumor/findcord_tumor.json``, and\n", + "``findcord_tumor/findcord_tumor.onnx``, respectively). The model's prediction\n", + "will be used to generate bounding boxes. Default: ``null``." + ] + }, + "safety_factor": { + "type": "[int, int, int]", + "$$description": [ + "List of length 3 containing the factors to multiply each dimension of the\n", + "bounding box. Ex: If the original bounding box has a size of 10x20x30 with\n", + "a safety factor of [1.5, 1.5, 1.5], the final dimensions of the bounding box\n", + "will be 15x30x45 with an unchanged center. Default: ``[1.0, 1.0, 1.0]``." + ] + } + } + } + +.. code-block:: JSON + + { + "object_detection_params": { + "object_detection_path": null, + "safety_factor": [1.0, 1.0, 1.0] + } + } + + Training Parameters ------------------- @@ -713,6 +976,7 @@ Training Parameters "$schema": "http://json-schema.org/draft-04/schema#", "title": "batch_size", "type": "int", + "description": "Defines the number of samples that will be propagated through the network. Default: ``18``.", "range": "(0, inf)" } @@ -732,14 +996,13 @@ Training Parameters "title": "loss", "$$description": [ "Metadata for the loss function. Other parameters that could be needed in the\n", - "Loss function definition: see attributes of the Loss function of interest\n", - "(e.g. ``'gamma': 0.5`` for ``FocalLoss``)." + "Loss function definition: see attributes of the Loss function of interest (e.g. ``'gamma': 0.5`` for ``FocalLoss``)." ], "type": "dict", "options": { "name": { "type": "string", - "description": "Name of the loss function class. See :mod:`ivadomed.losses`" + "description": "Name of the loss function class. See :mod:`ivadomed.losses`. Default: ``DiceLoss``." } } } @@ -760,11 +1023,6 @@ Training Parameters { "$schema": "http://json-schema.org/draft-04/schema#", "title": "training_time", - "$$description": [ - "Metadata for the loss function. Other parameters that could be needed in the\n", - "Loss function definition: see attributes of the Loss function of interest\n", - "(e.g. ``'gamma': 0.5`` for ``FocalLoss``)." - ], "type": "dict", "options": { "num_epochs": { @@ -777,7 +1035,7 @@ Training Parameters "If the validation loss difference during one epoch\n", "(i.e. ``abs(validation_loss[n] - validation_loss[n-1]`` where n is the current epoch)\n", "is inferior to this epsilon for ``early_stopping_patience`` consecutive epochs,\n", - "then training stops." + "then training stops. Default: ``0.001``." ] }, "early_stopping_patience": { @@ -785,7 +1043,7 @@ Training Parameters "range": "(0, inf)", "$$description": [ "Number of epochs after which the training is stopped if the validation loss\n", - "improvement is smaller than ``early_stopping_epsilon``." + "improvement is smaller than ``early_stopping_epsilon``. Default: ``50``." ] } } @@ -810,24 +1068,36 @@ Training Parameters "$schema": "http://json-schema.org/draft-04/schema#", "title": "scheduler", "type": "dict", + "description": "A predefined framework that adjusts the learning rate between epochs or iterations as the training progresses.", "options": { "initial_lr": { "type": "float", - "description": "Initial learning rate." + "description": "Initial learning rate. Default: ``0.001``." }, - "scheduler_lr": { + "lr_scheduler": { "type": "dict", "options": { "name": { "type": "string", "$$description": [ - "One of ``CosineAnnealingLR``, ``CosineAnnealingWarmRestarts``\n", - "and ``CyclicLR``. Please find documentation `here `__.\n", - + "One of ``CosineAnnealingLR``, ``CosineAnnealingWarmRestarts`` and ``CyclicLR``.\n", + "Please find documentation `here `__.", + "Default: ``CosineAnnealingLR``." + ] + }, + "max_lr": { + "type": "float", + "description": "Upper learning rate boundaries in the cycle for each parameter group. Default: ``1e-2``." + }, + "base_lr": { + "type": "float", + "$$description": [ + "Initial learning rate which is the lower boundary in the cycle for each parameter group.\n", + "Default: ``1e-5``." ] } }, - "description": "Other parameters depend on the scheduler of interest" + "description": "Other parameters depend on the scheduler of interest." } } } @@ -838,7 +1108,7 @@ Training Parameters "training_parameters": { "scheduler": { "initial_lr": 0.001, - "scheduler_lr": { + "lr_scheduler": { "name": "CosineAnnealingLR", "max_lr": 1e-2, "base_lr": 1e-5 @@ -858,14 +1128,14 @@ Training Parameters "options": { "applied": { "type": "boolean", - "description": "Indicates whether to use a balanced sampler or not." + "description": "Indicates whether to use a balanced sampler or not. Default: ``False``." }, "type": { "type": "string", "$$description": [ "Indicates which metadata to use to balance the sampler.\n", "Choices: ``gt`` or the name of a column from the ``participants.tsv`` file\n", - "(i.e. subject-based metadata)" + "(i.e. subject-based metadata). Default: ``gt``." ] } } @@ -889,7 +1159,7 @@ Training Parameters "$schema": "http://json-schema.org/draft-04/schema#", "title": "mixup_alpha", "description": "Alpha parameter of the Beta distribution, see `original paper on - the Mixup technique `__.", + the Mixup technique `__. Default: ``null``.", "type": "float" } @@ -904,34 +1174,38 @@ Training Parameters .. jsonschema:: - { - "$schema": "http://json-schema.org/draft-04/schema#", - "title": "transfer_learning", - "type": "dict", - "options": { - "retrain_model": { - "type": "string", - "$$description": [ - "Filename of the pretrained model (``path/to/pretrained-model``). If ``null``,\n", - "no transfer learning is performed and the network is trained from scratch." - ] - }, - "retrain_fraction": { - "type": "float", - "range": "[0, 1]", - "$$description": [ - "Controls the fraction of the pre-trained model that will be fine-tuned. For\n", - "instance, if set to 0.5, the second half of the model will be fine-tuned while\n", - "the first layers will be frozen." - ] - }, - "reset": { - "type": "boolean", - "description": "If true, the weights of the layers that are not frozen - are reset. If false, they are kept as loaded." - } - } - } + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "transfer_learning", + "type": "dict", + "$$description": ["A learning method where a model pretrained for a task is reused as the starting point", + "for a model on a second task." + ], + "options": { + "retrain_model": { + "type": "string", + "$$description": [ + "Filename of the pretrained model (``path/to/pretrained-model``). If ``null``,\n", + "no transfer learning is performed and the network is trained from scratch. Default: ``null``." + ] + }, + "retrain_fraction": { + "type": "float", + "range": "[0, 1]", + "$$description": [ + "Controls the fraction of the pre-trained model that will be fine-tuned. For\n", + "instance, if set to 0.5, the second half of the model will be fine-tuned while\n", + "the first layers will be frozen. Default: ``1.0``." + ] + }, + "reset": { + "type": "boolean", + "$$description": ["If true, the weights of the layers that are not frozen are reset.", + "If false, they are kept as loaded. Default: ``True``." + ] + } + } + } .. code-block:: JSON @@ -966,9 +1240,9 @@ being used for the segmentation task). "required": "true", "type": "dict", "$$description": [ - "Define the default model (``Unet``) and mandatory parameters that are common to all\n", - "available :ref:`architectures`. For custom architectures (see below), the default\n", - "parameters are merged with the parameters that are specific to the tailored architecture." + "Define the default model (``Unet``) and mandatory parameters that are common to all available :ref:`architectures`.\n", + "For custom architectures (see below), the default parameters are merged with the parameters that are specific\n", + "to the tailored architecture." ], "options": { "name": { @@ -976,27 +1250,28 @@ being used for the segmentation task). "description": "Default: ``Unet``" }, "dropout_rate": { - "type": "float" + "type": "float", + "description": "Default: ``0.3``" }, "bn_momentum": { "type": "float", "$$description": [ "Defines the importance of the running average: (1 - `bn_momentum`). A large running\n", "average factor will lead to a slow and smooth learning.\n", - "See `PyTorch's BatchNorm classes for more details. `__ for more details.\n" + "See `PyTorch's BatchNorm classes for more details. `__ for more details. Default: ``0.1``\n" ] }, "depth": { "type": "int", "range": "(0, inf)", - "description": "Number of down-sampling operations." + "description": "Number of down-sampling operations. Default: ``3``" }, "final_activation": { "type": "string", "required": "false", "$$description": [ - "Final activation layer. Options: ``sigmoid`` (default), ``relu``(normalized ReLU), or ``softmax``." + "Final activation layer. Options: ``sigmoid`` (default), ``relu`` (normalized ReLU), or ``softmax``." ] }, "length_2D": { @@ -1017,7 +1292,8 @@ being used for the segmentation task). "type": "boolean", "$$description": [ "Indicates if the model is 2D, if not the model is 3D. If ``is_2d`` is ``False``, then parameters\n", - "``length_3D`` and ``stride_3D`` for 3D loader need to be specified (see :ref:`Modified3DUNet `)." + "``length_3D`` and ``stride_3D`` for 3D loader need to be specified (see :ref:`Modified3DUNet `).\n", + "Default: ``True``." ] } } @@ -1029,8 +1305,13 @@ being used for the segmentation task). { "default_model": { "name": "Unet", - "dropout_rate": 0.4, - "batch_norm_momentum": 0.1 + "dropout_rate": 0.3, + "bn_momentum": 0.1, + "depth": 3, + "final_activation": "sigmoid" + "is_2d": true, + "length_2D": [256, 256], + "stride_2D": [244, 244] } } @@ -1042,10 +1323,11 @@ being used for the segmentation task). "title": "FiLMedUnet", "type": "dict", "required": "false", + "description": "U-Net network containing FiLM layers to condition the model with another data type (i.e. not an image).", "options": { "applied": { "type": "boolean", - "description": "Set to ``true`` to use this model." + "description": "Set to ``true`` to use this model. Default: ``False``." }, "metadata": { "type": "string", @@ -1056,13 +1338,16 @@ being used for the segmentation task). "(defined in the json of each image) are input to the FiLM generator." ] }, - "contrast": "Image contrasts (according to ``config/contrast_dct.json``) are input to the FiLM generator." + "contrasts": "Image contrasts (according to ``config/contrast_dct.json``) are input to the FiLM generator." }, "$$description": [ "Choice between ``mri_params``, ``contrasts`` (i.e. image-based metadata) or the\n", "name of a column from the participants.tsv file (i.e. subject-based metadata)." ] - } + }, + "film_layers": { + "description": "List of 0 or 1 indicating on which layer FiLM is applied." + } } } @@ -1080,18 +1365,19 @@ being used for the segmentation task). .. jsonschema:: - { - "$schema": "http://json-schema.org/draft-04/schema#", - "title": "HeMISUnet", - "type": "dict", - "required": "false", - "options": { - "applied": { - "type": "boolean", - "description": "Set to ``true`` to use this model." - }, - "missing_probability": { - "type": "float", + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "HeMISUnet", + "type": "dict", + "required": "false", + "description": "A U-Net model inspired by HeMIS to deal with missing contrasts.", + "options": { + "applied": { + "type": "boolean", + "description": "Set to ``true`` to use this model." + }, + "missing_probability": { + "type": "float", "range": "[0, 1]", "$$description": [ "Initial probability of missing image contrasts as model's input\n", @@ -1103,7 +1389,7 @@ being used for the segmentation task). "type": "float", "$$description": [ "Controls missing probability growth at each epoch: at each epoch, the\n", - "``missing_probability`` is modified with the exponent ``missing_probability_growth``." + "``missing_probability`` is modified with the exponent ``missing_probability_growth``.", ] } } @@ -1134,10 +1420,18 @@ being used for the segmentation task). "title": "Modified3DUNet", "type": "dict", "required": "false", + "$$description": [ + "The main differences with the original UNet resides in the use of LeakyReLU instead of ReLU, InstanceNormalisation\n", + "instead of BatchNorm due to small batch size in 3D and the addition of segmentation layers in the decoder." + ], "options": { + "applied": { + "type": "boolean", + "description": "Set to ``true`` to use this model." + }, "length_3D": { "type": "[int, int, int]", - "description": "Size of the 3D patches used as model's input tensors." + "description": "Size of the 3D patches used as model's input tensors. Default: ``[128, 128, 16]``." }, "stride_3D": { "type": "[int, int, int]", @@ -1145,19 +1439,19 @@ being used for the segmentation task). "Voxels' shift over the input matrix to create patches. Ex: Stride of [1, 2, 3]\n", "will cause a patch translation of 1 voxel in the 1st dimension, 2 voxels in\n", "the 2nd dimension and 3 voxels in the 3rd dimension at every iteration until\n", - "the whole input matrix is covered." + "the whole input matrix is covered. Default: ``[128, 128, 16]``." ] }, - "attention_unet": { + "attention": { "type": "boolean", - "description": "Use attention gates in the Unet's decoder.", + "description": "Use attention gates in the Unet's decoder. Default: ``False``.", "required": "false" }, "n_filters": { "type": "int", "$$description": [ "Number of filters in the first convolution of the UNet.\n", - "This number of filters will be doubled at each convolution." + "This number of filters will be doubled at each convolution. Default: ``16``." ], "required": "false" } @@ -1177,59 +1471,14 @@ being used for the segmentation task). } -Cascaded Architecture Features ------------------------------- - -.. jsonschema:: - - { - "$schema": "http://json-schema.org/draft-04/schema#", - "title": "object_detection_params", - "type": "dict", - "required": "false", - "options": { - "object_detection_path": { - "type": "string", - "$$description": [ - "Path to object detection model and the configuration file. The folder,\n", - "configuration file, and model need to have the same name\n", - "(e.g. ``findcord_tumor/``, ``findcord_tumor/findcord_tumor.json``, and\n", - "``findcord_tumor/findcord_tumor.onnx``, respectively). The model's prediction\n", - "will be used to generate bounding boxes." - ] - }, - "safety_factor": { - "type": "[int, int, int]", - "$$description": [ - "List of length 3 containing the factors to multiply each dimension of the\n", - "bounding box. Ex: If the original bounding box has a size of 10x20x30 with\n", - "a safety factor of [1.5, 1.5, 1.5], the final dimensions of the bounding box\n", - "will be 15x30x45 with an unchanged center." - ] - } - } - } - -.. code-block:: JSON - - { - "object_detection_params": { - "object_detection_path": null, - "safety_factor": [1.0, 1.0, 1.0] - } - } - - Transformations --------------- -Transformations applied during data augmentation. Transformations are sorted in the order they are applied to the image samples. For each transformation, the following parameters are customizable: +Transformations applied during data augmentation. Transformations are sorted in the order they are applied to the image samples. For each transformation, the following parameters are customizable: - ``applied_to``: list between ``"im", "gt", "roi"``. If not specified, then the transformation is applied to all loaded samples. Otherwise, only applied to the specified types: Example: ``["gt"]`` implies that this transformation is only applied to the ground-truth data. - ``dataset_type``: list between ``"training", "validation", "testing"``. If not specified, then the transformation is applied to the three sub-datasets. Otherwise, only applied to the specified subdatasets. Example: ``["testing"]`` implies that this transformation is only applied to the testing sub-dataset. -Available Transformations: -^^^^^^^^^^^^^^^^^^^^^^^^^^ .. jsonschema:: @@ -1257,12 +1506,15 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "CenterCrop", "type": "dict", + "$$description": [ + "Make a centered crop of a specified size." + ], "options": { "size": { - "type": "list, int" + "type": "list[int]" }, "applied_to": { - "type": "list, string" + "type": "list[str]" } } } @@ -1285,12 +1537,15 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "ROICrop", "type": "dict", + "$$description": [ + "Make a crop of a specified size around a Region of Interest (ROI).", + ], "options": { "size": { - "type": "list, int" + "type": "list[int]" }, "applied_to": { - "type": "list, string" + "type": "list[str]" } } } @@ -1300,7 +1555,8 @@ Available Transformations: { "transformation": { "ROICrop": { - "size": [48, 48] + "size": [48, 48], + "applied_to": ["im", "roi"] } } } @@ -1315,14 +1571,21 @@ Available Transformations: "$$description": [ "Normalize a tensor or an array image with mean and standard deviation estimated from\n", "the sample itself." - ] + ], + "options": { + "applied_to": { + "type": "list[str]" + } + } } .. code-block:: JSON { "transformation": { - "NormalizeInstance": {} + "NormalizeInstance": { + "applied_to": ["im"] + } } } @@ -1333,9 +1596,10 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "RandomAffine", "type": "dict", + "description": "Apply Random Affine transformation.", "options": { "degrees": { - "type": "float or tuple of float", + "type": "float or tuple(float)", "range": "(0, inf)", "$$description": [ "Positive float or list (or tuple) of length two. Angles in degrees. If only\n", @@ -1344,7 +1608,7 @@ Available Transformations: ] }, "translate": { - "type": "list, float", + "type": "list[float]", "range": "[0, 1]", "$$description": [ "Length 2 or 3 depending on the sample shape (2D or 3D). Defines\n", @@ -1352,11 +1616,11 @@ Available Transformations: ] }, "scale": { - "type": "list, float", + "type": "list[float]", "range": "[0, 1]", "$$description": [ "Length 2 or 3 depending on the sample shape (2D or 3D). Defines\n", - "the maximum range of scaling along each axis." + "the maximum range of scaling along each axis. Default: ``[0., 0., 0.]``." ] } } @@ -1383,9 +1647,10 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "RandomShiftIntensity", "type": "dict", + "description": "Add a random intensity offset.", "options": { "shift_range": { - "type": "[float, float]", + "type": "(float, float)", "description": "Range from which the offset applied is randomly selected." } } @@ -1423,7 +1688,8 @@ Available Transformations: "description": "Standard deviation." }, "p": { - "type": "float" + "type": "float", + "description": "Probability. Default: ``0.1``" } } } @@ -1449,13 +1715,14 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "Resample", "type": "dict", + "description": "Resample image to a given resolution.", "options": { - "wspace": { + "hspace": { "type": "float", "range": "[0, 1]", "description": "Resolution along the first axis, in mm." }, - "hspace": { + "wspace": { "type": "float", "range": "[0, 1]", "description": "Resolution along the second axis, in mm." @@ -1473,8 +1740,8 @@ Available Transformations: { "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1 } } @@ -1487,14 +1754,15 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "AdditiveGaussianNoise", "type": "dict", + "description": "Adds Gaussian Noise to images.", "options": { "mean": { "type": "float", - "description": "Mean of Gaussian noise." + "description": "Mean of Gaussian noise. Default: ``0.0``." }, "std": { "type": "float", - "description": "Standard deviation of Gaussian noise." + "description": "Standard deviation of Gaussian noise. Default: ``0.01``." } } } @@ -1517,6 +1785,7 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "DilateGT", "type": "dict", + "description": "Randomly dilate a ground-truth tensor.", "options": { "dilation_factor": { "type": "float", @@ -1550,11 +1819,13 @@ Available Transformations: "options": { "min_percentile": { "type": "float", - "range": "[0, 100]" + "range": "[0, 100]", + "description": "Lower clipping limit. Default: ``5.0``." }, "max_percentile": { "type": "float", - "range": "[0, 100]" + "range": "[0, 100]", + "description": "Higher clipping limit. Default: ``95.0``." } } } @@ -1577,15 +1848,17 @@ Available Transformations: "$schema": "http://json-schema.org/draft-04/schema#", "title": "Clahe", "type": "dict", + "description": "Applies Contrast Limited Adaptive Histogram Equalization for enhancing the local image contrast.", "options": { "clip_limit": { - "type": "float" + "type": "float", + "description": "Clipping limit, normalized between 0 and 1. Default: ``3.0``." }, "kernel_size": { - "type": "list, int", + "type": "list[int]", "$$description": [ "Defines the shape of contextual regions used in the algorithm.\n", - "List length = dimension, i.e. 2D or 3D" + "List length = dimension, i.e. 2D or 3D. Default: ``[8, 8]``." ] } } @@ -1622,6 +1895,112 @@ Available Transformations: } } +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "RandomGamma", + "type": "dict", + "$$description": [ + "Randomly changes the contrast of an image by gamma exponential." + ], + "options": { + "log_gamma_range": { + "type": "[float, float]", + "description": "Log gamma range for changing contrast." + }, + "p": { + "type": "float", + "description": "Probability of performing the gamma contrast. Default: ``0.5``." + } + } + } + +.. code-block:: JSON + + { + "transformation": { + "RandomGamma": { + "log_gamma_range": [-3.0, 3.0], + "p": 0.5, + "applied_to": ["im"], + "dataset_type": ["training"] + } + } + } + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "RandomBiasField", + "type": "dict", + "$$description": [ + "Applies a random MRI bias field artifact to the image via ``torchio.RandomBiasField()``." + ], + "options": { + "coefficients": { + "type": "float", + "description": "Maximum magnitude of polynomial coefficients." + }, + "order": { + "type": "int", + "description": "Order of the basis polynomial functions." + }, + "p": { + "type": "float", + "description": "Probability of applying the bias field. Default: ``0.5``." + } + } + } + +.. code-block:: JSON + + { + "transformation": { + "RandomBiasField": { + "coefficients": 0.5, + "order": 3, + "p": 0.5, + "applied_to": ["im"], + "dataset_type": ["training"] + } + } + } + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "RandomBlur", + "type": "dict", + "$$description": [ + "Applies a random blur to the image." + ], + "options": { + "sigma_range": { + "type": "(float, float)", + "description": "Standard deviation range for the gaussian filter." + }, + "p": { + "type": "float", + "description": "Probability of performing blur. Default: ``0.5``." + } + } + } + +.. code-block:: JSON + + { + "transformation": { + "RandomBlur": { + "sigma_range": [0.0, 2.0], + "p": 0.5, + "applied_to": ["im"], + "dataset_type": ["training"] + } + } + } .. _Uncertainty: @@ -1639,7 +2018,7 @@ Uncertainty computation is performed if ``n_it>0`` and at least "$schema": "http://json-schema.org/draft-04/schema#", "title": "epistemic", "type": "boolean", - "description": "Model-based uncertainty with `Monte Carlo Dropout `__." + "description": "Model-based uncertainty with `Monte Carlo Dropout `__. Default: ``false``." } .. code-block:: JSON @@ -1656,7 +2035,7 @@ Uncertainty computation is performed if ``n_it>0`` and at least "$schema": "http://json-schema.org/draft-04/schema#", "title": "aleatoric", "type": "boolean", - "description": "Image-based uncertainty with `test-time augmentation `__." + "description": "Image-based uncertainty with `test-time augmentation `__. Default: ``false``." } .. code-block:: JSON @@ -1673,7 +2052,7 @@ Uncertainty computation is performed if ``n_it>0`` and at least "$schema": "http://json-schema.org/draft-04/schema#", "title": "n_it", "type": "int", - "description": "Number of Monte Carlo iterations. Set to 0 for no uncertainty computation." + "description": "Number of Monte Carlo iterations. Set to 0 for no uncertainty computation. Default: ``0``." } .. code-block:: JSON @@ -1700,7 +2079,7 @@ Postprocessing "range": "[0, 1]", "$$description": [ "Threshold. To use soft predictions (i.e. no binarisation, float between 0 and 1)\n", - "for metric computation, indicate -1." + "for metric computation, indicate -1. Default: ``0.5``." ] } }, @@ -1731,7 +2110,7 @@ Postprocessing "type": "dict", "$$description": [ "Binarize by setting to 1 the voxel having the maximum prediction across all classes.\n", - "Useful for multiclass models. No parameters required (i.e., {})." + "Useful for multiclass models. No parameters required (i.e., {}). Default: ``{}``." ] } @@ -1752,7 +2131,7 @@ Postprocessing "$schema": "http://json-schema.org/draft-04/schema#", "title": "fill_holes", "type": "dict", - "description": "Fill holes in the predictions. No parameters required (i.e., {})." + "description": "Fill holes in the predictions. No parameters required (i.e., {}). Default: ``{}``." } @@ -1775,7 +2154,7 @@ Postprocessing "$$description": [ "Keeps only the largest connected object in prediction. Only nearest neighbors are\n", "connected to the center, diagonally-connected elements are not considered neighbors.\n", - "No parameters required (i.e., {})" + "No parameters required (i.e., {}). Default: ``{}``." ] } @@ -1800,7 +2179,7 @@ Postprocessing "thr": { "type": "float", "range": "[0, 1]", - "description": "Threshold. Threshold set to ``-1`` will not apply this postprocessing step." + "description": "Threshold. Threshold set to ``-1`` will not apply this postprocessing step. Default: ``-1``." } }, "description": "Sets to zero prediction values strictly below the given threshold ``thr``." @@ -1832,17 +2211,17 @@ Postprocessing ], "options": { "thr": { - "type": "int or list", + "type": "int or list[int]", "$$description": [ "Minimal object size. If a list of thresholds is chosen, the length should\n", - "match the number of predicted classes." + "match the number of predicted classes. Default: ``3``." ] }, "unit": { "type": "string", "$$description": [ - "Either `vox` for voxels or `mm3`. Indicates the unit used to define the\n", - "minimal object size." + "Either ``vox`` for voxels or ``mm3``. Indicates the unit used to define the\n", + "minimal object size. Default: ``vox``." ] } } @@ -1911,8 +2290,28 @@ Postprocessing Evaluation Parameters --------------------- -Dict. Parameters to get object detection metrics (true positive and false detection rates), and this, for defined -object sizes. +Dict. Parameters to get object detection metrics (lesions true positive rate, lesions false detection rate +and Hausdorff score), and this, for defined object sizes. + +.. jsonschema:: + + { + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "object_detection_metrics", + "$$description": [ + "Indicate if object detection metrics (lesions true positive rate, lesions false detection rate\n", + "and Hausdorff score) are computed or not at evaluation time. Default: ``true``", + ], + "type": "boolean" + } + +.. code-block:: JSON + + { + "evaluation_parameters": { + "object_detection_metrics": true + } + } .. jsonschema:: @@ -1923,19 +2322,20 @@ object sizes. "type": "dict", "options": { "thr": { - "type": "list, int", + "type": "list[int]", "$$description": [ "These values will create several consecutive target size bins. For instance\n", "with a list of two values, we will have three target size bins: minimal size\n", "to first list element, first list element to second list element, and second\n", - "list element to infinity." + "list element to infinity. Default: ``[20, 100]``.\n", + "``object_detection_metrics`` must be ``true`` for the target_size to apply." ] }, "unit": { "type": "string", "$$description": [ - "Either `vox` for voxels or `mm3`. Indicates the unit used to define the\n", - "target object sizes." + "Either ``vox`` for voxels or ``mm3``. Indicates the unit used to define the\n", + "target object sizes. Default: ``vox``." ] } } @@ -1963,14 +2363,15 @@ object sizes. "thr": { "type": "int", "$$description": [ - "Minimal object size overlapping to be considered a TP, FP, or FN." + "Minimal object size overlapping to be considered a TP, FP, or FN. Default: ``3``.\n", + "``object_detection_metrics`` must be ``true`` for the overlap to apply." ] }, "unit": { "type": "string", - "$$description": [ - "Either `vox` for voxels or `mm3`. Indicates the unit used to define the\n", - "overlap." + "$$description":[ + "Either ``vox`` for voxels or ``mm3``. Indicates the unit used to define the overlap.\n", + "Default: ``vox``." ] } } @@ -1981,7 +2382,7 @@ object sizes. { "evaluation_parameters": { "overlap": { - "thr": 30, + "thr": 3, "unit": "vox" } } diff --git a/docs/source/data.rst b/docs/source/data.rst index c00f5614f..f7d9983fc 100644 --- a/docs/source/data.rst +++ b/docs/source/data.rst @@ -1,24 +1,40 @@ Data ==== -To facilitate the organization of data, ``ivadomed`` requires the data to be -organized according to the `Brain Imaging Data Structure (BIDS) `__ convention. -An example of this organization is shown below: +Organization +------------ + +To facilitate the organization of data, ``ivadomed`` requires the data to be organized according to the +`Brain Imaging Data Structure `_ (BIDS) standard. +The details of the standard can be found in the `BIDS specification `_. + +Validation +---------- + +The compliance of the dataset with BIDS can be validated with the `BIDS-validator +web version `_. +Other options for validation are available `here `_. + +Examples +-------- + +An example of this organization is shown below for MRI data: .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/data/1920px-BIDS_Logo.png :alt: BIDS_Logo + :width: 200 :: dataset/ └── dataset_description.json - └── participants.tsv - └── sub-01 <--------------------- Folder enclosing data for subject 1 + └── participants.tsv <-------------------------------- Metadata describing subjects attributes e.g. sex, age, etc. + └── sub-01 <------------------------------------------ Folder enclosing data for subject 1 └── sub-02 └── sub-03 └── anat - └── sub-03_T1w.nii.gz <-- MRI image in NIfTI format - └── sub-03_T1w.json <---- Metadata including image parameters, MRI vendor, etc. + └── sub-03_T1w.nii.gz <----------------------- MRI image in NIfTI format + └── sub-03_T1w.json <------------------------- Metadata including image parameters, MRI vendor, etc. └── sub-03_T2w.nii.gz └── sub-03_T2w.json └── derivatives @@ -28,6 +44,8 @@ An example of this organization is shown below: └── sub-03_seg-tumor-manual.nii.gz <-- Manually-corrected segmentation └── sub-03_seg-tumor-manual.json <---- Metadata including author who performed the labeling and date -.. note:: ``participants.tsv`` should, at least, include a column ``participant_id``, which is used when loading the dataset. +.. note:: For an exhaustive list of ``derivatives`` used in ``ivadomed``, please see our `wiki `_. -.. note:: For an exhaustive list of derivatives used in ``ivadomed``, please see our `wiki `_ +For usage in ``ivadomed``, additional examples are available in our tutorials, for `MRI data `_ and `Microscopy data `_. +Further examples of the BIDS organization can be found in the +`BIDS-examples `_ repository. diff --git a/docs/source/help.rst b/docs/source/help.rst index c83f710c5..86358badb 100644 --- a/docs/source/help.rst +++ b/docs/source/help.rst @@ -2,5 +2,5 @@ Help ==== If you need help using ``ivadomed``, please don't hesitate to -`post an issue on GitHub `__ +`post a question on our discussion forum `__ and we will be happy to assist you. diff --git a/docs/source/index.rst b/docs/source/index.rst index 91be20552..df9fe7df4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -57,6 +57,7 @@ Home tutorials/cascaded_architecture.rst tutorials/uncertainty.rst tutorials/automate_training.rst + tutorials/two_class_microscopy_seg_2d_unet.rst .. toctree:: :maxdepth: 1 diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 1c8c39a01..71a1d22ee 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -2,140 +2,131 @@ Installation ============ Supported OS ------------- +++++++++++++ -Currently, we only support ``MacOS`` and ``Linux`` operating systems. ``Windows`` -users have the possibility to install and use ``ivadomed`` via -`Windows Subsystem for Linux (WSL) `_. The steps below (about updating bashrc) are strongly recommended for MacOS users in the installation process but are optional for Linux users. + Currently, ``ivadomed`` supports GPU/CPU on ``Linux`` and ``Windows``, and CPU only on ``macOS`` and `Windows Subsystem for Linux `_. -Open your bash/zsh script file with editor on your computer. +Step 1: Setup dedicated python environment ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - If you are using bash shell - :: + You can setup ``ivadomed`` using either Conda or Venv: - vim ~/.bashrc + .. tabs:: - If you are using zsh shell - :: - - vim ~/.zshrc + .. tab:: Install via ``venv`` -Write in your .bashrc/.zshrc file with following line. + 1. Setup Python Venv Virtual Environment. -:: + ``ivadomed`` requires Python >= 3.7 and <3.10. - export HDF5_USE_FILE_LOCKING='FALSE' + First, make sure that a compatible version of Python 3 is installed on your system by running: -Save this change and restart your terminal to apply the change. + .. tabs:: -There are fundamentally two different approaches to install ``IvadoMed``: + .. group-tab:: Mac/Linux -1) Install via Conda - This is the easiest way for personal computers. + .. code:: -2) Install via Venv/VirtualEnv - This is compatible with ComputeCanada cluster environment. + python3 --version -Approach 1: Conda ------------------- + .. group-tab:: Windows -Step 1: Create new Conda Env called IvadoMedEnv (may taken 5 to 15 minutes) -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + .. code:: -:: + python --version - conda env create --file environment.yml + If your system's Python is not 3.7, 3.8, or 3.9 (or if you don't have Python 3 installed at all), please `install Python `_ before continuing. -Step 2 : Activate environment and use -++++++++++++++++++++++++++++++++++++++ + Once you have a supported version of Python installed, run the following command: -:: - conda activate IvadoMedEnv + .. tabs:: -Note that this is NOT compatible with ComputeCanada because of their no anaconda policy: https://docs.computecanada.ca/wiki/Anaconda/en + .. group-tab:: Mac/Linux + .. code:: -Approach 2: Venv ------------------ + # Replacing ``3.X`` with the Python version number that you installed): + python3.X -m venv ivadomed_env -Step 1: Setup Python Virtual Environment. -+++++++++++++++++++++++++++++++++++++++++ + .. note:: -``ivadomed`` requires Python >= 3.6 and <3.9. We recommend -working under a virtual environment, which could be set as follows: + If you use ``Debian`` or ``Ubuntu``, you may be prompted to install the ``python3-venv`` module when creating the virtual environment. This is expected, so please follow the instructions provided by Python. For other operating systems, ``venv`` will be installed by default. -:: + .. group-tab:: Windows - virtualenv venv-ivadomed - source venv-ivadomed/bin/activate + .. code:: + python -m venv ivadomed_env -.. warning:: - If the default Python version installed in your system does not fit the version requirements, you might need to specify a version of Python associated with your virtual environment: + 2. Activate the new virtual environment (default named ``ivadomed_env``) - :: + .. tabs:: - virtualenv venv-ivadomed --python=python3.6 + .. group-tab:: Mac/Linux -Step 2: Clone the `ivadomed `_ repository. -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + .. code:: + + source ivadomed_env/bin/activate + + .. group-tab:: Windows + + .. code:: + + cd ivadomed_env/Scripts/ + activate + + .. tab:: Install via ``conda`` + + 1. Create new conda environment using ``environment.yml`` file + + :: + conda env create --name ivadomed_env -:: + 2. Activate the new conda environment - git clone git@github.com:ivadomed/ivadomed.git - cd ivadomed - + :: -Step 3: Install PyTorch 1.5 and TorchVision -+++++++++++++++++++++++++++++++++++++++++++ + conda activate ivadomed_env -If you have a compatible NVIDIA GPU that supports CUDA, run the following command: -:: - - pip install -r requirements_gpu.txt + .. tab:: Compute Canada HPC + There are numerous constraints and limited package availabilities with ComputeCanada cluster environment. -If you do not have a compatible GPU, run the following installer to use ``ivadomed`` with CPU. + It is best to attempt ``venv`` based installations and follow up with ComputeCanada technical support as MANY specially compiled packages (e.g. numpy) are exclusively available for Compute Canada HPC environment. + If you are using `Compute Canada `_, you can load modules as `mentioned here `_ and `also here `_. -:: - pip install -r requirements.txt +Step 2: Install ``ivadomed`` +++++++++++++++++++++++++++++ -Step 4: Install from release (recommended) -++++++++++++++++++++++++++++++++++++++++++ + .. tabs:: -Install ``ivadomed`` and its requirements from -`Pypi `__: + .. group-tab:: PyPI Installation -:: + Install ``ivadomed`` and its requirements from + `PyPI `__: - pip install --upgrade pip - pip install ivadomed + :: -(Optional) Alternative Step 4 for Developers: Install from source -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + pip install --upgrade pip -Bleeding-edge developments are available on the project's master branch -on Github. Installation procedure is the following at repository root: + pip install ivadomed -:: + .. group-tab:: Repo Installation (Advanced or Developer) - cd ivadomed - pip install -e . + Bleeding-edge developments are available on the project's master branch + on Github. Install ``ivadomed`` from source: + :: -(Optional) Step 5 for Developers Install pre-commit hooks -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + git clone https://github.com/ivadomed/ivadomed.git -We use ``pre-commit`` to enforce a limit on file size. -After you've installed ``ivadomed``, install the hooks: + cd ivadomed -:: + pip install -e . - pip install -r requirements_dev.txt - pre-commit install diff --git a/docs/source/scripts.rst b/docs/source/scripts.rst index b03a03154..6cb052d67 100644 --- a/docs/source/scripts.rst +++ b/docs/source/scripts.rst @@ -8,6 +8,11 @@ Scripts This section contains a collection of useful scripts for quality control during the training of models. +ivadomed_segment_image +"""""""""""""""""""""" + +.. autofunction:: ivadomed.scripts.segment_image.segment_image + ivadomed_visualize_transforms """"""""""""""""""""""""""""" diff --git a/docs/source/tutorials/automate_training.rst b/docs/source/tutorials/automate_training.rst index 31d899131..796e89829 100644 --- a/docs/source/tutorials/automate_training.rst +++ b/docs/source/tutorials/automate_training.rst @@ -81,7 +81,7 @@ Step 5: Run the Code Default ^^^^^^^ -If neither ``all_combin`` nor ``multi_params`` is selected, then the hyperparameters will be +If neither ``all-combin`` nor ``multi-params`` is selected, then the hyperparameters will be combined as follows into a ``config_list``. .. note:: @@ -108,7 +108,7 @@ To run this: All Combinations ^^^^^^^^^^^^^^^^ -If the flag ``all_combin`` is selected, the hyperparameter options will be combined +If the flag ``all-combin`` is selected, the hyperparameter options will be combined combinatorically. .. code-block:: @@ -135,12 +135,12 @@ To run: .. code-block:: bash ivadomed_automate_training -c config.json -ch config_hyper.json \ - -n 1 --all_combin + -n 1 --all-combin Multiple Parameters ^^^^^^^^^^^^^^^^^^^ -If the flag ``multi_params`` is selected, the elements from each hyperparameter list will be +If the flag ``multi-params`` is selected, the elements from each hyperparameter list will be selected sequentially, so all the first elements, then all the second elements, etc. If the lists are different lengths, say ``len(list_a) = n`` and ``len(list_b) = n+m``, where ``n`` and ``m`` are strictly positive integers, then we will only use the first ``n`` elements. @@ -155,7 +155,7 @@ To run: .. code-block:: bash ivadomed_automate_training -c config.json -ch config_hyper.json \ - -n 1 --multi_params + -n 1 --multi-params Step 6: Results --------------- diff --git a/docs/source/tutorials/cascaded_architecture.rst b/docs/source/tutorials/cascaded_architecture.rst index 9ecd149e8..9afab11f9 100644 --- a/docs/source/tutorials/cascaded_architecture.rst +++ b/docs/source/tutorials/cascaded_architecture.rst @@ -1,182 +1,389 @@ Cascaded architecture ===================== -In this tutorial we will learn the following features: + In this tutorial we will learn the following features: -- Design a training scheme composed of two cascaded networks. -- Visualize the training with tensorboard. -- Generate a GIF to visualize the learning of the model. -- Find the optimal threshold to binarize images based on the validation sub-dataset. + - Design a training scheme composed of two cascaded networks. + - Visualize the training with tensorboard. + - Generate a GIF to visualize the learning of the model. + - Find the optimal threshold to binarize images based on the validation sub-dataset. -In our example, the model will first locate the spinal cord (step 1). This localisation will then be used to crop the images around this region of interest, before segmenting the cerebrospinal fluid (CSF, step 2). + In our example, the model will first locate the spinal cord (step 1). This localisation will then be used to crop the images around this region of interest, before segmenting the cerebrospinal fluid (CSF, step 2). Download dataset ---------------- -A dataset example is available for this tutorial. If not already done, download the dataset with the following line. -For more details on this dataset see :ref:`One-class segmentation with 2D U-Net`. + A dataset example is available for this tutorial. If not already done, download the dataset with the following line. + For more details on this dataset see :ref:`One-class segmentation with 2D U-Net`. -.. code-block:: bash + .. tabs:: - # Download data - ivadomed_download_data -d data_example_spinegeneric + .. tab:: Command Line Interface + + .. code-block:: bash + + # Download data + ivadomed_download_data -d data_example_spinegeneric Configuration file ------------------ -In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. -First off, copy this configuration file in your local directory to avoid modifying the source file: + In ``ivadomed``, training is orchestrated by a configuration file. Examples of configuration files are available in + the ``ivadomed/config/`` and the documentation is available in :doc:`../configuration_file`. + + In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. + First off, copy this configuration file in your local directory to avoid modifying the source file: + + .. tabs:: + + .. tab:: Command Line Interface + + .. code-block:: bash + + cp /ivadomed/config/config.json . + + + Then, open it with a text editor. Which you can `view directly here: `_ or you can see it in the collapsed JSON code block below. + + .. collapse:: Reveal the embedded config.json + + .. code-block:: json + :linenos: + + { + "command": "train", + "gpu_ids": [0], + "path_output": "spineGeneric", + "model_name": "my_model", + "debugging": false, + "object_detection_params": { + "object_detection_path": null, + "safety_factor": [1.0, 1.0, 1.0] + }, + "loader_parameters": { + "path_data": ["data_example_spinegeneric"], + "subject_selection": {"n": [], "metadata": [], "value": []}, + "target_suffix": ["_seg-manual"], + "extensions": [".nii.gz"], + "roi_params": { + "suffix": null, + "slice_filter_roi": null + }, + "contrast_params": { + "training_validation": ["T1w", "T2w", "T2star"], + "testing": ["T1w", "T2w", "T2star"], + "balance": {} + }, + "slice_filter_params": { + "filter_empty_mask": false, + "filter_empty_input": true + }, + "slice_axis": "axial", + "multichannel": false, + "soft_gt": false + }, + "split_dataset": { + "fname_split": null, + "random_seed": 6, + "split_method" : "participant_id", + "data_testing": {"data_type": null, "data_value":[]}, + "balance": null, + "train_fraction": 0.6, + "test_fraction": 0.2 + }, + "training_parameters": { + "batch_size": 18, + "loss": { + "name": "DiceLoss" + }, + "training_time": { + "num_epochs": 100, + "early_stopping_patience": 50, + "early_stopping_epsilon": 0.001 + }, + "scheduler": { + "initial_lr": 0.001, + "lr_scheduler": { + "name": "CosineAnnealingLR", + "base_lr": 1e-5, + "max_lr": 1e-2 + } + }, + "balance_samples": { + "applied": false, + "type": "gt" + }, + "mixup_alpha": null, + "transfer_learning": { + "retrain_model": null, + "retrain_fraction": 1.0, + "reset": true + } + }, + "default_model": { + "name": "Unet", + "dropout_rate": 0.3, + "bn_momentum": 0.1, + "final_activation": "sigmoid", + "depth": 3 + }, + "FiLMedUnet": { + "applied": false, + "metadata": "contrasts", + "film_layers": [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] + }, + "Modified3DUNet": { + "applied": false, + "length_3D": [128, 128, 16], + "stride_3D": [128, 128, 16], + "attention": false, + "n_filters": 8 + }, + "uncertainty": { + "epistemic": false, + "aleatoric": false, + "n_it": 0 + }, + "postprocessing": { + "remove_noise": {"thr": -1}, + "keep_largest": {}, + "binarize_prediction": {"thr": 0.5}, + "uncertainty": {"thr": -1, "suffix": "_unc-vox.nii.gz"}, + "fill_holes": {}, + "remove_small": {"unit": "vox", "thr": 3} + }, + "evaluation_parameters": { + "target_size": {"unit": "vox", "thr": [20, 100]}, + "overlap": {"unit": "vox", "thr": 3} + }, + "transformation": { + "Resample": + { + "hspace": 0.75, + "wspace": 0.75, + "dspace": 1 + }, + "CenterCrop": { + "size": [128, 128]}, + "RandomAffine": { + "degrees": 5, + "scale": [0.1, 0.1], + "translate": [0.03, 0.03], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + }, + "ElasticTransform": { + "alpha_range": [28.0, 30.0], + "sigma_range": [3.5, 4.5], + "p": 0.1, + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + }, + "NormalizeInstance": {"applied_to": ["im"]} + } + } + + + From this point onward, we will discuss some of the key parameters to use cascaded models. Most parameters are configurable only via modification of the configuration ``JSON file``. + For those that supports commandline run time configuration, we included the respective command versions under the ``Command Line Interface`` tab + + At `this line `__ in the ``config.json`` is where you can update the ``debugging``. + + - ``debugging``: Boolean, create extended verbosity and intermediate outputs. Here we will look at the intermediate predictions + with tensorboard, we therefore need to activate those intermediate outputs. + + .. tabs:: + + .. tab:: JSON File + + .. code-block:: json + + "debugging": true + + At `this line `__ in the ``config.json`` is where you can update the ``object_detection_path`` within the ``object_detection_params`` sub-dictionary. + + - ``object_detection_params:object_detection_path``: Location of the object detection model. This parameter corresponds + to the path of the first model from the cascaded architecture that segments the spinal cord. The packaged model in the + downloaded dataset located in the folder `trained_model/seg_sc_t1-t2-t2s-mt` will be used to detect the spinal cord. + This spinal cord segmentation model will be applied to the images and a bounding box will be created around this mask + to crop the image. + + .. tabs:: + + .. tab:: JSON File + + .. code-block:: json + + "object_detection_path": "/trained_model/seg_sc_t1-t2-t2s-mt" + + At `this line `__ in the ``config.json`` is where you can update the ``safety_factor`` within the ``object_detection_params`` sub-dictionary. + + - ``object_detection_params:safety_factor``: Multiplicative factor to apply to each dimension of the bounding box. To + ensure all the CSF is included, a safety factor should be applied to the bounding box generated from the spinal cord. + A safety factor of 200% on each dimension is applied on the height and width of the image. The original depth of the + bounding box is kept since the CSF should not be present past this border. + + .. tabs:: + + .. tab:: JSON File -.. code-block:: bash + .. code-block:: json - cp /ivadomed/config/config.json . + "safety_factor": [2, 2, 1] -Then, open it with a text editor. As described in the tutorial :doc:`../tutorials/one_class_segmentation_2d_unet`, make -sure the ``command`` is set to "train" and ``path_data`` point to the location of the dataset. Below, we will discuss -some of the key parameters to use cascaded models. + At `this line `__ in the ``config.json`` is where you can update the ``target_suffix`` within the ``loader_parameters`` sub-dictionary. -- ``debugging``: Boolean, create extended verbosity and intermediate outputs. Here we will look at the intermediate predictions - with tensorboard, we therefore need to activate those intermediate outputs. - - .. code-block:: xml + - ``loader_parameters:target_suffix``: Suffix of the ground truth segmentation. The ground truth are located under the + ``DATASET/derivatives/labels`` folder. The suffix for CSF labels in this dataset is ``_csfseg-manual``: - "debugging": true + .. tabs:: -- ``object_detection_params:object_detection_path``: Location of the object detection model. This parameter corresponds - to the path of the first model from the cascaded architecture that segments the spinal cord. The packaged model in the - downloaded dataset located in the folder `trained_model/seg_sc_t1-t2-t2s-mt` will be used to detect the spinal cord. - This spinal cord segmentation model will be applied to the images and a bounding box will be created around this mask - to crop the image. + .. tab:: JSON File - .. code-block:: xml + .. code-block:: json - "object_detection_path": "/data_example_spinegeneric/trained_model/seg_sc_t1-t2-t2s-mt" + "target_suffix": ["_csfseg-manual"] -- ``object_detection_params:safety_factor``: Multiplicative factor to apply to each dimension of the bounding box. To - ensure all the CSF is included, a safety factor should be applied to the bounding box generated from the spinal cord. - A safety factor of 200% on each dimension is applied on the height and width of the image. The original depth of the - bounding box is kept since the CSF should not be present past this border. + At `this line `__ in the ``config.json`` is where you can update the ``contrast_params`` within the ``loader_parameters`` sub-dictionary. - .. code-block:: xml + - ``loader_parameters:contrast_params``: Contrast(s) of interest. The CSF labels are only available in T2w contrast in + the example dataset. - "safety_factor": [2, 2, 1] + .. tabs:: -- ``loader_parameters:target_suffix``: Suffix of the ground truth segmentation. The ground truth are located under the - ``DATASET/derivatives/labels`` folder. The suffix for CSF labels in this dataset is ``_csfseg-manual``: + .. tab:: JSON File - .. code-block:: xml + .. code-block:: json - "target_suffix": ["_csfseg-manual"] + "contrast_params": { + "training_validation": ["T2w"], + "testing": ["T2w"], + "balance": {} + } -- ``loader_parameters:contrast_params``: Contrast(s) of interest. The CSF labels are only available in T2w contrast in - the example dataset. + At `this line `__ in the ``config.json`` is where you can update the ``size`` within the ``transformation:CenterCrop`` sub-dictionary. - .. code-block:: xml + - ``transformation:CenterCrop:size``: Crop size in voxel. Images will be cropped or padded to fit these dimensions. This + allows all the images to have the same size during training. Since the images will be cropped around the spinal cord, + the image size can be reduced to avoid large zero padding. - "contrast_params": { - "training_validation": ["T2w"], - "testing": ["T2w"], - "balance": {} - } + .. tabs:: -- ``transformation:CenterCrop:size``: Crop size in voxel. Images will be cropped or padded to fit these dimensions. This - allows all the images to have the same size during training. Since the images will be cropped around the spinal cord, - the image size can be reduced to avoid large zero padding. + .. tab:: JSON File - .. code-block:: xml + .. code-block:: json - "CenterCrop": { - "size": [64, 64] - } + "CenterCrop": { + "size": [64, 64] + } Train model ----------- -Once the configuration file is ready, run the training. `ivadomed` has an option to find a threshold value which optimized the dice score on the validation dataset. This threshold will be further used to binarize the predictions on testing data. Add the flag `-t` with an increment -between 0 and 1 to perform this threshold optimization (i.e. ``-t 0.1`` will return the best threshold between 0.1, -0.2, ..., 0.9) + Once the configuration file is ready, run the training. ``ivadomed`` has an option to find a threshold value which optimized the dice score on the validation dataset. This threshold will be further used to binarize the predictions on testing data. Add the flag ``-t`` with an increment + between 0 and 1 to perform this threshold optimization (i.e. ``-t 0.1`` will return the best threshold between 0.1, + 0.2, ..., 0.9) + + To help visualize the training, the flag ``--gif`` or ``-g`` can be used. The flag should be followed by the number of + slices by epoch to visualize. For example, ``-g 2`` will generate 2 GIFs of 2 randomly selected slices from the + validation set. + + Make sure to run the CLI command with the ``--train`` flag, and to point to the location of the dataset via the flag ``--path-data path/to/bids/data``. + + .. tabs:: -To help visualize the training, the flag ``--gif`` or ``-g`` can be used. The flag should be followed by the number of -slices by epoch to visualize. For example, ``-g 2`` will generate 2 GIFs of 2 randomly selected slices from the -validation set. + .. tab:: Command Line Interface -Make sure to run the CLI command with the "--train" flag, and to point to the location of the dataset via the flag "--path-data path/to/bids/data ". + .. code-block:: bash -.. code-block:: bash + ivadomed --train -c config.json --path-data path/to/bids/data --path-output path/to/output/directory -t 0.01 -g 1 - ivadomed --train -c config.json --path-data path/to/bids/data --path-output path/to/output/directory -t 0.01 -g 1 -If you prefer to use config files over CLI flags, set "command" to the following in you config file: + .. tab:: JSON File -.. code-block:: bash + If you prefer to use config files over CLI flags, set ``command`` to the following in you config file: - "command": "train" + .. code-block:: json -You can also set "path_output", and "path_data" arguments in your config file. + "command": "train" -Then run: + You can also set ``path_output``, and ``path_data`` arguments in your config file. -.. code-block:: bash + Then run: - ivadomed -c config.json + .. tabs:: -At the end of the training, the optimal threshold will be indicated: + .. tab:: Command Line Interface -.. code-block:: console + .. code-block:: bash - Running threshold analysis to find optimal threshold - Optimal threshold: 0.01 - Saving plot: spineGeneric/roc.png + ivadomed -c config.json + + At the end of the training, the optimal threshold will be indicated: + + .. code-block:: console + + Running threshold analysis to find optimal threshold + Optimal threshold: 0.01 + Saving plot: spineGeneric/roc.png Visualize training data ----------------------- -If the flag ``--gif`` or ``-g`` was used, the training can be visualized through gifs located in the folder specified by the --path-output flag -/gifs. + If the flag ``--gif`` or ``-g`` was used, the training can be visualized through gifs located in the folder specified by the ``--path-output`` flag + ``/gifs``. + + .. figure:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/training.gif + :width: 300 + :align: center -.. figure:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/training.gif - :width: 300 - :align: center + Training visualization with GIF - Training visualization with GIF + Another way to visualize the training is to use Tensorboard. Tensorboard helps to visualize the augmented input images, + the model's prediction, the ground truth, the learning curves, and more. To access this data during or after training, + use the following command-line: -Another way to visualize the training is to use Tensorboard. Tensorboard helps to visualize the augmented input images, -the model's prediction, the ground truth, the learning curves, and more. To access this data during or after training, -use the following command-line: + .. tabs:: -.. code-block:: bash + .. tab:: Command Line Interface - tensorboard --logdir + .. code-block:: bash -The following should be displayed in the terminal: + tensorboard --logdir -.. code-block:: console + The following should be displayed in the terminal: - Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all - TensorBoard 2.2.1 at http://localhost:6006/ (Press CTRL+C to quit) + .. code-block:: console -Open your browser and type the URL provided, in this case ``http://localhost:6006/``. -In the scalars folder, the evolution of metrics, learning rate and loss through the epochs can be visualized. + Serving TensorBoard on localhost; to expose to the network, use a proxy or pass --bind_all + TensorBoard 2.2.1 at http://localhost:6006/ (Press CTRL+C to quit) -.. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/tensorboard_scalar.png - :align: center + Open your browser and type the URL provided, in this case ``http://localhost:6006/``. + In the scalars folder, the evolution of metrics, learning rate and loss through the epochs can be visualized. -In the image folder, the training and validation ground truth, input images and predictions are displayed. With this -feature, it is possible to visualize the cropping from the first model and confirm that the spinal cord -was correctly located. + .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/tensorboard_scalar.png + :align: center -.. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/tensorboard_images.png - :align: center + In the image folder, the training and validation ground truth, input images and predictions are displayed. With this + feature, it is possible to visualize the cropping from the first model and confirm that the spinal cord + was correctly located. + + .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/cascaded_architecture/tensorboard_images.png + :align: center Evaluate model -------------- -- ``postprocessing:binarize_prediction``: Threshold at which predictions are binarized. Before testing the model, - modify the binarization threshold to have a threshold adapted to the data: + - ``postprocessing:binarize_prediction``: Threshold at which predictions are binarized. Before testing the model, + modify the binarization threshold to have a threshold adapted to the data: + + .. tabs:: + + .. tab:: JSON File -.. code-block:: xml + .. code-block:: json - "binarize_prediction": 0.01 + "binarize_prediction": 0.01 -To test and apply this model on the testing dataset, go to the `Evaluate model` section of the tutorial -:ref:`One-class segmentation with 2D U-Net`. + To test and apply this model on the testing dataset, go to the `Evaluate model` section of the tutorial + :ref:`One-class segmentation with 2D U-Net`. diff --git a/docs/source/tutorials/detailed_results.csv b/docs/source/tutorials/detailed_results.csv index 289023bdd..0cb6ef478 100644 --- a/docs/source/tutorials/detailed_results.csv +++ b/docs/source/tutorials/detailed_results.csv @@ -1,4 +1,4 @@ -,log_directory,training_parameters,default_model,best_training_dice,best_training_loss,best_validation_dice,best_validation_loss +,path_output,training_parameters,default_model,best_training_dice,best_training_loss,best_validation_dice,best_validation_loss 0,spineGeneric-batch_size=2,"{'batch_size': 2, 'loss': {'name': 'DiceLoss'}, 'training_time': {'num_epochs': 1, 'early_stopping_patience': 50, 'early_stopping_epsilon': 0.001}, 'scheduler': {'initial_lr': 0.001, 'lr_scheduler': {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01}}, 'balance_samples': {'applied': False, 'type': 'gt'}, 'mixup_alpha': None, 'transfer_learning': {'retrain_model': None, 'retrain_fraction': 1.0, 'reset': True}}","{'name': 'Unet', 'dropout_rate': 0.3, 'bn_momentum': 0.9, 'depth': 3, 'is_2d': True}",-0.13313321973048692,-0.13313321973048692,-0.14559978920411557,-0.14559978920411557 2,spineGeneric-loss={'name': 'DiceLoss'},"{'batch_size': 18, 'loss': {'name': 'DiceLoss'}, 'training_time': {'num_epochs': 1, 'early_stopping_patience': 50, 'early_stopping_epsilon': 0.001}, 'scheduler': {'initial_lr': 0.001, 'lr_scheduler': {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01}}, 'balance_samples': {'applied': False, 'type': 'gt'}, 'mixup_alpha': None, 'transfer_learning': {'retrain_model': None, 'retrain_fraction': 1.0, 'reset': True}}","{'name': 'Unet', 'dropout_rate': 0.3, 'bn_momentum': 0.9, 'depth': 3, 'is_2d': True}",-0.03612175240414217,-0.03612175240414217,-0.07506937285264333,-0.07506937285264333 5,spineGeneric-depth=3,"{'batch_size': 18, 'loss': {'name': 'DiceLoss'}, 'training_time': {'num_epochs': 1, 'early_stopping_patience': 50, 'early_stopping_epsilon': 0.001}, 'scheduler': {'initial_lr': 0.001, 'lr_scheduler': {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01}}, 'balance_samples': {'applied': False, 'type': 'gt'}, 'mixup_alpha': None, 'transfer_learning': {'retrain_model': None, 'retrain_fraction': 1.0, 'reset': True}}","{'name': 'Unet', 'dropout_rate': 0.3, 'bn_momentum': 0.9, 'depth': 3, 'is_2d': True}",-0.0344025717349723,-0.0344025717349723,-0.06566549402972062,-0.06566549402972062 diff --git a/docs/source/tutorials/one_class_segmentation_2d_unet.rst b/docs/source/tutorials/one_class_segmentation_2d_unet.rst index 478d1961b..2cf205896 100644 --- a/docs/source/tutorials/one_class_segmentation_2d_unet.rst +++ b/docs/source/tutorials/one_class_segmentation_2d_unet.rst @@ -1,306 +1,564 @@ One-class segmentation with 2D U-Net ==================================== -In this tutorial we will learn the following features: + In this tutorial we will learn the following features: -- Training of a segmentation model (U-Net 2D) with a single label on multiple contrasts, + - Training of a segmentation model (U-Net 2D) with a single label on multiple contrasts, + - Testing of a trained model and computation of 3D evaluation metrics. + - Visualization of the outputs of a trained model. -- Testing of a trained model and computation of 3D evaluation metrics. + An interactive Colab version of this tutorial is directly accessible here: |image_badge| -- Visualization of the outputs of a trained model. +.. |image_badge| image:: https://colab.research.google.com/assets/colab-badge.png + :target: https://colab.research.google.com/github/ivadomed/ivadomed/blob/master/testing/tutorials/tutorial_1_2d_segmentation_unet.ipynb .. _Download dataset: Download dataset ---------------- -We will use a publicly-available dataset consisting of MRI data of the spinal cord. This dataset is a subset of the -`spine-generic multi-center dataset `_ and has been pre-processed -to facilitate training/testing of a new model. Namely, for each subject, all six contrasts were co-registered together. -Semi-manual cord segmentation for all modalities and manual cerebrospinal fluid labels for T2w modality were created. -More details `here `_. + We will use a publicly-available dataset consisting of MRI data of the spinal cord. This dataset is a subset of the + `spine-generic multi-center dataset `_ and has been pre-processed + to facilitate training/testing of a new model. Namely, for each subject, all six contrasts were co-registered together. + Semi-manual cord segmentation for all modalities and manual cerebrospinal fluid labels for T2w modality were created. + More details `here `_. -In addition to the MRI data, this sample dataset also includes a trained model for spinal cord segmentation. + In addition to the MRI data, this sample dataset also includes a trained model for spinal cord segmentation. -To download the dataset (~490MB), run the following commands in your terminal: + To download the dataset (~490MB), run the following commands in your terminal: -.. code-block:: bash + .. tabs:: - # Download data - ivadomed_download_data -d data_example_spinegeneric + .. tab:: Command Line Interface + + .. code-block:: bash + + # Download data + ivadomed_download_data -d data_example_spinegeneric Configuration file ------------------ -In ``ivadomed``, training is orchestrated by a configuration file. Examples of configuration files are available in -the ``ivadomed/config/`` and the documentation is available in :doc:`../configuration_file`. + In ``ivadomed``, **training** is orchestrated by a configuration file. Examples of configuration files are available in + the ``ivadomed/config/`` folder and the documentation is available in :doc:`../configuration_file`. + + In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. First off, copy this configuration + file in your local directory (to avoid modifying the source file): + + .. tabs:: + + .. tab:: Command Line Interface + + .. code-block:: bash + + cp /ivadomed/config/config.json . + + Then, open it with a text editor. Which you can `view directly here: `_ or you can see it in the collapsed JSON code block below. + + .. collapse:: Reveal the embedded `config.json`. + + .. code-block:: json + :linenos: + + { + "command": "train", + "gpu_ids": [0], + "path_output": "spineGeneric", + "model_name": "my_model", + "debugging": false, + "object_detection_params": { + "object_detection_path": null, + "safety_factor": [1.0, 1.0, 1.0] + }, + "loader_parameters": { + "path_data": ["data_example_spinegeneric"], + "subject_selection": {"n": [], "metadata": [], "value": []}, + "target_suffix": ["_seg-manual"], + "extensions": [".nii.gz"], + "roi_params": { + "suffix": null, + "slice_filter_roi": null + }, + "contrast_params": { + "training_validation": ["T1w", "T2w", "T2star"], + "testing": ["T1w", "T2w", "T2star"], + "balance": {} + }, + "slice_filter_params": { + "filter_empty_mask": false, + "filter_empty_input": true + }, + "slice_axis": "axial", + "multichannel": false, + "soft_gt": false + }, + "split_dataset": { + "fname_split": null, + "random_seed": 6, + "split_method" : "participant_id", + "data_testing": {"data_type": null, "data_value":[]}, + "balance": null, + "train_fraction": 0.6, + "test_fraction": 0.2 + }, + "training_parameters": { + "batch_size": 18, + "loss": { + "name": "DiceLoss" + }, + "training_time": { + "num_epochs": 100, + "early_stopping_patience": 50, + "early_stopping_epsilon": 0.001 + }, + "scheduler": { + "initial_lr": 0.001, + "lr_scheduler": { + "name": "CosineAnnealingLR", + "base_lr": 1e-5, + "max_lr": 1e-2 + } + }, + "balance_samples": { + "applied": false, + "type": "gt" + }, + "mixup_alpha": null, + "transfer_learning": { + "retrain_model": null, + "retrain_fraction": 1.0, + "reset": true + } + }, + "default_model": { + "name": "Unet", + "dropout_rate": 0.3, + "bn_momentum": 0.1, + "final_activation": "sigmoid", + "depth": 3 + }, + "FiLMedUnet": { + "applied": false, + "metadata": "contrasts", + "film_layers": [0, 1, 0, 0, 0, 0, 0, 0, 0, 0] + }, + "Modified3DUNet": { + "applied": false, + "length_3D": [128, 128, 16], + "stride_3D": [128, 128, 16], + "attention": false, + "n_filters": 8 + }, + "uncertainty": { + "epistemic": false, + "aleatoric": false, + "n_it": 0 + }, + "postprocessing": { + "remove_noise": {"thr": -1}, + "keep_largest": {}, + "binarize_prediction": {"thr": 0.5}, + "uncertainty": {"thr": -1, "suffix": "_unc-vox.nii.gz"}, + "fill_holes": {}, + "remove_small": {"unit": "vox", "thr": 3} + }, + "evaluation_parameters": { + "target_size": {"unit": "vox", "thr": [20, 100]}, + "overlap": {"unit": "vox", "thr": 3} + }, + "transformation": { + "Resample": + { + "hspace": 0.75, + "wspace": 0.75, + "dspace": 1 + }, + "CenterCrop": { + "size": [128, 128]}, + "RandomAffine": { + "degrees": 5, + "scale": [0.1, 0.1], + "translate": [0.03, 0.03], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + }, + "ElasticTransform": { + "alpha_range": [28.0, 30.0], + "sigma_range": [3.5, 4.5], + "p": 0.1, + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + }, + "NormalizeInstance": {"applied_to": ["im"]} + } + } + + + From this point onward, we will discuss some of the key parameters to perform a one-class 2D + segmentation training. Most parameters are configurable only via modification of the configuration ``JSON file``. + For those that supports command line run time configuration, we included the respective command versions under the ``Command Line Interface`` tab + + + - ``command``: Action to perform. Here, we want to **train** a model: + + .. tabs:: + + .. group-tab:: JSON File + + We can set the field within the newly copied ``config.json`` file as follow, at `this line `__: + + .. code-block:: json + + "command": "train" + + + .. group-tab:: Command Line Interface + + Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) + + .. code-block:: bash + + ivadomed --train -c path/to/config + + + - ``path_output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + + .. tabs:: + + .. group-tab:: JSON File + + At `this line `__ in the ``config.json`` is where you can update the ``path_output``. + + .. code-block:: json + + "path_output": "spineGeneric" + + .. group-tab:: Command Line Interface + + Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) + + .. code-block:: bash + + ivadomed -c path/to/config --path-output path/to/output/directory + + - ``loader_parameters:path_data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset + should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. + + .. tabs:: + + .. group-tab:: JSON File + + At `this line `__ in the ``config.json`` is where you can update the ``path_data`` within the ``loader_parameters`` sub-dictionary. -In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. -First off, copy this configuration file in your local directory (to avoid modifying the source file): + .. code-block:: json -.. code-block:: bash + "path_data": "data_example_spinegeneric" - cp /ivadomed/config/config.json . + .. group-tab:: Command Line Interface -Then, open it with a text editor. Below we will discuss some of the key parameters to perform a one-class 2D -segmentation training. + Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) -- ``command``: Action to perform. Here, we want to train a model, so we set the fields as follows: + .. code-block:: bash - .. code-block:: xml + ivadomed -c path/to/config --path-data path/to/bids/data - "command": "train" + - ``loader_parameters:target_suffix``: Suffix of the ground truth segmentation. The ground truth is located + under the ``DATASET/derivatives/labels`` folder. In our case, the suffix is ``_seg-manual``: -Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) - - .. code-block:: bash + .. tabs:: - ivadomed --train -c path/to/config + .. group-tab:: JSON File -- ``path_output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + At `this line `__ in the ``config.json`` is where you can update the ``target_suffix`` within the ``loader_parameters`` sub-dictionary. - .. code-block:: xml + .. code-block:: json - "path_output": "spineGeneric" + "target_suffix": ["_seg-manual"] -Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) - - .. code-block:: bash + - ``loader_parameters:contrast_params``: Contrast(s) of interest - ivadomed -c path/to/config --path-output path/to/output/directory + .. tabs:: -- ``loader_parameters:path_data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset - should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. + .. group-tab:: JSON File - .. code-block:: xml + At `this line `__ in the ``config.json`` is where you can update the ``contrast_params`` sub-dictionary within the ``loader_parameters`` sub-dictionary. - "path_data": "data_example_spinegeneric" + .. code-block:: json -Note that you can also pass this argument via CLI (see `Usage <../usage.html>`__) - - .. code-block:: bash + "contrast_params": { + "training_validation": ["T1w", "T2w", "T2star"], + "testing": ["T1w", "T2w", "T2star"], + "balance": {} + } - ivadomed -c path/to/config --path-data path/to/bids/data + - ``loader_parameters:slice_axis``: Orientation of the 2D slice to use with the model. -- ``loader_parameters:target_suffix``: Suffix of the ground truth segmentation. The ground truth is located - under the ``DATASET/derivatives/labels`` folder. In our case, the suffix is ``_seg-manual``: + .. tabs:: - .. code-block:: xml + .. group-tab:: JSON File - "target_suffix": ["_seg-manual"] + At `this line `__ in the ``config.json`` is where you can update the ``slice_axis`` subkey within the ``loader_parameters`` sub-dictionary. -- ``loader_parameters:contrast_params``: Contrast(s) of interest + .. code-block:: json - .. code-block:: xml + "slice_axis": "axial" - "contrast_params": { - "training_validation": ["T1w", "T2w", "T2star"], - "testing": ["T1w", "T2w", "T2star"], - "balance": {} - } + - ``loader_parameters:multichannel``: Turn on/off multi-channel training. If ``true``, each sample has several + channels, where each channel is an image contrast. If ``false``, only one image contrast is used per sample. -- ``loader_parameters:slice_axis``: Orientation of the 2D slice to use with the model. + .. tabs:: - .. code-block:: xml + .. group-tab:: JSON File - "slice_axis": "axial" + At `this line `__ in the ``config.json`` is where you can update the ``multichannel`` subkey within the ``loader_parameters`` sub-dictionary. -- ``loader_parameters:multichannel``: Turn on/off multi-channel training. If ``true``, each sample has several - channels, where each channel is an image contrast. If ``false``, only one image contrast is used per sample. + .. code-block:: json - .. code-block:: xml + "multichannel": false - "multichannel": false + .. note:: - .. note:: + The multichannel approach requires that for each subject, the image contrasts are co-registered. This implies that + a ground truth segmentation is aligned with all contrasts, for a given subject. In this tutorial, only one channel + will be used. - The multichannel approach requires that for each subject, the image contrasts are co-registered. This implies that - a ground truth segmentation is aligned with all contrasts, for a given subject. In this tutorial, only one channel - will be used. + - ``training_parameters:training_time:num_epochs``: the maximum number of epochs that will be run during training. Each epoch is composed + of a training part and an evaluation part. It should be a strictly positive integer. -- ``training_time:num_epochs``: the maximum number of epochs that will be run during training. Each epoch is composed - of a training part and an evaluation part. It should be a strictly positive integer. - - .. code-block:: xml + .. tabs:: - "num_epochs": 100 + .. group-tab:: JSON File + + At `this line `__ in the ``config.json`` is where you can update the ``num_epochs`` subkey within the ``training_parameters:training_time`` sub-dictionary. + + .. code-block:: json + + "num_epochs": 100 Train model ----------- -Once the configuration file is ready, run the training: + Once the configuration file is ready, run the training: + + .. tabs:: + + .. group-tab:: Command Line Interface + + .. code-block:: bash + + ivadomed --train -c config.json --path-data path/to/bids/data --path-output path/to/output/directory + + - In the above command, we execute the ``--train`` command and manually specified ``--path-data`` and ``--path-output`` and overwrote/replace the specification in ``config.json`` + + - ``--train``: We can pass other flags to execute different commands (training, testing, segmentation), see `Usage <../usage.html>`__. + + - ``--path-output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + + - ``--path-data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset + should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. + + .. group-tab:: JSON File + + If you set the ``command``, ``path_output``, and ``path_data`` arguments in your config file, you do not need to pass the above the specific CLI flags. + + Instead, make the following changes to the JSON file at the specific lines: + + * Command parameter located `here `__ -.. code-block:: bash + .. code-block:: json - ivadomed --train -c config.json --path-data path/to/bids/data --path-output path/to/output/directory + "command": "train" -- We can pass other flags to execute different commands (training, testing, segmentation), see `Usage <../usage.html>`__. - + * Path output parameter located `here `__ -- ``--path-output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + .. code-block:: json - .. code-block:: bash + "path_output": "spineGeneric" - --path-output path/to/output/directory + ``path-output``: Folder name that will contain the output files (e.g., trained model, predictions, results). -- ``--path-data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset - should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. + * Path Data located `here `__ - .. code-block:: bash + .. code-block:: json - --path-data path/to/bids/data + "path_data": "data_example_spinegeneric" -- If you set the "command", "path_output", and "path_data" arguments in your config file, you do not need to pass the CLI flags: + ``path-data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. -.. code-block:: bash - ivadomed -c config.json + Then execute the following simplified command: -.. note:: + .. tabs:: - If a `compatible GPU `_ is available, it will be used by default. - Otherwise, training will use the CPU, which will take a prohibitively long computational time (several hours). + .. tab:: Command Line Interface -The main parameters of the training scheme and model will be displayed on the terminal, followed by the loss value -on training and validation sets at every epoch. To know more about the meaning of each parameter, go to -:doc:`../configuration_file`. The value of the loss should decrease during the training. + .. code-block:: bash -.. code-block:: console + ivadomed -c config.json - Creating output path: spineGeneric - Cuda is not available. - Working on cpu. + .. note:: - Selected architecture: Unet, with the following parameters: - dropout_rate: 0.3 - bn_momentum: 0.1 - depth: 3 - is_2d: True - final_activation: sigmoid - folder_name: my_model - in_channel: 1 - out_channel: 1 - Dataframe has been saved in spineGeneric\bids_dataframe.csv. - After splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id. + If a `compatible GPU `_ is available, it will be used by default. + Otherwise, training will use the CPU, which will take a prohibitively long computational time (several hours). - Selected transformations for the ['training'] dataset: - Resample: {'wspace': 0.75, 'hspace': 0.75, 'dspace': 1} - CenterCrop: {'size': [128, 128]} - RandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']} - ElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']} - NumpyToTensor: {} - NormalizeInstance: {'applied_to': ['im']} + The main parameters of the training scheme and model will be displayed on the terminal, followed by the loss value + on training and validation sets at every epoch. To know more about the meaning of each parameter, go to + :doc:`../configuration_file`. The value of the loss should decrease during the training. - Selected transformations for the ['validation'] dataset: - Resample: {'wspace': 0.75, 'hspace': 0.75, 'dspace': 1} - CenterCrop: {'size': [128, 128]} - NumpyToTensor: {} - NormalizeInstance: {'applied_to': ['im']} - Loading dataset: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 383.65it/s] - Loaded 92 axial slices for the validation set. - Loading dataset: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 282.10it/s] - Loaded 276 axial slices for the training set. - Creating model directory: spineGeneric\my_model + .. code-block:: console - Initialising model's weights from scratch. + Creating output path: spineGeneric + Cuda is not available. + Working on cpu. - Scheduler parameters: {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01} + Selected architecture: Unet, with the following parameters: + dropout_rate: 0.3 + bn_momentum: 0.1 + depth: 3 + is_2d: True + final_activation: sigmoid + folder_name: my_model + in_channel: 1 + out_channel: 1 + Dataframe has been saved in spineGeneric\bids_dataframe.csv. + After splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id. - Selected Loss: DiceLoss - with the parameters: [] - Epoch 1 training loss: -0.0336. - Epoch 1 validation loss: -0.0382. + Selected transformations for the ['training'] dataset: + Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1} + CenterCrop: {'size': [128, 128]} + RandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']} + ElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']} + NumpyToTensor: {} + NormalizeInstance: {'applied_to': ['im']} + Selected transformations for the ['validation'] dataset: + Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1} + CenterCrop: {'size': [128, 128]} + NumpyToTensor: {} + NormalizeInstance: {'applied_to': ['im']} + Loading dataset: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 383.65it/s] + Loaded 92 axial slices for the validation set. + Loading dataset: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 282.10it/s] + Loaded 276 axial slices for the training set. + Creating model directory: spineGeneric\my_model -After 100 epochs (see ``"num_epochs"`` in the configuration file), the Dice score on the validation set should -be ~90%. + Initialising model's weights from scratch. + + Scheduler parameters: {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01} + + Selected Loss: DiceLoss + with the parameters: [] + Epoch 1 training loss: -0.0336. + Epoch 1 validation loss: -0.0382. + + + After 100 epochs (see ``num_epochs`` in the configuration file), the Dice score on the validation set should + be ~90%. .. _Evaluate model: Evaluate model -------------- -To test the trained model on the testing sub-dataset and compute evaluation metrics, run: + To test the trained model on the testing sub-dataset and compute evaluation metrics, run: + + .. tabs:: + + .. tab:: Command Line Interface + + .. code-block:: bash + + ivadomed --test -c config.json --path-data path/to/bids/data --path-output path/to/output/directory + + .. tab:: JSON File + + If you prefer to use config files over CLI flags, set ``command`` to ``test`` in the `following line `__ in you config file: + + .. code-block:: json + + "command": "test" + + You can also set ``path_output``, and ``path_data`` arguments in the ``config.json`` respectively. + + Then run: -.. code-block:: bash + .. tabs:: - ivadomed --test -c config.json --path-data path/to/bids/data --path-output path/to/output/directory + .. tab:: Command Line Interface -If you prefer to use config files over CLI flags, set "command" to the following in you config file: -. code-block:: bash + .. code-block:: bash - "command": "test" + ivadomed -c config.json -You can also set "path_output", and "path_data" arguments in your config file. + The model's parameters will be displayed in the terminal, followed by a preview of the results for each image. + The resulting segmentation is saved for each image in the ``/pred_masks`` while a csv file, + saved in ``/results_eval/evaluation_3Dmetrics.csv``, contains all the evaluation metrics. For more details + on the evaluation metrics, see :mod:`ivadomed.metrics`. -Then run: + .. code-block:: console -.. code-block:: bash + Output path already exists: spineGeneric + Cuda is not available. + Working on cpu. - ivadomed -c config.json + Selected architecture: Unet, with the following parameters: + dropout_rate: 0.3 + bn_momentum: 0.1 + depth: 3 + is_2d: True + final_activation: sigmoid + folder_name: my_model + in_channel: 1 + out_channel: 1 + Dataframe has been saved in spineGeneric\bids_dataframe.csv. + After splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id. -The model's parameters will be displayed in the terminal, followed by a preview of the results for each image. -The resulting segmentation is saved for each image in the ``/pred_masks`` while a csv file, -saved in ``/results_eval/evaluation_3Dmetrics.csv``, contains all the evaluation metrics. For more details -on the evaluation metrics, see :mod:`ivadomed.metrics`. + Selected transformations for the ['testing'] dataset: + Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1} + CenterCrop: {'size': [128, 128]} + NumpyToTensor: {} + NormalizeInstance: {'applied_to': ['im']} + Loading dataset: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 373.59it/s] + Loaded 94 axial slices for the testing set. -.. code-block:: console + Loading model: spineGeneric\best_model.pt + Inference - Iteration 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:29<00:00, 4.86s/it] + {'dice_score': 0.9334570551249012, 'multi_class_dice_score': 0.9334570551249012, 'precision_score': 0.925126264682505, 'recall_score': 0.9428409070673442, 'specificity_score': 0.9999025807354961, 'intersection_over_union': 0.8756498644456311, 'accu + racy_score': 0.9998261755671077, 'hausdorff_score': 0.05965616760384793} - Output path already exists: spineGeneric - Cuda is not available. - Working on cpu. + Run Evaluation on spineGeneric\pred_masks - Selected architecture: Unet, with the following parameters: - dropout_rate: 0.3 - bn_momentum: 0.1 - depth: 3 - is_2d: True - final_activation: sigmoid - folder_name: my_model - in_channel: 1 - out_channel: 1 - Dataframe has been saved in spineGeneric\bids_dataframe.csv. - After splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id. + Evaluation: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:05<00:00, 1.04it/s] + avd_class0 dice_class0 lfdr_101-INFvox_class0 lfdr_class0 ltpr_101-INFvox_class0 ltpr_class0 mse_class0 ... n_pred_class0 precision_class0 recall_class0 rvd_class0 specificity_class0 vol_gt_class0 vol_pred_class0 + image_id ... + sub-mpicbs06_T1w 0.086296 0.940116 0.0 0.0 1.0 1.0 0.002292 ... 1.0 0.902774 0.980680 -0.086296 0.999879 4852.499537 5271.249497 + sub-mpicbs06_T2star 0.038346 0.909164 0.0 0.0 1.0 1.0 0.003195 ... 1.0 0.892377 0.926595 -0.038346 0.999871 4563.749565 4738.749548 + sub-mpicbs06_T2w 0.032715 0.947155 0.0 0.0 1.0 1.0 0.001971 ... 1.0 0.932153 0.962648 -0.032715 0.999920 4852.499537 5011.249522 + sub-unf01_T1w 0.020288 0.954007 0.0 0.0 1.0 1.0 0.002164 ... 1.0 0.944522 0.963684 -0.020288 0.999917 6161.249412 6286.249400 + sub-unf01_T2star 0.001517 0.935124 0.0 0.0 1.0 1.0 0.002831 ... 1.0 0.934416 0.935834 -0.001517 0.999904 5766.249450 5774.999449 - Selected transformations for the ['testing'] dataset: - Resample: {'wspace': 0.75, 'hspace': 0.75, 'dspace': 1} - CenterCrop: {'size': [128, 128]} - NumpyToTensor: {} - NormalizeInstance: {'applied_to': ['im']} - Loading dataset: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 373.59it/s] - Loaded 94 axial slices for the testing set. + [5 rows x 16 columns] - Loading model: spineGeneric\best_model.pt - Inference - Iteration 0: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:29<00:00, 4.86s/it] - {'dice_score': 0.9334570551249012, 'multi_class_dice_score': 0.9334570551249012, 'precision_score': 0.925126264682505, 'recall_score': 0.9428409070673442, 'specificity_score': 0.9999025807354961, 'intersection_over_union': 0.8756498644456311, 'accu - racy_score': 0.9998261755671077, 'hausdorff_score': 0.05965616760384793} - Run Evaluation on spineGeneric\pred_masks + The test image segmentations are stored in ``/pred_masks/`` and have the same name as the input image + with the suffix ``_pred``. To visualize the segmentation of a given subject, you can use any NIfTI image viewer. + For `FSLeyes `_ users, this command will open the + input image with the overlaid prediction (segmentation) for one of the test subject: - Evaluation: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:05<00:00, 1.04it/s] - avd_class0 dice_class0 lfdr_101-INFvox_class0 lfdr_class0 ltpr_101-INFvox_class0 ltpr_class0 mse_class0 ... n_pred_class0 precision_class0 recall_class0 rvd_class0 specificity_class0 vol_gt_class0 vol_pred_class0 - image_id ... - sub-mpicbs06_T1w 0.086296 0.940116 0.0 0.0 1.0 1.0 0.002292 ... 1.0 0.902774 0.980680 -0.086296 0.999879 4852.499537 5271.249497 - sub-mpicbs06_T2star 0.038346 0.909164 0.0 0.0 1.0 1.0 0.003195 ... 1.0 0.892377 0.926595 -0.038346 0.999871 4563.749565 4738.749548 - sub-mpicbs06_T2w 0.032715 0.947155 0.0 0.0 1.0 1.0 0.001971 ... 1.0 0.932153 0.962648 -0.032715 0.999920 4852.499537 5011.249522 - sub-unf01_T1w 0.020288 0.954007 0.0 0.0 1.0 1.0 0.002164 ... 1.0 0.944522 0.963684 -0.020288 0.999917 6161.249412 6286.249400 - sub-unf01_T2star 0.001517 0.935124 0.0 0.0 1.0 1.0 0.002831 ... 1.0 0.934416 0.935834 -0.001517 0.999904 5766.249450 5774.999449 + .. tabs:: - [5 rows x 16 columns] + .. tab:: Command Line Interface + .. code-block:: bash -The test image segmentations are stored in ``/pred_masks/`` and have the same name as the input image -with the suffix ``_pred``. To visualize the segmentation of a given subject, you can use any Nifti image viewer. -For `FSLeyes `_ users, this command will open the -input image with the overlaid prediction (segmentation) for one of the test subject: + fsleyes /sub-mpicbs06/anat/sub-mpicbs06_T2w.nii.gz /pred_masks/sub-mpicbs06_T2w_pred.nii.gz -cm red -.. code-block:: bash + After the training for 100 epochs, the segmentations should be similar to the one presented in the following image. + The output and ground truth segmentations of the spinal cord are presented in red (subject ``sub-mpicbs06`` with + contrast T2w): - fsleyes /sub-mpicbs06/anat/sub-mpicbs06_T2w.nii.gz /pred_masks/sub-mpicbs06_T2w_pred.nii.gz -cm red + .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/one_class_segmentation_2d_unet/sc_prediction.png + :align: center -After the training for 100 epochs, the segmentations should be similar to the one presented in the following image. -The output and ground truth segmentations of the spinal cord are presented in red (subject ``sub-mpicbs06`` with -contrast T2w): -.. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/one_class_segmentation_2d_unet/sc_prediction.png - :align: center +Another set of test image segmentations are also present in ``/pred_masks/`` with the suffix ``_pred-TP-FP-FN`` when the ``evaluation_parameters:object_detection_metrics`` is set to ``true`` (Default: ``true``). These files include 3 possible values depending if each object detected in the prediction compared to the ground-truth is a True Positive (TP), False Positive (FP) or False Negative (FN). In NIfTI files (``.nii.gz``), the respective values for TP, FP and FN are 1, 2 and 3. diff --git a/docs/source/tutorials/two_class_microscopy_seg_2d_unet.rst b/docs/source/tutorials/two_class_microscopy_seg_2d_unet.rst new file mode 100644 index 000000000..8753965d1 --- /dev/null +++ b/docs/source/tutorials/two_class_microscopy_seg_2d_unet.rst @@ -0,0 +1,319 @@ +Two-class microscopy segmentation with 2D U-Net +=============================================== + +In this tutorial we will learn the following features: + +- Training of a segmentation model (U-Net 2D) with two-class labels on a single contrast on microscopy PNG images, + +- Testing of a trained model and computation of evaluation metrics, + +- Visualization of the outputs of a trained model. + +Download dataset +---------------- + +We will use a publicly available dataset consisting of 10 microscopy samples of rat spinal cord. + +To download the dataset (~11MB), run the following command in your terminal: + +.. code-block:: bash + + # Download data + ivadomed_download_data -d data_axondeepseg_sem + +Configuration file +------------------ + +In ``ivadomed``, training is orchestrated by a configuration file. Examples of configuration files are available in +the ``ivadomed/config/`` and the documentation is available in :doc:`../configuration_file`. + +In this tutorial, we will use the configuration file: ``ivadomed/config/config_microscopy.json``. +First off, copy this configuration file in your local directory (to avoid modifying the source file): + +.. code-block:: bash + + cp /ivadomed/config/config_microscopy.json . + +Then, open it with a text editor. +Below we will discuss some of the key parameters to perform a two-class 2D +microscopy segmentation training. + +- ``command``: Action to perform. Here, we want to train a model, so we set the fields as follows: + + .. code-block:: xml + + "command": "train" + +- ``path_output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + + .. code-block:: xml + + "path_output": "log_microscopy_sem" + +- ``loader_parameters:path_data``: Location of the dataset. As discussed in `Data <../data.html>`__, the dataset + should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. + + .. code-block:: xml + + "path_data": ["data_axondeepseg_sem"] + +- ``loader_parameters:target_suffix``: Suffix of the ground truth segmentations. The ground truths are located + under the ``data_axondeepseg_sem/derivatives/labels`` folder. In our case, the suffix are ``_seg-axon-manual`` + and ``_seg-myelin-manual``: + + .. code-block:: xml + + "target_suffix": ["_seg-axon-manual", "_seg-myelin-manual"] + +- ``loader_parameters:extensions``: List of file extensions of the microscopy data. In our case, both the raw data and + derivatives are ".png" files. + + .. code-block:: xml + + "extensions": [".png"] + +- ``loader_parameters:contrast_params``: Contrast(s) of interest. In our case, we are training a single contrast model + with contrast ``SEM``. + + .. code-block:: xml + + "contrast_params": { + "training_validation": ["SEM"], + "testing": ["SEM"], + "balance": {} + } + +- ``loader_parameters:slice_axis``: Orientation of the 2D slice to use with the model. + 2D PNG files must use default ``axial``. + + .. code-block:: xml + + "slice_axis": "axial" + +- ``split_dataset:split_method``: Describe the metadata used to split the train/validation/test sets. + Here, ``sample_id`` from the ``samples.tsv`` file will shuffle all samples, then split them between + train/validation/test sets. +- ``split_dataset:train_fraction``: Fraction of the dataset's ``sample_id`` in the train set. In our case ``0.6``. +- ``split_dataset:test_fraction``: Fraction of the dataset's ``sample_id`` in the test set. In our case ``0.1``. + + .. code-block:: xml + + "split_method" : "sample_id" + "train_fraction": 0.6 + "test_fraction": 0.1 + +- ``training_parameters:training_time:num_epochs``: The maximum number of epochs that will be run during training. + Each epoch is composed of a training part and a validation part. It should be a strictly positive integer. + In our case, we will use 50 epochs. + + .. code-block:: xml + + "num_epochs": 50 + +- ``default_model:length_2D``: Size of the 2D patches used as model’s input tensors. We recommend using patches + between 256x256 and 512x512. In our case, we use patches of 256x256. +- ``default_model:stride_2D``: Pixels’ shift over the input matrix to create 2D patches. In our case, we use + a stride of 244 pixels in both dimensions, resulting in an overlap of 12 pixels between patches. + + .. code-block:: xml + + "length_2D": [256, 256] + "stride_2D": [244, 244] + +- ``postprocessing:binarize_maxpooling``: Used to binarize predictions across all classes in multiclass models. + For each pixel, the class, including the background class, with the highest output probability will be segmented. + + .. code-block:: xml + + "binarize_maxpooling": {} + +- ``evaluation_parameters:object_detection_metrics``: Used to indicate if object detection metrics + (lesions true positive rate, lesions false detection rate and Hausdorff score) are computed or + not at evaluation time. For the axons and myelin segmentation task, we set this parameter to ``false``. + + .. code-block:: xml + + "object_detection_metrics": false + +- ``transformation:Resample``: Used to resample images to a common resolution (in mm) before splitting into patches, + according to each image real pixel size. In our case, we resample the images to a common resolution of 0.0001 mm + (0.1 μm) in both dimensions. + + .. code-block:: xml + + "Resample": + { + "hspace": 0.0001, + "wspace": 0.0001 + }, + + +Train model +----------- + +Once the configuration file is ready, run the training: + +.. code-block:: bash + + ivadomed -c config_microscopy.json + +Alternatively, the "command", "path_output", and "path_data" arguments can be passed as CLI flags +in which case they supersede the configration file, see `Usage <../usage.html>`__. + +.. code-block:: bash + + ivadomed --train -c config_microscopy.json --path-data path/to/bids/data --path-output path/to/output/directory + +.. note:: + + If a `compatible GPU `_ is available, it will be used by default. + Otherwise, training will use the CPU, which will take a prohibitively long computational time (several hours). + +The main parameters of the training scheme and model will be displayed on the terminal, followed by the loss value +on training and validation sets at every epoch. To know more about the meaning of each parameter, go to +:doc:`../configuration_file`. The value of the loss should decrease during the training. + +.. code-block:: console + + No CLI argument given for command: ( --train | --test | --segment ). Will check config file for command... + CLI flag --path-output not used to specify output directory. Will check config file for directory... + CLI flag --path-data not used to specify BIDS data directory. Will check config file for directory... + + Creating output path: log_microscopy_sem + Using GPU ID 0 + + Selected architecture: Unet, with the following parameters: + dropout_rate: 0.2 + bn_momentum: 0.1 + depth: 4 + is_2d: True + final_activation: sigmoid + length_2D: [256, 256] + stride_2D: [244, 244] + folder_name: model_seg_rat_axon-myelin_sem + in_channel: 1 + out_channel: 3 + + Dataframe has been saved in log_microscopy_sem/bids_dataframe.csv. + After splitting: train, validation and test fractions are respectively 0.6, 0.3 and 0.1 of sample_id. + + Selected transformations for the ['training'] dataset: + Resample: {'hspace': 0.0001, 'wspace': 0.0001} + RandomAffine: {'degrees': 2.5, 'scale': [0.05, 0.05], 'translate': [0.015, 0.015], 'applied_to': ['im', 'gt']} + ElasticTransform: {'alpha_range': [100.0, 150.0], 'sigma_range': [4.0, 5.0], 'p': 0.5, 'applied_to': ['im', 'gt']} + NormalizeInstance: {'applied_to': ['im']} + Selected transformations for the ['validation'] dataset: + Resample: {'hspace': 0.0001, 'wspace': 0.0001} + NormalizeInstance: {'applied_to': ['im']} + + Loading dataset: 100%|████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 738.48it/s] + Loaded 76 axial patches of shape [256, 256] for the validation set. + Loading dataset: 100%|████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 829.21it/s] + Loaded 252 axial patches of shape [256, 256] for the training set. + Creating model directory: log_microscopy_sem/model_seg_rat_axon-myelin_sem + + Initialising model's weights from scratch. + Scheduler parameters: {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01} + + Selected Loss: DiceLoss + with the parameters: [] + Epoch 1 training loss: -0.6894. + Epoch 1 validation loss: -0.7908. + +After 50 epochs (see ``"num_epochs"`` in the configuration file), the Dice score on the validation set should be ~85%. + +.. note:: + + When loading the images for training or evaluation, a temporary NIfTI file will be created for each images in the + dataset directory (``path_data``) alongside the original PNG files. + +Evaluate model +-------------- + +To test the trained model on the testing sub-dataset and compute evaluation metrics, run: + +.. code-block:: bash + + ivadomed -c config_microscopy.json --test + +If you prefer to use config files over CLI flags, set "command" to the following in you config file: + +.. code-block:: xml + + "command": "test" + +Then run: + +.. code-block:: bash + + ivadomed -c config_microscopy.json + +The model's parameters will be displayed in the terminal, followed by a preview of the results for each image. +The resulting segmentations are saved for each image in the ``/pred_masks`` while a CSV file, +saved in ``/results_eval/evaluation_3Dmetrics.csv``, contains all the evaluation metrics. +For more details on the evaluation metrics, see :mod:`ivadomed.metrics`. + +.. code-block:: console + + CLI flag --path-output not used to specify output directory. Will check config file for directory... + CLI flag --path-data not used to specify BIDS data directory. Will check config file for directory... + + Output path already exists: log_microscopy_sem + Using GPU ID 0 + + Selected architecture: Unet, with the following parameters: + dropout_rate: 0.2 + bn_momentum: 0.1 + depth: 4 + is_2d: True + final_activation: sigmoid + length_2D: [256, 256] + stride_2D: [244, 244] + folder_name: model_seg_rat_axon-myelin_sem + in_channel: 1 + out_channel: 3 + + Dataframe has been saved in log_microscopy_sem/bids_dataframe.csv. + After splitting: train, validation and test fractions are respectively 0.6, 0.3 and 0.1 of sample_id. + + Selected transformations for the ['testing'] dataset: + Resample: {'hspace': 0.0001, 'wspace': 0.0001} + NormalizeInstance: {'applied_to': ['im']} + + Loading dataset: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 413.48it/s] + Loaded 16 axial patches of shape [256, 256] for the testing set. + Loading model: log_microscopy_sem/best_model.pt + + Inference - Iteration 0: 100%|████████████████████████████████████████████████████████████████| 4/4 [00:01<00:00, 2.89it/s] + Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning. + Lossy conversion from float64 to uint8. Range [0, 1]. Convert image to uint8 prior to saving to suppress this warning. + {'dice_score': 0.8381376827003003, 'multi_class_dice_score': 0.8422281034034607, 'precision_score': 0.8342335786851753, + 'recall_score': 0.8420784999205466, 'specificity_score': 0.9456594910680598, 'intersection_over_union': 0.7213743575471384, + 'accuracy_score': 0.9202670087814067, 'hausdorff_score': 0.0} + + Run Evaluation on log_microscopy_sem/pred_masks + + Evaluation: 100%|████████████████████████████████████████████████████████████████| 1/1 [00:13<00:00, 13.56s/it] + Lossy conversion from float64 to uint8. Range [0.0, 3.0]. Convert image to uint8 prior to saving to suppress this warning. + Lossy conversion from float64 to uint8. Range [0.0, 3.0]. Convert image to uint8 prior to saving to suppress this warning. + avd_class0 avd_class1 dice_class0 dice_class1 ... vol_gt_class0 vol_gt_class1 vol_pred_class0 vol_pred_class1 + image_id + sub-rat3_sample-data9_SEM 0.082771 0.082971 0.868964 0.815492 ... 1.256960e-07 1.574890e-07 1.152920e-07 1.705560e-07 + + [1 rows x 26 columns] + +The test image segmentations are stored in ``/pred_masks/`` in PNG format and have the same name as +the input image with the suffix ``_pred.png``. In our case: ``sub-rat3_sample-data9_SEM_class-0_pred.png`` and +``sub-rat3_sample-data9_SEM_class-1_pred.png`` for axons and myelin respectively (in the same order as ``target_suffix``). +A temporary NIfTI files containing the predictions for both classes with the suffix ``_pred.nii.gz`` will also be +present. + +After the training for 50 epochs, the segmentations should be similar to the one presented in the following image. +The ground truth segmentations and predictions of the axons and myelin are presented in blue and red respectively for +``sub-rat3_sample-data9_SEM``): + +.. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/two_classes_microscopy_seg_2d_unet/axon_myelin_predictions.png + :align: center + + +Another set of test image segmentations are also present in ``/pred_masks/`` with the suffix ``_pred-TP-FP-FN`` when the ``evaluation_parameters:object_detection_metrics`` is set to ``true``. These files include 3 possible values depending if each object detected in the prediction compared to the ground-truth is a True Positive (TP), False Positive (FP) or False Negative (FN). In PNG files (``.png``), the respective values for TP, FP and FN are 85, 170 and 255. diff --git a/docs/source/tutorials/uncertainty.rst b/docs/source/tutorials/uncertainty.rst index 6f80a5b4b..be84bdb56 100644 --- a/docs/source/tutorials/uncertainty.rst +++ b/docs/source/tutorials/uncertainty.rst @@ -1,200 +1,198 @@ Estimate uncertainty ==================== -This tutorial shows how to estimate uncertainty measures on the model predictions. The uncertainty measures implemented -in ``ivadomed`` are detailed in implemented in ``ivadomed`` are detailed -:ref:`Technical features `. + This tutorial shows how to estimate uncertainty measures on the model predictions. The uncertainty measures implemented + in ``ivadomed`` are detailed in :ref:`Technical features `. + + An interactive Colab version of this tutorial is directly accessible `here `_. Download dataset ---------------- -A dataset example is available for this tutorial. If not already done, download the dataset with the following line. -For more details on this dataset see :ref:`One-class segmentation with 2D U-Net`. + A dataset example is available for this tutorial. If not already done, download the dataset with the following line. + For more details on this dataset see :ref:`One-class segmentation with 2D U-Net`. -.. code-block:: bash + .. code-block:: bash - # Download data - ivadomed_download_data -d data_example_spinegeneric + # Download data + ivadomed_download_data -d data_example_spinegeneric Configuration file ------------------ -In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. -First off, copy this configuration file in your local directory (to avoid modifying the source file): - -.. code-block:: bash + In this tutorial we will use the configuration file: ``ivadomed/config/config.json``. + First off, copy this configuration file in your local directory (to avoid modifying the source file): - cp /ivadomed/config/config.json . + .. code-block:: bash -Please open it with a text editor. -The configuration file will be modified to be the same as the one used for -:ref:`Technical features `. As described in the tutorial -:doc:`../tutorials/one_class_segmentation_2d_unet`, make sure ``path_data`` point to the location of the dataset. -The parameters that are specific to this tutorial are: + cp /ivadomed/config/config.json . -- ``path_output``: Location of the directory containing the trained model. To avoid having to train a model from - scratch, in the downloaded dataset, there is a trained model for spinal cord segmentation in the folder `trained_model`. - Modify the path so it points to the location of the trained model. + Please open it with a text editor. + The configuration file will be modified to be the same as the one used for + :ref:`Technical features `. As described in the tutorial + :doc:`../tutorials/one_class_segmentation_2d_unet`, make sure ``path_data`` point to the location of the dataset. + The parameters that are specific to this tutorial are: - .. code-block:: xml + - ``path_output``: Location of the directory containing the trained model. To avoid having to train a model from + scratch, there is a already trained model for spinal cord segmentation in the folder named `trained_model`, in the downloaded dataset. + Modify the path so it points to the location of the trained model. - "path_output": "/data_example_spinegeneric/trained_model" + .. code-block:: json - Note that you can also pass this argument via CLI (see :ref:`Usage `) + "path_output": "/data_example_spinegeneric/trained_model" - .. code-block:: bash + Note that you can also pass this argument via CLI (see :ref:`Usage `) - ivadomed -c path/to/config --path-output path/to/output/directory + .. code-block:: bash -- ``command``: Action to perform. Here, we want to do some inference using the previously trained model, so we set the - field as follows: + ivadomed -c path/to/config --path-output path/to/output/directory - .. code-block:: xml + - ``command``: Action to perform. Here, we want to do some inference using the previously trained model, so we set the + field as follows: - "command": "test" + .. code-block:: json - Note that you can also pass this argument via CLI (see :ref:`Usage `) + "command": "test" - .. code-block:: bash + Note that you can also pass this argument via CLI (see :ref:`Usage `) - ivadomed --test -c path/to/config + .. code-block:: bash -- ``uncertainty``: Type of uncertainty to estimate. Available choices are ``"epistemic"`` and - ``"aleatoric"``. Note that both can be ``true``. More details on the implementation are available in :ref:`Technical features `. - ``"n_it"`` controls the number of Monte Carlo iterations that are performed to estimate the uncertainty. Set it to a - non-zero positive integer for this tutorial (e.g. ``20``). + ivadomed --test -c path/to/config - .. code-block:: xml + - ``uncertainty``: Type of uncertainty to estimate. Available choices are ``epistemic`` and + ``aleatoric``. Note that both can be ``true``. More details on the implementation are available in :ref:`Technical features `. + ``n_it`` controls the number of Monte Carlo iterations that are performed to estimate the uncertainty. Set it to a + non-zero positive integer for this tutorial (e.g. ``20``). - "uncertainty": { - "epistemic": true, - "aleatoric": true, - "n_it": 20 - } + .. code-block:: json + "uncertainty": { + "epistemic": true, + "aleatoric": true, + "n_it": 20 + } -- ``transformation``: Data augmentation transformation. If you have selected the aleatoric uncertainty, the data - augmentation that will be performed is the same as the one performed for the training. Note that only transformations - for which a ``undo_transform`` (i.e. inverse transformation) is available will be performed since these inverse - transformations are required to reconstruct the predicted volume. + - ``transformation``: Data augmentation transformation. If you have selected the aleatoric uncertainty, the data + augmentation that will be performed is the same as the one performed for the training. Note that only transformations + for which a ``undo_transform`` (i.e. inverse transformation) is available will be performed since these inverse + transformations are required to reconstruct the predicted volume. Run uncertainty estimation -------------------------- -Once the configuration file has been modified, run the inference with the following command: + Once the configuration file has been modified, run the inference with the following command: -.. code-block:: bash + .. code-block:: bash - ivadomed --test -c config.json --path-data /data_example_spinegeneric/trained_model --path-output path/to/output/directory + ivadomed --test -c config.json --path-data /data_example_spinegeneric --path-output /data_example_spinegeneric/trained_model -- Here, we want to do some inference using the previously trained model, so we set the - command flag as follows: + - Here, we want to do some inference using the previously trained model, so we set the + command flag as follows: - .. code-block:: bash + .. code-block:: bash - --test + --test -- ``--path-data``: Location of the directory containing the trained model. To avoid having to train a model from - scratch, in the downloaded dataset, there is a trained model for spinal cord segmentation in the folder `trained_model`. - Modify the path so it points to the location of the trained model. + - ``--path-data``: Location of the directory containing the dataset. - .. code-block:: bash + .. code-block:: bash - --path-data /data_example_spinegeneric/trained_model + --path-data /data_example_spinegeneric -- ``--path-output``: Folder name that will contain the output files (e.g., trained model, predictions, results). + - ``--path-output``: Folder name that will contain the output files (e.g., trained model, predictions, results). For the purpose of this particular tutorial, since we do not train the model from scratch, we set the output path to point to a folder containing the pre-trained model for spinal cord segmentation that comes with the dataset. Hence, after running this tutorial, the corresponding output files can be found inside the `trained_model` folder. - .. code-block:: bash + .. code-block:: bash - --path-output path/to/output/directory + --path-output /data_example_spinegeneric/trained_model -If you set the "command", "path_output", and "path_data" arguments in your config file, you do not need to pass the CLI flags: + If you set the ``command``, ``path_output``, and ``path_data`` arguments in your config file, you do not need to pass the CLI flags: -.. code-block:: bash + .. code-block:: bash - ivadomed -c config.json + ivadomed -c config.json -If aleatoric uncertainty was selected, then data augmentation operations will be performed at testing time, as indicated -in the terminal output (see below). Note that ``ElasticTransform`` has been desactivated because no ``undo_transform`` -function is available for it. + If aleatoric uncertainty was selected, then data augmentation operations will be performed at testing time, as indicated + in the terminal output (see below). Note that ``ElasticTransform`` has been deactivated because no ``undo_transform`` + function is available for it. -.. code-block:: bash + .. code-block:: bash - Selected transformations for the ['testing'] dataset: - Resample: {'wspace': 0.75, 'hspace': 0.75, 'dspace': 1} - CenterCrop: {'size': [128, 128]} - RandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']} - ElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']} - NumpyToTensor: {} - NormalizeInstance: {'applied_to': ['im']} - ElasticTransform transform not included since no undo_transform available for it. + Selected transformations for the ['testing'] dataset: + Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1} + CenterCrop: {'size': [128, 128]} + RandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']} + ElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']} + NumpyToTensor: {} + NormalizeInstance: {'applied_to': ['im']} + ElasticTransform transform not included since no undo_transform available for it. -... otherwise, only preprocessing and data normalization are performed, see below: + ... otherwise, only preprocessing and data normalization are performed, see below: -.. code-block:: bash + .. code-block:: bash - Selected transformations for the ['testing'] dataset: - Resample: {'wspace': 0.75, 'hspace': 0.75, 'dspace': 1} - CenterCrop: {'size': [128, 128]} - NumpyToTensor: {} - NormalizeInstance: {'applied_to': ['im']} + Selected transformations for the ['testing'] dataset: + Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1} + CenterCrop: {'size': [128, 128]} + NumpyToTensor: {} + NormalizeInstance: {'applied_to': ['im']} -For each Monte Carlo iteration, each testing image is segmented using the trained model and saved under ``pred_masks``, -with the iteration number as suffix (e.g. ``sub-001_pred_00.nii.gz`` ... ``sub-001_pred_19.nii.gz``). + For each Monte Carlo iteration, each testing image is segmented using the trained model and saved under ``pred_masks``, + with the iteration number as suffix (e.g. ``sub-001_pred_00.nii.gz`` ... ``sub-001_pred_19.nii.gz``). -.. code-block:: bash + .. code-block:: bash - Computing model uncertainty over 20 iterations. - Inference - Iteration 0: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:11<00:00, 2.27s/it] - Inference - Iteration 1: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.81s/it] - Inference - Iteration 2: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.96s/it] - Inference - Iteration 3: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.66s/it] - Inference - Iteration 4: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.69s/it] - Inference - Iteration 5: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it] - Inference - Iteration 6: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it] - Inference - Iteration 7: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it] - Inference - Iteration 8: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it] - Inference - Iteration 9: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:07<00:00, 1.59s/it] - Inference - Iteration 10: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it] - Inference - Iteration 11: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it] - Inference - Iteration 12: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it] - Inference - Iteration 13: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it] - Inference - Iteration 14: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.84s/it] - Inference - Iteration 15: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.87s/it] - Inference - Iteration 16: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.81s/it] - Inference - Iteration 17: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.95s/it] - Inference - Iteration 18: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.82s/it] - Inference - Iteration 19: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.71s/it] + Computing model uncertainty over 20 iterations. + Inference - Iteration 0: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:11<00:00, 2.27s/it] + Inference - Iteration 1: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.81s/it] + Inference - Iteration 2: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.96s/it] + Inference - Iteration 3: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.66s/it] + Inference - Iteration 4: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.69s/it] + Inference - Iteration 5: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it] + Inference - Iteration 6: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it] + Inference - Iteration 7: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it] + Inference - Iteration 8: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it] + Inference - Iteration 9: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:07<00:00, 1.59s/it] + Inference - Iteration 10: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it] + Inference - Iteration 11: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it] + Inference - Iteration 12: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it] + Inference - Iteration 13: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it] + Inference - Iteration 14: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.84s/it] + Inference - Iteration 15: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.87s/it] + Inference - Iteration 16: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.81s/it] + Inference - Iteration 17: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.95s/it] + Inference - Iteration 18: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.82s/it] + Inference - Iteration 19: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.71s/it] -The Monte Carlo samples are then used to compute uncertainty measures for each image. The results are saved under -``pred_masks``. + The Monte Carlo samples are then used to compute uncertainty measures for each image. The results are saved under + ``pred_masks``. -.. code-block:: bash + .. code-block:: bash - Uncertainty Computation: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [01:31<00:00, 18.28s/it] + Uncertainty Computation: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [01:31<00:00, 18.28s/it] -Six files are generated during this process for each testing image: + Six files are generated during this process for each testing image: -- ``*_soft.nii.gz``: Soft segmentation (i.e. values between 0 and 1) which is generated by averaging the Monte Carlo - samples. -- ``*_pred.nii.gz``: Binary segmentation obtained by thresholding ``*_soft.nii.gz`` with ``1 / (Number of Monte Carlo - iterations)``. -- ``*_unc-vox.nii.gz``: Voxel-wise measure of uncertainty derived from the entropy of the Monte Carlo samples. The - higher a given voxel value is, the more uncertain is the prediction for this voxel. -- ``*_unc-avgUnc.nii.gz``: Structure-wise measure of uncertainty derived from the mean value of ``*_unc-vox.nii.gz`` - within a given connected object (e.g. a lesion, grey matter). -- ``*_unc-cv.nii.gz``: Structure-wise measure of uncertainty derived from the coefficient of variation of the volume - of a given connected object across the Monte Carlo samples. The higher a given voxel value is, the more uncertain is the - prediction for this voxel. -- ``*_unc-iou.nii.gz``: Structure-wise measure of uncertainty derived from the Intersection-over-Union of the - predictions of a given connected object across the Monte Carlo samples. The lower a given voxel value is, the more - uncertain is the prediction for this voxel. + - ``*_soft.nii.gz``: Soft segmentation (i.e. values between 0 and 1) which is generated by averaging the Monte Carlo + samples. + - ``*_pred.nii.gz``: Binary segmentation obtained by thresholding ``*_soft.nii.gz`` with ``1 / (Number of Monte Carlo + iterations)``. + - ``*_unc-vox.nii.gz``: Voxel-wise measure of uncertainty derived from the entropy of the Monte Carlo samples. The + higher a given voxel value is, the more uncertain is the prediction for this voxel. + - ``*_unc-avgUnc.nii.gz``: Structure-wise measure of uncertainty derived from the mean value of ``*_unc-vox.nii.gz`` + within a given connected object (e.g. a lesion, grey matter). + - ``*_unc-cv.nii.gz``: Structure-wise measure of uncertainty derived from the coefficient of variation of the volume + of a given connected object across the Monte Carlo samples. The higher a given voxel value is, the more uncertain is the + prediction for this voxel. + - ``*_unc-iou.nii.gz``: Structure-wise measure of uncertainty derived from the Intersection-over-Union of the + predictions of a given connected object across the Monte Carlo samples. The lower a given voxel value is, the more + uncertain is the prediction for this voxel. -These files can further be used for post-processing to refine the segmentation. For example, the voxels -depicted in pink are more uncertain than the ones in blue (left image): we might want to refine the model prediction by removing -from the foreground class the voxels with low uncertainty (blue, left image) AND low prediction value (dark red, middle image). + These files can further be used for post-processing to refine the segmentation. For example, the voxels + depicted in pink are more uncertain than the ones in blue (left image): we might want to refine the model prediction by removing + from the foreground class the voxels with low uncertainty (blue, left image) AND low prediction value (dark red, middle image). -.. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/uncertainty/uncertainty_tutorial.png - :align: center + .. image:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/tutorials/uncertainty/uncertainty_tutorial.png + :align: center diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 5832c3112..6c2d8fcaf 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -27,7 +27,7 @@ If not set via CLI, then you MUST specify this field in the configuration file. ``config_file.json`` is a configuration file, which parameters are described in the :ref:`configuration_file:Configuration File`. This flag is *required*. -``path/to/bids/data`` is the location of the dataset. As discussed in `Data <../data.html>`__, the dataset +``path/to/bids/data`` is the location of the dataset. As discussed in :doc:`Data `, the dataset should conform to the BIDS standard. Modify the path so it points to the location of the downloaded dataset. ``path/to/output/directory`` is the folder name that will contain the output files (e.g., trained model, predictions, results) @@ -36,3 +36,10 @@ Note that both path CLI flags are optional and can be specified instead via the If not set via CLI, then you MUST specify this field in the configuration file. Please see section ``TUTORIALS`` to run this command on an example dataset. + +Additional optional flags with ``--segment`` command for models trained with 2D patches (not available for 3D models): + + ``--no-patch``: 2D patches are not used while segmenting with models trained with patches. The ``--no-patch`` flag supersedes the + ``--overlap-2d`` flag. This option may not be suitable with large images depending on computer RAM capacity. + + ``--overlap-2d``: Custom overlap for 2D patches while segmenting. Example: ``--overlap-2d 48 48`` for an overlap of 48 pixels between patches in X and Y respectively. Default model overlap is used otherwise. diff --git a/docs/source/use_cases.rst b/docs/source/use_cases.rst index f3d04a869..66e354dc7 100644 --- a/docs/source/use_cases.rst +++ b/docs/source/use_cases.rst @@ -1,18 +1,18 @@ Use cases ========= -Use case #1 - Spinal Cord Toolbox: ----------------------------------- +Spinal Cord Toolbox +------------------- `Spinal cord toolbox `__ (SCT) is an open-source analysis software package for processing MRI data of the spinal cord `[De Leener et al. 2017] `__. `ivadomed` is SCT's backbone for the automated segmentation of the spinal cord, gray matter, tumors, and multiple sclerosis lesions, as well as for the labeling of intervertebral discs. -Use case 2 - Creation of anatomical template: ---------------------------------------------- +Creation of anatomical template +------------------------------- `ivadomed` was used in the generation of several high-resolution anatomical MRI templates `[Calabrese et al. 2018 `__, `Gros et al. 2020] `__. To make anatomical templates, it is sometimes necessary to segment anatomical regions, such as the spinal cord white matter. When dealing with high resolution data, there may be several thousand 2D slices to segment, stressing the need for a fully-automated and robust solution. In these studies, only a handful of slices were manually-segmented and used to train a specific model. The model then predicted reliably and with high accuracy (Dice score > 90%) the delineation of anatomical structures for the thousands of remaining unlabeled slices. -Use case 3 - Tumor segmentation: --------------------------------- +Tumor segmentation +------------------ `ivadomed` also proves to be useful in the context of clinical radiology routine REF, where clinicians need to segment tumors, edema, and cavity to establish prognosis and monitor the outcome. The framework is composed of a cascaded architecture that detects the spinal cord, crops the image around the region of interest, and segments the tumor (Figure herebelow). The resulting model can be applied to new data using only CPUs, which is more convenient in the clinical setting. The advanced features and architectures available in `ivadomed`, such as FiLM, were pivotal in obtaining encouraging results despite the difficulty of the task and the relatively low number of images. @@ -21,4 +21,12 @@ Use case 3 - Tumor segmentation: Fully automatic spinal cord tumor segmentation framework. Step 1: The spinal cord is localized using a 3D U-Net and the image is cropped around the generated mask. Step 2: The spinal cord tumors are segmented. - Figure tumor segmentation +AxonDeepSeg +----------- + +`AxonDeepSeg `__ (ADS) is an open-source analysis software package for segmentating and computing morphometry on microscopy data of nerve fibers `[Zaimi et al. 2018] `__. `ivadomed` is ADS's backbone for the automated segmentation of axons and myelin on scanning electron microscopy (SEM), transmission electron microscopy (TEM; Figure herebelow) and bright-field optical microscopy (BF) images. + +.. figure:: https://raw.githubusercontent.com/ivadomed/doc-figures/main/use_cases/ADS.png + :alt: Figure of automatic axons and myelin segmentation on TEM input image + + Automatic axons and myelin segmentation on TEM input image. diff --git a/environment.yml b/environment.yml index dc1c5d918..7a97c8f2b 100644 --- a/environment.yml +++ b/environment.yml @@ -1,9 +1,9 @@ -name: IvadoMedEnv +name: ivadomed_env channels: - conda-forge - pytorch dependencies: - - python>=3.6, < 3.9 + - python>=3.7, < 3.9 - pip - pip: - --requirement requirements.txt diff --git a/ivadomed/config/config.json b/ivadomed/config/config.json index 01b376d0a..db25a6783 100644 --- a/ivadomed/config/config.json +++ b/ivadomed/config/config.json @@ -4,9 +4,12 @@ "path_output": "spineGeneric", "model_name": "my_model", "debugging": false, - "object_detection_params": { - "object_detection_path": null, - "safety_factor": [1.0, 1.0, 1.0] + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 }, "loader_parameters": { "path_data": ["data_example_spinegeneric"], @@ -28,7 +31,8 @@ }, "slice_axis": "axial", "multichannel": false, - "soft_gt": false + "soft_gt": false, + "bids_validate": true }, "split_dataset": { "fname_split": null, @@ -39,13 +43,17 @@ "train_fraction": 0.6, "test_fraction": 0.2 }, + "object_detection_params": { + "object_detection_path": null, + "safety_factor": [1.0, 1.0, 1.0] + }, "training_parameters": { - "batch_size": 18, + "batch_size": 32, "loss": { "name": "DiceLoss" }, "training_time": { - "num_epochs": 100, + "num_epochs": 15, "early_stopping_patience": 50, "early_stopping_epsilon": 0.001 }, @@ -107,8 +115,8 @@ "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1 }, "CenterCrop": { diff --git a/ivadomed/config/config_bids.json b/ivadomed/config/config_bids.json index eefb8b015..e5f061cad 100644 --- a/ivadomed/config/config_bids.json +++ b/ivadomed/config/config_bids.json @@ -32,6 +32,10 @@ "name": "staining", "pattern": "[_/\\\\]+stain-([a-zA-Z0-9]+)" }, + { + "name": "tracer", + "pattern": "[_/\\\\]+trc-([a-zA-Z0-9]+)" + }, { "name": "reconstruction", "pattern": "[_/\\\\]+rec-([a-zA-Z0-9]+)" @@ -42,7 +46,7 @@ }, { "name": "run", - "pattern": "[_/\\\\]+run-0*(\\d+)", + "pattern": "[_/\\\\]+run-(\\d+)", "dtype": "int" }, { @@ -99,7 +103,7 @@ }, { "name": "datatype", - "pattern": "[/\\\\]+(anat|beh|dwi|eeg|fmap|func|ieeg|meg|perf|microscopy|ct)[/\\\\]+" + "pattern": "[/\\\\]+(anat|beh|dwi|eeg|fmap|func|ieeg|meg|micr|perf|pet|ct)[/\\\\]+" }, { "name": "extension", @@ -110,24 +114,69 @@ "default_path_patterns": [ "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_mod-{modality}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", - "sub-{subject}[/ses-{session}]/{datatype|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_ce-{ceagent}][_dir-{direction}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", - "sub-{subject}[/ses-{session}]/{datatype|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_ce-{ceagent}][_dir-{direction}][_rec-{reconstruction}][_run-{run}][_echo-{echo}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_dir-{direction}][_run-{run}][_echo-{echo}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_dir-{direction}][_run-{run}][_echo-{echo}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", "sub-{subject}[/ses-{session}]/{datatype|dwi}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_part-{part}]_{suffix}{extension<.bval|.bvec|.json|.nii.gz|.nii>|.nii.gz}", - "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_dir-{direction}][_run-{run}]_{fmap}{extension<.nii|.nii.gz|.json>|.nii.gz}", - "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}]_dir-{direction}[_run-{run}]_{fmap}{extension<.nii|.nii.gz|.json>|.nii.gz}", - "sub-{subject}[/ses-{session}]/[{datatype|func}/]sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.tsv|.json>|.tsv}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_dir-{direction}][_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}]_dir-{direction}[_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|func}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.tsv|.json>|.tsv}", "sub-{subject}[/ses-{session}]/[{datatype|func}/]sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.tsv.gz|.json>|.tsv.gz}", "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}][_proc-{proc}]_{suffix}{extension}", "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}][_proc-{proc}]_{suffix}{extension<.tsv|.json>|.tsv}", - "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}]_{suffix}{extension<.json>|.json}", - "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}]_{suffix}{extension<.jpg>|.jpg}", + "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix}{extension<.json>|.json}", + "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix}{extension<.pos>|.pos}", + "sub-{subject}[/ses-{session}]/{datatype|meg}/sub-{subject}[_ses-{session}]_acq-{acquisition}_{suffix|meg}{extension<.fif|.dat>}", + "sub-{subject}[/ses-{session}]/{datatype|eeg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix}{extension<.edf|.vhdr|.vmrk|.eeg|.set|.fdt|.bdf|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|ieeg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix}{extension<.mefd/|.json|.edf|.vhdr|.eeg|.vmrk|.set|.fdt|.nwb|.json>}", + "sub-{subject}[/ses-{session}]/{datatype}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}]_{suffix}{extension<.tsv|.json>|.tsv}", + "sub-{subject}[/ses-{session}]/{datatype}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_space-{space}]_{suffix}{extension<.json>|.json}", + "sub-{subject}[/ses-{session}]/{datatype}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_space-{space}]_{suffix}{extension<.tsv|.json>|.tsv}", + "sub-{subject}[/ses-{session}]/{datatype}/sub-{subject}[_ses-{session}][_acq-{acquisition}]_{suffix|photo}{extension<.jpg>}", + "sub-{subject}[/ses-{session}]/{datatype|eeg}/sub-{subject}[_ses-{session}]_task-{task}[_acq-{acquisition}][_run-{run}][_recording-{recording}]_{suffix}{extension<.tsv.gz|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|micr}/sub-{subject}[_ses-{session}]_sample-{sample}[_acq-{acquisition}][_stain-{staining}][_run-{run}][_chunk-{chunk}]_{suffix}{extension<.png|.tif|.ome.tif|.ome.btf|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|micr}/sub-{subject}[_ses-{session}]_sample-{sample}[_acq-{acquisition}]_{suffix|photo}{extension<.jpg|.png|.tif|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_echo-{echo}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}][_echo-{echo}]_flip-{flip}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_inv-{inv}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_flip-{flip}]_inv-{inv}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}][_echo-{echo}]_flip-{flip}_mt-{mt}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_mt-{mt}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|anat}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_flip-{flip}[_inv-{inv}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_echo-{echo}_flip-{flip}[_inv-{inv}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_flip-{flip}][_inv-{inv}][_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}][_echo-{echo}]_flip-{flip}_inv-{inv}[_part-{part}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|fmap}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_ce-{ceagent}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|pet}/sub-{subject}[_ses-{session}][_task-{task}][trc-{tracer}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|pet}/sub-{subject}[_ses-{session}][_task-{task}][trc-{tracer}][_rec-{reconstruction}][_run-{run}]_recording-{recording}_{suffix}{extension<.tsv|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|pet}/sub-{subject}[_ses-{session}]_task-{task}[trc-{tracer}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.tsv|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|pet}/sub-{subject}[_ses-{session}][_task-{task}][trc-{tracer}][_rec-{reconstruction}][_run-{run}][_recording-{recording}]_{suffix}{extension<.tsv.gz|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|perf}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_rec-{reconstruction}][_dir-{direction}][_run-{run}]_{suffix}{extension<.nii|.nii.gz|.json>|.nii.gz}", + "sub-{subject}[/ses-{session}]/{datatype|perf}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_rec-{reconstruction}][_dir-{direction}][_run-{run}]_{suffix}{extension<.tsv|.json>}", + "sub-{subject}[/ses-{session}]/{datatype|perf}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.jpg>}", + "sub-{subject}[/ses-{session}]/{datatype|perf}/sub-{subject}[_ses-{session}][_acq-{acquisition}][_rec-{reconstruction}][_dir-{direction}][_run-{run}][_recording{recording}]_{suffix}{extension<.tsv|.json>}", "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_]{suffix}{extension<.json>|.json}", "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][mod-{modality}_]{suffix}{extension<.json>|.json}", "task-{task}[_acq-{acquisition}][_ce-{ceagent}][_dir-{direction}][_rec-{reconstruction}][_run-{run}][_echo-{echo}]_{suffix}{extension<.json>|.json}", "[acq-{acquisition}_]{suffix}{extension<.json>|.json}", - "[acq-{acquisition}_][dir-{direction}_][run-{run}_]{fmap}{extension<.json>|.json}", - "[acq-{acquisition}_][ce-{ceagent}_]dir-{direction}[_run-{run}]_{fmap}{extension<.json>|.json}", + "[acq-{acquisition}_][dir-{direction}_][run-{run}_]{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_]dir-{direction}[_run-{run}]_{suffix}{extension<.json>|.json}", "task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.json>|.json}", - "task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.json>}" + "task-{task}[_acq-{acquisition}][_rec-{reconstruction}][_run-{run}][_echo-{echo}][_recording-{recording}]_{suffix}{extension<.json>}", + "sample-{sample}[_acq-{acquisition}][_stain-{stain}][_run-{run}][_chunk-{chunk}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_]echo-{echo}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_][echo-{echo}_]flip-{flip}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_]inv-{inv}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_][echo-{echo}_][flip-{flip}_]inv-{inv}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_][echo-{echo}_]flip-{flip}_mt-{mt}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_]mt-{mt}[_part-{part}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][ce-{ceagent}_][rec-{reconstruction}_][run-{run}_]{suffix}{extension<.json>|.json}", + "[task-{task}_][trc-{tracer}_][rec-{reconstruction}_][run-{run}_]{suffix}{extension<.json>|.json}", + "[task-{task}_][trc-{tracer}_][rec-{reconstruction}_][run-{run}_]recording-{recording}_{suffix}{extension<.json>|.json}", + "task-{task}[_trc-{tracer}][_rec-{reconstruction}][_run-{run}]_{suffix}{extension<.json>|.json}", + "[task-{task}][_trc-{tracer}][_rec-{reconstruction}][_run-{run}][_recording-{recording}]_{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][rec-{reconstruction}_][dir-{direction}_][run-{run}_]{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][rec-{reconstruction}_][dir-{direction}_][run-{run}_]{suffix}{extension<.json>|.json}", + "[acq-{acquisition}_][rec-{reconstruction}_][dir-{direction}_][run-{run}_][recording-{recording}_]{suffix}{extension<.json>|.json}" ] } diff --git a/ivadomed/config/config_classification.json b/ivadomed/config/config_classification.json index 546cf2af6..954d1f64f 100644 --- a/ivadomed/config/config_classification.json +++ b/ivadomed/config/config_classification.json @@ -4,6 +4,13 @@ "path_output": "classification_lesion_ax", "model_name": "label_lesion_t2_t2star", "debugging": false, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { "path_data": ["../duke/sct_testing/large/"], "target_suffix": ["_lesion-manual"], @@ -87,8 +94,8 @@ "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1, "applied_to": ["im", "roi"] }, diff --git a/ivadomed/config/config_default.json b/ivadomed/config/config_default.json index 2ff54f874..70c971f05 100644 --- a/ivadomed/config/config_default.json +++ b/ivadomed/config/config_default.json @@ -9,6 +9,13 @@ "object_detection_path": null, "safety_factor": [1.0, 1.0, 1.0] }, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { "path_data": ["path_data"], "subject_selection": {"n": [], "metadata": [], "value": []}, @@ -27,6 +34,10 @@ "filter_empty_mask": false, "filter_empty_input": true }, + "patch_filter_params": { + "filter_empty_mask": false, + "filter_empty_input": false + }, "slice_axis": "axial", "multichannel": false, "soft_gt": false, @@ -84,6 +95,7 @@ }, "postprocessing": {}, "evaluation_parameters": { + "object_detection_metrics": true }, "transformation": { } diff --git a/ivadomed/config/config_microscopy.json b/ivadomed/config/config_microscopy.json index 68ad9a5a3..d17dab35d 100644 --- a/ivadomed/config/config_microscopy.json +++ b/ivadomed/config/config_microscopy.json @@ -3,13 +3,20 @@ "gpu_ids": [0], "path_output": "log_microscopy_sem", "model_name": "model_seg_rat_axon-myelin_sem", - "debugging": true, + "debugging": false, "object_detection_params": { "object_detection_path": null, "safety_factor": [1.0, 1.0, 1.0] }, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { - "path_data": ["data_example_microscopy_sem"], + "path_data": ["data_axondeepseg_sem"], "bids_config": "ivadomed/config/config_bids.json", "subject_selection": {"n": [], "metadata": [], "value": []}, "target_suffix": ["_seg-axon-manual", "_seg-myelin-manual"], @@ -27,6 +34,10 @@ "filter_empty_mask": false, "filter_empty_input": true }, + "patch_filter_params": { + "filter_empty_mask": false, + "filter_empty_input": false + }, "slice_axis": "axial", "multichannel": false, "soft_gt": false @@ -37,7 +48,7 @@ "split_method" : "sample_id", "data_testing": {"data_type": null, "data_value":[]}, "balance": null, - "train_fraction": 0.7, + "train_fraction": 0.6, "test_fraction": 0.1 }, "training_parameters": { @@ -51,11 +62,9 @@ "early_stopping_epsilon": 0.001 }, "scheduler": { - "initial_lr": 0.001, + "initial_lr": 0.005, "lr_scheduler": { - "name": "CosineAnnealingLR", - "base_lr": 1e-5, - "max_lr": 1e-2 + "name": "CosineAnnealingLR" } }, "balance_samples": { @@ -71,33 +80,37 @@ }, "default_model": { "name": "Unet", - "dropout_rate": 0.3, + "dropout_rate": 0.2, "bn_momentum": 0.1, - "final_activation": "sigmoid", "depth": 4, - "length_2D": [512, 512], - "stride_2D": [500, 500] + "is_2d": true, + "final_activation": "sigmoid", + "length_2D": [256, 256], + "stride_2D": [244, 244] }, "postprocessing": { "binarize_maxpooling": {} }, + "evaluation_parameters": { + "object_detection_metrics": false + }, "transformation": { "Resample": { - "wspace": 0.0001, - "hspace": 0.0001 + "hspace": 0.0001, + "wspace": 0.0001 }, "RandomAffine": { - "degrees": 5, - "scale": [0.1, 0.1], - "translate": [0.03, 0.03], + "degrees": 2.5, + "scale": [0.05, 0.05], + "translate": [0.015, 0.015], "applied_to": ["im", "gt"], "dataset_type": ["training"] }, "ElasticTransform": { - "alpha_range": [28.0, 30.0], - "sigma_range": [3.5, 4.5], - "p": 0.1, + "alpha_range": [100.0, 150.0], + "sigma_range": [4.0, 5.0], + "p": 0.5, "applied_to": ["im", "gt"], "dataset_type": ["training"] }, diff --git a/ivadomed/config/config_sctTesting.json b/ivadomed/config/config_sctTesting.json index 72dd9bf9f..463f6be9e 100644 --- a/ivadomed/config/config_sctTesting.json +++ b/ivadomed/config/config_sctTesting.json @@ -4,6 +4,13 @@ "path_output": "sct_testing_lesion_ax", "model_name": "seg_lesion_t2_t2star", "debugging": false, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { "path_data": ["../duke/sct_testing/large/"], "target_suffix": ["_lesion-manual"], @@ -86,8 +93,8 @@ "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1 }, "ROICrop": { diff --git a/ivadomed/config/config_small.json b/ivadomed/config/config_small.json index a83401535..e6bf1fd42 100644 --- a/ivadomed/config/config_small.json +++ b/ivadomed/config/config_small.json @@ -89,8 +89,8 @@ "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1 }, "CenterCrop": { diff --git a/ivadomed/config/config_spineGeHemis.json b/ivadomed/config/config_spineGeHemis.json index eb07e0470..eb72ea471 100644 --- a/ivadomed/config/config_spineGeHemis.json +++ b/ivadomed/config/config_spineGeHemis.json @@ -4,6 +4,13 @@ "path_output": "HeMIS", "model_name": "seg_tumor_t2_t1", "debugging": false, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { "path_data": ["data_example_spinegeneric"], "target_suffix": ["_seg"], @@ -103,8 +110,8 @@ "transformation": { "Resample": { - "wspace": 0.75, "hspace": 0.75, + "wspace": 0.75, "dspace": 1 }, "CenterCrop": { diff --git a/ivadomed/config/config_tumorSeg.json b/ivadomed/config/config_tumorSeg.json index 088f26211..aa9b85693 100644 --- a/ivadomed/config/config_tumorSeg.json +++ b/ivadomed/config/config_tumorSeg.json @@ -8,6 +8,13 @@ "object_detection_path": null, "safety_factor": [1.1, 1.1, 1.0] }, + "wandb": { + "wandb_api_key": "", + "project_name": "my_project", + "group_name": "my_group", + "run_name": "run-1", + "log_grads_every": 100 + }, "loader_parameters": { "path_data": ["/home/andreanne/Documents/dataset/toy_dataset"], "target_suffix": ["_seg-tumor"], @@ -95,8 +102,8 @@ "transformation": { "Resample": { - "wspace": 1, "hspace": 1, + "wspace": 1, "dspace": 2 }, "CenterCrop": {"size": [512, 256, 16]}, diff --git a/ivadomed/config_manager.py b/ivadomed/config_manager.py index 9dca46e02..60cc2cdbc 100644 --- a/ivadomed/config_manager.py +++ b/ivadomed/config_manager.py @@ -1,57 +1,61 @@ import json -import os import collections.abc +from typing import Dict, List, Any, KeysView, Union + from loguru import logger +from pathlib import Path from ivadomed import utils as imed_utils +from ivadomed.keywords import ConfigKW, LoaderParamsKW, SplitDatasetKW, DataTestingKW import copy -def update(d, u): +def update(source_dict: dict, destination_dict: dict) -> dict: """Update dictionary and nested dictionaries. Args: - d (dict): Source dictionary that is updated by destination dictionary. - u (dict): Destination dictionary. + source_dict (dict): Source dictionary that is updated by destination dictionary. + destination_dict (dict): Destination dictionary. Returns: dict: updated dictionary """ - for k, v in u.items(): - if isinstance(v, collections.abc.Mapping): - d[k] = update(d.get(k, {}), v) + for key, value in destination_dict.items(): + if isinstance(value, collections.abc.Mapping): + source_dict[key] = update(source_dict.get(key, {}), value) else: # If source dictionary has keys that the destination dict doesn't have, keep these keys - if k in d and isinstance(d[k], collections.abc.Mapping) and not isinstance(v, collections.abc.Mapping): + if key in source_dict and isinstance(source_dict[key], collections.abc.Mapping) and not isinstance(value, + collections.abc.Mapping): pass else: - d[k] = v - return d + source_dict[key] = value + return source_dict -def deep_dict_compare(source_dict, dest_dict, keyname=None): +def deep_dict_compare(source_dict: dict, destination_dict: dict, keyname: str = None): """Compare and display differences between dictionaries (and nested dictionaries). Args: source_dict (dict): Source dictionary. - dest_dict (dict): Destination dictionary. + destination_dict (dict): Destination dictionary. keyname (str): Key name to indicate the path to nested parameter. """ - for key in dest_dict: + for key in destination_dict: if key not in source_dict: key_str = key if keyname is None else keyname + key - logger.info(f' {key_str}: {dest_dict[key]}') + logger.info(f' {key_str}: {destination_dict[key]}') else: - if isinstance(dest_dict[key], collections.abc.Mapping): + if isinstance(destination_dict[key], collections.abc.Mapping): if isinstance(source_dict[key], collections.abc.Mapping): - deep_dict_compare(source_dict[key], dest_dict[key], key + ": ") + deep_dict_compare(source_dict[key], destination_dict[key], key + ": ") # In case a new dictionary appears in updated file else: - deep_dict_compare(source_dict, dest_dict[key], key + ": ") + deep_dict_compare(source_dict, destination_dict[key], key + ": ") -def load_json(config_path): +def load_json(config_path: str) -> dict: """Load json file content Args: @@ -66,8 +70,9 @@ def load_json(config_path): return default_config -# To ensure retrocompatibility for parameter changes in configuration file -KEY_CHANGE_DICT = {'UNet3D': 'Modified3DUNet', 'bids_path': 'path_data', 'log_directory': 'path_output'} +# To ensure retro-compatibility for parameter changes in configuration file +KEY_CHANGE_DICT = {'UNet3D': ConfigKW.MODIFIED_3D_UNET, 'bids_path': LoaderParamsKW.PATH_DATA, + 'log_directory': ConfigKW.PATH_OUTPUT} KEY_SPLIT_DATASET_CHANGE_LST = ['method', 'center_test'] @@ -82,50 +87,83 @@ class ConfigurationManager(object): context_original (dict): Provided configuration file. config_updated (dict): Updated configuration file. """ - def __init__(self, path_context): - self.path_context = path_context - self.key_change_dict = KEY_CHANGE_DICT - self.key_split_dataset_change_lst = KEY_SPLIT_DATASET_CHANGE_LST + + def __init__(self, path_context: str): + """ + Initialize the ConfigurationManager by validating the given path and loading the file. + Also load the default configuration file. + + Args: + path_context (str): Path to configuration file. + """ + self.path_context: str = path_context + self.key_change_dict: Dict[str, str] = KEY_CHANGE_DICT + self.key_split_dataset_change_lst: List[str] = KEY_SPLIT_DATASET_CHANGE_LST self._validate_path() - default_config_path = os.path.join(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json") - self.config_default = load_json(default_config_path) - self.context_original = load_json(path_context) - self.config_updated = {} + default_config_path: str = str(Path(imed_utils.__ivadomed_dir__, "ivadomed", "config", "config_default.json")) + self.config_default: dict = load_json(default_config_path) + self.context_original: dict = load_json(path_context) + self.config_updated: dict = {} @property - def config_updated(self): + def config_updated(self) -> dict: + """ + This function simply returns the attribute `_config_updated`. + + Returns: + dict: `_config_updated` attribute + """ return self._config_updated @config_updated.setter - def config_updated(self, config_updated): + def config_updated(self, config_updated: dict): + """ + If config_updated is empty we copy the loaded configuration into it and apply some changes (changing keys name, + changing values, deleting key-value pair) to ensure retro-compatibility. + Sets the new config_updated to the attribute `_config_updated`. + + Args: + config_updated (dict): The new configuration to set. + """ if config_updated == {}: - context = copy.deepcopy(self.context_original) + context: dict = copy.deepcopy(self.context_original) self.change_keys(context, list(context.keys())) - config_updated = update(self.config_default, context) - self.change_keys_values(config_updated['split_dataset'], config_updated['split_dataset'].keys()) + config_updated: dict = update(self.config_default, context) + self.change_keys_values(config_updated[ConfigKW.SPLIT_DATASET], + config_updated[ConfigKW.SPLIT_DATASET].keys()) - self._config_updated = config_updated + self._config_updated: dict = config_updated if config_updated['debugging']: self._display_differing_keys() - def get_config(self): + def get_config(self) -> dict: """Get updated configuration file with all parameters from the default config file. + Returns: dict: Updated configuration dict. """ return self.config_updated - def change_keys(self, context, keys): - for k in keys: + def change_keys(self, context: Union[dict, collections.abc.Mapping], keys: List[str]): + """ + This function changes the name of the keys of the context dictionary, that are also in the `key_change_dict` + attribute, to the values that are associated with them in the `key_change_dict` attribute. + + Args: + context (Union[dict, collections.abc.Mapping]): The dictionary to change. + keys (List[str]): The keys in context to consider. + """ + for key_to_change in keys: # Verify if key is still in the dict - if k in context: - if k == "NumpyToTensor": - del context[k] + if key_to_change in context: + # If the key_to_change is "NumpyToTensor", remove it from the context. + if key_to_change == "NumpyToTensor": + del context[key_to_change] continue - v = context[k] + value_to_change: Any = context[key_to_change] # Verify if value is a dictionary - if isinstance(v, collections.abc.Mapping): - self.change_keys(v, list(v.keys())) + if isinstance(value_to_change, collections.abc.Mapping): + self.change_keys(value_to_change, list(value_to_change.keys())) else: # Change keys from the key_change_dict for key in self.key_change_dict: @@ -133,16 +171,30 @@ def change_keys(self, context, keys): context[self.key_change_dict[key]] = context[key] del context[key] - def change_keys_values(self, config_updated, keys): - for k in self.key_split_dataset_change_lst: - if k in keys: - value = config_updated[k] - if k == 'method' and value == "per_center": - config_updated['data_testing']['data_type'] = "institution_id" - if k == 'center_test' and config_updated['data_testing']['data_type'] == "institution_id" and \ - value is not None: - config_updated['data_testing']['data_value'] = value - del config_updated[k] + def change_keys_values(self, config_updated: dict, keys: List[str]): + """ + This function sets DATA_TESTING->DATA_TYPE to "institution_id" if method value is per_center, + DATA_TESTING->DATA_VALUE to the value of center_test. + It is basically verifying some conditions and set values to the `config_updated`. + + Args: + config_updated (dict): Configuration dictionary to update. + keys (List[str]): The keys to consider. + """ + for key_to_change in self.key_split_dataset_change_lst: + if key_to_change in keys: + value: Any = config_updated[key_to_change] + # If the method is per_center, the data_testing->data_type value becomes "institution_id". + if key_to_change == 'method' and value == "per_center": + config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] = "institution_id" + # If [the key is center_test], [data_testing->data_type == "institution_id"] and [the value is not None] + # data_testing->data_type value becomes value of config_updated + if key_to_change == 'center_test' and \ + config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_TYPE] == "institution_id" and \ + value is not None: + config_updated[SplitDatasetKW.DATA_TESTING][DataTestingKW.DATA_VALUE] = value + # Remove the value of the current key + del config_updated[key_to_change] def _display_differing_keys(self): """Display differences between dictionaries. @@ -154,6 +206,21 @@ def _display_differing_keys(self): def _validate_path(self): """Ensure validity of configuration file path. """ - if not os.path.isfile(self.path_context) or not self.path_context.endswith('.json'): + if not Path(self.path_context).exists(): + raise ValueError( + f"\nERROR: The provided configuration file path (.json) does not exist: " + f"{Path(self.path_context).absolute()}\n") + elif Path(self.path_context).is_dir(): + raise IsADirectoryError(f"ERROR: The provided configuration file path (.json) is a directory not a file: " + f"{Path(self.path_context).absolute()}\n") + elif not Path(self.path_context).is_file(): + raise FileNotFoundError(f"ERROR: The provided configuration file path (.json) is not found: " + f"{Path(self.path_context).absolute()}\n") + elif self.path_context.endswith('.yaml') or self.path_context.endswith('.yml'): + raise ValueError( + f"\nERROR: The provided configuration file path (.json) is a yaml file not a json file, " + f"yaml files are not yet supported: {Path(self.path_context).absolute()}\n") + elif not self.path_context.endswith('.json'): raise ValueError( - "\nERROR: The provided configuration file path (.json) is invalid: {}\n".format(self.path_context)) + f"\nERROR: The provided configuration file path (.json) is not a json file: " + f"{Path(self.path_context).absolute()}\n") diff --git a/ivadomed/evaluation.py b/ivadomed/evaluation.py index fad6ba7b5..2fbc74b04 100644 --- a/ivadomed/evaluation.py +++ b/ivadomed/evaluation.py @@ -1,11 +1,10 @@ -import os - import nibabel as nib import numpy as np import pandas as pd from loguru import logger from scipy.ndimage import label, generate_binary_structure from tqdm import tqdm +from pathlib import Path from ivadomed import inference as imed_inference from ivadomed import metrics as imed_metrics @@ -30,19 +29,19 @@ def evaluate(bids_df, path_output, target_suffix, eval_params): Returns: pd.Dataframe: results for each image. """ - path_preds = os.path.join(path_output, 'pred_masks') + path_preds = Path(path_output, 'pred_masks') logger.info('\nRun Evaluation on {}\n'.format(path_preds)) # OUTPUT RESULT FOLDER - path_results = os.path.join(path_output, 'results_eval') - if not os.path.isdir(path_results): - os.makedirs(path_results) + path_results = Path(path_output, 'results_eval') + if not path_results.is_dir(): + path_results.mkdir(parents=True) - # INIT DATA FRAME - df_results = pd.DataFrame() + # INIT DATA FRAME ROW LIST + df_lst = [] # LIST PREDS - subj_acq_lst = [f.split('_pred')[0] for f in os.listdir(path_preds) if f.endswith('_pred.nii.gz')] + subj_acq_lst = [f.name.split('_pred')[0] for f in path_preds.iterdir() if f.name.endswith('_pred.nii.gz')] # Get all derivatives filenames all_deriv = bids_df.get_deriv_fnames() @@ -50,7 +49,7 @@ def evaluate(bids_df, path_output, target_suffix, eval_params): # LOOP ACROSS PREDS for subj_acq in tqdm(subj_acq_lst, desc="Evaluation"): # Fnames of pred and ground-truth - fname_pred = os.path.join(path_preds, subj_acq + '_pred.nii.gz') + fname_pred = path_preds.joinpath(subj_acq + '_pred.nii.gz') derivatives = bids_df.df[bids_df.df['filename'] .str.contains('|'.join(bids_df.get_derivatives(subj_acq, all_deriv)))]['path'].to_list() # Ordering ground-truth the same as target_suffix @@ -77,7 +76,7 @@ def evaluate(bids_df, path_output, target_suffix, eval_params): n_classes = len(fname_gt) data_gt = np.zeros((h, w, d, n_classes)) for idx, file in enumerate(fname_gt): - if os.path.exists(file): + if Path(file).exists(): data_gt[..., idx] = nib.load(file).get_fdata() else: data_gt[..., idx] = np.zeros((h, w, d), dtype='u1') @@ -87,25 +86,34 @@ def evaluate(bids_df, path_output, target_suffix, eval_params): params=eval_params) results_pred, data_painted = eval.run_eval() - # SAVE PAINTED DATA, TP FP FN - fname_paint = fname_pred.split('.nii.gz')[0] + '_painted.nii.gz' - nib_painted = nib.Nifti1Image(data_painted, nib_pred.affine) - nib.save(nib_painted, fname_paint) - - # For Microscopy PNG/TIF files (TODO: implement OMETIFF behavior) - if "nii" not in extension: - painted_list = imed_inference.split_classes(nib_painted) - imed_inference.pred_to_png(painted_list, - target_suffix, - os.path.join(path_preds, subj_acq), - suffix="_painted") + if eval_params['object_detection_metrics']: + # SAVE PAINTED DATA, TP FP FN + fname_paint = str(fname_pred).split('.nii.gz')[0] + '_TP-FP-FN.nii.gz' + nib_painted = nib.Nifti1Image( + dataobj=data_painted.astype(int), + affine=nib_pred.header.get_best_affine(), + header=nib_pred.header.copy() + ) + nib.save(nib_painted, fname_paint) + + # For Microscopy PNG/TIF files (TODO: implement OMETIFF behavior) + if "nii" not in extension: + painted_list = imed_inference.split_classes(nib_painted) + # Reformat target list to include class index and be compatible with multiple raters + target_list = ["_class-%d" % i for i in range(len(target_suffix))] + imed_inference.pred_to_png(painted_list, + target_list, + str(path_preds.joinpath(subj_acq)), + suffix="_pred_TP-FP-FN.png", + max_value=3) # painted data contain 3 float values [0.0, 1.0, 2.0, 3.0] corresponding to background, TP, FP and FN objects) # SAVE RESULTS FOR THIS PRED results_pred['image_id'] = subj_acq - df_results = df_results.append(results_pred, ignore_index=True) + df_lst.append(results_pred) + df_results = pd.DataFrame(df_lst) df_results = df_results.set_index('image_id') - df_results.to_csv(os.path.join(path_results, 'evaluation_3Dmetrics.csv')) + df_results.to_csv(str(path_results.joinpath('evaluation_3Dmetrics.csv'))) logger.info(df_results.head(5)) return df_results @@ -130,6 +138,8 @@ class Evaluation3DMetrics(object): bin_struct (ndarray): Binary structure. size_min (int): Minimum size of objects. Objects that are smaller than this limit can be removed if "removeSmall" is in params. + object_detection_metrics (bool): Indicate if object detection metrics (lesions true positive and false detection + rates) are computed or not. overlap_vox (int): A prediction and ground-truth are considered as overlapping if they overlap for at least this amount of voxels. overlap_ratio (float): A prediction and ground-truth are considered as overlapping if they overlap for at least @@ -162,6 +172,8 @@ def __init__(self, data_pred, data_gt, dim_lst, params=None): self.postprocessing_dict = {} self.size_min = 0 + self.object_detection_metrics = params["object_detection_metrics"] + if "target_size" in params: self.size_rng_lst, self.size_suffix_lst = \ self._get_size_ranges(thr_lst=params["target_size"]["thr"], @@ -259,14 +271,14 @@ def label_per_size(self, data): data_out = np.zeros(data.shape) for idx in range(1, n + 1): - data_idx = (data_label == idx).astype(np.int) + data_idx = (data_label == idx).astype(int) n_nonzero = np.count_nonzero(data_idx) for idx_size, rng in enumerate(self.size_rng_lst): if n_nonzero >= rng[0] and n_nonzero <= rng[1]: data_out[np.nonzero(data_idx)] = idx_size + 1 - return data_out.astype(np.int) + return data_out.astype(int) def get_vol(self, data): """Get volume.""" @@ -316,8 +328,8 @@ def _get_ltp_lfn(self, label_size, class_idx=0): ltp, lfn, n_obj = 0, 0, 0 for idx in range(1, self.n_gt[class_idx] + 1): - data_gt_idx = (self.data_gt_label[..., class_idx] == idx).astype(np.int) - overlap = (data_gt_idx * self.data_pred).astype(np.int) + data_gt_idx = (self.data_gt_label[..., class_idx] == idx).astype(int) + overlap = (data_gt_idx * self.data_pred).astype(int) # if label_size is None, then we look at all object sizes # we check if the currrent object belongs to the current size range @@ -351,11 +363,11 @@ def _get_lfp(self, label_size, class_idx=0): """ lfp = 0 for idx in range(1, self.n_pred[class_idx] + 1): - data_pred_idx = (self.data_pred_label[..., class_idx] == idx).astype(np.int) - overlap = (data_pred_idx * self.data_gt).astype(np.int) + data_pred_idx = (self.data_pred_label[..., class_idx] == idx).astype(int) + overlap = (data_pred_idx * self.data_gt).astype(int) label_gt = np.max(data_pred_idx * self.data_gt_label[..., class_idx]) - data_gt_idx = (self.data_gt_label[..., class_idx] == label_gt).astype(np.int) + data_gt_idx = (self.data_gt_label[..., class_idx] == label_gt).astype(int) # if label_size is None, then we look at all object sizes # we check if the current object belongs to the current size range @@ -384,8 +396,12 @@ def get_ltpr(self, label_size=None, class_idx=0): label_size (int): Size of label. class_idx (int): Label index. If monolabel 0, else ranges from 0 to number of output channels - 1. - Note: computed only if n_obj >= 1. + Note: computed only if n_obj >= 1 and "object_detection_metrics" evaluation parameter is True. """ + if not self.object_detection_metrics: + n_obj = 0 + return np.nan, n_obj + ltp, lfn, n_obj = self._get_ltp_lfn(label_size, class_idx) denom = ltp + lfn @@ -401,8 +417,12 @@ def get_lfdr(self, label_size=None, class_idx=0): label_size (int): Size of label. class_idx (int): Label index. If monolabel 0, else ranges from 0 to number of output channels - 1. - Note: computed only if n_obj >= 1. + Note: computed only if n_obj >= 1 and "object_detection_metrics" evaluation parameter is True. """ + + if not self.object_detection_metrics: + return np.nan + ltp, _, n_obj = self._get_ltp_lfn(label_size, class_idx) lfp = self._get_lfp(label_size, class_idx) diff --git a/ivadomed/inference.py b/ivadomed/inference.py index ed3ae6af6..c3fb2c461 100644 --- a/ivadomed/inference.py +++ b/ivadomed/inference.py @@ -1,4 +1,3 @@ -import os import nibabel as nib import numpy as np import onnxruntime @@ -6,6 +5,7 @@ import imageio import joblib from typing import List +from pathlib import Path from loguru import logger from torch.utils.data import Dataset @@ -20,10 +20,12 @@ from ivadomed import postprocessing as imed_postpro from ivadomed import transforms as imed_transforms from ivadomed.loader import utils as imed_loader_utils, film as imed_film +from ivadomed.loader.slice_filter import SliceFilter from ivadomed.object_detection import utils as imed_obj_detect from ivadomed import utils as imed_utils from ivadomed import training as imed_training - +from ivadomed.keywords import ConfigKW, ModelParamsKW, ObjectDetectionParamsKW, TransformationKW, LoaderParamsKW, \ + ROIParamsKW, SliceFilterParamsKW, TrainingParamsKW, MetadataKW, OptionKW def onnx_inference(model_path: str, inputs: tensor) -> tensor: @@ -43,21 +45,20 @@ def onnx_inference(model_path: str, inputs: tensor) -> tensor: return torch.tensor(ort_outs[0]) -def get_preds(context: dict, fname_model: str, model_params: dict, gpu_id: int, batch: dict) -> tensor: +def get_preds(context: dict, fname_model: str, model_params: dict, cuda_available: bool, device: torch.device, batch: dict) -> tensor: """Returns the predictions from the given model. Args: context (dict): configuration dict. fname_model (str): name of file containing model. model_params (dict): dictionary containing model parameters. - gpu_id (int): Number representing gpu number if available. Currently does NOT support multiple GPU segmentation. + cuda_available (bool): True if cuda is available. + device (torch.device): Device used for prediction. batch (dict): dictionary containing input, gt and metadata Returns: tensor: predictions from the model. """ - # Define device - cuda_available, device = imed_utils.define_device(gpu_id) with torch.no_grad(): @@ -74,10 +75,10 @@ def get_preds(context: dict, fname_model: str, model_params: dict, gpu_id: int, model.eval() # Films/Hemis based prediction require meta data load - if ('FiLMedUnet' in context and context['FiLMedUnet']['applied']) or \ - ('HeMISUnet' in context and context['HeMISUnet']['applied']): + if (ConfigKW.FILMED_UNET in context and context[ConfigKW.FILMED_UNET].get(ModelParamsKW.APPLIED)) or \ + (ConfigKW.HEMIS_UNET in context and context[ConfigKW.HEMIS_UNET].get(ModelParamsKW.APPLIED)): # Load meta data before prediction - metadata = imed_training.get_metadata(batch["input_metadata"], model_params) + metadata = imed_training.get_metadata(batch[MetadataKW.INPUT_METADATA], model_params) preds = model(img, metadata) else: preds = model(img) @@ -106,15 +107,18 @@ def get_onehotencoder(context: dict, folder_model: str, options: dict, ds: Datas Returns: dict: onehotencoder used in the model params. """ - metadata_dict = joblib.load(os.path.join(folder_model, 'metadata_dict.joblib')) + metadata_dict = joblib.load(Path(folder_model, 'metadata_dict.joblib')) for idx in ds.indexes: for i in range(len(idx)): - idx[i]['input_metadata'][0][context['FiLMedUnet']['metadata']] = options['metadata'] - idx[i]['input_metadata'][0]['metadata_dict'] = metadata_dict + idx[i][MetadataKW.INPUT_METADATA][0][context[ConfigKW.FILMED_UNET][ModelParamsKW.METADATA]] = options.get(OptionKW.METADATA) + idx[i][MetadataKW.INPUT_METADATA][0][MetadataKW.METADATA_DICT] = metadata_dict - ds = imed_film.normalize_metadata(ds, None, context["debugging"], context['FiLMedUnet']['metadata']) + if ConfigKW.DEBUGGING in context and ConfigKW.FILMED_UNET in context and \ + context[ConfigKW.FILMED_UNET].get(ModelParamsKW.METADATA): + ds = imed_film.normalize_metadata( + ds, None, context[ConfigKW.DEBUGGING], context[ConfigKW.FILMED_UNET][ModelParamsKW.METADATA]) - return joblib.load(os.path.join(folder_model, 'one_hot_encoder.joblib')) + return joblib.load(Path(folder_model, 'one_hot_encoder.joblib')) def pred_to_nib(data_lst: List[np.ndarray], z_lst: List[int], fname_ref: str, fname_out: str, slice_axis: int, @@ -197,7 +201,7 @@ def pred_to_nib(data_lst: List[np.ndarray], z_lst: List[int], fname_ref: str, fn # See: https://github.com/ivadomed/ivadomed/issues/711 nib_pred = nib.Nifti1Image( dataobj=arr_pred_ref_space, - affine=None, + affine=nib_ref.header.get_best_affine(), header=nib_ref.header.copy() ) # save as NifTI file @@ -207,7 +211,7 @@ def pred_to_nib(data_lst: List[np.ndarray], z_lst: List[int], fname_ref: str, fn return nib_pred -def pred_to_png(pred_list: list, target_list: list, subj_path: str, suffix: str = ''): +def pred_to_png(pred_list: list, target_list: list, subj_path: str, suffix: str = '', max_value: int = 1): """Save the network predictions as PNG files with suffix "_target_pred". Args: @@ -215,18 +219,21 @@ def pred_to_png(pred_list: list, target_list: list, subj_path: str, suffix: str target_list (list of str): list of target suffixes. subj_path (str): Path of the subject filename in output folder without extension (e.g. "path_output/pred_masks/sub-01_sample-01_SEM"). - suffix (str): additional suffix to append to the filename. + suffix (str): additional suffix to append to the filename (e.g. "_pred.png") + max_value (int): Maximum mask value of the float mask to use during the conversion to uint8. """ for pred, target in zip(pred_list, target_list): - filename = subj_path + target + "_pred" + suffix + ".png" + filename = subj_path + target + suffix data = pred.get_fdata() - imageio.imwrite(filename, data, format='png') + _img = (data*255/max_value).astype(np.uint8).squeeze() + imageio.v3.imwrite(filename, _img, extension='.png') def process_transformations(context: dict, fname_roi: str, fname_prior: str, metadata: dict, slice_axis: int, fname_images: list) -> dict: """Sets the transformation based on context parameters. When ROI is not provided center-cropping is applied. - If there is an object_detection_path, then we modify the metadata to store transformation data. + + If there is an object_detection_path, then we modify the metadata to store transformation data. Args: context (dict): configuration dictionary. @@ -239,19 +246,22 @@ def process_transformations(context: dict, fname_roi: str, fname_prior: str, met Returns: dict: metadata. """ - if fname_roi is None and 'ROICrop' in context["transformation"].keys(): + if fname_roi is None and TransformationKW.ROICROP in context[ConfigKW.TRANSFORMATION].keys(): logger.warning( "fname_roi has not been specified, then a cropping around the center of the image is " "performed instead of a cropping around a Region of Interest.") - context["transformation"] = dict((key, value) if key != 'ROICrop' - else ('CenterCrop', value) - for (key, value) in context["transformation"].items()) + # Convert transformation configuration into dict. + for (key, value) in context[ConfigKW.TRANSFORMATION].items(): + if key != TransformationKW.ROICROP: + context[ConfigKW.TRANSFORMATION] = dict(key, value) + else: + context[ConfigKW.TRANSFORMATION] = dict(TransformationKW.CENTERCROP, value) - if 'object_detection_params' in context and \ - context['object_detection_params']['object_detection_path'] is not None: + if ConfigKW.OBJECT_DETECTION_PARAMS in context and \ + context[ConfigKW.OBJECT_DETECTION_PARAMS][ObjectDetectionParamsKW.OBJECT_DETECTION_PATH] is not None: imed_obj_detect.bounding_box_prior(fname_prior, metadata, slice_axis, - context['object_detection_params']['safety_factor']) + context[ConfigKW.OBJECT_DETECTION_PARAMS][ObjectDetectionParamsKW.SAFETY_FACTOR]) metadata = [metadata] * len(fname_images) return metadata @@ -259,8 +269,9 @@ def process_transformations(context: dict, fname_roi: str, fname_prior: str, met def set_option(options: dict, postpro: dict, context: dict, key: str): """Generalized function that sets postprocessing option based on given list of options. - When given key already exists in options, we initialize the key value for the postprocessing dictionary - Otherwise, when the key is already found in the postprocessing attritute of the context, we remove it + + When given key already exists in options, we initialize the key value for the postprocessing dictionary + Otherwise, when the key is already found in the postprocessing attritute of the context, we remove it Args: options (dict): Contains postprocessing steps information. @@ -274,8 +285,8 @@ def set_option(options: dict, postpro: dict, context: dict, key: str): if options[key]: postpro[key] = {} # Remove key in context if value set to 0 - elif key in context['postprocessing']: - del context['postprocessing'][key] + elif key in context[ConfigKW.POSTPROCESSING]: + del context[ConfigKW.POSTPROCESSING][key] def set_postprocessing_options(options: dict, context: dict): @@ -287,25 +298,25 @@ def set_postprocessing_options(options: dict, context: dict): """ postpro = {} - if 'binarize_prediction' in options and options['binarize_prediction']: - postpro['binarize_prediction'] = {"thr": options['binarize_prediction']} + if OptionKW.BINARIZE_PREDICTION in options and options[OptionKW.BINARIZE_PREDICTION] is not None: + postpro[OptionKW.BINARIZE_PREDICTION] = {"thr": options[OptionKW.BINARIZE_PREDICTION]} - if 'binarize_maxpooling' in options and options['binarize_maxpooling'] is not None: - set_option(options, postpro, context, 'binarize_maxpooling') + if OptionKW.BINARIZE_MAXPOOLING in options and options.get(OptionKW.BINARIZE_MAXPOOLING) is not None: + set_option(options, postpro, context, OptionKW.BINARIZE_MAXPOOLING) - if 'keep_largest' in options and options['keep_largest'] is not None: - set_option(options, postpro, context, 'keep_largest') + if OptionKW.KEEP_LARGEST in options and options.get(OptionKW.KEEP_LARGEST) is not None: + set_option(options, postpro, context, OptionKW.KEEP_LARGEST) - if 'fill_holes' in options and options['fill_holes'] is not None: - set_option(options, postpro, context, 'fill_holes') + if OptionKW.FILL_HOLES in options and options.get(OptionKW.FILL_HOLES) is not None: + set_option(options, postpro, context, OptionKW.FILL_HOLES) - if 'remove_small' in options and options['remove_small'] and \ - ('mm' in options['remove_small'][-1] or 'vox' in options['remove_small'][-1]): - unit = 'mm3' if 'mm3' in options['remove_small'][-1] else 'vox' - thr = [int(t.replace(unit, "")) for t in options['remove_small']] - postpro['remove_small'] = {"unit": unit, "thr": thr} + if OptionKW.REMOVE_SMALL in options and options.get(OptionKW.REMOVE_SMALL) and \ + ('mm' in options[OptionKW.REMOVE_SMALL][-1] or 'vox' in options[OptionKW.REMOVE_SMALL][-1]): + unit = 'mm3' if 'mm3' in options[OptionKW.REMOVE_SMALL][-1] else 'vox' + thr = [int(t.replace(unit, "")) for t in options[OptionKW.REMOVE_SMALL]] + postpro[OptionKW.REMOVE_SMALL] = {"unit": unit, "thr": thr} - context['postprocessing'].update(postpro) + context[ConfigKW.POSTPROCESSING].update(postpro) def segment_volume(folder_model: str, fname_images: list, gpu_id: int = 0, options: dict = None): @@ -323,32 +334,42 @@ def segment_volume(folder_model: str, fname_images: list, gpu_id: int = 0, optio images to segment, e.i., len(fname_images) > 1. gpu_id (int): Number representing gpu number if available. Currently does NOT support multiple GPU segmentation. options (dict): This can optionally contain any of the following key-value pairs: - * 'binarize_prediction': (float) Binarize segmentation with specified threshold. - Predictions below the threshold become 0, and predictions above or equal to - threshold become 1. Set to -1 for no thresholding (i.e., soft segmentation). - * 'binarize_maxpooling': (bool) Binarize by setting to 1 the voxel having the maximum prediction across - all classes. Useful for multiclass models. + + * 'binarize_prediction': (float) Binarize segmentation with specified threshold. \ + Predictions below the threshold become 0, and predictions above or equal to \ + threshold become 1. Set to -1 for no thresholding (i.e., soft segmentation). + * 'binarize_maxpooling': (bool) Binarize by setting to 1 the voxel having the maximum prediction across \ + all classes. Useful for multiclass models. * 'fill_holes': (bool) Fill small holes in the segmentation. * 'keep_largest': (bool) Keep the largest connected-object for each class from the output segmentation. - * 'remove_small': (list of str) Minimal object size to keep with unit (mm3 or vox). A single value can be provided - or one value per prediction class. Single value example: ["1mm3"], ["5vox"]. Multiple values - example: ["10", "20", "10vox"] (remove objects smaller than 10 voxels for class 1 and 3, + * 'remove_small': (list of str) Minimal object size to keep with unit (mm3 or vox). A single value can be provided \ + or one value per prediction class. Single value example: ["1mm3"], ["5vox"]. Multiple values \ + example: ["10", "20", "10vox"] (remove objects smaller than 10 voxels for class 1 and 3, \ and smaller than 20 voxels for class 2). - * 'pixel_size': (list of float) List of microscopy pixel size in micrometers. - Length equals 2 [X, Y] for 2D or 3 [X, Y, Z] for 3D. - * 'overlap_2D': (list of int) List of overlaps in pixels for 2D patching. Length equals 2 [X, Y]. + * 'pixel_size': (list of float) List of microscopy pixel size in micrometers. \ + Length equals 2 [PixelSizeX, PixelSizeY] for 2D or 3 [PixelSizeX, PixelSizeY, PixelSizeZ] for 3D, \ + where X is the width, Y the height and Z the depth of the image. + * 'pixel_size_units': (str) Units of pixel size (Must be either "mm", "um" or "nm") + * 'no_patch': (bool) 2D patches are not used while segmenting with models trained with patches. \ + The "no_patch" option supersedes the "overlap_2D" option. \ + This option may not be suitable with large images depending on computer RAM capacity. + * 'overlap_2D': (list of int) List of overlaps in pixels for 2D patching. Length equals 2 [OverlapX, OverlapY], \ + where X is the width and Y the height of the image. * 'metadata': (str) Film metadata. - * 'fname_prior': (str) An image filename (e.g., .nii.gz) containing processing information - (e.g., spinal cord segmentation, spinal location or MS lesion classification, spinal cord centerline), - used to crop the image prior to segment it if provided. + * 'fname_prior': (str) An image filename (e.g., .nii.gz) containing processing information \ + (e.g., spinal cord segmentation, spinal location or MS lesion classification, spinal cord centerline), \ + used to crop the image prior to segment it if provided. \ The segmentation is not performed on the slices that are empty in this image. Returns: - list: List of nibabel objects containing the soft segmentation(s), one per prediction class. - list: List of target suffix associated with each prediction in `pred_list` + list, list: List of nibabel objects containing the soft segmentation(s), one per prediction class, \ + List of target suffix associated with each prediction in `pred_list` """ + # Define device + cuda_available, device = imed_utils.define_device(gpu_id) + # Check if model folder exists and get filenames to be stored as string fname_model: str fname_model_metadata: str @@ -363,76 +384,117 @@ def segment_volume(folder_model: str, fname_images: list, gpu_id: int = 0, optio set_postprocessing_options(options, context) # LOADER - loader_params = context["loader_parameters"] - slice_axis = imed_utils.AXIS_DCT[loader_params['slice_axis']] + loader_params = context[ConfigKW.LOADER_PARAMETERS] + slice_axis = imed_utils.AXIS_DCT[loader_params[LoaderParamsKW.SLICE_AXIS]] metadata = {} fname_roi = None - fname_prior = options['fname_prior'] if (options is not None) and ('fname_prior' in options) else None + + if (options is not None) and (OptionKW.FNAME_PRIOR in options): + fname_prior = options.get(OptionKW.FNAME_PRIOR) + else: + fname_prior = None + if fname_prior is not None: - if 'roi_params' in loader_params and loader_params['roi_params']['suffix'] is not None: + if LoaderParamsKW.ROI_PARAMS in loader_params and loader_params[LoaderParamsKW.ROI_PARAMS][ROIParamsKW.SUFFIX] is not None: fname_roi = fname_prior # TRANSFORMATIONS metadata = process_transformations(context, fname_roi, fname_prior, metadata, slice_axis, fname_images) # Compose transforms - _, _, transform_test_params = imed_transforms.get_subdatasets_transforms(context["transformation"]) + _, _, transform_test_params = imed_transforms.get_subdatasets_transforms(context[ConfigKW.TRANSFORMATION]) tranform_lst, undo_transforms = imed_transforms.prepare_transforms(transform_test_params) # Force filter_empty_mask to False if fname_roi = None - if fname_roi is None and 'filter_empty_mask' in loader_params["slice_filter_params"]: + if fname_roi is None and SliceFilterParamsKW.FILTER_EMPTY_MASK in loader_params[LoaderParamsKW.SLICE_FILTER_PARAMS]: logger.warning("fname_roi has not been specified, then the entire volume is processed.") - loader_params["slice_filter_params"]["filter_empty_mask"] = False + loader_params[LoaderParamsKW.SLICE_FILTER_PARAMS][SliceFilterParamsKW.FILTER_EMPTY_MASK] = False + + kernel_3D = bool(ConfigKW.MODIFIED_3D_UNET in context and context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.APPLIED]) or \ + not context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D] - kernel_3D = bool('Modified3DUNet' in context and context['Modified3DUNet']['applied']) or \ - not context['default_model']['is_2d'] + if (options is not None) and (OptionKW.NO_PATCH in options) and kernel_3D: + logger.warning(f"The 'no-patch' option is provided but is not available for 3D models. " + f"'no-patch' is ignored.") + if (options is not None) and (OptionKW.OVERLAP_2D in options) and kernel_3D: + logger.warning(f"The 'overlap-2d' option is provided but is not available for 3D models. " + f"'overlap-2d' is ignored.") # Assign length_2D and stride_2D for 2D patching - length_2D = context["default_model"]["length_2D"] if "length_2D" in context["default_model"] else [] - stride_2D = context["default_model"]["stride_2D"] if "stride_2D" in context["default_model"] else [] - is_2d_patch = bool(length_2D) + length_2D = context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.LENGTH_2D] if \ + ModelParamsKW.LENGTH_2D in context[ConfigKW.DEFAULT_MODEL] else [] + stride_2D = context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.STRIDE_2D] if \ + ModelParamsKW.STRIDE_2D in context[ConfigKW.DEFAULT_MODEL] else [] - # Adjust stride_2D with overlap_2D option if present - if is_2d_patch and (options is not None) and ('overlap_2D' in options): - stride_2D = [x1 - x2 for (x1, x2) in zip(length_2D, options['overlap_2D'])] + is_2d_patch = bool(length_2D) + if (options is not None) and (OptionKW.NO_PATCH in options) and not kernel_3D: + if is_2d_patch: + is_2d_patch = not options.get(OptionKW.NO_PATCH) + length_2D = [] + stride_2D = [] + else: + logger.warning(f"The 'no-patch' option is provided but the model has no 'length_2D' and " + f"'stride_2D' parameters in its configuration file " + f"'{fname_model_metadata.split('/')[-1]}'. 2D patching is ignored, the segmentation " + f"is done on the entire image without patches.") + if OptionKW.OVERLAP_2D in options: + logger.warning(f"The 'no-patch' option is provided along with the 'overlap-2D' option. " + f"2D patching is ignored, the segmentation is done on the entire image without patches.") + else: + if (options is not None) and (OptionKW.OVERLAP_2D in options) and not kernel_3D: + if (length_2D and stride_2D): + overlap_2D = options.get(OptionKW.OVERLAP_2D) + # Swap OverlapX and OverlapY resulting in an array in order [OverlapY, OverlapX] + # to match length_2D and stride_2D in [Height, Width] orientation. + overlap_2D[1], overlap_2D[0] = overlap_2D[0], overlap_2D[1] + # Adjust stride_2D with overlap_2D + stride_2D = [x1 - x2 for (x1, x2) in zip(length_2D, overlap_2D)] + else: + logger.warning(f"The 'overlap-2d' option is provided but the model has no 'length_2D' and " + f"'stride_2D' parameters in its configuration file " + f"'{fname_model_metadata.split('/')[-1]}'. 2D patching is ignored, the segmentation " + f"is done on the entire image without patches.") - # Add microscopy pixel size from options to metadata for filenames_pairs - if (options is not None) and ('pixel_size' in options): - metadata['PixelSize'] = options['pixel_size'] + # Add microscopy pixel size and pixel size units from options to metadata for filenames_pairs + if (options is not None) and (OptionKW.PIXEL_SIZE in options): + metadata[MetadataKW.PIXEL_SIZE] = options.get(OptionKW.PIXEL_SIZE) + if (options is not None) and (OptionKW.PIXEL_SIZE_UNITS in options): + metadata[MetadataKW.PIXEL_SIZE_UNITS] = options.get(OptionKW.PIXEL_SIZE_UNITS) filename_pairs = [(fname_images, None, fname_roi, metadata if isinstance(metadata, list) else [metadata])] if kernel_3D: ds = MRI3DSubVolumeSegmentationDataset(filename_pairs, transform=tranform_lst, - length=context["Modified3DUNet"]["length_3D"], - stride=context["Modified3DUNet"]["stride_3D"]) - logger.info(f"Loaded {len(ds)} {loader_params['slice_axis']} volumes of shape " - f"{context['Modified3DUNet']['length_3D']}.") + length=context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.LENGTH_3D], + stride=context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.STRIDE_3D], + slice_axis=slice_axis) + logger.info(f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} volumes of shape " + f"{context[ConfigKW.MODIFIED_3D_UNET][ModelParamsKW.LENGTH_3D]}.") else: ds = MRI2DSegmentationDataset(filename_pairs, length=length_2D, stride=stride_2D, slice_axis=slice_axis, - cache=True, + nibabel_cache=True, transform=tranform_lst, - slice_filter_fn=imed_loader_utils.SliceFilter( - **loader_params["slice_filter_params"])) + slice_filter_fn=SliceFilter( + **loader_params[LoaderParamsKW.SLICE_FILTER_PARAMS])) ds.load_filenames() if is_2d_patch: - logger.info(f"Loaded {len(ds)} {loader_params['slice_axis']} patches of shape {length_2D}.") + logger.info(f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} patches of shape {length_2D}.") else: - logger.info(f"Loaded {len(ds)} {loader_params['slice_axis']} slices.") + logger.info(f"Loaded {len(ds)} {loader_params[LoaderParamsKW.SLICE_AXIS]} slices.") model_params = {} - if 'FiLMedUnet' in context and context['FiLMedUnet']['applied']: + if ConfigKW.FILMED_UNET in context and context[ConfigKW.FILMED_UNET][ModelParamsKW.APPLIED]: onehotencoder = get_onehotencoder(context, folder_model, options, ds) - model_params.update({"name": 'FiLMedUnet', - "film_onehotencoder": onehotencoder, - "n_metadata": len([ll for l in onehotencoder.categories_ for ll in l])}) + model_params.update({ModelParamsKW.NAME: ConfigKW.FILMED_UNET, + ModelParamsKW.FILM_ONEHOTENCODER: onehotencoder, + ModelParamsKW.N_METADATA: len([ll for l in onehotencoder.categories_ for ll in l])}) # Data Loader - data_loader = DataLoader(ds, batch_size=context["training_parameters"]["batch_size"], + data_loader = DataLoader(ds, batch_size=context[ConfigKW.TRAINING_PARAMETERS][TrainingParamsKW.BATCH_SIZE], shuffle=False, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) @@ -441,10 +503,10 @@ def segment_volume(folder_model: str, fname_images: list, gpu_id: int = 0, optio preds_list, slice_idx_list = [], [] last_sample_bool, weight_matrix, volume, image = False, None, None, None for i_batch, batch in enumerate(data_loader): - preds = get_preds(context, fname_model, model_params, gpu_id, batch) + preds = get_preds(context, fname_model, model_params, cuda_available, device, batch) # Set datatype to gt since prediction should be processed the same way as gt - for b in batch['input_metadata']: + for b in batch[MetadataKW.INPUT_METADATA]: for modality in b: modality['data_type'] = 'gt' @@ -469,7 +531,8 @@ def split_classes(nib_prediction): pred = nib_prediction.get_fdata() pred_list = [] for c in range(pred.shape[-1]): - class_pred = nib.Nifti1Image(pred[..., c].astype('float32'), None, nib_prediction.header.copy()) + class_pred = nib.Nifti1Image(pred[..., c].astype('float32'), nib_prediction.header.get_best_affine(), + nib_prediction.header.copy()) pred_list.append(class_pred) return pred_list @@ -478,24 +541,22 @@ def reconstruct_3d_object(context: dict, batch: dict, undo_transforms: UndoCompo preds_list: list, kernel_3D: bool, is_2d_patch: bool, slice_axis: int, slice_idx_list: list, data_loader: DataLoader, fname_images: list, i_batch: int, last_sample_bool: bool, weight_matrix: tensor, volume: tensor, image: tensor): - """Reconstructs the 3D object from the current batch, and returns the list of predictions and - targets. + """Reconstructs the 3D object from the current batch, and returns the list of predictions and targets. Args: - context (dict): configuration dict. batch (dict): Dictionary containing input, gt and metadata undo_transforms (UndoCompose): Undo transforms so prediction match original image resolution and shape preds (tensor): Subvolume predictions preds_list (list of tensor): list of subvolume predictions. kernel_3D (bool): true when using 3D kernel. - is_2d_patch (bool): True if length in default model params. + is_2d_patch (bool): Indicates if 2d patching is used. slice_axis (int): Indicates the axis used for the 2D slice extraction: Sagittal: 0, Coronal: 1, Axial: 2. slice_idx_list (list of int): list of indices for the axis slices. data_loader (DataLoader): DataLoader object containing batches using in object construction. fname_images (list): list of image filenames (e.g. .nii.gz) to segment. i_batch (int): index of current batch. - last_sample_bool: : flag to indicate whether this is the last sample in the 3D volume + last_sample_bool (bool) : flag to indicate whether this is the last sample in the 3D volume weight_matrix (tensor): the weight matrix volume (tensor): the volume tensor that is being partially reconstructed through the loop image (tensor): the image tensor that is being partially reconstructed through the loop @@ -505,22 +566,23 @@ def reconstruct_3d_object(context: dict, batch: dict, undo_transforms: UndoCompo target_list (list): list of targets last_sample_bool (bool): flag to indicate whether this is the last sample in the 3D volume weight_matrix (tensor): the weight matrix. Must be returned as passing tensor by reference is NOT reliable. - volume (tensor): the volume tensor that is being partially reconstructed through the loop. Must be returned - as passing tensor by reference is NOT reliable. - image (tensor): the vimage tensor that is being partially reconstructed through the loop. Must be returned - as passing tensor by reference is NOT reliable. + volume (tensor): the volume tensor that is being partially reconstructed through the loop. Must be returned \ + as passing tensor by reference is NOT reliable. + image (tensor): the vimage tensor that is being partially reconstructed through the loop. Must be returned \ + as passing tensor by reference is NOT reliable. """ pred_list = [] target_list = [] for i_slice in range(len(preds)): - if "bounding_box" in batch['input_metadata'][i_slice][0]: + if "bounding_box" in batch[MetadataKW.INPUT_METADATA][i_slice][0]: imed_obj_detect.adjust_undo_transforms(undo_transforms.transforms, batch, i_slice) - batch['gt_metadata'] = [[metadata[0]] * preds.shape[1] for metadata in batch['input_metadata']] + batch[MetadataKW.GT_METADATA] = [[metadata[0]] * preds.shape[1] for metadata in batch[MetadataKW.INPUT_METADATA]] if kernel_3D: preds_undo, metadata, last_sample_bool, volume, weight_matrix = \ volume_reconstruction(batch, preds, undo_transforms, i_slice, volume, weight_matrix) - preds_list = [np.array(preds_undo)] + if last_sample_bool: + preds_list = [np.array(preds_undo)] else: if is_2d_patch: # undo transformations for patch and reconstruct slice @@ -531,16 +593,16 @@ def reconstruct_3d_object(context: dict, batch: dict, undo_transforms: UndoCompo # Add new segmented slice to preds_list preds_list.append(np.array(preds_i_undo)) # Store the slice index of preds_i_undo in the original 3D image - slice_idx_list.append(int(batch['input_metadata'][i_slice][0]['slice_index'])) + slice_idx_list.append(int(batch[MetadataKW.INPUT_METADATA][i_slice][0]['slice_index'])) else: # undo transformations for slice preds_i_undo, metadata_idx = undo_transforms(preds[i_slice], - batch["gt_metadata"][i_slice], + batch[MetadataKW.GT_METADATA][i_slice], data_type='gt') # Add new segmented slice to preds_list preds_list.append(np.array(preds_i_undo)) # Store the slice index of preds_i_undo in the original 3D image - slice_idx_list.append(int(batch['input_metadata'][i_slice][0]['slice_index'])) + slice_idx_list.append(int(batch[MetadataKW.INPUT_METADATA][i_slice][0]['slice_index'])) # If last batch and last sample of this batch, then reconstruct 3D object if (i_batch == len(data_loader) - 1 and i_slice == len(batch['gt']) - 1) or last_sample_bool: @@ -552,10 +614,10 @@ def reconstruct_3d_object(context: dict, batch: dict, undo_transforms: UndoCompo kernel_dim='3d' if kernel_3D else '2d', debug=False, bin_thr=-1, - postprocessing=context['postprocessing']) + postprocessing=context[ConfigKW.POSTPROCESSING]) pred_list = split_classes(pred_nib) - target_list = context['loader_parameters']['target_suffix'] + target_list = context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.TARGET_SUFFIX] return pred_list, target_list, last_sample_bool, weight_matrix, volume, image @@ -579,7 +641,8 @@ def volume_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompos volume (tensor): representing the volume reconstructed weight_matrix (tensor): weight matrix """ - x_min, x_max, y_min, y_max, z_min, z_max = batch['input_metadata'][smp_idx][0]['coord'] + pred_undo, metadata = None, None + x_min, x_max, y_min, y_max, z_min, z_max = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['coord'] num_pred = pred[smp_idx].shape[0] # A boolean flag indicate whether the current volume is the VERY first subvolume of the entire 3D volume/space. @@ -587,7 +650,7 @@ def volume_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompos first_sample: bool = (x_min == 0 and y_min == 0 and z_min == 0) # Get the Dimension - x, y, z = batch['input_metadata'][smp_idx][0]['index_shape'] + x, y, z = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['index_shape'] # If this is the first sample, instantiate a ZERO tensor based on the dimension if first_sample: @@ -602,10 +665,9 @@ def volume_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompos if last_sample_bool: volume /= weight_matrix - - pred_undo, metadata = undo_transforms(volume, - batch['gt_metadata'][smp_idx], - data_type='gt') + pred_undo, metadata = undo_transforms(volume, + batch[MetadataKW.GT_METADATA][smp_idx], + data_type='gt') return pred_undo, metadata, last_sample_bool, volume, weight_matrix @@ -622,13 +684,14 @@ def image_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompose weight_matrix (tensor): Weights containing the number of predictions for each pixel Returns: - pred_undo (tensor): undone patch, - metadata (dict): metadata, - last_sample_bool (bool): boolean representing if its the last patch of the image + pred_undo (tensor): undone image + metadata (dict): metadata + last_patch_bool (bool): boolean representing if its the last patch of the image image (tensor): representing the image reconstructed weight_matrix (tensor): weight matrix """ - x_min, x_max, y_min, y_max = batch['input_metadata'][smp_idx][0]['coord'] + pred_undo, metadata = None, None + x_min, x_max, y_min, y_max = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['coord'] num_pred = pred[smp_idx].shape[0] # A boolean flag indicate whether the current patch is the VERY first patch of the entire 2D image. @@ -636,7 +699,7 @@ def image_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompose first_patch: bool = (x_min == 0 and y_min == 0) # Get the Dimension - x, y = batch['input_metadata'][smp_idx][0]['index_shape'] + x, y = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['index_shape'] # If this is the first sample, instantiate a ZERO tensor based on the dimension if first_patch: @@ -650,6 +713,6 @@ def image_reconstruction(batch: dict, pred: tensor, undo_transforms: UndoCompose weight_matrix[:, x_min:x_max, y_min:y_max] += 1 if last_patch_bool: image /= weight_matrix + pred_undo, metadata = undo_transforms(image, batch[MetadataKW.GT_METADATA][smp_idx], data_type='gt') - pred_undo, metadata = undo_transforms(image, batch['gt_metadata'][smp_idx], data_type='gt') return pred_undo, metadata, last_patch_bool, image, weight_matrix diff --git a/ivadomed/keywords.py b/ivadomed/keywords.py index 4b05f7749..ab42d0417 100644 --- a/ivadomed/keywords.py +++ b/ivadomed/keywords.py @@ -6,9 +6,10 @@ class ConfigKW: LOADER_PARAMETERS = "loader_parameters" TRAINING_PARAMETERS = "training_parameters" MODEL_NAME = "model_name" - MODIFIED3DUNET = "Modified3DUNet" + MODIFIED_3D_UNET = "Modified3DUNet" DEBUGGING = "debugging" - FILMEDUNET = "FiLMedUnet" + WANDB = "wandb" + FILMED_UNET = "FiLMedUnet" DEFAULT_MODEL = "default_model" OBJECT_DETECTION_PARAMS = "object_detection_params" GPU_IDS = "gpu_ids" @@ -20,170 +21,260 @@ class ConfigKW: SPLIT_DATASET = "split_dataset" UNCERTAINTY = "uncertainty" UNDO_TRANSFORMS = "undo_transforms" - ELEVATION_PARAMETERS = "elevation_parameters" - HEMISUNET = "HeMISUnet" + EVALUATION_PARAMETERS = "evaluation_parameters" + HEMIS_UNET = "HeMISUnet" SPLIT_PATH = "split_path" TRAINING_SHA256 = "training_sha256" +@dataclass +class WandbKW: + WANDB_API_KEY = "wandb_api_key" + PROJECT_NAME = "project_name" + GROUP_NAME = "group_name" + RUN_NAME = "run_name" + LOG_GRADS_EVERY = "log_grads_every" + + @dataclass class LoaderParamsKW: - PATH_DATA = "path_data" - BIDS_CONFIG = "bids_config" - TARGET_SUFFIX = "target_suffix" - ROI_PARAMS = "roi_params" - CONTRAST_PARAMS = "contrast_params" - MULTICHANNEL = "multichannel" # boolean key that is used to change the configuration file ever slightly. - EXTENSIONS = "extensions" - TARGET_GROUND_TRUTH = "target_ground_truth" - TARGET_SESSIONS = "target_sessions" # the sessions to focus the analyses on - METADATA_TYPE = "metadata_type" - MODEL_PARAMS = "model_params" - SLICE_AXIS = "slice_axis" - IS_INPUT_DROPOUT = "is_input_dropout" - SLICE_FILTER_PARAMS = "slice_filter_params" + PATH_DATA: str = "path_data" + BIDS_CONFIG: str = "bids_config" + TARGET_SUFFIX: str = "target_suffix" + ROI_PARAMS: str = "roi_params" + CONTRAST_PARAMS: str = "contrast_params" + MULTICHANNEL: str = "multichannel" # boolean key that is used to change the configuration file ever slightly. + EXTENSIONS: str = "extensions" + TARGET_GROUND_TRUTH: str = "target_ground_truth" + TARGET_SESSIONS: str = "target_sessions" # the sessions to focus the analyses on + METADATA_TYPE: str = "metadata_type" + MODEL_PARAMS: str = "model_params" + SLICE_AXIS: str = "slice_axis" + IS_INPUT_DROPOUT: str = "is_input_dropout" + SLICE_FILTER_PARAMS: str = "slice_filter_params" + SUBJECT_SELECTION: str = "subject_selection" @dataclass class SplitDatasetKW: - SPLIT_METHOD = "split_method" - FNAME_SPLIT = "fname_split" - DATA_TESTING = "data_testing" - RANDOM_SEED = "random_seed" - TRAIN_FRACTION = "train_fraction" - TEST_FRACTION = "test_fraction" - BALANCE = "balance" + SPLIT_METHOD: str = "split_method" + FNAME_SPLIT: str = "fname_split" + DATA_TESTING: str = "data_testing" + RANDOM_SEED: str = "random_seed" + TRAIN_FRACTION: str = "train_fraction" + TEST_FRACTION: str = "test_fraction" + BALANCE: str = "balance" @dataclass class DataTestingKW: - DATA_TYPE = "data_type" - DATA_VALUE = "data_value" + DATA_TYPE: str = "data_type" + DATA_VALUE: str = "data_value" @dataclass class TrainingParamsKW: - BALANCE_SAMPLES = "balance_samples" - BATCH_SIZE = "batch_size" + BALANCE_SAMPLES: str = "balance_samples" + BATCH_SIZE: str = "batch_size" @dataclass class TransformationKW: - ROICROP = "ROICrop" - CENTERCROP = "CenterCrop" - RESAMPLE = "Resample" + ROICROP: str = "ROICrop" + CENTERCROP: str = "CenterCrop" + RESAMPLE: str = "Resample" + RANDOM_AFFINE: str = "RandomAffine" @dataclass class BalanceSamplesKW: - APPLIED = "applied" - TYPE = "type" + APPLIED: str = "applied" + TYPE: str = "type" @dataclass class ContrastParamsKW: - CONTRAST_LIST = "contrast_lst" # The list help determine the number of model parameter inputs. - BALANCE = "balance" - TRAINING_VALIDATION = "training_validation" - TESTING = "testing" + CONTRAST_LST: str = "contrast_lst" # The list help determine the number of model parameter inputs. + BALANCE: str = "balance" + TRAINING_VALIDATION: str = "training_validation" + TESTING: str = "testing" -@dataclass class ModelParamsKW: - LENGTH_2D = "length_2D" - STRIDE_2D = "stride_2D" - LENGTH_3D = "length_3D" - STRIDE_3D = "stride_3D" - FILM_LAYERS = "film_layers" - FOLDER_NAME = "folder_name" - METADATA = "metadata" - FILM_ONEHOTENCODER = "film_onehotencoder" - N_METADATA = "n_metadata" - APPLIED = "applied" - NAME = "name" - IS_2D = "is_2d" - IN_CHANNEL = "in_channel" - OUT_CHANNEL = "out_channel" - TARGET_LIST = "target_list" - ROI_LIST = "roi_list" - PATH_HDF5 = "path_hdf5" - CSV_PATH = "csv_path" - RAM = "ram" - ATTENTION = "attention" - DEPTH = "depth" - MISSING_PROBABILITY = "missing_probability" - MISSING_PROBABILITY_GROWTH = "missing_probability_growth" + LENGTH_2D: str = "length_2D" + STRIDE_2D: str = "stride_2D" + LENGTH_3D: str = "length_3D" + STRIDE_3D: str = "stride_3D" + FILM_LAYERS: str = "film_layers" + FOLDER_NAME: str = "folder_name" + METADATA: str = "metadata" + FILM_ONEHOTENCODER: str = "film_onehotencoder" + N_METADATA: str = "n_metadata" + APPLIED: str = "applied" + NAME: str = "name" + IS_2D: str = "is_2d" + IN_CHANNEL: str = "in_channel" + OUT_CHANNEL: str = "out_channel" + TARGET_LST: str = "target_lst" + ROI_LST: str = "roi_lst" + PATH_HDF5: str = "path_hdf5" + CSV_PATH: str = "csv_path" + RAM: str = "ram" + ATTENTION: str = "attention" + DEPTH: str = "depth" + MISSING_PROBABILITY: str = "missing_probability" + MISSING_PROBABILITY_GROWTH: str = "missing_probability_growth" + N_FILTERS: str = "n_filters" @dataclass class SubjectDictKW: - ABSOLUTE_PATHS = "absolute_paths" - DERIV_PATH = "deriv_path" - ROI_FILENAME = "roi_filename" - METADATA = "metadata" - EXTENSIONS = "extensions" + ABSOLUTE_PATHS: str = "absolute_paths" + DERIV_PATH: str = "deriv_path" + ROI_FILENAME: str = "roi_filename" + METADATA: str = "metadata" + EXTENSIONS: str = "extensions" @dataclass class SubjectDataFrameKW: - FILENAME = "filename" + FILENAME: str = "filename" @dataclass -class BidsDataFrameKW: - # bids layout converted to dataframe during bids dataset creation - PATH = "path" # full path. - FILENAME = "filename" # the actual file's name (base) - PARTICIPANT_ID = "participant_id" # i.e. sub-unf01 - SUBJECT = "subject" # i.e. unf01 - SUFFIX = "suffix" # T1w - SESSION = "session" # session field (single int) in Bids DataFrame - EXTENSION = "extension" # .nii.gz - DERIVATIVES = "derivatives" +class OptionKW: + METADATA: str = "metadata" + FNAME_PRIOR: str = 'fname_prior' + BINARIZE_PREDICTION: str = "binarize_prediction" + BINARIZE_MAXPOOLING: str = "binarize_maxpooling" + KEEP_LARGEST: str = "keep_largest" + FILL_HOLES: str = "fill_holes" + REMOVE_SMALL: str = "remove_small" + OVERLAP_2D: str = "overlap_2D" + PIXEL_SIZE: str = "pixel_size" + PIXEL_SIZE_UNITS: str = "pixel_size_units" + NO_PATCH: str = "no_patch" @dataclass -class ROIParamsKW: - SUFFIX = "suffix" - SLICE_FILTER_ROI = "slice_filter_roi" +class BidsDataFrameKW: + # bids layout converted to dataframe during bids dataset creation + PATH: str = "path" # full path. + FILENAME: str = "filename" # the actual file's name (base) + PARTICIPANT_I: str = "participant_id" # i.e. sub-unf01 + SUBJECT: str = "subject" # i.e. unf01 + SUFFIX: str = "suffix" # T1w + SESSION: str = "session" # session field (single int) in Bids DataFrame + EXTENSION: str = "extension" # .nii.gz + DERIVATIVES: str = "derivatives" @dataclass -class MetadataParamsKW: - CONTRAST = "contrast" - BOUNDING_BOX = "bounding_box" +class ROIParamsKW: + SUFFIX: str = "suffix" + SLICE_FILTER_ROI: str = "slice_filter_roi" @dataclass -class MetadataChoiceKW: - MRI_PARAMS = "mri_params" - CONTRASTS = "contrasts" +class MetadataKW: + CONTRAST: str = "contrast" + CONTRASTS: str = "contrasts" + BOUNDING_BOX: str = "bounding_box" + DATA_TYPE: str = "data_type" + PRE_RESAMPLE_SHAPE: str = "preresample_shape" + CROP_PARAMS: str = "crop_params" + MRI_PARAMS: str = "mri_params" + ROTATION: str = "rotation" + TRANSLATION: str = "translation" + SCALE: str = "scale" + COORD: str = "coord" + ZOOMS: str = "zooms" + UNDO: str = "undo" + REVERSE: str = "reverse" + OFFSET: str = "offset" + ELASTIC: str = "elastic" + GAUSSIAN_NOISE: str = "gaussian_noise" + GAMMA: str = "gamma" + BIAS_FIELD: str = "bias_field" + BLUR: str = "blur" + DATA_SHAPE: str = "data_shape" + SLICE_INDEX: str = "slice_index" + MISSING_MOD: str = "missing_mod" + METADATA_DICT: str = "metadata_dict" + INDEX_SHAPE: str = "index_shape" + GT_METADATA: str = "gt_metadata" + GT_FILENAMES: str = "gt_filenames" + INPUT_METADATA: str = "input_metadata" + INPUT_FILENAMES: str = "input_filenames" + ROI_METADATA: str = "roi_metadata" + PIXEL_SIZE: str = "PixelSize" + PIXEL_SIZE_UNITS: str = "PixelSizeUnits" @dataclass class ObjectDetectionParamsKW: - GPU_IDS = "gpu_ids" - PATH_OUTPUT = "path_output" - OBJECT_DETECTION_PATH = "object_detection_path" - SAFETY_FACTOR = "safety_factor" + GPU_IDS: str = "gpu_ids" + PATH_OUTPUT: str = "path_output" + OBJECT_DETECTION_PATH: str = "object_detection_path" + SAFETY_FACTOR: str = "safety_factor" @dataclass class UncertaintyKW: - ALEATORIC = 'aleatoric' - N_IT = "n_it" + ALEATORIC: str = 'aleatoric' + N_IT: str = "n_it" @dataclass class PostprocessingKW: - BINARIZE_PREDICTION = "binarize_prediction" + BINARIZE_PREDICTION: str = "binarize_prediction" @dataclass class BinarizeProdictionKW: - THR = "thr" + THR: str = "thr" @dataclass class SliceFilterParamsKW: - FILTER_EMPTY_MASK = "filter_empty_mask" \ No newline at end of file + FILTER_EMPTY_MASK: str = "filter_empty_mask" + + +@dataclass +class IgnoredFolderKW: + MACOSX: str = "__MACOSX" + + +@dataclass +class MetricsKW: + RECALL_SPECIFICITY: str = "recall_specificity" + DICE: str = "dice" + +@dataclass +class MetadataParamsKW: + CONTRAST = "contrast" + BOUNDING_BOX = "bounding_box" + +@dataclass +class MetadataChoiceKW: + MRI_PARAMS = "mri_params" + CONTRASTS = "contrasts" + +@dataclass +class SegmentationDatasetKW: + X_MIN: str = 'x_min' + X_MAX: str = 'x_max' + Y_MIN: str = 'y_min' + Y_MAX: str = 'y_max' + Z_MIN: str = 'z_min' + Z_MAX: str = 'z_max' + HANDLER_INDEX: str = 'handler_index' + +@dataclass +class SegmentationPairKW: + GT_METADATA = "gt_metadata" + INPUT_METADATA = "input_metadata" + ROI_METADATA = "roi_metadata" + GT = "gt" + INPUT = "input" + ROI = "roi" diff --git a/ivadomed/loader/adaptative.py b/ivadomed/loader/adaptative.py deleted file mode 100644 index 7ec35f353..000000000 --- a/ivadomed/loader/adaptative.py +++ /dev/null @@ -1,764 +0,0 @@ -import copy - -import h5py -import nibabel as nib -import numpy as np -import pandas as pd -from loguru import logger -from tqdm import tqdm -from pathlib import Path - -from ivadomed.loader.segmentation_pair import SegmentationPair -from ivadomed import transforms as imed_transforms -from ivadomed.loader import utils as imed_loader_utils, film as imed_film -from ivadomed.object_detection import utils as imed_obj_detect -from ivadomed import utils as imed_utils - - -class Dataframe: - """ - This class aims to create a dataset using an HDF5 file, which can be used by an adapative loader - to perform curriculum learning, Active Learning or any other strategy that needs to load samples - in a specific way. - It works on RAM or on the fly and can be saved for later. - - Args: - hdf5_file (hdf5): hdf5 file containing dataset information - contrasts (list of str): List of the contrasts of interest. - path (str): Dataframe path. - target_suffix (list of str): List of suffix of targetted structures. - roi_suffix (str): List of suffix of ROI masks. - filter_slices (SliceFilter): Object that filters slices according to their content. - dim (int): Choice 2 or 3, for 2D or 3D data respectively. - - Attributes: - dim (int): Choice 2 or 3, for 2D or 3D data respectively. - contrasts (list of str): List of the contrasts of interest. - filter_slices (SliceFilter): Object that filters slices according to their content. - df (pd.Dataframe): Dataframe containing dataset information - """ - - def __init__(self, hdf5_file, contrasts, path, target_suffix=None, roi_suffix=None, - filter_slices=False, dim=2): - # Number of dimension - self.dim = dim - # List of all contrasts - self.contrasts = copy.deepcopy(contrasts) - - if target_suffix: - for gt in target_suffix: - self.contrasts.append('gt/' + gt) - else: - self.contrasts.append('gt') - - if roi_suffix: - for roi in roi_suffix: - self.contrasts.append('roi/' + roi) - else: - self.contrasts.append('ROI') - - self.df = None - self.filter = filter_slices - - # Data frame - if Path(path).exists(): - self.load_dataframe(path) - else: - self.create_df(hdf5_file) - - def shuffle(self): - """Shuffle the whole data frame.""" - self.df = self.df.sample(frac=1) - - def load_dataframe(self, path): - """Load the dataframe from a csv file. - - Args: - path (str): Path to hdf5 file. - """ - try: - self.df = pd.read_csv(path) - logger.info("Dataframe has been correctly loaded from {}.".format(path)) - except FileNotFoundError: - logger.error("No csv file found") - - def save(self, path): - """Save the dataframe into a csv file. - - Args: - path (str): Path to hdf5 file. - """ - try: - self.df.to_csv(path, index=False) - logger.info("Dataframe has been saved at {}.".format(path)) - except FileNotFoundError: - logger.error("Wrong path.") - - def process_key(self, key, grp, line, subject, col_names): - assert key in grp.keys() - inputs = grp[key] - for contrast in inputs.attrs['contrast']: - if key == 'inputs' and contrast in col_names: - line[contrast] = '{}/inputs/{}'.format(subject, contrast) - elif key == 'inputs' and contrast not in col_names: - continue - else: - key_contrast = key + '/' + contrast - for col in col_names: - if key_contrast in col: - line[col] = '{}/{}/{}'.format(subject, key, contrast) - else: - continue - return line - - def process_line(self, df, grp, line, subject, col_names): - # inputs - line = self.process_key('inputs', grp, line, subject, col_names) - - # GT - line = self.process_key('gt', grp, line, subject, col_names) - - # ROI - line = self.process_key('roi', grp, line, subject, col_names) - - # Adding slices & removing useless slices if loading in ram - line['Slices'] = np.array(grp.attrs['slices']) - - # If the number of dimension is 2, we separate the slices - if self.dim == 2 and self.filter: - for n in line['Slices']: - line_slice = copy.deepcopy(line) - line_slice['Slices'] = n - df = df.append(line_slice, ignore_index=True) - - else: - df = df.append(line, ignore_index=True) - - return df, line - - - def create_df(self, hdf5_file): - """Generate the Data frame using the hdf5 file. - - Args: - hdf5_file (hdf5): File containing dataset information - """ - # Template of a line - empty_line = {col: 'None' for col in self.contrasts} - empty_line['Slices'] = 'None' - - # Initialize the data frame - col_names = [col for col in empty_line.keys()] - col_names.append('Subjects') - df = pd.DataFrame(columns=col_names) - logger.info(hdf5_file.attrs['patients_id']) - # Filling the data frame - for subject in hdf5_file.attrs['patients_id']: - # Getting the Group the corresponding patient - grp = hdf5_file[subject] - line = copy.deepcopy(empty_line) - line['Subjects'] = subject - - df, line = self.process_line(df, grp, line, subject, col_names) - - self.df = df - - def clean(self, contrasts): - """Aims to remove lines where one of the contrasts in not available. - - Agrs: - contrasts (list of str): List of contrasts. - """ - # Replacing 'None' values by np.nan - self.df[contrasts] = self.df[contrasts].replace(to_replace='None', value=np.nan) - # Dropping np.nan - self.df = self.df.dropna() - - -class BIDStoHDF5: - """Converts a BIDS dataset to a HDF5 file. - - Args: - bids_df (BidsDataframe): Object containing dataframe with all BIDS image files and their metadata. - subject_file_lst (list): Subject filenames list. - target_suffix (list): List of suffixes for target masks. - roi_params (dict): Dictionary containing parameters related to ROI image processing. - contrast_lst (list): List of the contrasts. - path_hdf5 (str): Path and name of the hdf5 file. - contrast_balance (dict): Dictionary controlling image contrasts balance. - slice_axis (int): Indicates the axis used to extract slices: "axial": 2, "sagittal": 0, "coronal": 1. - metadata_choice (str): Choice between "mri_params", "contrasts", None or False, related to FiLM. - slice_filter_fn (SliceFilter): Class that filters slices according to their content. - transform (Compose): Transformations. - object_detection_params (dict): Object detection parameters. - - Attributes: - dt (dtype): hdf5 special dtype. - path_hdf5 (str): path to hdf5 file containing dataset information. - filename_pairs (list): A list of tuples in the format (input filename list containing all modalities,ground \ - truth filename, ROI filename, metadata). - metadata (dict): Dictionary containing metadata of input and gt. - prepro_transforms (Compose): Transforms to be applied before training. - transform (Compose): Transforms to be applied during training. - has_bounding_box (bool): True if all metadata contains bounding box coordinates, else False. - slice_axis (int): Indicates the axis used to extract slices: "axial": 2, "sagittal": 0, "coronal": 1. - slice_filter_fn (SliceFilter): Object that filters slices according to their content. - """ - - def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_lst, path_hdf5, contrast_balance=None, - slice_axis=2, metadata_choice=False, slice_filter_fn=None, roi_params=None, transform=None, - object_detection_params=None, soft_gt=False): - logger.info("Starting conversion") - - # Sort subject_file_lst and create a sub-dataframe from bids_df containing only subjects from subject_file_lst - subject_file_lst = sorted(subject_file_lst) - df_subjects = bids_df.df[bids_df.df['filename'].isin(subject_file_lst)] - - self.soft_gt = soft_gt - self.dt = h5py.special_dtype(vlen=str) - # opening an hdf5 file with write access and writing metadata - # self.hdf5_file = h5py.File(hdf5_name, "w") - self.path_hdf5 = path_hdf5 - list_patients = [] - - self.filename_pairs = [] - self.metadata = {} - - if metadata_choice == 'mri_params': - self.metadata = {"FlipAngle": [], "RepetitionTime": [], - "EchoTime": [], "Manufacturer": []} - - self.prepro_transforms, self.transform = transform - - # Create a dictionary with the number of subjects for each contrast of contrast_balance - tot = {contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] - for contrast in contrast_balance.keys()} - - # Create a counter that helps to balance the contrasts - c = {contrast: 0 for contrast in contrast_balance.keys()} - - - # Get all subjects path from bids_df for bounding box - get_all_subj_path = bids_df.df[bids_df.df['filename'] - .str.contains('|'.join(bids_df.get_subject_fnames()))]['path'].to_list() - - # Load bounding box from list of path - self.has_bounding_box = True - bounding_box_dict = imed_obj_detect.load_bounding_boxes(object_detection_params, - get_all_subj_path, - slice_axis, - contrast_lst) - - # Get all derivatives filenames from bids_df - all_deriv = bids_df.get_deriv_fnames() - - for subject in tqdm(subject_file_lst, desc="Loading dataset"): - self.process_subject(bids_df, subject, df_subjects, c, tot, contrast_balance, target_suffix, all_deriv, - roi_params, bounding_box_dict, metadata_choice, list_patients) - - self.slice_axis = slice_axis - self.slice_filter_fn = slice_filter_fn - - # Update HDF5 metadata - with h5py.File(self.path_hdf5, "w") as hdf5_file: - hdf5_file.attrs.create('patients_id', list(set(list_patients)), dtype=self.dt) - hdf5_file.attrs['slice_axis'] = slice_axis - - hdf5_file.attrs['slice_filter_fn'] = [('filter_empty_input', True), ('filter_empty_mask', False)] - hdf5_file.attrs['metadata_choice'] = metadata_choice - - # Save images into HDF5 file - self._load_filenames() - logger.info("Files loaded.") - - def process_subject(self, bids_df, subject, df_subjects, c, tot, contrast_balance, target_suffix, all_deriv, - roi_params, bounding_box_dict, metadata_choice, list_patients): - df_sub = df_subjects.loc[df_subjects['filename'] == subject] - - # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance - contrast = df_sub['suffix'].values[0] - is_over_thresh = self.is_contrast_over_threshold(c, tot, contrast, contrast_balance) - - if(not is_over_thresh): - target_filename, roi_filename = self.get_filenames(bids_df, subject, all_deriv, target_suffix, roi_params) - - if (not any(target_filename)) or (not (roi_params["suffix"] is None) and (roi_filename is None)): - return - - metadata = df_sub.to_dict(orient='records')[0] - metadata['contrast'] = contrast - - if len(bounding_box_dict): - # Take only one bounding box for cropping - metadata['bounding_box'] = bounding_box_dict[str(df_sub['path'].values[0])][0] - - are_mri_params = all([imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys()]) - if metadata_choice == 'mri_params' and not are_mri_params: - return - - # Get subj_id (prefix filename without modality suffix and extension) - subj_id = subject.split('.')[0].split('_')[0] - - self.filename_pairs.append((subj_id, [df_sub['path'].values[0]], - target_filename, roi_filename, [metadata])) - list_patients.append(subj_id) - - def is_contrast_over_threshold(self, c, tot, contrast, contrast_balance): - if contrast in (contrast_balance.keys()): - c[contrast] = c[contrast] + 1 - return c[contrast] / tot[contrast] > contrast_balance[contrast] - - def get_filenames(self, bids_df, subject, all_deriv, target_suffix, roi_params): - target_filename, roi_filename = [None] * len(target_suffix), None - derivatives = bids_df.df[bids_df.df['filename'] - .str.contains('|'.join(bids_df.get_derivatives(subject, all_deriv)))]['path'].to_list() - - for deriv in derivatives: - for idx, suffix in enumerate(target_suffix): - if suffix in deriv: - target_filename[idx] = deriv - if not (roi_params["suffix"] is None) and roi_params["suffix"] in deriv: - roi_filename = [deriv] - - return target_filename, roi_filename - - def _slice_seg_pair(self, idx_pair_slice, seg_pair, roi_pair, useful_slices, input_volumes, gt_volume, roi_volume): - """ Helper function to slice segmentation pair at load time """ - slice_seg_pair = seg_pair.get_pair_slice(idx_pair_slice) - - self.has_bounding_box = imed_obj_detect.verify_metadata(slice_seg_pair, self.has_bounding_box) - if self.has_bounding_box: - imed_obj_detect.adjust_transforms(self.prepro_transforms, slice_seg_pair) - - # keeping idx of slices with gt - if self.slice_filter_fn: - filter_fn_ret_seg = self.slice_filter_fn(slice_seg_pair) - if self.slice_filter_fn and filter_fn_ret_seg: - useful_slices.append(idx_pair_slice) - - roi_pair_slice = roi_pair.get_pair_slice(idx_pair_slice) - slice_seg_pair, roi_pair_slice = imed_transforms.apply_preprocessing_transforms(self.prepro_transforms, - slice_seg_pair, - roi_pair_slice) - - input_volumes.append(slice_seg_pair["input"][0]) - - # Handle unlabeled data - if not len(slice_seg_pair["gt"]): - gt_volume = [] - else: - gt_volume.append((slice_seg_pair["gt"][0] * 255).astype(np.uint8) / 255.) - - # Handle data with no ROI provided - if not len(roi_pair_slice["gt"]): - roi_volume = [] - else: - roi_volume.append((roi_pair_slice["gt"][0] * 255).astype(np.uint8) / 255.) - - return slice_seg_pair, roi_pair_slice - - def create_subgrp_metadata(self, grp_key, grp, contrast): - if grp[grp_key].attrs.__contains__('contrast'): - attr = grp[grp_key].attrs['contrast'] - new_attr = [c for c in attr] - new_attr.append(contrast) - grp[grp_key].attrs.create('contrast', new_attr, dtype=self.dt) - else: - grp[grp_key].attrs.create('contrast', [contrast], dtype=self.dt) - - def create_metadata(self, grp, key, metadata): - grp[key].attrs['data_type'] = metadata['data_type'] - - if 'zooms' in metadata.keys(): - grp[key].attrs['zooms'] = metadata['zooms'] - if 'data_shape' in metadata.keys(): - grp[key].attrs['data_shape'] = metadata['data_shape'] - if 'bounding_box' in metadata.keys() and metadata['bounding_box'] is not None: - grp[key].attrs['bounding_box'] = metadata['bounding_box'] - - def add_grp_contrast(self, grp, contrast): - if grp.attrs.__contains__('contrast'): - attr = grp.attrs['contrast'] - new_attr = [c for c in attr] - new_attr.append(contrast) - grp.attrs.create('contrast', new_attr, dtype=self.dt) - - else: - grp.attrs.create('contrast', [contrast], dtype=self.dt) - - def _load_filenames(self): - """Load preprocessed pair data (input and gt) in handler.""" - with h5py.File(self.path_hdf5, "a") as hdf5_file: - for subject_id, input_filename, gt_filename, roi_filename, metadata in self.filename_pairs: - # Creating/ getting the subject group - if str(subject_id) in hdf5_file.keys(): - grp = hdf5_file[str(subject_id)] - else: - grp = hdf5_file.create_group(str(subject_id)) - - roi_pair = SegmentationPair(input_filename, roi_filename, metadata=metadata, - slice_axis=self.slice_axis, cache=False, soft_gt=self.soft_gt) - - seg_pair = SegmentationPair(input_filename, gt_filename, metadata=metadata, - slice_axis=self.slice_axis, cache=False, soft_gt=self.soft_gt) - logger.info("gt filename", gt_filename) - input_data_shape, _ = seg_pair.get_pair_shapes() - - useful_slices = [] - input_volumes = [] - gt_volume = [] - roi_volume = [] - - for idx_pair_slice in range(input_data_shape[-1]): - slice_seg_pair, roi_pair_slice = self._slice_seg_pair(idx_pair_slice, seg_pair, roi_pair, - useful_slices, input_volumes, gt_volume, roi_volume) - - # Getting metadata using the one from the last slice - input_metadata = slice_seg_pair['input_metadata'][0] - gt_metadata = slice_seg_pair['gt_metadata'][0] - roi_metadata = roi_pair_slice['input_metadata'][0] - - if grp.attrs.__contains__('slices'): - grp.attrs['slices'] = list(set(np.concatenate((grp.attrs['slices'], useful_slices)))) - else: - grp.attrs['slices'] = useful_slices - - # Creating datasets and metadata - contrast = input_metadata['contrast'] - - # Inputs - logger.info(len(input_volumes)) - logger.info("grp= ", str(subject_id)) - key = "inputs/{}".format(contrast) - logger.info("key = ", key) - if len(input_volumes) < 1: - logger.warning("list empty") - continue - grp.create_dataset(key, data=input_volumes) - - # Sub-group metadata - self.create_subgrp_metadata('inputs', grp, contrast) - - # dataset metadata - grp[key].attrs['input_filenames'] = input_metadata['input_filenames'] - self.create_metadata(grp, key, input_metadata) - - # GT - key = "gt/{}".format(contrast) - grp.create_dataset(key, data=gt_volume) - # Sub-group metadata - self.create_subgrp_metadata('gt', grp, contrast) - - # dataset metadata - grp[key].attrs['gt_filenames'] = input_metadata['gt_filenames'] - self.create_metadata(grp, key, gt_metadata) - - # ROI - key = "roi/{}".format(contrast) - grp.create_dataset(key, data=roi_volume) - # Sub-group metadata - self.create_subgrp_metadata('roi', grp, contrast) - - # dataset metadata - grp[key].attrs['roi_filename'] = roi_metadata['gt_filenames'] - self.create_metadata(grp, key, roi_metadata) - - # Adding contrast to group metadata - self.add_grp_contrast(grp, contrast) - - -class HDF5Dataset: - """HDF5 dataset object. - - Args: - bids_df (BidsDataframe): Object containing dataframe with all BIDS image files and their metadata. - subject_file_lst (list of str): List of subject filenames. - model_params (dict): Dictionary containing model parameters. - target_suffix (list of str): List of suffixes of the target structures. - contrast_params (dict): Dictionary containing contrast parameters. - slice_axis (int): Indicates the axis used to extract slices: "axial": 2, "sagittal": 0, "coronal": 1. - transform (Compose): Transformations. - metadata_choice (str): Choice between "mri_params", "contrasts", None or False, related to FiLM. - dim (int): Choice 2 or 3, for 2D or 3D data respectively. - complet (bool): If True removes lines where contrasts is not available. - slice_filter_fn (SliceFilter): Object that filters slices according to their content. - roi_params (dict): Dictionary containing parameters related to ROI image processing. - object_detection_params (dict): Object detection parameters. - - Attributes: - cst_lst (list): Contrast list. - gt_lst (list): Contrast label used for ground truth. - roi_lst (list): Contrast label used for ROI cropping. - dim (int): Choice 2 or 3, for 2D or 3D data respectively. - filter_slices (SliceFilter): Object that filters slices according to their content. - prepro_transforms (Compose): Transforms to be applied before training. - transform (Compose): Transforms to be applied during training. - df_object (pd.Dataframe): Dataframe containing dataset information. - - """ - - def __init__(self, bids_df, subject_file_lst, model_params, target_suffix, contrast_params, - slice_axis=2, transform=None, metadata_choice=False, dim=2, complet=True, - slice_filter_fn=None, roi_params=None, object_detection_params=None, soft_gt=False): - self.cst_lst = copy.deepcopy(contrast_params["contrast_lst"]) - self.gt_lst = copy.deepcopy(model_params["target_lst"] if "target_lst" in model_params else None) - self.roi_lst = copy.deepcopy(model_params["roi_lst"] if "roi_lst" in model_params else None) - self.dim = dim - self.roi_params = roi_params if roi_params is not None else {"suffix": None, "slice_filter_roi": None} - self.filter_slices = slice_filter_fn - self.prepro_transforms, self.transform = transform - - metadata_choice = False if metadata_choice is None else metadata_choice - # Getting HDS5 dataset file - if not Path(model_params["path_hdf5"]).exists(): - logger.info("Computing hdf5 file of the data") - bids_to_hdf5 = BIDStoHDF5(bids_df=bids_df, - subject_file_lst=subject_file_lst, - path_hdf5=model_params["path_hdf5"], - target_suffix=target_suffix, - roi_params=self.roi_params, - contrast_lst=self.cst_lst, - metadata_choice=metadata_choice, - contrast_balance=contrast_params["balance"], - slice_axis=slice_axis, - slice_filter_fn=slice_filter_fn, - transform=transform, - object_detection_params=object_detection_params, - soft_gt=soft_gt) - - self.path_hdf5 = bids_to_hdf5.path_hdf5 - else: - self.path_hdf5 = model_params["path_hdf5"] - - # Loading dataframe object - with h5py.File(self.path_hdf5, "r") as hdf5_file: - self.df_object = Dataframe(hdf5_file, self.cst_lst, model_params["csv_path"], - target_suffix=self.gt_lst, roi_suffix=self.roi_lst, - dim=self.dim, filter_slices=slice_filter_fn) - if complet: - self.df_object.clean(self.cst_lst) - logger.info("after cleaning") - logger.info(self.df_object.df.head()) - - self.initial_dataframe = self.df_object.df - - self.dataframe = copy.deepcopy(self.df_object.df) - - self.cst_matrix = np.ones([len(self.dataframe), len(self.cst_lst)], dtype=int) - - # RAM status - self.status = {ct: False for ct in self.df_object.contrasts} - - ram = model_params["ram"] if "ram" in model_params else True - if ram: - self.load_into_ram(self.cst_lst) - - def load_into_ram(self, contrast_lst=None): - """Aims to load into RAM the contrasts from the list. - - Args: - contrast_lst (list of str): List of contrasts of interest. - """ - keys = self.status.keys() - with h5py.File(self.path_hdf5, "r") as hdf5_file: - for ct in contrast_lst: - if ct not in keys: - logger.warning("Key error: status has no key {}".format(ct)) - continue - if self.status[ct]: - logger.info("Contrast {} already in RAM".format(ct)) - else: - logger.info("Loading contrast {} in RAM...".format(ct), end='') - for sub in self.dataframe.index: - if self.filter_slices: - slices = self.dataframe.at[sub, 'Slices'] - self.dataframe.at[sub, ct] = hdf5_file[self.dataframe.at[sub, ct]][np.array(slices)] - logger.info("Done.") - self.status[ct] = True - - def set_transform(self, transform): - """Set the transforms.""" - self.transform = transform - - def __len__(self): - """Get the dataset size, ie he number of subvolumes.""" - return len(self.dataframe) - - def __getitem__(self, index): - """Get samples. - - Warning: For now, this method only supports one gt / roi. - - Args: - index (int): Sample index. - - Returns: - dict: Dictionary containing image and label tensors as well as metadata. - """ - line = self.dataframe.iloc[index] - # For HeMIS strategy. Otherwise the values of the matrix dont change anything. - missing_modalities = self.cst_matrix[index] - - input_metadata = [] - input_tensors = [] - - # Inputs - with h5py.File(self.path_hdf5, "r") as f: - for i, ct in enumerate(self.cst_lst): - if self.status[ct]: - input_tensor = line[ct] * missing_modalities[i] - else: - input_tensor = f[line[ct]][line['Slices']] * missing_modalities[i] - - input_tensors.append(input_tensor) - # input Metadata - metadata = imed_loader_utils.SampleMetadata({key: value for key, value in f['{}/inputs/{}' - .format(line['Subjects'], ct)].attrs.items()}) - metadata['slice_index'] = line["Slices"] - metadata['missing_mod'] = missing_modalities - metadata['crop_params'] = {} - input_metadata.append(metadata) - - # GT - gt_img = [] - gt_metadata = [] - for idx, gt in enumerate(self.gt_lst): - if self.status['gt/' + gt]: - gt_data = line['gt/' + gt] - else: - gt_data = f[line['gt/' + gt]][line['Slices']] - - gt_data = gt_data.astype(np.uint8) - gt_img.append(gt_data) - gt_metadata.append(imed_loader_utils.SampleMetadata({key: value for key, value in - f[line['gt/' + gt]].attrs.items()})) - gt_metadata[idx]['crop_params'] = {} - - # ROI - roi_img = [] - roi_metadata = [] - if self.roi_lst: - if self.status['roi/' + self.roi_lst[0]]: - roi_data = line['roi/' + self.roi_lst[0]] - else: - roi_data = f[line['roi/' + self.roi_lst[0]]][line['Slices']] - - roi_data = roi_data.astype(np.uint8) - roi_img.append(roi_data) - - roi_metadata.append(imed_loader_utils.SampleMetadata({key: value for key, value in - f[ - line['roi/' + self.roi_lst[0]]].attrs.items()})) - roi_metadata[0]['crop_params'] = {} - - # Run transforms on ROI - # ROI goes first because params of ROICrop are needed for the followings - stack_roi, metadata_roi = self.transform(sample=roi_img, - metadata=roi_metadata, - data_type="roi") - # Update metadata_input with metadata_roi - metadata_input = imed_loader_utils.update_metadata(metadata_roi, input_metadata) - - # Run transforms on images - stack_input, metadata_input = self.transform(sample=input_tensors, - metadata=metadata_input, - data_type="im") - # Update metadata_input with metadata_roi - metadata_gt = imed_loader_utils.update_metadata(metadata_input, gt_metadata) - - # Run transforms on images - stack_gt, metadata_gt = self.transform(sample=gt_img, - metadata=metadata_gt, - data_type="gt") - data_dict = { - 'input': stack_input, - 'gt': stack_gt, - 'roi': stack_roi, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_gt, - 'roi_metadata': metadata_roi - } - - return data_dict - - def update(self, strategy="Missing", p=0.0001): - """Update the Dataframe itself. - - Args: - p (float): Float between 0 and 1, probability of the contrast to be missing. - strategy (str): Update the dataframe using the corresponding strategy. For now the only the strategy - implemented is the one used by HeMIS (i.e. by removing contrasts with a certain probability.) Other - strategies that could be implemented are Active Learning, Curriculum Learning, ... - """ - if strategy == 'Missing': - logger.info("Probalility of missing contrast = {}".format(p)) - for idx in range(len(self.dataframe)): - missing_mod = np.random.choice(2, len(self.cst_lst), p=[p, 1 - p]) - # if all contrasts are removed from a subject randomly choose 1 - if not np.any(missing_mod): - missing_mod = np.zeros((len(self.cst_lst))) - missing_mod[np.random.randint(2, size=1)] = 1 - self.cst_matrix[idx, ] = missing_mod - - logger.info("Missing contrasts = {}".format(self.cst_matrix.size - self.cst_matrix.sum())) - - -def HDF5ToBIDS(path_hdf5, subjects, path_dir): - """Convert HDF5 file to BIDS dataset. - - Args: - path_hdf5 (str): Path to the HDF5 file. - subjects (list): List of subject names. - path_dir (str): Output folder path, already existing. - """ - # Open FDH5 file - with h5py.File(path_hdf5, "r") as hdf5_file: - # check the dir exists: - if not Path(path_dir).exists(): - raise FileNotFoundError("Directory {} doesn't exist. Stopping process.".format(path_dir)) - - # loop over all subjects - for sub in subjects: - path_sub = path_dir + '/' + sub + '/anat/' - path_label = path_dir + '/derivatives/labels/' + sub + '/anat/' - - if not Path(path_sub).exists(): - Path(path_sub).mkdir(parents=True) - - if not Path(path_label).exists(): - Path(path_label).mkdir(parents=True) - - # Get Subject Group - try: - grp = hdf5_file[sub] - except Exception: - continue - # inputs - cts = grp['inputs'].attrs['contrast'] - - # Relation between voxel and world coordinates is not available - for ct in cts: - input_data = np.array(grp['inputs/{}'.format(ct)]) - nib_image = nib.Nifti1Image(input_data, np.eye(4)) - filename = Path(path_sub).joinpath(sub + "_" + ct + ".nii.gz") - nib.save(nib_image, filename) - - # GT - cts = grp['gt'].attrs['contrast'] - - for ct in cts: - for filename in grp['gt/{}'.format(ct)].attrs['gt_filenames']: - gt_data = grp['gt/{}'.format(ct)] - nib_image = nib.Nifti1Image(gt_data, np.eye(4)) - filename = Path(path_label).joinpath(filename.split("/")[-1]) - nib.save(nib_image, filename) - - cts = grp['roi'].attrs['contrast'] - - for ct in cts: - roi_data = grp['roi/{}'.format(ct)] - if np.any(roi_data.shape): - nib_image = nib.Nifti1Image(roi_data, np.eye(4)) - filename = Path(path_label).joinpath( - grp['roi/{}'.format(ct)].attrs['roi_filename'][0].split("/")[-1]) - nib.save(nib_image, filename) diff --git a/ivadomed/loader/balanced_sampler.py b/ivadomed/loader/balanced_sampler.py new file mode 100644 index 000000000..30f1a977d --- /dev/null +++ b/ivadomed/loader/balanced_sampler.py @@ -0,0 +1,80 @@ +from __future__ import annotations +import torch +import numpy as np +import typing + +from typing import Union + +if typing.TYPE_CHECKING: + from ivadomed.loader.bids_dataset import BidsDataset + from ivadomed.loader.bids3d_dataset import Bids3DDataset + + +class BalancedSampler(torch.utils.data.sampler.Sampler): + """Estimate sampling weights in order to rebalance the + class distributions from an imbalanced dataset. + + Args: + dataset (BidsDataset): Dataset containing input, gt and metadata. + metadata (str): Indicates which metadata to use to balance the sampler. + + Attributes: + indices (list): List from 0 to length of dataset (number of elements in the dataset). + nb_samples (int): Number of elements in the dataset. + weights (Tensor): Weight of each dataset element equal to 1 over the frequency of a + given label (inverse of the frequency). + metadata_dict (dict): Stores the mapping from metadata string to index (int). + label_idx (int): Keeps track of the label indices already used for the metadata_dict. + """ + + def __init__(self, dataset: Union[BidsDataset, Bids3DDataset], metadata: str = 'gt') -> None: + self.indices = list(range(len(dataset))) + + self.nb_samples = len(self.indices) + self.metadata_dict = {} + self.label_idx = 0 + + cmpt_label = {} + for idx in self.indices: + label = self._get_label(dataset, idx, metadata) + if label in cmpt_label: + cmpt_label[label] += 1 + else: + cmpt_label[label] = 1 + + weights = [1.0 / cmpt_label[self._get_label(dataset, idx, metadata)] + for idx in self.indices] + + self.weights = torch.DoubleTensor(weights) + + def _get_label(self, dataset: Union[BidsDataset, Bids3DDataset], idx: int, metadata: str) -> int: + """Returns 1 if sample is not empty, 0 if it is empty (only zeros). + + Args: + dataset (BidsDataset): Dataset containing input, gt and metadata. + idx (int): Element index. + + Returns: + int: 0 or 1. + """ + if metadata != 'gt': + label_str = dataset[idx]['input_metadata'][0][metadata] + if label_str not in self.metadata_dict: + self.metadata_dict[label_str] = self.label_idx + self.label_idx += 1 + return self.metadata_dict[label_str] + + else: + # For now, only supported with single label + sample_gt = np.array(dataset[idx]['gt'][0]) + if np.any(sample_gt): + return 1 + else: + return 0 + + def __iter__(self): + return (self.indices[i] for i in torch.multinomial( + self.weights, self.nb_samples, replacement=True)) + + def __len__(self): + return self.num_samples diff --git a/ivadomed/loader/bids3d_dataset.py b/ivadomed/loader/bids3d_dataset.py index f1763043b..ecbf4e633 100644 --- a/ivadomed/loader/bids3d_dataset.py +++ b/ivadomed/loader/bids3d_dataset.py @@ -1,5 +1,16 @@ +from __future__ import annotations +import typing + +from torchvision.transforms import Compose + from ivadomed.loader.bids_dataset import BidsDataset from ivadomed.loader.mri3d_subvolume_segmentation_dataset import MRI3DSubVolumeSegmentationDataset +from ivadomed.keywords import ModelParamsKW + +if typing.TYPE_CHECKING: + from typing import List, Optional + from ivadomed.loader.bids_dataframe import BidsDataframe + from ivadomed.loader.patch_filter import PatchFilter class Bids3DDataset(MRI3DSubVolumeSegmentationDataset): @@ -17,22 +28,43 @@ class Bids3DDataset(MRI3DSubVolumeSegmentationDataset): to apply during training (Compose). metadata_choice: Choice between "mri_params", "contrasts", None or False, related to FiLM. roi_params (dict): Dictionary containing parameters related to ROI image processing. + subvolume_filter_fn (PatchFilter): Class that filters subvolumes according to their content. multichannel (bool): If True, the input contrasts are combined as input channels for the model. Otherwise, each contrast is processed individually (ie different sample / tensor). + subvolume_filter_fn (PatchFilter): Class that filters subvolumes according to their content. object_detection_params (dict): Object dection parameters. + task (str): Choice between segmentation or classification. If classification: GT is discrete values, \ + If segmentation: GT is binary mask. + soft_gt (bool): If True, ground truths are not binarized before being fed to the network. Otherwise, ground + truths are thresholded (0.5) after the data augmentation operations. is_input_dropout (bool): Return input with missing modalities. """ - def __init__(self, bids_df, subject_file_lst, target_suffix, model_params, contrast_params, slice_axis=2, - cache=True, transform=None, metadata_choice=False, roi_params=None, - multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False, - is_input_dropout=False): + def __init__(self, + bids_df: BidsDataframe, + subject_file_lst: List[str], + target_suffix: List[str], + model_params: dict, + contrast_params: dict, + slice_axis: int = 2, + cache: bool = True, + transform: List[Optional[Compose]] = None, + metadata_choice: str | bool = False, + roi_params: dict = None, + subvolume_filter_fn: PatchFilter = None, + multichannel: bool = False, + object_detection_params: dict = None, + task: str = "segmentation", + soft_gt: bool = False, + is_input_dropout: bool = False): + dataset = BidsDataset(bids_df=bids_df, subject_file_lst=subject_file_lst, target_suffix=target_suffix, roi_params=roi_params, contrast_params=contrast_params, model_params=model_params, + patch_filter_fn=subvolume_filter_fn, metadata_choice=metadata_choice, slice_axis=slice_axis, transform=transform, @@ -40,6 +72,12 @@ def __init__(self, bids_df, subject_file_lst, target_suffix, model_params, contr object_detection_params=object_detection_params, is_input_dropout=is_input_dropout) - super().__init__(dataset.filename_pairs, length=model_params["length_3D"], stride=model_params["stride_3D"], - transform=transform, slice_axis=slice_axis, task=task, soft_gt=soft_gt, + super().__init__(dataset.filename_pairs, + length=model_params[ModelParamsKW.LENGTH_3D], + stride=model_params[ModelParamsKW.STRIDE_3D], + transform=transform, + slice_axis=slice_axis, + subvolume_filter_fn=subvolume_filter_fn, + task=task, + soft_gt=soft_gt, is_input_dropout=is_input_dropout) diff --git a/ivadomed/loader/bids_dataframe.py b/ivadomed/loader/bids_dataframe.py new file mode 100644 index 000000000..309edcf6c --- /dev/null +++ b/ivadomed/loader/bids_dataframe.py @@ -0,0 +1,344 @@ +import copy +import itertools +import os + +import bids as pybids +import pandas as pd +from loguru import logger +from pathlib import Path + + +class BidsDataframe: + """ + This class aims to create a dataframe containing all BIDS image files in a list of path_data and their metadata. + + Args: + loader_params (dict): Loader parameters, see :doc:`configuration_file` for more details. + path_output (str): Output folder. + derivatives (bool): If True, derivatives are indexed. + split_method (str): split_method from Split Dataset parameters, see :doc:`configuration_file` for more details. + Default: None. Used to remove unused subject files from the bids_dataframe. + + Attributes: + path_data (list): Paths to the BIDS datasets. + bids_config (str): Path to the custom BIDS configuration file. + target_suffix (list of str): List of suffix of targetted structures. + roi_suffix (str): List of suffix of ROI masks. + extensions (list of str): List of file extensions of interest. + contrast_lst (list of str): List of the contrasts of interest. + derivatives (bool): If True, derivatives are indexed. + split_method (str): split_method from Split Dataset parameters + df (pd.DataFrame): Dataframe containing dataset information + """ + + def __init__(self, loader_params: dict, path_output: str, derivatives: bool, split_method: str = None) -> None: + + # paths_data from loader parameters + self.paths_data = loader_params['path_data'] + + # bids_config from loader parameters + self.bids_config = None if 'bids_config' not in loader_params else loader_params['bids_config'] + + # target_suffix and roi_suffix from loader parameters + self.target_suffix = copy.deepcopy(loader_params['target_suffix']) + + # If `target_suffix` is a list of lists convert to list + if any(isinstance(t, list) for t in self.target_suffix): + self.target_suffix = list(itertools.chain.from_iterable(self.target_suffix)) + + self.roi_suffix = loader_params['roi_params']['suffix'] + + # If `roi_suffix` is not None, add to target_suffix + if self.roi_suffix is not None: + self.target_suffix.append(self.roi_suffix) + + self.bids_validate = loader_params.get('bids_validate', True) + + # extensions from loader parameters + self.extensions = loader_params['extensions'] if loader_params['extensions'] else [".nii", ".nii.gz"] + + # contrast_lst from loader parameters + self.contrast_lst = [] if 'contrast_lst' not in loader_params['contrast_params'] \ + else loader_params['contrast_params']['contrast_lst'] + + # split_method + self.split_method = split_method + + # derivatives + self.derivatives = derivatives + + # Create dataframe + self.df = pd.DataFrame() + self.create_bids_dataframe() + + # Save dataframe as csv file + self.save(str(Path(path_output, "bids_dataframe.csv"))) + + def create_bids_dataframe(self) -> None: + """Generate the dataframe.""" + + for path_data in self.paths_data: + path_data = Path(path_data, '') + + # For CT-scan files: + # Force indexing of subject subfolders containing CT-scan files. + # As of 20221026: Implementation based on potential CT datatypes (anat or ct), extensions and + # modality suffixes discussed in BEP024 (https://bids.neuroimaging.io/bep024). + # bids_config parameter with default config_bids.json must be used + # (see: https://ivadomed.org/configuration_file.html#bids-config) + # TODO: remove force indexing of CT-scan files when BEP024 is merged in BIDS. + extension_ct = ('.nii.gz', '.nii') + suffix_ct = ('ct', 'CT') + force_index = [] + for path_object in path_data.glob('**/*'): + if path_object.is_file(): + subject_path_index = len(path_data.parts) + subject_path = path_object.parts[subject_path_index] + if (path_object.name.endswith(extension_ct) and path_object.name.split('.')[0].endswith(suffix_ct) and + (path_object.parent.name == "anat" or path_object.parent.name == "ct") and + subject_path.startswith('sub')): + force_index.append(str(Path(*path_object.parent.parts[subject_path_index:]))) + + # Initialize BIDSLayoutIndexer and BIDSLayout + # validate=True by default for both indexer and layout, BIDS-validator is not skipped + indexer = pybids.BIDSLayoutIndexer(force_index=force_index, validate=self.bids_validate) + if self.derivatives: + self.write_derivatives_dataset_description(path_data) + layout = pybids.BIDSLayout(str(path_data), config=self.bids_config, indexer=indexer, + derivatives=self.derivatives) + + # Transform layout to dataframe with all entities and json metadata + # As per pybids, derivatives don't include parsed entities, only the "path" column + df_next = layout.to_df(metadata=True) + + # Add filename column + df_next.insert(1, 'filename', df_next['path'].apply(os.path.basename)) + + # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering + df_next = df_next[~df_next['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))] + + # The following command updates the dataframe by doing 2 things: + # 1. Keep only subject files of chosen contrasts (for files that are not in the 'derivatives' folder) + # (ex: '/sub-XX/anat/sub-XX_T1w.nii.gz' with contrast_lst:["T1w"]) + # 2. Keep only derivatives files of chosen target_suffix (for files that are in the 'derivatives' folder) + # (ex: '/derivatives/labels/sub-XX/anat/sub-XX_T1w_seg-manual.nii.gz' with target_suffix:["_seg-manual"]) + df_next = df_next[(~df_next['path'].str.contains('derivatives') + & df_next['suffix'].str.contains('|'.join(self.contrast_lst))) + | (df_next['path'].str.contains('derivatives') + & (df_next['filename'].str.split('.').apply(lambda x: x[0])).str.endswith(tuple(self.target_suffix)))] + + # Update dataframe with files of chosen extensions + df_next = df_next[df_next['filename'].str.endswith(tuple(self.extensions))] + + # Warning if no subject files are found in path_data + if df_next[~df_next['path'].str.contains('derivatives')].empty: + logger.warning(f"No subject files were found in '{path_data}' dataset. Skipping dataset.") + else: + # Add tsv files metadata to dataframe + df_next = self.add_tsv_metadata(df_next, path_data, layout) + + # TODO: check if other files are needed for EEG and DWI + + # Merge dataframes + self.df = pd.concat([self.df, df_next], join='outer', ignore_index=True) + + if self.df.empty: + # Raise error and exit if no subject files are found in any path data + raise RuntimeError("No subject files found. Check selection of parameters in config.json" + " and datasets compliance with BIDS specification.") + + # Drop duplicated rows based on all columns except 'path' + # Keep first occurence + columns = self.df.columns.to_list() + columns.remove('path') + self.df = self.df[~(self.df.astype(str).duplicated(subset=columns, keep='first'))] + + # Remove subject files without the "split_method" metadata if specified and keep all derivatives + if self.split_method: + files_remove = (self.df[( + # Path does not contain derivative string (i.e. we only target subject raw data files) + ~self.df['path'].str.contains('derivatives') + # and split method metadata is null (i.e. the subject must have the split_method metadata or will be excluded) + & self.df[self.split_method].isnull())] + # Get these filesnames and convert to list. + ['filename']).tolist() + if files_remove: + logger.warning(f"The following files don't have the '{self.split_method}' metadata indicated as the " + f"split_method in the configuration JSON file. Skipping these files: {files_remove}") + # Removing from dataframe all filenames which contain any of the file from files_remove field. + self.df = self.df[~self.df['filename'].str.contains('|'.join(files_remove))] + + # If indexing of derivatives is true + if self.derivatives: + + # Get list of subject files with available derivatives + has_deriv, deriv = self.get_subjects_with_derivatives() + + # Filter dataframe to keep subjects files with available derivatives only + if has_deriv: + self.df = self.df[self.df['filename'].str.contains('|'.join(has_deriv)) + | self.df['filename'].str.contains('|'.join(deriv))] + else: + # Raise error and exit if no derivatives are found for any subject files + raise RuntimeError("Derivatives not found.") + + # Reset index + self.df.reset_index(drop=True, inplace=True) + + # Drop columns with all null values + self.df.dropna(axis=1, inplace=True, how='all') + + def add_tsv_metadata(self, df: pd.DataFrame, path_data: str, layout: pybids.BIDSLayout) -> pd.DataFrame: + """Add tsv files metadata to dataframe. + + Args: + df (pd.DataFrame): Dataframe containing dataset information + path_data (str): Path to the BIDS dataset + layout (pybids.BIDSLayout): pybids BIDSLayout of the indexed files of the path_data + + Returns: + pd.DataFrame: Dataframe containing datasets information + """ + + # Drop columns with all null values before loading TSV metadata + # Avoid conflicts with unused columns descriptions from TSV sidecar JSON files + df.dropna(axis=1, inplace=True, how='all') + + # Add metadata from 'participants.tsv' file if present + # Uses pybids function + if layout.get_collections(level='dataset'): + df_participants = layout.get_collections(level='dataset', merge=True).to_df() + df_participants.insert(1, 'participant_id', "sub-" + df_participants['subject']) + df_participants.drop(['suffix'], axis=1, inplace=True) + df = pd.merge(df, df_participants, on='subject', suffixes=("_x", None), how='left') + + # Add metadata from 'samples.tsv' file if present + # The 'participant_id' column is added only if not already present from the 'participants.tsv' file. + # TODO: update to pybids function when the indexing of samples.tsv is integrated in pybids + # (see: https://github.com/bids-standard/pybids/issues/843) + fname_samples = Path(path_data, "samples.tsv") + if fname_samples.exists(): + df_samples = pd.read_csv(str(fname_samples), sep='\t') + df_samples['sample'] = df_samples['sample_id'].str.split("sample-").apply(lambda x: x[1]) + df_samples['subject'] = df_samples['participant_id'].str.split("sub-").apply(lambda x: x[1]) + columns = df_samples.columns.tolist() + if 'participant_id' in df.columns: + columns.remove('participant_id') + df = pd.merge(df, df_samples[columns], on=['subject', 'sample'], suffixes=("_x", None), how='left') + + # Add metadata from all _sessions.tsv files, if present + # Uses pybids function + if layout.get_collections(level='subject'): + df_sessions = layout.get_collections(level='subject', merge=True).to_df() + df_sessions.drop(['suffix'], axis=1, inplace=True) + df = pd.merge(df, df_sessions, on=['subject', 'session'], suffixes=("_x", None), how='left') + + # Add metadata from all _scans.tsv files, if present + # TODO: implement reading _scans.tsv files using pybids "layout.get_collections(level='session')" + # TODO: verify merge behavior with EEG and DWI scans files, tested with anat and micr only + df_scans = pd.DataFrame() + for path_object in Path(path_data).glob("**/*"): + if path_object.is_file(): + if path_object.name.endswith("scans.tsv"): + df_temp = pd.read_csv(str(path_object), sep='\t') + df_scans = pd.concat([df_scans, df_temp], ignore_index=True) + if not df_scans.empty: + df_scans['filename'] = df_scans['filename'].apply(os.path.basename) + df = pd.merge(df, df_scans, on=['filename'], suffixes=("_x", None), how='left') + + return df + + def get_subjects_with_derivatives(self) -> (list, list): + """Get lists of subject filenames with available derivatives. + + Returns: + list, list: subject filenames having derivatives, available derivatives filenames. + """ + subject_fnames = self.get_subject_fnames() + deriv_fnames = self.get_deriv_fnames() + has_deriv = [] + deriv = [] + + for subject_fname in subject_fnames: + available = self.get_derivatives(subject_fname, deriv_fnames) + if available: + if self.roi_suffix is not None: + if self.roi_suffix in ('|'.join(available)): + has_deriv.append(subject_fname) + deriv.extend(available) + else: + logger.warning(f"Missing roi_suffix {self.roi_suffix} for {subject_fname}. Skipping file.") + else: + has_deriv.append(subject_fname) + deriv.extend(available) + for target in self.target_suffix: + if target not in str(available) and target != self.roi_suffix: + logger.warning(f"Missing target_suffix {target} for {subject_fname}") + else: + logger.warning(f"Missing derivatives for {subject_fname}. Skipping file.") + + return has_deriv, deriv + + def get_subject_fnames(self) -> list: + """Get the list of subject filenames in dataframe. + + Returns: + list: subject filenames. + """ + return self.df[~self.df['path'].str.contains('derivatives')]['filename'].to_list() + + def get_deriv_fnames(self) -> list: + """Get the list of derivative filenames in dataframe. + + Returns: + list: derivative filenames. + """ + return self.df[self.df['path'].str.contains('derivatives')]['filename'].tolist() + + def get_derivatives(self, subject_fname: str, deriv_fnames: list) -> list: + """Return list of available derivative filenames for a subject filename. + + Args: + subject_fname (str): Subject filename. + deriv_fnames (list of str): List of derivative filenames. + + Returns: + list: derivative filenames + """ + prefix_fname = subject_fname.split('.')[0] + return [d for d in deriv_fnames if prefix_fname in d] + + def save(self, path: str) -> None: + """Save the dataframe into a csv file. + + Args: + path (str): Path to csv file. + """ + try: + self.df.to_csv(path, index=False) + logger.info(f"Dataframe has been saved in {path}.") + except FileNotFoundError: + logger.error(f"Wrong path, bids_dataframe.csv could not be saved in {path}.") + + def write_derivatives_dataset_description(self, path_data: str) -> None: + """Writes default dataset_description.json file if not found in path_data/derivatives folder + + Args: + path_data (str): Path to the BIDS dataset. + """ + path_data = Path(path_data).absolute() + + filename = 'dataset_description' + path_deriv_desc_file = Path(f'{path_data}/derivatives/{filename}.json') + path_label_desc_file = Path(f'{path_data}/derivatives/labels/{filename}.json') + # need to write default dataset_description.json file if not found + if not path_deriv_desc_file.is_file() and not path_label_desc_file.is_file(): + + logger.warning(f"{path_deriv_desc_file} not found. Will attempt to create a place holder " + f"description file for now at {path_deriv_desc_file}.") + with path_deriv_desc_file.open(mode='w') as f: + f.write( + '{"Name": "Example dataset", ' + '"BIDSVersion": "1.0.2", ' + '"GeneratedBy": [{"Name": "Example pipeline"}]}' + ) diff --git a/ivadomed/loader/bids_dataset.py b/ivadomed/loader/bids_dataset.py index 26803eadd..c4303c66a 100644 --- a/ivadomed/loader/bids_dataset.py +++ b/ivadomed/loader/bids_dataset.py @@ -1,8 +1,16 @@ +from __future__ import annotations from tqdm import tqdm from ivadomed.loader import film as imed_film from ivadomed.loader.mri2d_segmentation_dataset import MRI2DSegmentationDataset from ivadomed.object_detection import utils as imed_obj_detect +from ivadomed.keywords import ROIParamsKW, ContrastParamsKW, ModelParamsKW, MetadataKW, SubjectDictKW +import typing +if typing.TYPE_CHECKING: + from ivadomed.loader.bids_dataframe import BidsDataframe + from ivadomed.loader.slice_filter import SliceFilter + from ivadomed.loader.patch_filter import PatchFilter + import pandas as pd class BidsDataset(MRI2DSegmentationDataset): @@ -16,12 +24,13 @@ class BidsDataset(MRI2DSegmentationDataset): model_params (dict): Dictionary containing model parameters. slice_axis (int): Indicates the axis used to extract 2D slices from 3D NifTI files: "axial": 2, "sagittal": 0, "coronal": 1. 2D PNG/TIF/JPG files use default "axial": 2. - cache (bool): If the data should be cached in memory or not. + nibabel_cache (bool): If the data should be cached in memory or not by nibabel to reduce repetitive disk loading. transform (list): Transformation list (length 2) composed of preprocessing transforms (Compose) and transforms to apply during training (Compose). metadata_choice (str): Choice between "mri_params", "contrasts", the name of a column from the participants.tsv file, None or False, related to FiLM. slice_filter_fn (SliceFilter): Class that filters slices according to their content. + patch_filter_fn (PatchFilter): Class that filters patches according to their content. roi_params (dict): Dictionary containing parameters related to ROI image processing. multichannel (bool): If True, the input contrasts are combined as input channels for the model. Otherwise, each contrast is processed individually (ie different sample / tensor). @@ -42,15 +51,17 @@ class BidsDataset(MRI2DSegmentationDataset): """ - def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, model_params, slice_axis=2, - cache=True, transform=None, metadata_choice=False, slice_filter_fn=None, roi_params=None, - multichannel=False, object_detection_params=None, task="segmentation", soft_gt=False, - is_input_dropout=False): + def __init__(self, bids_df: BidsDataframe, subject_file_lst: list, target_suffix: list, contrast_params: dict, + model_params: dict, slice_axis: int = 2, nibabel_cache: bool = True, transform: list = None, + metadata_choice: str = False, slice_filter_fn: SliceFilter = None, patch_filter_fn: PatchFilter = None, + roi_params: dict = None, multichannel: bool = False, object_detection_params: dict = None, + task: str = "segmentation", soft_gt: bool = False, is_input_dropout: bool = False): - self.roi_params = roi_params if roi_params is not None else {"suffix": None, "slice_filter_roi": None} + self.roi_params = roi_params if roi_params is not None else \ + {ROIParamsKW.SUFFIX: None, ROIParamsKW.SLICE_FILTER_ROI: None} self.soft_gt = soft_gt self.filename_pairs = [] - if metadata_choice == 'mri_params': + if metadata_choice == MetadataKW.MRI_PARAMS: self.metadata = {"FlipAngle": [], "RepetitionTime": [], "EchoTime": [], "Manufacturer": []} @@ -60,10 +71,10 @@ def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, mo # Create a dictionary with the number of subjects for each contrast of contrast_balance tot = {contrast: df_subjects['suffix'].str.fullmatch(contrast).value_counts()[True] - for contrast in contrast_params["balance"].keys()} + for contrast in contrast_params[ContrastParamsKW.BALANCE].keys()} # Create a counter that helps to balance the contrasts - c = {contrast: 0 for contrast in contrast_params["balance"].keys()} + c = {contrast: 0 for contrast in contrast_params[ContrastParamsKW.BALANCE].keys()} # Get a list of subject_ids for multichannel_subjects (prefix filename without modality suffix and extension) subject_ids = [] @@ -75,13 +86,13 @@ def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, mo multichannel_subjects = {} idx_dict = {} if multichannel: - num_contrast = len(contrast_params["contrast_lst"]) - for idx, contrast in enumerate(contrast_params["contrast_lst"]): + num_contrast = len(contrast_params[ContrastParamsKW.CONTRAST_LST]) + for idx, contrast in enumerate(contrast_params[ContrastParamsKW.CONTRAST_LST]): idx_dict[contrast] = idx multichannel_subjects = {subject: {"absolute_paths": [None] * num_contrast, "deriv_path": None, "roi_filename": None, - "metadata": [None] * num_contrast} for subject in subject_ids} + SubjectDictKW.METADATA: [None] * num_contrast} for subject in subject_ids} # Get all subjects path from bids_df for bounding box get_all_subj_path = bids_df.df[bids_df.df['filename'] @@ -91,7 +102,7 @@ def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, mo bounding_box_dict = imed_obj_detect.load_bounding_boxes(object_detection_params, get_all_subj_path, slice_axis, - contrast_params["contrast_lst"]) + contrast_params[ContrastParamsKW.CONTRAST_LST]) # Get all derivatives filenames from bids_df all_deriv = bids_df.get_deriv_fnames() @@ -116,19 +127,19 @@ def __init__(self, bids_df, subject_file_lst, target_suffix, contrast_params, mo for subject in multichannel_subjects.values(): if None not in subject["absolute_paths"]: self.filename_pairs.append((subject["absolute_paths"], subject["deriv_path"], - subject["roi_filename"], subject["metadata"])) + subject["roi_filename"], subject[SubjectDictKW.METADATA])) if not self.filename_pairs: raise Exception('No subjects were selected - check selection of parameters on config.json (e.g. center ' 'selected + target_suffix)') - length = model_params["length_2D"] if "length_2D" in model_params else [] - stride = model_params["stride_2D"] if "stride_2D" in model_params else [] + length = model_params[ModelParamsKW.LENGTH_2D] if ModelParamsKW.LENGTH_2D in model_params else [] + stride = model_params[ModelParamsKW.STRIDE_2D] if ModelParamsKW.STRIDE_2D in model_params else [] - super().__init__(self.filename_pairs, length, stride, slice_axis, cache, transform, slice_filter_fn, task, self.roi_params, - self.soft_gt, is_input_dropout) + super().__init__(self.filename_pairs, length, stride, slice_axis, nibabel_cache, transform, slice_filter_fn, patch_filter_fn, + task, self.roi_params, self.soft_gt, is_input_dropout) - def get_target_filename(self, target_suffix, target_filename, derivative): + def get_target_filename(self, target_suffix: any, target_filename: any, derivative: any) -> None: for idx, suffix_list in enumerate(target_suffix): # If suffix_list is a string, then only one rater annotation per class is available. # Otherwise, multiple raters segmented the same class. @@ -140,7 +151,7 @@ def get_target_filename(self, target_suffix, target_filename, derivative): target_filename[idx] = derivative - def create_metadata_dict(self, metadata_choice, df_sub, bids_df): + def create_metadata_dict(self, metadata: dict, metadata_choice: any, df_sub: pd.DataFrame, bids_df: BidsDataframe) -> None: # add custom data to metadata if metadata_choice not in df_sub.columns: raise ValueError("The following metadata cannot be found: {}. " @@ -151,28 +162,30 @@ def create_metadata_dict(self, metadata_choice, df_sub, bids_df): metadata_dict = {} for idx, data in enumerate(data_lst): metadata_dict[data] = idx - metadata['metadata_dict'] = metadata_dict + metadata[MetadataKW.METADATA_DICT] = metadata_dict - def fill_multichannel_dict(self, multichannel_subjects, subject, idx_dict, df_sub, roi_filename, target_filename, metadata): + def fill_multichannel_dict(self, multichannel_subjects: dict, subject: str, idx_dict: dict, df_sub: pd.DataFrame, + roi_filename: str, target_filename: str, metadata: any) -> dict: idx = idx_dict[df_sub['suffix'].values[0]] subj_id = subject.split('.')[0].split('_')[0] multichannel_subjects[subj_id]["absolute_paths"][idx] = df_sub['path'].values[0] multichannel_subjects[subj_id]["deriv_path"] = target_filename - multichannel_subjects[subj_id]["metadata"][idx] = metadata + multichannel_subjects[subj_id][SubjectDictKW.METADATA][idx] = metadata if roi_filename: multichannel_subjects[subj_id]["roi_filename"] = roi_filename return multichannel_subjects - def create_filename_pair(self, multichannel_subjects, subject, c, tot, multichannel, df_subjects, contrast_params, - target_suffix, all_deriv, bids_df, bounding_box_dict, idx_dict, metadata_choice): + def create_filename_pair(self, multichannel_subjects: dict, subject: str, c: dict, tot: dict, multichannel: any, + df_subjects: pd.DataFrame, contrast_params: dict, target_suffix: any, all_deriv: any, + bids_df: pd.DataFrame, bounding_box_dict: dict, idx_dict: dict, metadata_choice: str): df_sub = df_subjects.loc[df_subjects['filename'] == subject] # Training & Validation: do not consider the contrasts over the threshold contained in contrast_balance contrast = df_sub['suffix'].values[0] - if contrast in (contrast_params["balance"].keys()): + if contrast in (contrast_params[ContrastParamsKW.BALANCE].keys()): c[contrast] = c[contrast] + 1 - if c[contrast] / tot[contrast] > contrast_params["balance"][contrast]: + if c[contrast] / tot[contrast] > contrast_params[ContrastParamsKW.BALANCE][contrast]: return if isinstance(target_suffix[0], str): target_filename, roi_filename = [None] * len(target_suffix), None @@ -184,25 +197,25 @@ def create_filename_pair(self, multichannel_subjects, subject, c, tot, multichan for derivative in derivatives: self.get_target_filename(target_suffix, target_filename, derivative) - if not (self.roi_params["suffix"] is None) and self.roi_params["suffix"] in derivative: + if not (self.roi_params[ROIParamsKW.SUFFIX] is None) and self.roi_params[ROIParamsKW.SUFFIX] in derivative: roi_filename = [derivative] - if (not any(target_filename)) or (not (self.roi_params["suffix"] is None) and (roi_filename is None)): + if (not any(target_filename)) or (not (self.roi_params[ROIParamsKW.SUFFIX] is None) and (roi_filename is None)): return metadata = df_sub.to_dict(orient='records')[0] - metadata['contrast'] = contrast + metadata[MetadataKW.CONTRAST] = contrast if len(bounding_box_dict): # Take only one bounding box for cropping - metadata['bounding_box'] = bounding_box_dict[str(df_sub['path'].values[0])][0] + metadata[MetadataKW.BOUNDING_BOX] = bounding_box_dict[str(df_sub['path'].values[0])][0] - if metadata_choice == 'mri_params': + if metadata_choice == MetadataKW.MRI_PARAMS: if not all([imed_film.check_isMRIparam(m, metadata, subject, self.metadata) for m in self.metadata.keys()]): return - elif metadata_choice and metadata_choice != 'contrasts' and metadata_choice is not None: - self.create_metadata_dict(metadata_choice, df_sub, bids_df) + elif metadata_choice and metadata_choice != MetadataKW.CONTRASTS and metadata_choice is not None: + self.create_metadata_dict(metadata, metadata_choice, df_sub, bids_df) return df_sub, roi_filename, target_filename, metadata diff --git a/ivadomed/loader/film.py b/ivadomed/loader/film.py index 05aed4084..13f04039b 100644 --- a/ivadomed/loader/film.py +++ b/ivadomed/loader/film.py @@ -1,6 +1,8 @@ +from __future__ import annotations import json from pathlib import Path from copy import deepcopy +from typing import List, Union import numpy as np from loguru import logger @@ -8,6 +10,15 @@ from sklearn.model_selection import GridSearchCV from sklearn.neighbors import KernelDensity from sklearn.preprocessing import OneHotEncoder +from ivadomed.keywords import MetadataKW +import typing + +if typing.TYPE_CHECKING: + from ivadomed.loader.bids_dataset import BidsDataset + from ivadomed.loader.bids3d_dataset import Bids3DDataset + from ivadomed.loader.mri2d_segmentation_dataset import MRI2DSegmentationDataset + + import torch.nn as nn from ivadomed import __path__ @@ -18,12 +29,16 @@ "acq-MToff_MTS": 3, "acq-MTon_MTS": 4, "acq-T1w_MTS": 5} -def normalize_metadata(ds_in, clustering_models, debugging, metadata_type, train_set=False): +def normalize_metadata(ds_in: Union[BidsDataset, Bids3DDataset, MRI2DSegmentationDataset], + clustering_models: dict, + debugging: bool, + metadata_type: str, + train_set: bool = False) -> (list, OneHotEncoder) | list: """Categorize each metadata value using a KDE clustering method, then apply a one-hot-encoding. Args: - ds_in (BidsDataset): Dataset with metadata. - clustering_models: Pre-trained clustering model that has been trained on metadata of the training set. + ds_in (BidsDataset): Dataset BidsDataset, Bids3D, MRI2D with metadata. + clustering_models (dict): Pre-trained clustering model that has been trained on metadata of the training set. debugging (bool): If True, extended verbosity and intermediate outputs. metadata_type (str): Choice between 'mri_params', 'constrasts' or the name of a column from the participants.tsv file. @@ -42,7 +57,7 @@ def normalize_metadata(ds_in, clustering_models, debugging, metadata_type, train ds_out = [] for idx, subject in enumerate(ds_in): s_out = deepcopy(subject) - if metadata_type == 'mri_params': + if metadata_type == MetadataKW.MRI_PARAMS: # categorize flip angle, repetition time and echo time values using KDE for m in ['FlipAngle', 'RepetitionTime', 'EchoTime']: v = subject["input_metadata"][m] @@ -65,7 +80,7 @@ def normalize_metadata(ds_in, clustering_models, debugging, metadata_type, train s_out["input_metadata"]["film_input"] = [s_out["input_metadata"][k] for k in ["FlipAngle", "RepetitionTime", "EchoTime", "Manufacturer"]] - elif metadata_type == "contrasts": + elif metadata_type == MetadataKW.CONTRASTS: for i, input_metadata in enumerate(subject["input_metadata"]): generic_contrast = GENERIC_CONTRAST[input_metadata["contrast"]] label_contrast = CONTRAST_CATEGORY[generic_contrast] @@ -104,11 +119,11 @@ class Kde_model(): kde (sklearn.neighbors.KernelDensity): minima (float): Local minima. """ - def __init__(self): + def __init__(self) -> None: self.kde = KernelDensity() self.minima = None - def train(self, data, value_range, gridsearch_bandwidth_range): + def train(self, data: list, value_range: np.ndarray, gridsearch_bandwidth_range: np.ndarray) -> None: # reshape data to fit sklearn data = np.array(data).reshape(-1, 1) @@ -129,18 +144,18 @@ def train(self, data, value_range, gridsearch_bandwidth_range): # find local minima self.minima = s[argrelextrema(e, np.less)[0]] - def predict(self, data): + def predict(self, data: float) -> int: x = [i for i, m in enumerate(self.minima) if data < m] pred = min(x) if len(x) else len(self.minima) return pred -def clustering_fit(dataset, key_lst): +def clustering_fit(dataset: list, key_lst: List[str]) -> dict: """This function creates clustering models for each metadata type, using Kernel Density Estimation algorithm. Args: - datasets (list): data + dataset (list): data key_lst (list of str): names of metadata to cluster Returns: @@ -162,12 +177,12 @@ def clustering_fit(dataset, key_lst): return model_dct -def check_isMRIparam(mri_param_type, mri_param, subject, metadata): +def check_isMRIparam(mri_param_type: str, mri_param: dict, subject: str, metadata: dict) -> bool: """Check if a given metadata belongs to the MRI parameters. Args: mri_param_type (str): Metadata type name. - mri_param (list): List of MRI params names. + mri_param (dict): List of MRI params names. subject (str): Current subject name. metadata (dict): Metadata. @@ -191,7 +206,7 @@ def check_isMRIparam(mri_param_type, mri_param, subject, metadata): return True -def get_film_metadata_models(ds_train, metadata_type, debugging=False): +def get_film_metadata_models(ds_train: MRI2DSegmentationDataset, metadata_type: str, debugging: bool = False): """Get FiLM models. This function pulls the clustering and one-hot encoder models that are used by FiLMedUnet. @@ -205,7 +220,7 @@ def get_film_metadata_models(ds_train, metadata_type, debugging=False): Returns: MRI2DSegmentationDataset, OneHotEncoder, KernelDensity: dataset, one-hot encoder and KDE model """ - if metadata_type == "mri_params": + if metadata_type == MetadataKW.MRI_PARAMS: metadata_vector = ["RepetitionTime", "EchoTime", "FlipAngle"] metadata_clustering_models = clustering_fit(ds_train.metadata, metadata_vector) else: @@ -220,7 +235,8 @@ def get_film_metadata_models(ds_train, metadata_type, debugging=False): return ds_train, train_onehotencoder, metadata_clustering_models -def store_film_params(gammas, betas, metadata_values, metadata, model, film_layers, depth, film_metadata): +def store_film_params(gammas: dict, betas: dict, metadata_values: list, metadata: list, model: nn.Module, + film_layers: list, depth: int, film_metadata: str) -> (dict, dict, list): """Store FiLM params. Args: @@ -234,7 +250,7 @@ def store_film_params(gammas, betas, metadata_values, metadata, model, film_laye film_metadata (str): Metadata of interest used to modulate the network (e.g., contrast, tumor_type). Returns: - dict, dict: gammas, betas + dict, dict, list: gammas, betas, metadata_values """ new_input = [metadata[k][0][film_metadata] for k in range(len(metadata))] metadata_values.append(new_input) @@ -254,7 +270,7 @@ def store_film_params(gammas, betas, metadata_values, metadata, model, film_laye return gammas, betas, metadata_values -def save_film_params(gammas, betas, metadata_values, depth, ofolder): +def save_film_params(gammas: dict, betas: dict, metadata_values: list, depth: int, ofolder: str) -> None: """Save FiLM params as npy files. These parameters can be further used for visualisation purposes. They are saved in the `ofolder` with `.npy` format. diff --git a/ivadomed/loader/loader.py b/ivadomed/loader/loader.py index cead2c3ca..e2edb1dca 100644 --- a/ivadomed/loader/loader.py +++ b/ivadomed/loader/loader.py @@ -1,21 +1,42 @@ import copy from loguru import logger +from typing import List from ivadomed import transforms as imed_transforms from ivadomed import utils as imed_utils -from ivadomed.loader import utils as imed_loader_utils, adaptative as imed_adaptative from ivadomed.loader.bids3d_dataset import Bids3DDataset +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed.loader.bids_dataset import BidsDataset +from ivadomed.keywords import ROIParamsKW, TransformationKW, ModelParamsKW, ConfigKW +from ivadomed.loader.slice_filter import SliceFilter +from ivadomed.loader.patch_filter import PatchFilter +import torch -def load_dataset(bids_df, data_list, transforms_params, model_params, target_suffix, roi_params, - contrast_params, slice_filter_params, slice_axis, multichannel, - dataset_type="training", requires_undo=False, metadata_type=None, - object_detection_params=None, soft_gt=False, device=None, - cuda_available=None, is_input_dropout=False, **kwargs): +def load_dataset(bids_df: BidsDataframe, + data_list: List[str], + transforms_params: dict, + model_params: dict, + target_suffix: List[str], + roi_params: dict, + contrast_params: dict, + slice_filter_params: dict, + patch_filter_params: dict, + slice_axis: str, + multichannel: bool, + dataset_type: str = "training", + requires_undo: bool = False, + metadata_type: str = None, + object_detection_params: dict = None, + soft_gt: bool = False, + device: torch.device = None, + cuda_available: bool = None, + is_input_dropout: bool = False, + **kwargs) -> Bids3DDataset: """Get loader appropriate loader according to model type. Available loaders are Bids3DDataset for 3D data, BidsDataset for 2D data and HDF5Dataset for HeMIS. Args: + bids_df (BidsDataframe): Object containing dataframe with all BIDS image files and their metadata. data_list (list): Subject names list. transforms_params (dict): Dictionary containing transformations for "training", "validation", "testing" (keys), @@ -24,7 +45,8 @@ def load_dataset(bids_df, data_list, transforms_params, model_params, target_suf target_suffix (list of str): List of suffixes for target masks. roi_params (dict): Contains ROI related parameters. contrast_params (dict): Contains image contrasts related parameters. - slice_filter_params (dict): Contains slice_filter parameters, see :doc:`configuration_file` for more details. + slice_filter_params (dict): Contains slice_filter_params, see :doc:`configuration_file` for more details. + patch_filter_params (dict): Contains patch_filter_params, see :doc:`configuration_file` for more details. slice_axis (string): Choice between "axial", "sagittal", "coronal" ; controls the axis used to extract the 2D data from 3D NifTI files. 2D PNG/TIF/JPG files use default "axial. multichannel (bool): If True, the input contrasts are combined as input channels for the model. Otherwise, each @@ -35,23 +57,25 @@ def load_dataset(bids_df, data_list, transforms_params, model_params, target_suf object_detection_params (dict): Object dection parameters. soft_gt (bool): If True, ground truths are not binarized before being fed to the network. Otherwise, ground truths are thresholded (0.5) after the data augmentation operations. + device (torch.device): Device to use for the model training. + cuda_available (bool): If True, cuda is available. is_input_dropout (bool): Return input with missing modalities. Returns: BidsDataset Note: For more details on the parameters transform_params, target_suffix, roi_params, contrast_params, - slice_filter_params and object_detection_params see :doc:`configuration_file`. + slice_filter_params, patch_filter_params and object_detection_params see :doc:`configuration_file`. """ - # Compose transforms tranform_lst, _ = imed_transforms.prepare_transforms(copy.deepcopy(transforms_params), requires_undo) # If ROICrop is not part of the transforms, then enforce no slice filtering based on ROI data. - if 'ROICrop' not in transforms_params: - roi_params["slice_filter_roi"] = None + if TransformationKW.ROICROP not in transforms_params: + roi_params[ROIParamsKW.SLICE_FILTER_ROI] = None - if model_params["name"] == "Modified3DUNet" or ('is_2d' in model_params and not model_params['is_2d']): + if model_params[ModelParamsKW.NAME] == ConfigKW.MODIFIED_3D_UNET \ + or (ModelParamsKW.IS_2D in model_params and not model_params[ModelParamsKW.IS_2D]): dataset = Bids3DDataset(bids_df=bids_df, subject_file_lst=data_list, target_suffix=target_suffix, @@ -61,29 +85,14 @@ def load_dataset(bids_df, data_list, transforms_params, model_params, target_suf slice_axis=imed_utils.AXIS_DCT[slice_axis], transform=tranform_lst, multichannel=multichannel, + subvolume_filter_fn=PatchFilter(**patch_filter_params, is_train=False if dataset_type == "testing" else True), model_params=model_params, object_detection_params=object_detection_params, soft_gt=soft_gt, is_input_dropout=is_input_dropout) - - elif model_params["name"] == "HeMISUnet": - dataset = imed_adaptative.HDF5Dataset(bids_df=bids_df, - subject_file_lst=data_list, - model_params=model_params, - contrast_params=contrast_params, - target_suffix=target_suffix, - slice_axis=imed_utils.AXIS_DCT[slice_axis], - transform=tranform_lst, - metadata_choice=metadata_type, - slice_filter_fn=imed_loader_utils.SliceFilter(**slice_filter_params, - device=device, - cuda_available=cuda_available), - roi_params=roi_params, - object_detection_params=object_detection_params, - soft_gt=soft_gt) else: # Task selection - task = imed_utils.get_task(model_params["name"]) + task = imed_utils.get_task(model_params[ModelParamsKW.NAME]) dataset = BidsDataset(bids_df=bids_df, subject_file_lst=data_list, @@ -95,20 +104,21 @@ def load_dataset(bids_df, data_list, transforms_params, model_params, target_suf slice_axis=imed_utils.AXIS_DCT[slice_axis], transform=tranform_lst, multichannel=multichannel, - slice_filter_fn=imed_loader_utils.SliceFilter(**slice_filter_params, device=device, - cuda_available=cuda_available), + slice_filter_fn=SliceFilter(**slice_filter_params, device=device, + cuda_available=cuda_available), + patch_filter_fn=PatchFilter(**patch_filter_params, + is_train=False if dataset_type == "testing" else True), soft_gt=soft_gt, object_detection_params=object_detection_params, task=task, is_input_dropout=is_input_dropout) dataset.load_filenames() - if model_params["name"] == "Modified3DUNet": - logger.info("Loaded {} volumes of shape {} for the {} set.".format(len(dataset), dataset.length, dataset_type)) - elif model_params["name"] != "HeMISUnet" and dataset.length: - logger.info("Loaded {} {} patches of shape {} for the {} set.".format(len(dataset), slice_axis, dataset.length, - dataset_type)) + if model_params[ModelParamsKW.NAME] == ConfigKW.MODIFIED_3D_UNET: + logger.info(f"Loaded {len(dataset)} volumes of shape {dataset.length} for the {dataset_type} set.") + elif model_params[ModelParamsKW.NAME] != ConfigKW.HEMIS_UNET and dataset.length: + logger.info(f"Loaded {len(dataset)} {slice_axis} patches of shape {dataset.length} for the {dataset_type} set.") else: - logger.info("Loaded {} {} slices for the {} set.".format(len(dataset), slice_axis, dataset_type)) + logger.info(f"Loaded {len(dataset)} {slice_axis} slices for the { dataset_type} set.") return dataset diff --git a/ivadomed/loader/mri2d_segmentation_dataset.py b/ivadomed/loader/mri2d_segmentation_dataset.py index f289fa2d8..41515a245 100644 --- a/ivadomed/loader/mri2d_segmentation_dataset.py +++ b/ivadomed/loader/mri2d_segmentation_dataset.py @@ -1,15 +1,31 @@ +from __future__ import annotations import copy import random +from pathlib import Path +import pickle + +from typing import Tuple import numpy as np import torch +from torchvision.transforms import Compose from torch.utils.data import Dataset +from loguru import logger from ivadomed import transforms as imed_transforms, postprocessing as imed_postpro from ivadomed.loader import utils as imed_loader_utils -from ivadomed.loader.utils import dropout_input +from ivadomed.loader.utils import dropout_input, get_obj_size, create_temp_directory from ivadomed.loader.segmentation_pair import SegmentationPair from ivadomed.object_detection import utils as imed_obj_detect +from ivadomed.keywords import ROIParamsKW, MetadataKW, SegmentationDatasetKW, SegmentationPairKW +import typing + +if typing.TYPE_CHECKING: + from ivadomed.loader.slice_filter import SliceFilter + from ivadomed.loader.patch_filter import PatchFilter + from typing import List, Dict, Optional + +from ivadomed.utils import get_timestamp, get_system_memory class MRI2DSegmentationDataset(Dataset): @@ -22,9 +38,10 @@ class MRI2DSegmentationDataset(Dataset): stride (list): Size of the pixels' shift between patches, length equals 0 (no patching) or 2 (2d patching). slice_axis (int): Indicates the axis used to extract 2D slices from 3D NifTI files: "axial": 2, "sagittal": 0, "coronal": 1. 2D PNG/TIF/JPG files use default "axial": 2. - cache (bool): if the data should be cached in memory or not. + nibabel_cache (bool): if the data should be cached in memory or not. transform (torchvision.Compose): transformations to apply. - slice_filter_fn (dict): Slice filter parameters, see :doc:`configuration_file` for more details. + slice_filter_fn (SliceFilter): SliceFilter object containing Slice filter parameters. + patch_filter_fn (PatchFilter): PatchFilter object containing Patch filter parameters. task (str): choice between segmentation or classification. If classification: GT is discrete values, \ If segmentation: GT is binary mask. roi_params (dict): Dictionary containing parameters related to ROI image processing. @@ -42,10 +59,12 @@ class MRI2DSegmentationDataset(Dataset): is_2d_patch (bool): True if length in model params. prepro_transforms (Compose): Transformations to apply before training. transform (Compose): Transformations to apply during training. - cache (bool): Tf the data should be cached in memory or not. + nibabel_cache (bool): determine if the nibabel data object should be cached in memory or not to avoid repetitive + disk loading slice_axis (int): Indicates the axis used to extract 2D slices from 3D NifTI files: "axial": 2, "sagittal": 0, "coronal": 1. 2D PNG/TIF/JPG files use default "axial": 2. - slice_filter_fn (dict): Slice filter parameters, see :doc:`configuration_file` for more details. + slice_filter_fn (SliceFilter): SliceFilter object containing Slice filter parameters. + patch_filter_fn (PatchFilter): PatchFilter object containing Patch filter parameters. n_contrasts (int): Number of input contrasts. has_bounding_box (bool): True if bounding box in all metadata, else False. task (str): Choice between segmentation or classification. If classification: GT is discrete values, \ @@ -56,48 +75,74 @@ class MRI2DSegmentationDataset(Dataset): roi_thr (int): If the ROI mask contains less than this number of non-zero voxels, the slice will be discarded from the dataset. is_input_dropout (bool): Return input with missing modalities. + disk_cache (bool): determines whether the items in the segmentation pairs for the entire dataset are cached on + disk (True) or in memory (False). Default to None to automatically determine based on guesstimated size of + the entire datasets naively assuming that first image in first volume is representative. """ - def __init__(self, filename_pairs, length=None, stride=None, slice_axis=2, cache=True, transform=None, - slice_filter_fn=None, task="segmentation", roi_params=None, soft_gt=False, is_input_dropout=False): + def __init__(self, + filename_pairs: list, + length: list = None, + stride: list = None, + slice_axis: int = 2, + nibabel_cache: bool = True, + transform: List[Optional[Compose]] = None, + slice_filter_fn: SliceFilter = None, + patch_filter_fn: PatchFilter = None, + task: str = "segmentation", + roi_params: dict = None, + soft_gt: bool = False, + is_input_dropout: bool = False, + disk_cache=None) -> None: if length is None: length = [] if stride is None: stride = [] - self.indexes = [] - self.handlers = [] + self.indexes: list = [] + self.handlers: list = [] self.filename_pairs = filename_pairs self.length = length self.stride = stride self.is_2d_patch = True if self.length else False self.prepro_transforms, self.transform = transform - self.cache = cache + self.cache = nibabel_cache self.slice_axis = slice_axis self.slice_filter_fn = slice_filter_fn + self.patch_filter_fn = patch_filter_fn self.n_contrasts = len(self.filename_pairs[0][0]) if roi_params is None: - roi_params = {"suffix": None, "slice_filter_roi": None} - self.roi_thr = roi_params["slice_filter_roi"] - self.slice_filter_roi = roi_params["suffix"] is not None and isinstance(self.roi_thr, int) + roi_params = {ROIParamsKW.SUFFIX: None, ROIParamsKW.SLICE_FILTER_ROI: None} + self.roi_thr = roi_params[ROIParamsKW.SLICE_FILTER_ROI] + self.slice_filter_roi = roi_params[ROIParamsKW.SUFFIX] is not None and isinstance(self.roi_thr, int) self.soft_gt = soft_gt self.has_bounding_box = True self.task = task self.is_input_dropout = is_input_dropout - + self.disk_cache: bool = disk_cache def load_filenames(self): """Load preprocessed pair data (input and gt) in handler.""" for input_filenames, gt_filenames, roi_filename, metadata in self.filename_pairs: - roi_pair = SegmentationPair(input_filenames, roi_filename, metadata=metadata, slice_axis=self.slice_axis, - cache=self.cache, prepro_transforms=self.prepro_transforms) - - seg_pair = SegmentationPair(input_filenames, gt_filenames, metadata=metadata, slice_axis=self.slice_axis, - cache=self.cache, prepro_transforms=self.prepro_transforms, + roi_pair = SegmentationPair(input_filenames, + roi_filename, + metadata=metadata, + slice_axis=self.slice_axis, + cache=self.cache, + prepro_transforms=self.prepro_transforms) + + seg_pair = SegmentationPair(input_filenames, + gt_filenames, + metadata=metadata, + slice_axis=self.slice_axis, + cache=self.cache, + prepro_transforms=self.prepro_transforms, soft_gt=self.soft_gt) input_data_shape, _ = seg_pair.get_pair_shapes() + path_temp = Path(create_temp_directory()) + for idx_pair_slice in range(input_data_shape[-1]): slice_seg_pair = seg_pair.get_pair_slice(idx_pair_slice, gt_type=self.task) self.has_bounding_box = imed_obj_detect.verify_metadata(slice_seg_pair, self.has_bounding_box) @@ -114,28 +159,55 @@ def load_filenames(self): if self.slice_filter_roi and imed_loader_utils.filter_roi(slice_roi_pair['gt'], self.roi_thr): continue - item = imed_transforms.apply_preprocessing_transforms(self.prepro_transforms, + item: Tuple[dict, dict] = imed_transforms.apply_preprocessing_transforms(self.prepro_transforms, slice_seg_pair, slice_roi_pair) + # Run once code to keep track if disk cache is used + if self.disk_cache is None: + self.determine_cache_need(item, input_data_shape[-1]) # If is_2d_patch, create handlers list for indexing patch if self.is_2d_patch: - for metadata in item[0]['input_metadata']: - metadata['index_shape'] = item[0]['input'][0].shape - self.handlers.append((item)) + for metadata in item[0][MetadataKW.INPUT_METADATA]: + metadata[MetadataKW.INDEX_SHAPE] = item[0]['input'][0].shape + if self.disk_cache: + path_item = path_temp / f"item_{get_timestamp()}.pkl" + with path_item.open(mode="wb") as f: + pickle.dump(item, f) + self.handlers.append((path_item)) + else: + self.handlers.append((item)) # else, append the whole slice to self.indexes else: - self.indexes.append(item) + + if self.disk_cache: + path_item = path_temp / f"item_{get_timestamp()}.pkl" + with path_item.open(mode="wb") as f: + pickle.dump(item, f) + self.indexes.append(path_item) + else: + self.indexes.append(item) # If is_2d_patch, prepare indices of patches if self.is_2d_patch: self.prepare_indices() - def prepare_indices(self): + def prepare_indices(self) -> None: """Stores coordinates of 2d patches for training.""" for i in range(0, len(self.handlers)): - input_img = self.handlers[i][0]['input'] + if self.disk_cache: + with self.handlers[i].open(mode="rb") as f: + item = pickle.load(f) + primary_handle = item[0] + else: + primary_handle = self.handlers[i][0] + + input_img = primary_handle.get('input') + gt_img = primary_handle.get('gt') + input_metadata = primary_handle.get('input_metadata') + gt_metadata = primary_handle.get('gt_metadata') + shape = input_img[0].shape if len(self.length) != 2 or len(self.stride) != 2: @@ -144,135 +216,197 @@ def prepare_indices(self): if stride > length or stride <= 0: raise RuntimeError('"stride_2D" must be greater than 0 and smaller or equal to "length_2D".') if length > size: - raise RuntimeError('"length_2D" must be smaller or equal to image dimensions.') - - for x in range(0, (shape[0] - self.length[0] + self.stride[0]), self.stride[0]): - if x + self.length[0] > shape[0]: - x = (shape[0] - self.length[0]) - for y in range(0, (shape[1] - self.length[1] + self.stride[1]), self.stride[1]): - if y + self.length[1] > shape[1]: - y = (shape[1] - self.length[1]) + raise RuntimeError('"length_2D" must be smaller or equal to image dimensions after resampling.') + + for x_min in range(0, (shape[0] - self.length[0] + self.stride[0]), self.stride[0]): + if x_min + self.length[0] > shape[0]: + x_min = (shape[0] - self.length[0]) + x_max = x_min + self.length[0] + for y_min in range(0, (shape[1] - self.length[1] + self.stride[1]), self.stride[1]): + if y_min + self.length[1] > shape[1]: + y_min = (shape[1] - self.length[1]) + y_max = y_min + self.length[1] + + # Extract patch from handlers for patch filter + patch = {'input': list(np.asarray(input_img)[:, x_min:x_max, y_min:y_max]), + 'gt': list(np.asarray(gt_img)[:, x_min:x_max, y_min:y_max]) \ + if gt_img else [], + 'input_metadata': input_metadata, + 'gt_metadata': gt_metadata} + if self.patch_filter_fn and not self.patch_filter_fn(patch): + continue + self.indexes.append({ - 'x_min': x, - 'x_max': x + self.length[0], - 'y_min': y, - 'y_max': y + self.length[1], + 'x_min': x_min, + 'x_max': x_max, + 'y_min': y_min, + 'y_max': y_max, 'handler_index': i}) - def set_transform(self, transform): + def set_transform(self, transform: List[Optional[Compose]]) -> None: self.transform = transform - def __len__(self): + def __len__(self) -> int: return len(self.indexes) - def __getitem__(self, index): + def __getitem__(self, index: int) -> Dict[str, Optional[list]]: """Return the specific processed data corresponding to index (input, ground truth, roi and metadata). Args: - index (int): Slice index. + index (int): Slice or patch index. """ + # CONTEXT + # 2D models are trained with or without 2D patches: + # With 2D patches: + # * 'self.handlers' contains paired data for all preprocessed 2D slices + # * 'self.indexes' is a list of coordinates for all 2D patches + # e.g. [{'x_min': 0, 'x_max': 32, 'y_min': 0, 'y_max': 32, 'handler_index': 0}, + # {'x_min': 0, 'x_max': 32, 'y_min': 32, 'y_max': 64, 'handler_index': 0}] + # where 'handler_index' is the index of the 2D slice from which the patch is extracted + # Without 2D patches: + # * 'self.handlers' is unused + # * 'self.indexes' contains paired data for all preprocessed 2D slices + + # Extract coordinates and paired data for the patch or the slice (no patch) case # copy.deepcopy is used to have different coordinates for reconstruction for a given handler with patch, # to allow a different rater at each iteration of training, and to clean transforms params from previous # transforms i.e. remove params from previous iterations so that the coming transforms are different if self.is_2d_patch: + # Get patch coordinates from 'self.indexes' coord = self.indexes[index] - seg_pair_slice, roi_pair_slice = copy.deepcopy(self.handlers[coord['handler_index']]) + # Extract patch pair from 'self.handlers' + if self.disk_cache: + with self.handlers[coord.get(SegmentationDatasetKW.HANDLER_INDEX)].open(mode="rb") as f: + seg_pair_slice, roi_pair_slice = pickle.load(f) + else: + seg_pair_slice, roi_pair_slice = copy.deepcopy(self.handlers[coord.get(SegmentationDatasetKW.HANDLER_INDEX)]) else: - seg_pair_slice, roi_pair_slice = copy.deepcopy(self.indexes[index]) - - # In case multiple raters - if seg_pair_slice['gt'] and isinstance(seg_pair_slice['gt'][0], list): + # Extract slice pair from 'self.indexes' + if self.disk_cache: + with self.indexes[index].open(mode="rb") as f: + seg_pair_slice, roi_pair_slice = pickle.load(f) + else: + seg_pair_slice, roi_pair_slice = copy.deepcopy(self.indexes[index]) + # Set coordinates to the slice full size + coord = {} + coord[SegmentationDatasetKW.X_MIN] = 0 + coord[SegmentationDatasetKW.X_MAX] = seg_pair_slice[SegmentationPairKW.INPUT][0].shape[0] + coord[SegmentationDatasetKW.Y_MIN] = 0 + coord[SegmentationDatasetKW.Y_MAX] = seg_pair_slice[SegmentationPairKW.INPUT][0].shape[1] + + # In case of multiple raters + if seg_pair_slice[SegmentationPairKW.GT] and isinstance(seg_pair_slice[SegmentationPairKW.GT][0], list): # Randomly pick a rater - idx_rater = random.randint(0, len(seg_pair_slice['gt'][0]) - 1) + idx_rater = random.randint(0, len(seg_pair_slice[SegmentationPairKW.GT][0]) - 1) # Use it as ground truth for this iteration # Note: in case of multi-class: the same rater is used across classes - for idx_class in range(len(seg_pair_slice['gt'])): - seg_pair_slice['gt'][idx_class] = seg_pair_slice['gt'][idx_class][idx_rater] - seg_pair_slice['gt_metadata'][idx_class] = seg_pair_slice['gt_metadata'][idx_class][idx_rater] + for idx_class in range(len(seg_pair_slice[SegmentationPairKW.GT])): + seg_pair_slice[SegmentationPairKW.GT][idx_class] = seg_pair_slice[SegmentationPairKW.GT][idx_class][idx_rater] + seg_pair_slice[SegmentationPairKW.GT_METADATA][idx_class] = seg_pair_slice[SegmentationPairKW.GT_METADATA][idx_class][idx_rater] - metadata_input = seg_pair_slice['input_metadata'] if seg_pair_slice['input_metadata'] is not None else [] - metadata_roi = roi_pair_slice['gt_metadata'] if roi_pair_slice['gt_metadata'] is not None else [] - metadata_gt = seg_pair_slice['gt_metadata'] if seg_pair_slice['gt_metadata'] is not None else [] + # Extract metadata from paired data + metadata_input = seg_pair_slice[SegmentationPairKW.INPUT_METADATA] if seg_pair_slice[SegmentationPairKW.INPUT_METADATA] is not None else [] + metadata_roi = roi_pair_slice[SegmentationPairKW.GT_METADATA] if roi_pair_slice[SegmentationPairKW.GT_METADATA] is not None else [] + metadata_gt = seg_pair_slice[SegmentationPairKW.GT_METADATA] if seg_pair_slice[SegmentationPairKW.GT_METADATA] is not None else [] # Run transforms on ROI - # ROI goes first because params of ROICrop are needed for the followings - stack_roi, metadata_roi = self.transform(sample=roi_pair_slice["gt"], - metadata=metadata_roi, - data_type="roi") - - # Update metadata_input with metadata_roi - metadata_input = imed_loader_utils.update_metadata(metadata_roi, metadata_input) + # Note that ROI is not available for the patch case + if self.is_2d_patch: + stack_roi, metadata_roi = None, None + else: + # ROI goes first because params of ROICrop are needed for the followings + stack_roi, metadata_roi = self.transform(sample=roi_pair_slice[SegmentationPairKW.GT], + metadata=metadata_roi, + data_type="roi") + # Update metadata_input with metadata_roi + metadata_input = imed_loader_utils.update_metadata(metadata_roi, metadata_input) + + # Extract min/max coordinates + x_min = coord.get(SegmentationDatasetKW.X_MIN) + x_max = coord.get(SegmentationDatasetKW.X_MAX) + y_min = coord.get(SegmentationDatasetKW.Y_MIN) + y_max = coord.get(SegmentationDatasetKW.Y_MAX) + + # Extract image and gt slice or patch from coordinates + stack_input = np.asarray(seg_pair_slice[SegmentationPairKW.INPUT])[ + :, + x_min:x_max, + y_min:y_max + ] + if seg_pair_slice[SegmentationPairKW.GT]: + stack_gt = np.asarray(seg_pair_slice["gt"])[ + :, + x_min:x_max, + y_min:y_max + ] + else: + stack_gt = [] - # Run transforms on images - stack_input, metadata_input = self.transform(sample=seg_pair_slice["input"], + # Run transforms on image slice or patch + stack_input, metadata_input = self.transform(sample=list(stack_input), metadata=metadata_input, data_type="im") - # Update metadata_gt with metadata_input metadata_gt = imed_loader_utils.update_metadata(metadata_input, metadata_gt) - if self.task == "segmentation": - # Run transforms on images - stack_gt, metadata_gt = self.transform(sample=seg_pair_slice["gt"], + # Run transforms on gt slices or patches + stack_gt, metadata_gt = self.transform(sample=list(stack_gt), metadata=metadata_gt, data_type="gt") # Make sure stack_gt is binarized if stack_gt is not None and not self.soft_gt: stack_gt = imed_postpro.threshold_predictions(stack_gt, thr=0.5).astype(np.uint8) - else: # Force no transformation on labels for classification task # stack_gt is a tensor of size 1x1, values: 0 or 1 # "expand(1)" is necessary to be compatible with segmentation convention: n_labelxhxwxd stack_gt = torch.from_numpy(seg_pair_slice["gt"][0]).expand(1) - # If is_2d_patch, add coordinates to metadata to reconstruct image - if self.is_2d_patch: - shape_x = coord["x_max"] - coord["x_min"] - shape_y = coord["y_max"] - coord["y_min"] - - for metadata in metadata_input: - metadata['coord'] = [coord["x_min"], coord["x_max"], coord["y_min"], coord["y_max"]] - - data_dict = { - 'input': torch.zeros(stack_input.shape[0], shape_x, shape_y), - 'gt': torch.zeros(stack_gt.shape[0], shape_x, shape_y) if stack_gt is not None else None, - 'roi': torch.zeros(stack_roi.shape[0], shape_x, shape_y) if stack_roi is not None else None, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_gt, - 'roi_metadata': metadata_roi - } - - for _ in range(len(stack_input)): - data_dict['input'] = stack_input[:, - coord['x_min']:coord['x_max'], - coord['y_min']:coord['y_max']] - - if stack_gt is not None: - for _ in range(len(stack_gt)): - data_dict['gt'] = stack_gt[:, - coord['x_min']:coord['x_max'], - coord['y_min']:coord['y_max']] - - if stack_roi is not None: - for _ in range(len(stack_roi)): - data_dict['roi'] = stack_roi[:, - coord['x_min']:coord['x_max'], - coord['y_min']:coord['y_max']] - - else: - data_dict = { - 'input': stack_input, - 'gt': stack_gt, - 'roi': stack_roi, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_gt, - 'roi_metadata': metadata_roi - } + # Add coordinates of slice or patch to input metadata + for metadata in metadata_input: + metadata[MetadataKW.COORD] = [ + x_min, x_max, + y_min, y_max, + ] + + # Combine all processed data for a given patch or slice in dictionary + data_dict = { + SegmentationPairKW.INPUT: stack_input, + SegmentationPairKW.GT: stack_gt, + SegmentationPairKW.ROI: stack_roi, + MetadataKW.INPUT_METADATA: metadata_input, + MetadataKW.GT_METADATA: metadata_gt, + MetadataKW.ROI_METADATA: metadata_roi + } # Input-level dropout to train with missing modalities if self.is_input_dropout: data_dict = dropout_input(data_dict) return data_dict + + def determine_cache_need(self, item: tuple, n_slice: int): + """ + When Cache flag is not explicitly set, determine whether to cache the data or not + Args: + item: an EXAMPLE, typical Tuple structure contain the main data. + n_slice: number of slice in one file_name_pairs. + + Returns: + + """ + size_item_in_bytes = get_obj_size(item) + + optimal_ram_limit = get_system_memory() * 0.5 + + # Size limit: 4GB GPU RAM, keep in mind tranform etc might take MORE! + size_estimated_dataset_GB = (size_item_in_bytes) * len(self.filename_pairs) * n_slice / 1024 ** 3 + if size_estimated_dataset_GB > optimal_ram_limit: + logger.info(f"Estimated 2D dataset size is {size_estimated_dataset_GB} GB, which is larger than {optimal_ram_limit} GB. Auto " + f"enabling disk cache.") + self.disk_cache = True + else: + logger.info(f"Estimated 2D dataset size is {size_estimated_dataset_GB} GB, which is smaller than {optimal_ram_limit} GB. File " + f"cache will not be used") + self.disk_cache = False diff --git a/ivadomed/loader/mri3d_subvolume_segmentation_dataset.py b/ivadomed/loader/mri3d_subvolume_segmentation_dataset.py index 107ecf899..11dd2f5d2 100644 --- a/ivadomed/loader/mri3d_subvolume_segmentation_dataset.py +++ b/ivadomed/loader/mri3d_subvolume_segmentation_dataset.py @@ -1,15 +1,23 @@ import copy import random +from pathlib import Path +import pickle +from typing import List, Optional import numpy as np -import torch + from torch.utils.data import Dataset +from loguru import logger from ivadomed import transforms as imed_transforms, postprocessing as imed_postpro from ivadomed.loader import utils as imed_loader_utils -from ivadomed.loader.utils import dropout_input +from ivadomed.loader.utils import dropout_input, create_temp_directory, get_obj_size from ivadomed.loader.segmentation_pair import SegmentationPair from ivadomed.object_detection import utils as imed_obj_detect +from ivadomed.loader.patch_filter import PatchFilter +from ivadomed.keywords import MetadataKW, SegmentationDatasetKW, SegmentationPairKW +from ivadomed.utils import get_timestamp, get_system_memory +from torchvision.transforms import Compose class MRI3DSubVolumeSegmentationDataset(Dataset): @@ -24,34 +32,55 @@ class MRI3DSubVolumeSegmentationDataset(Dataset): Args: filename_pairs (list): A list of tuples in the format (input filename, ground truth filename). - transform (Compose): Transformations to apply. length (tuple): Size of each dimensions of the subvolumes, length equals 3. stride (tuple): Size of the overlapping per subvolume and dimensions, length equals 3. slice_axis (int): Indicates the axis used to extract slices: "axial": 2, "sagittal": 0, "coronal": 1. + transform (Compose): Transformations to apply. + subvolume_filter_fn (PatchFilter): PatchFilter object containing subvolume filter parameters. soft_gt (bool): If True, ground truths are not binarized before being fed to the network. Otherwise, ground truths are thresholded (0.5) after the data augmentation operations. is_input_dropout (bool): Return input with missing modalities. + disk_cache (bool): set whether all input data should be cached in local folders to allow faster subsequent + reloading and bypass memory cap. """ - def __init__(self, filename_pairs, transform=None, length=(64, 64, 64), stride=(0, 0, 0), slice_axis=0, - task="segmentation", soft_gt=False, is_input_dropout=False): + def __init__(self, + filename_pairs: list, + transform: List[Optional[Compose]] = None, + length: tuple = (64, 64, 64), + stride: tuple = (0, 0, 0), + slice_axis: int = 0, + subvolume_filter_fn: PatchFilter = None, + task: str = "segmentation", + soft_gt: bool = False, + is_input_dropout: bool = False, + disk_cache: bool=True): + self.filename_pairs = filename_pairs - self.handlers = [] - self.indexes = [] + + # could be a list of tuple of objects OR path objects to the actual disk equivalent. + # behaves differently depend on if self.cache is set to true or not. + self.handlers: List[tuple] = [] + + self.indexes: list = [] self.length = length self.stride = stride self.prepro_transforms, self.transform = transform self.slice_axis = slice_axis - self.has_bounding_box = True + self.subvolume_filter_fn = subvolume_filter_fn + self.has_bounding_box: bool = True self.task = task self.soft_gt = soft_gt self.is_input_dropout = is_input_dropout + self.disk_cache: bool = disk_cache self._load_filenames() self._prepare_indices() - def _load_filenames(self): + def _load_filenames(self) -> None: """Load preprocessed pair data (input and gt) in handler.""" + path_temp: Path = Path(create_temp_directory()) + for input_filename, gt_filename, roi_filename, metadata in self.filename_pairs: segpair = SegmentationPair(input_filename, gt_filename, metadata=metadata, slice_axis=self.slice_axis, soft_gt=self.soft_gt) @@ -60,27 +89,54 @@ def _load_filenames(self): seg_pair = { 'input': input_data, 'gt': gt_data, - 'input_metadata': metadata['input_metadata'], - 'gt_metadata': metadata['gt_metadata'] + MetadataKW.INPUT_METADATA: metadata[MetadataKW.INPUT_METADATA], + MetadataKW.GT_METADATA: metadata[MetadataKW.GT_METADATA] } self.has_bounding_box = imed_obj_detect.verify_metadata(seg_pair, self.has_bounding_box) if self.has_bounding_box: - self.prepro_transforms = imed_obj_detect.adjust_transforms(self.prepro_transforms, seg_pair, + self.prepro_transforms = imed_obj_detect.adjust_transforms(self.prepro_transforms, + seg_pair, length=self.length, stride=self.stride) + seg_pair, roi_pair = imed_transforms.apply_preprocessing_transforms(self.prepro_transforms, seg_pair=seg_pair) - for metadata in seg_pair['input_metadata']: - metadata['index_shape'] = seg_pair['input'][0].shape - self.handlers.append((seg_pair, roi_pair)) + for metadata in seg_pair[MetadataKW.INPUT_METADATA]: + metadata[MetadataKW.INDEX_SHAPE] = seg_pair['input'][0].shape + + # First time detemine cache automatically IF not specified. Otherwise, use the cache specified. + if self.disk_cache is None: + self.disk_cache = self.determine_cache_need(seg_pair, roi_pair) + + if self.disk_cache: + # Write SegPair and ROIPair to disk cache with timestamp to avoid collisions + # 'self.handler' is now a list of a FILES instead of actual data to prevent using too much memory + path_cache_seg_pair = path_temp / f'seg_pair_{get_timestamp()}.pkl' + with path_cache_seg_pair.open(mode='wb') as f: + pickle.dump(seg_pair, f) + + path_cache_roi_pair = path_temp / f'roi_pair_{get_timestamp()}.pkl' + with path_cache_roi_pair.open(mode='wb') as f: + pickle.dump(roi_pair, f) + self.handlers.append((path_cache_seg_pair, path_cache_roi_pair)) + + else: + self.handlers.append((seg_pair, roi_pair)) def _prepare_indices(self): """Stores coordinates of subvolumes for training.""" for i in range(0, len(self.handlers)): - segpair, _ = self.handlers[i] - input_img = self.handlers[i][0]['input'] + + if self.disk_cache: + with self.handlers[i][0].open(mode='rb') as f: + segpair = pickle.load(f) + else: + segpair = self.handlers[i][0] + + input_img, gt = segpair.get('input'), segpair.get('gt') + shape = input_img[0].shape if ((shape[0] - self.length[0]) % self.stride[0]) != 0 or self.length[0] % 16 != 0 or shape[0] < \ @@ -95,91 +151,163 @@ def _prepare_indices(self): for x in range(0, (shape[0] - self.length[0]) + 1, self.stride[0]): for y in range(0, (shape[1] - self.length[1]) + 1, self.stride[1]): for z in range(0, (shape[2] - self.length[2]) + 1, self.stride[2]): + x_min, x_max = x, x + self.length[0] + y_min, y_max = y, y + self.length[1] + z_min, z_max = z, z + self.length[2] + + subvolume = { + 'input': list(np.asarray(input_img)[:, x_min:x_max, y_min:y_max, z_min:z_max]), + 'gt': list(np.asarray(gt)[:, x_min:x_max, y_min:y_max, z_min:z_max] if gt else []), + } + + if self.subvolume_filter_fn and not self.subvolume_filter_fn(subvolume): + continue + self.indexes.append({ - 'x_min': x, - 'x_max': x + self.length[0], - 'y_min': y, - 'y_max': y + self.length[1], - 'z_min': z, - 'z_max': z + self.length[2], - 'handler_index': i}) - - def __len__(self): + 'x_min': x_min, + 'x_max': x_max, + 'y_min': y_min, + 'y_max': y_max, + 'z_min': z_min, + 'z_max': z_max, + 'handler_index': i, + }) + + def __len__(self) -> int: """Return the dataset size. The number of subvolumes.""" return len(self.indexes) - def __getitem__(self, index): - """Return the specific index pair subvolume (input, ground truth). + def __getitem__(self, subvolume_index: int) -> dict: + """Return the specific processed subvolume corresponding to index (input, ground truth and metadata). Args: - index (int): Subvolume index. + subvolume_index (int): Subvolume (patch) index. """ + # CONTEXT + # All 3D models are trained with 3D subvolumes (patches): + # * 'self.handlers' contains paired data for all preprocessed 3D volumes + # * 'self.indexes' is a list of coordinates for all 3D subvolumes + # e.g. [{'x_min': 0, 'x_max': 32, 'y_min': 0, 'y_max': 32, 'z_min': 0, 'z_max': 16, 'handler_index': 0}, + # {'x_min': 0, 'x_max': 32, 'y_min': 0, 'y_max': 32, 'z_min': 16, 'z_max': 32, 'handler_index': 0}] + # where 'handler_index' is the index of the 3D volume from which the subvolume is extracted + # Note that ROI is not available for 3D models + + # Extract coordinates and paired data for the subvolume + # Get subvolume coordinates from 'self.indexes' + coord: dict = self.indexes[subvolume_index] + # Extract subvolume pair from 'self.handlers' # copy.deepcopy is used to have different coordinates for reconstruction for a given handler, # to allow a different rater at each iteration of training, and to clean transforms params from previous # transforms i.e. remove params from previous iterations so that the coming transforms are different - coord = self.indexes[index] - seg_pair, _ = copy.deepcopy(self.handlers[coord['handler_index']]) + tuple_seg_roi_pair: tuple = self.handlers[coord.get(SegmentationDatasetKW.HANDLER_INDEX)] + if self.disk_cache: + with tuple_seg_roi_pair[0].open(mode='rb') as f: + seg_pair = pickle.load(f) + else: + seg_pair, _ = copy.deepcopy(tuple_seg_roi_pair) - # In case multiple raters - if seg_pair['gt'] and isinstance(seg_pair['gt'][0], list): + # In case of multiple raters + if seg_pair[SegmentationPairKW.GT] and isinstance(seg_pair[SegmentationPairKW.GT][0], list): # Randomly pick a rater - idx_rater = random.randint(0, len(seg_pair['gt'][0]) - 1) + idx_rater = random.randint(0, len(seg_pair[SegmentationPairKW.GT][0]) - 1) # Use it as ground truth for this iteration # Note: in case of multi-class: the same rater is used across classes - for idx_class in range(len(seg_pair['gt'])): - seg_pair['gt'][idx_class] = seg_pair['gt'][idx_class][idx_rater] - seg_pair['gt_metadata'][idx_class] = seg_pair['gt_metadata'][idx_class][idx_rater] + for idx_class in range(len(seg_pair[SegmentationPairKW.GT])): + seg_pair[SegmentationPairKW.GT][idx_class] = seg_pair[SegmentationPairKW.GT][idx_class][idx_rater] + seg_pair[SegmentationPairKW.GT_METADATA][idx_class] = seg_pair[SegmentationPairKW.GT_METADATA][idx_class][idx_rater] + + # Extract metadata from paired data + metadata_input = seg_pair[SegmentationPairKW.INPUT_METADATA] if seg_pair[SegmentationPairKW.INPUT_METADATA] is not None else [] + metadata_gt = seg_pair[SegmentationPairKW.GT_METADATA] if seg_pair[SegmentationPairKW.GT_METADATA] is not None else [] + + # Extract min/max coordinates + x_min = coord.get(SegmentationDatasetKW.X_MIN) + x_max = coord.get(SegmentationDatasetKW.X_MAX) + y_min = coord.get(SegmentationDatasetKW.Y_MIN) + y_max = coord.get(SegmentationDatasetKW.Y_MAX) + z_min = coord.get(SegmentationDatasetKW.Z_MIN) + z_max = coord.get(SegmentationDatasetKW.Z_MAX) - metadata_input = seg_pair['input_metadata'] if seg_pair['input_metadata'] is not None else [] - metadata_gt = seg_pair['gt_metadata'] if seg_pair['gt_metadata'] is not None else [] + # Extract subvolume and gt from coordinates + stack_input = np.asarray(seg_pair[SegmentationPairKW.INPUT])[ + :, + x_min:x_max, + y_min:y_max, + z_min:z_max + ] - # Run transforms on images - stack_input, metadata_input = self.transform(sample=seg_pair['input'], + if seg_pair[SegmentationPairKW.GT]: + stack_gt = np.asarray(seg_pair[SegmentationPairKW.GT])[ + :, + x_min:x_max, + y_min:y_max, + z_min:z_max + ] + else: + stack_gt = [] + + # Run transforms on subvolume + stack_input, metadata_input = self.transform(sample=list(stack_input), metadata=metadata_input, data_type="im") # Update metadata_gt with metadata_input metadata_gt = imed_loader_utils.update_metadata(metadata_input, metadata_gt) - # Run transforms on images - stack_gt, metadata_gt = self.transform(sample=seg_pair['gt'], + # Run transforms on gt + stack_gt, metadata_gt = self.transform(sample=list(stack_gt), metadata=metadata_gt, data_type="gt") # Make sure stack_gt is binarized if stack_gt is not None and not self.soft_gt: stack_gt = imed_postpro.threshold_predictions(stack_gt, thr=0.5).astype(np.uint8) - shape_x = coord["x_max"] - coord["x_min"] - shape_y = coord["y_max"] - coord["y_min"] - shape_z = coord["z_max"] - coord["z_min"] - - # add coordinates to metadata to reconstruct volume + # Add coordinates to metadata to reconstruct volume for metadata in metadata_input: - metadata['coord'] = [coord["x_min"], coord["x_max"], coord["y_min"], coord["y_max"], coord["z_min"], - coord["z_max"]] + metadata[MetadataKW.COORD] = [ + x_min, x_max, + y_min, y_max, + z_min, z_max, + ] + # Combine all processed data for a given subvolume in dictionary subvolumes = { - 'input': torch.zeros(stack_input.shape[0], shape_x, shape_y, shape_z), - 'gt': torch.zeros(stack_gt.shape[0], shape_x, shape_y, shape_z) if stack_gt is not None else None, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_gt + SegmentationPairKW.INPUT: stack_input, + SegmentationPairKW.GT: stack_gt, + MetadataKW.INPUT_METADATA: metadata_input, + MetadataKW.GT_METADATA: metadata_gt } - for _ in range(len(stack_input)): - subvolumes['input'] = stack_input[:, - coord['x_min']:coord['x_max'], - coord['y_min']:coord['y_max'], - coord['z_min']:coord['z_max']] - # Input-level dropout to train with missing modalities if self.is_input_dropout: subvolumes = dropout_input(subvolumes) - if stack_gt is not None: - for _ in range(len(stack_gt)): - subvolumes['gt'] = stack_gt[:, - coord['x_min']:coord['x_max'], - coord['y_min']:coord['y_max'], - coord['z_min']:coord['z_max']] - return subvolumes + + def determine_cache_need(self, seg_pair: dict, roi_pair: dict): + """ + When Cache flag is not explicitly set, determine whether to cache the data or not + Args: + seg_pair: an EXAMPLE, typical seg_pair object + roi_pair: an EXAMPLE, typical seg_pair object + + Returns: + + """ + size_seg_pair_in_bytes = get_obj_size(seg_pair) + size_roi_pair_in_bytes = get_obj_size(roi_pair) + + optimal_ram_limit = get_system_memory() * 0.5 + + # Size limit: 4GB GPU RAM, keep in mind tranform etc might take MORE! + size_estimated_dataset_GB = (size_seg_pair_in_bytes + size_roi_pair_in_bytes) * len(self.filename_pairs) / 1024 ** 3 + if size_estimated_dataset_GB > optimal_ram_limit: + logger.info(f"Estimated 3D dataset size is {size_estimated_dataset_GB} GB, which is larger than {optimal_ram_limit} GB. Auto " + f"enabling cache.") + self.disk_cache = True + return True + else: + logger.info(f"Estimated 3D dataset size is {size_estimated_dataset_GB} GB, which is smaller than {optimal_ram_limit} GB. File " + f"cache will not be used") + self.disk_cache = False + return False diff --git a/ivadomed/loader/patch_filter.py b/ivadomed/loader/patch_filter.py new file mode 100644 index 000000000..d203119ad --- /dev/null +++ b/ivadomed/loader/patch_filter.py @@ -0,0 +1,59 @@ +import numpy as np + + +class PatchFilter(object): + """Filter 2D or 3D patches from dataset. + + If a patch does not meet certain conditions, it is discarded from the dataset at training time. + + Args: + filter_empty_mask (bool): If True, 2D or 3D patches where all voxel labels are zeros are discarded at training time. + filter_absent_class (bool): If True, 2D or 3D patches where all voxel labels are zero for one or more classes are + discarded at training time. + filter_empty_input (bool): If True, 2D or 3D patches where all voxel intensities are zeros are discarded + at training time. + is_train (bool): Indicates if at training time. + + Attributes: + filter_empty_mask (bool): If True, 2D or 3D patches where all voxel labels are zeros are discarded at training time. + Default: False. + filter_absent_class (bool): If True, 2D or 3D patches where all voxel labels are zero for one or more classes are + discarded at training time. Default: False. + filter_empty_input (bool): If True, 2D or 3D patches where all voxel intensities are zeros are discarded + at training time. Default: False. + is_train (bool): Indicates if at training time. + + """ + + def __init__(self, filter_empty_mask: bool = False, + filter_absent_class: bool = False, + filter_empty_input: bool = False, + is_train: bool = False) -> None: + self.filter_empty_mask = filter_empty_mask + self.filter_absent_class = filter_absent_class + self.filter_empty_input = filter_empty_input + self.is_train = is_train + + def __call__(self, sample: dict) -> bool: + """Extract input_data and gt_data lists from sample dict and discard them if they don't match certain + conditions. + + """ + input_data, gt_data = sample['input'], sample['gt'] + + if self.is_train: + if self.filter_empty_mask: + # Discard 2D or 3D patches that do not have ANY ground truth (i.e. all masks are empty) at training time + if not np.any(gt_data): + return False + if self.filter_absent_class: + # Discard 2D or 3D patches that have absent classes (i.e. one or more masks are empty) at training time + if not np.all([np.any(mask) for mask in gt_data]): + return False + if self.filter_empty_input: + # Discard set of 2D or 3D patches if one of them is empty or filled with constant value + # (i.e. std == 0) at training time + if np.any([img.std() == 0 for img in input_data]): + return False + + return True diff --git a/ivadomed/loader/sample_meta_data.py b/ivadomed/loader/sample_meta_data.py new file mode 100644 index 000000000..465d9e661 --- /dev/null +++ b/ivadomed/loader/sample_meta_data.py @@ -0,0 +1,48 @@ +from __future__ import annotations +import typing +if typing.TYPE_CHECKING: + from typing import ItemsView + from typing import KeysView + + +class SampleMetadata(object): + """Metadata class to help update, get and set metadata values. + + Args: + d (dict): Initial metadata. + + Attributes: + metadata (dict): Image metadata. + """ + + def __init__(self, d: dict = None) -> None: + self.metadata = {} or d + + def __setitem__(self, key: any, value: any) -> None: + self.metadata[key] = value + + def __getitem__(self, key: any) -> any: + return self.metadata[key] + + def __contains__(self, key: any) -> bool: + return key in self.metadata + + def items(self) -> ItemsView: + return self.metadata.items() + + def _update(self, ref: SampleMetadata, list_keys: list) -> None: + """Update metadata keys with a reference metadata. + + A given list of metadata keys will be changed and given the values of the reference + metadata. + + Args: + ref (SampleMetadata): Reference metadata object. + list_keys (list): List of keys that need to be updated. + """ + for k in list_keys: + if (k not in self.metadata.keys() or not bool(self.metadata[k])) and k in ref.metadata.keys(): + self.metadata[k] = ref.metadata[k] + + def keys(self) -> KeysView: + return self.metadata.keys() diff --git a/ivadomed/loader/segmentation_pair.py b/ivadomed/loader/segmentation_pair.py index 83cf18e9e..172fa324a 100644 --- a/ivadomed/loader/segmentation_pair.py +++ b/ivadomed/loader/segmentation_pair.py @@ -1,10 +1,17 @@ +from __future__ import annotations from pathlib import Path import imageio import nibabel as nib import numpy as np from ivadomed.loader import utils as imed_loader_utils +from ivadomed.loader.sample_meta_data import SampleMetadata from ivadomed import postprocessing as imed_postpro +from ivadomed.keywords import MetadataKW +import typing +if typing.TYPE_CHECKING: + from typing import List + import nibabel.nifti1 class SegmentationPair(object): @@ -34,8 +41,8 @@ class SegmentationPair(object): gt_handle (list): List of gt (ground truth) NifTI data as 'nibabel.nifti1.Nifti1Image' object """ - def __init__(self, input_filenames, gt_filenames, metadata=None, slice_axis=2, cache=True, prepro_transforms=None, - soft_gt=False): + def __init__(self, input_filenames: List[str], gt_filenames: List[str], metadata: list = None, slice_axis: int = 2, + cache: bool = True, prepro_transforms: dict = None, soft_gt: bool = False) -> None: self.input_filenames = input_filenames self.gt_filenames = gt_filenames @@ -94,11 +101,11 @@ def __init__(self, input_filenames, gt_filenames, metadata=None, slice_axis=2, c if self.metadata: self.metadata = [] for data, input_filename in zip(metadata, input_filenames): - data["input_filenames"] = input_filename - data["gt_filenames"] = gt_filenames + data[MetadataKW.INPUT_FILENAMES] = input_filename + data[MetadataKW.GT_FILENAMES] = gt_filenames self.metadata.append(data) - def get_pair_shapes(self): + def get_pair_shapes(self) -> (tuple, tuple): """Return the tuple (input, ground truth) representing both the input and ground truth shapes.""" input_shape = [] for handle in self.input_handle: @@ -123,7 +130,7 @@ def get_pair_shapes(self): return input_shape[0], gt_shape[0] if len(gt_shape) else None - def get_pair_data(self): + def get_pair_data(self) -> (list, list): """Return the tuple (input, ground truth) with the data content in numpy array.""" cache_mode = 'fill' if self.cache else 'unchanged' @@ -153,7 +160,7 @@ def get_pair_data(self): return input_data, gt_data - def get_pair_metadata(self, slice_index=0, coord=None): + def get_pair_metadata(self, slice_index: int = 0, coord: tuple | list = None) -> dict: """Return dictionary containing input and gt metadata. Args: @@ -167,24 +174,22 @@ def get_pair_metadata(self, slice_index=0, coord=None): for idx_class, gt in enumerate(self.gt_handle): if gt is not None: if not isinstance(gt, list): # this tissue has annotation from only one rater - gt_meta_dict.append(imed_loader_utils.SampleMetadata({ - "zooms": imed_loader_utils.orient_shapes_hwd(gt.header.get_zooms(), self.slice_axis), - "data_shape": imed_loader_utils.orient_shapes_hwd(gt.header.get_data_shape(), self.slice_axis), - "gt_filenames": self.metadata[0]["gt_filenames"], - "bounding_box": self.metadata[0]["bounding_box"] if 'bounding_box' in self.metadata[ - 0] else None, - "data_type": 'gt', - "crop_params": {} + gt_meta_dict.append(SampleMetadata({ + MetadataKW.ZOOMS: imed_loader_utils.orient_shapes_hwd(gt.header.get_zooms(), self.slice_axis), + MetadataKW.DATA_SHAPE: imed_loader_utils.orient_shapes_hwd(gt.header.get_data_shape(), self.slice_axis), + MetadataKW.GT_FILENAMES: self.metadata[0].get(MetadataKW.GT_FILENAMES), + MetadataKW.BOUNDING_BOX: self.metadata[0].get(MetadataKW.BOUNDING_BOX), + MetadataKW.DATA_TYPE: 'gt', + MetadataKW.CROP_PARAMS: {} })) else: - gt_meta_dict.append([imed_loader_utils.SampleMetadata({ - "zooms": imed_loader_utils.orient_shapes_hwd(gt_rater.header.get_zooms(), self.slice_axis), - "data_shape": imed_loader_utils.orient_shapes_hwd(gt_rater.header.get_data_shape(), self.slice_axis), - "gt_filenames": self.metadata[0]["gt_filenames"][idx_class][idx_rater], - "bounding_box": self.metadata[0]["bounding_box"] if 'bounding_box' in self.metadata[ - 0] else None, - "data_type": 'gt', - "crop_params": {} + gt_meta_dict.append([SampleMetadata({ + MetadataKW.ZOOMS: imed_loader_utils.orient_shapes_hwd(gt_rater.header.get_zooms(), self.slice_axis), + MetadataKW.DATA_SHAPE: imed_loader_utils.orient_shapes_hwd(gt_rater.header.get_data_shape(), self.slice_axis), + MetadataKW.GT_FILENAMES: self.metadata[0].get(MetadataKW.GT_FILENAMES)[idx_class][idx_rater], + MetadataKW.BOUNDING_BOX: self.metadata[0].get(MetadataKW.BOUNDING_BOX), + MetadataKW.DATA_TYPE: 'gt', + MetadataKW.CROP_PARAMS: {} }) for idx_rater, gt_rater in enumerate(gt)]) else: @@ -198,28 +203,28 @@ def get_pair_metadata(self, slice_index=0, coord=None): input_meta_dict = [] for handle in self.input_handle: - input_meta_dict.append(imed_loader_utils.SampleMetadata({ - "zooms": imed_loader_utils.orient_shapes_hwd(handle.header.get_zooms(), self.slice_axis), - "data_shape": imed_loader_utils.orient_shapes_hwd(handle.header.get_data_shape(), self.slice_axis), - "data_type": 'im', - "crop_params": {} + input_meta_dict.append(SampleMetadata({ + MetadataKW.ZOOMS: imed_loader_utils.orient_shapes_hwd(handle.header.get_zooms(), self.slice_axis), + MetadataKW.DATA_SHAPE: imed_loader_utils.orient_shapes_hwd(handle.header.get_data_shape(), self.slice_axis), + MetadataKW.DATA_TYPE: 'im', + MetadataKW.CROP_PARAMS: {} })) dreturn = { - "input_metadata": input_meta_dict, - "gt_metadata": gt_meta_dict, + MetadataKW.INPUT_METADATA: input_meta_dict, + MetadataKW.GT_METADATA: gt_meta_dict, } for idx, metadata in enumerate(self.metadata): # loop across channels - metadata["slice_index"] = slice_index - metadata["coord"] = coord + metadata[MetadataKW.SLICE_INDEX] = slice_index + metadata[MetadataKW.COORD] = coord self.metadata[idx] = metadata for metadata_key in metadata.keys(): # loop across input metadata - dreturn["input_metadata"][idx][metadata_key] = metadata[metadata_key] + dreturn[MetadataKW.INPUT_METADATA][idx][metadata_key] = metadata[metadata_key] return dreturn - def get_pair_slice(self, slice_index, gt_type="segmentation"): + def get_pair_slice(self, slice_index: int, gt_type: str = "segmentation") -> dict: """Return the specified slice from (input, ground truth). Args: @@ -262,13 +267,13 @@ def get_pair_slice(self, slice_index, gt_type="segmentation"): dreturn = { "input": input_slices, "gt": gt_slices, - "input_metadata": metadata["input_metadata"], - "gt_metadata": metadata["gt_metadata"], + MetadataKW.INPUT_METADATA: metadata.get(MetadataKW.INPUT_METADATA), + MetadataKW.GT_METADATA: metadata.get(MetadataKW.GT_METADATA), } return dreturn - def read_file(self, filename, is_gt=False): + def read_file(self, filename: str, is_gt: bool = False) -> nibabel.nifti1.Nifti1Image: """Read file according to file extension and returns 'nibabel.nifti1.Nifti1Image' object. Args: @@ -281,7 +286,7 @@ def read_file(self, filename, is_gt=False): extension = imed_loader_utils.get_file_extension(filename) # TODO: remove "ome" from condition when implementing OMETIFF support (#739) if (not extension) or ("ome" in extension): - raise RuntimeError(f"The input file extension '{extension}' of '{Path(filename).stem}' is not " + raise RuntimeError(f"The input file extension '{extension}' of '{Path(filename).name}' is not " f"supported. ivadomed supports the following " f"file extensions: '.nii', '.nii.gz', '.png', '.tif', '.tiff', '.jpg' and '.jpeg'.") @@ -292,22 +297,12 @@ def read_file(self, filename, is_gt=False): img = self.convert_file_to_nifti(filename, extension, is_gt) return img - def convert_file_to_nifti(self, filename, extension, is_gt=False): + def convert_file_to_nifti(self, filename: str, extension: str, is_gt: bool = False) -> nibabel.nifti1.Nifti1Image: """ Convert a non-NifTI image into a 'nibabel.nifti1.Nifti1Image' object and save to a file. This method is especially relevant for making microscopy data compatible with NifTI-only pipelines. - The implementation of this method is dependent on the development of the corresponding - microscopy BEP (github.com/ivadomed/ivadomed/issues/301, bids.neuroimaging.io/bep031): - * "pixdim" (zooms) for Nifti1Image object is extracted as follows: - * For train, test and segment commands, PixelSize is taken from the metadata in BIDS JSON sidecar file. - * For inference with the segment_volume function, PixelSize must be provided in the 'options' argument. - * PixelSize definition in example dataset is a scalar in micrometers (BIDS BEP031 v0.0.2) - * PixelSize definition changed for list of 2-numbers [X, Y] or 3-numbers [X, Y, Z] in micrometers - for 2D and 3D respectively (BIDS BEP031 v0.0.3) - * Both PixelSize definitions are supported in this function. - TODO: (#739) implement OMETIFF behavior (if "ome" in extension) Args: @@ -320,10 +315,55 @@ def convert_file_to_nifti(self, filename, extension, is_gt=False): """ # For '.png', '.tif', '.tiff', '.jpg' and 'jpeg' extentions # Read image as 8 bit grayscale in numpy array (behavior TBD in ivadomed for RGB, RBGA or higher bit depth) - if "tif" in extension: - img = np.expand_dims(imageio.imread(filename, format='tiff-pil', as_gray=True), axis=-1).astype(np.uint8) + try: + props = imageio.v3.improps(filename) # new in v3 - standardized metadata + _img = imageio.v3.imread(filename) + except: + # brute force fall back to support backward compatibility + if '.tif' in extension: + img = np.expand_dims( + imageio.v3.imread(filename, plugin='TIFF-PIL'), + axis=-1).astype(np.unint8) + + if len(img.shape) > 3: + img = np.expand_dims( + imageio.v3.imread(filename, plugin='TIFF-PIL', as_gray=True), + axis=-1).astype(np.unint8) + else: + # NOTE: The following yield same result but the first one is preferred + # to support default backward compatibility: + # 1. _img = imageio.v3.imread(filename, plugin='PNG-PIL', as_gray=True) + # 2. _img = imageio.v3.imread(filename, plugin='PNG-PIL', pilmode='F') + # 3. _img = imageio.v3.imread(filename, plugin='pillow', pilmode='F') + + img = np.expand_dims( + imageio.v3.imread(filename, plugin='PNG-PIL', as_gray=True), + axis=-1).astype(np.unint8) else: - img = np.expand_dims(imageio.imread(filename, as_gray=True), axis=-1).astype(np.uint8) + # c.f for more details: https://github.com/ivadomed/ivadomed/pull/1297#discussion_r1267563980 and + # https://github.com/ivadomed/ivadomed/pull/1297#discussion_r1267563980 + + # TIFF is a "wild" format. A few assumptions greatly simplify the code below: + # 1. the image is interleaved/channel-last (not planar) + # 2. the colorspace is one of: binary, gray, RGB, RGBA (not aliasing ones like YUV or CMYK) + # 3. the image is flat (not a volume or time-series) + # Note: All of these are trivially true for JPEG and PNG due to limitations of these formats. + + # make grayscale (treats binary as 1-bit grayscale) + colorspace_idx = 2 + if _img.ndim <= colorspace_idx: # binary or gray + pass # nothing to do + elif _img.shape[colorspace_idx] == 2: # gray with alpha channel + _img = _img[:, :, 0] + elif _img.shape[colorspace_idx] == 3: # RGB + _img = np.sum(_img * (.299, .587, .114), axis=-1) + else: # RGBA + # discards alpha + _img = np.sum(_img * (.299, .587, .114, 0), axis=-1) + if len(_img.shape) < 3: + _img = np.expand_dims(_img, axis=-1) + + img = imageio.core.image_as_uint(_img, bitdepth=8) # Binarize ground-truth values (0-255) to 0 and 1 in uint8 with threshold 0.5 if is_gt: @@ -332,29 +372,12 @@ def convert_file_to_nifti(self, filename, extension, is_gt=False): # Convert numpy array to Nifti1Image object with 4x4 identity affine matrix img = nib.Nifti1Image(img, affine=np.eye(4)) - # Get pixel size in um from json metadata and convert to mm - array_length = [2, 3] # Accepted array length for 'PixelSize' metadata - conversion_factor = 0.001 # Conversion factor from um to mm - if 'PixelSize' in self.metadata[0]: - ps_in_um = self.metadata[0]['PixelSize'] - if isinstance(ps_in_um, list) and (len(ps_in_um) in array_length): - ps_in_um = np.asarray(ps_in_um) - elif isinstance(ps_in_um, float): - ps_in_um = np.asarray([ps_in_um, ps_in_um]) - else: - raise RuntimeError("'PixelSize' metadata type is not supported. Format must be 2D [X, Y] array," - " 3D [X, Y, Z] array or float.") - # Note: pixdim[1,2,3] must be non-zero in Nifti objects even if there is only one slice. - # When ps_in_um[2] (pixdim[3]) is not present or 0, we assign the same PixelSize as ps_in_um[0] (pixdim[1]) - ps_in_um = np.resize(ps_in_um, 3) - if ps_in_um[2] == 0: - ps_in_um[2] = ps_in_um[0] - ps_in_mm = tuple(ps_in_um * conversion_factor) - else: - raise RuntimeError("'PixelSize' is missing from metadata") + # Get PixelSize in millimeters in order (PixelSizeY, PixelSizeX, PixelSizeZ), where X is the width, + # Y the height and Z the depth of the image. + ps_in_mm = self.get_microscopy_pixelsize(filename) # Set "pixdim" (zooms) in Nifti1Image object header - img.header.set_zooms((ps_in_mm)) + img.header.set_zooms(ps_in_mm) # If it doesn't already exist, save NifTI file in path_data alongside PNG/TIF/JPG file fname_out = imed_loader_utils.update_filename_to_nifti(filename) @@ -362,3 +385,69 @@ def convert_file_to_nifti(self, filename, extension, is_gt=False): nib.save(img, fname_out) return img + + + def get_microscopy_pixelsize(self, filename: str) -> tuple: + """ + Get the microscopy pixel size from the metadata and convert to millimeters. + + The implementation of this method is compliant with BIDS version 1.7.0: + * "pixdim" (zooms) for Nifti1Image object is extracted as follows: + * For train, test and segment commands, PixelSize is taken from the metadata in BIDS JSON sidecar file. + * For inference with the segment_volume function, PixelSize and PixelSizeUnits must be provided in the + 'options' argument. + * The function supports the PixelSize definition of BIDS 1.7.0 as a list of 2-numbers + [PixelSizeX, PixelSizeY] or 3-numbers [PixelSizeX, PixelSizeY, PixelSizeZ] for 2D and 3D + respectively, where X is the width, Y the height and Z the depth of the image. + * The function supports the PixelSizeUnits definition of BIDS 1.7.0 as "mm", "um" or "nm". + + Returns: + ndrray: Pixel size in millimeters (ps_in_mm) in order (PixelSizeY, PixelSizeX, PixelSizeZ), + where Y is the height, X the width and Z the depth of the image. + """ + + # Get pixel size units from json metadata and set conversion factor from pixel size units to mm + if MetadataKW.PIXEL_SIZE_UNITS in self.metadata[0]: + pixel_size_units = self.metadata[0][MetadataKW.PIXEL_SIZE_UNITS] + if pixel_size_units == "mm": + conversion_factor = 1 # Conversion factor from mm to mm + elif pixel_size_units == "um": + conversion_factor = 0.001 # Conversion factor from um to mm + elif pixel_size_units == "nm": + conversion_factor = 0.000001 # Conversion factor from nm to mm + else: + raise RuntimeError(f"The PixelSizeUnits '{pixel_size_units}' of '{Path(filename).stem}' is not " + f"supported. ivadomed supports the following PixelSizeUnits: 'mm', 'um' and 'nm'.") + else: + raise RuntimeError("'PixelSizeUnits' is missing from metadata") + + # Set accepted array length for 'PixelSize' metadata + array_length = [2, 3] + + # Get pixel size from json metadata and convert to mm + if MetadataKW.PIXEL_SIZE in self.metadata[0]: + pixel_size = self.metadata[0][MetadataKW.PIXEL_SIZE] + + if len(pixel_size) in array_length: + # PixelSize array in order [PixelSizeX, PixelSizeY] or [PixelSizeX, PixelSizeY, PixelSizeZ] + pixel_size = np.asarray(pixel_size) + + # Note: pixdim[3] (PixelSizeZ) must be non-zero in Nifti objects even if there is only one slice. + # When PixelSizeZ is not present or 0, we assign the same PixelSize as PixelSizeX + pixel_size = np.resize(pixel_size, 3) + if pixel_size[2] == 0: + pixel_size[2] = pixel_size[0] + + # Swap PixelSizeX and PixelSizeY resulting in an array in order [PixelSizeY, PixelSizeX, PixelSizeZ] + # to match NIfTI pixdim[1,2,3] in [Height, Width, Depth] orientation with axial slice axis. + pixel_size[[1, 0]] = pixel_size[[0, 1]] + + else: + raise RuntimeError("'PixelSize' metadata type is not supported. Format must be a 2D" + " [PixelSizeX, PixelSizeY] array or 3D [PixelSizeX, PixelSizeY, PixelSizeZ] array" + " where X is the width, Y the height and Z the depth of the image.") + ps_in_mm = tuple(pixel_size * conversion_factor) + else: + raise RuntimeError("'PixelSize' is missing from metadata") + + return ps_in_mm diff --git a/ivadomed/loader/slice_filter.py b/ivadomed/loader/slice_filter.py new file mode 100644 index 000000000..27c9eefc5 --- /dev/null +++ b/ivadomed/loader/slice_filter.py @@ -0,0 +1,78 @@ +import torch +import numpy as np +from ivadomed import utils as imed_utils + + +class SliceFilter(object): + """Filter 2D slices from dataset. + + If a slice does not meet certain conditions, it is discarded from the dataset. + + Args: + filter_empty_mask (bool): If True, slices where all voxel labels are zeros are discarded. + filter_absent_class (bool): If True, slices where all voxel labels are zero for one or more classes are + discarded. + filter_empty_input (bool): If True, slices where all voxel intensities are zeros are discarded. + filter_classification (bool): If True, slices where all images fail a custom classifier filter are discarded. + device (torch.device): Indicates the CPU or GPU ID. + cuda_available (bool): If True, CUDA is available. + + Attributes: + filter_empty_mask (bool): If True, slices where all voxel labels are zeros are discarded. Default: False. + filter_absent_class (bool): If True, slices where all voxel labels are zero for one or more classes are + discarded. Default: False. + filter_empty_input (bool): If True, slices where all voxel intensities are zeros are discarded. Default: True. + filter_classification (bool): If True, slices where all images fail a custom classifier filter are discarded. + Default: False. + device (torch.device): Indicates the CPU or GPU ID. + cuda_available (bool): If True, CUDA is available. + + """ + + def __init__(self, filter_empty_mask: bool = False, + filter_absent_class: bool = False, + filter_empty_input: bool = True, + filter_classification: bool = False, + classifier_path: any = None, + device: torch.device = None, + cuda_available: bool = None): + self.filter_empty_mask = filter_empty_mask + self.filter_absent_class = filter_absent_class + self.filter_empty_input = filter_empty_input + self.filter_classification = filter_classification + self.device = device + self.cuda_available = cuda_available + + if self.filter_classification: + if cuda_available: + self.classifier = torch.load(classifier_path, map_location=device) + else: + self.classifier = torch.load(classifier_path, map_location='cpu') + + def __call__(self, sample: dict) -> bool: + """Extract input_data and gt_data lists from sample dict and discard them if they don't match certain + conditions. + + """ + input_data, gt_data = sample['input'], sample['gt'] + + if self.filter_empty_mask: + # Discard slices that do not have ANY ground truth (i.e. all masks are empty) + if not np.any(gt_data): + return False + if self.filter_absent_class: + # Discard slices that have absent classes (i.e. one or more masks are empty) + if not np.all([np.any(mask) for mask in gt_data]): + return False + if self.filter_empty_input: + # Discard set of images if one of them is empty or filled with constant value (i.e. std == 0) + if np.any([img.std() == 0 for img in input_data]): + return False + if self.filter_classification: + if not np.all([int( + self.classifier( + imed_utils.cuda(torch.from_numpy(img.copy()).unsqueeze(0).unsqueeze(0), + self.cuda_available))) for img in input_data]): + return False + + return True diff --git a/ivadomed/loader/utils.py b/ivadomed/loader/utils.py index 5b6efc34d..956dceb65 100644 --- a/ivadomed/loader/utils.py +++ b/ivadomed/loader/utils.py @@ -1,19 +1,26 @@ +from __future__ import annotations import collections.abc import re +import sys import os +import joblib +import gc +from pathlib import Path +from tempfile import mkdtemp + import numpy as np import pandas as pd import torch -import joblib from loguru import logger from sklearn.model_selection import train_test_split -from torch._six import string_classes, int_classes from ivadomed import utils as imed_utils +from ivadomed.keywords import SplitDatasetKW, LoaderParamsKW, ROIParamsKW, ContrastParamsKW import nibabel as nib -import bids as pybids # "bids" is already taken by bids_neuropoly -import itertools import random -import copy +import typing +if typing.TYPE_CHECKING: + from typing import Union + from typing import Optional __numpy_type_map = { 'float64': torch.DoubleTensor, @@ -37,7 +44,8 @@ ".tiff", ".png", ".jpg", ".jpeg"] -def split_dataset(df, split_method, data_testing, random_seed, train_frac=0.8, test_frac=0.1): +def split_dataset(df: pd.DataFrame, split_method: str, data_testing: dict, random_seed: int, train_frac: float = 0.8, + test_frac: float = 0.1) -> (list, list, Union[list, object]): """Splits dataset into training, validation and testing sets by applying train, test and validation fractions according to the split_method. The "data_testing" parameter can be used to specify the data_type and data_value to include in the testing set, @@ -114,8 +122,9 @@ def split_dataset(df, split_method, data_testing, random_seed, train_frac=0.8, t return X_train, X_val, X_test -def get_new_subject_file_split(df, split_method, data_testing, random_seed, - train_frac, test_frac, path_output, balance, subject_selection=None): +def get_new_subject_file_split(df: pd.DataFrame, split_method: str, data_testing: dict, random_seed: int, + train_frac: float, test_frac: float, path_output: str, balance: str, + subject_selection: dict = None) -> (list, list, list): """Randomly split dataset between training / validation / testing. Randomly split dataset between training / validation / testing\ @@ -178,13 +187,14 @@ def get_new_subject_file_split(df, split_method, data_testing, random_seed, # save the subject distribution split_dct = {'train': train_lst, 'valid': valid_lst, 'test': test_lst} - split_path = os.path.join(path_output, "split_datasets.joblib") + split_path = Path(path_output, "split_datasets.joblib") joblib.dump(split_dct, split_path) return train_lst, valid_lst, test_lst -def get_subdatasets_subject_files_list(split_params, df, path_output, subject_selection=None): +def get_subdatasets_subject_files_list(split_params: dict, df: pd.DataFrame, path_output: str, + subject_selection: dict = None) -> (list, list, list): """Get lists of subject filenames for each sub-dataset between training / validation / testing. Args: @@ -196,9 +206,9 @@ def get_subdatasets_subject_files_list(split_params, df, path_output, subject_se Returns: list, list list: Training, validation and testing filenames lists. """ - if split_params["fname_split"]: + if split_params[SplitDatasetKW.FNAME_SPLIT]: # Load subjects lists - old_split = joblib.load(split_params["fname_split"]) + old_split = joblib.load(split_params[SplitDatasetKW.FNAME_SPLIT]) train_lst, valid_lst, test_lst = old_split['train'], old_split['valid'], old_split['test'] # Backward compatibility for subject_file_lst containing participant_ids instead of filenames @@ -218,19 +228,19 @@ def get_subdatasets_subject_files_list(split_params, df, path_output, subject_se test_lst = sorted(df_test['filename'].to_list()) else: train_lst, valid_lst, test_lst = get_new_subject_file_split(df=df, - split_method=split_params['split_method'], - data_testing=split_params['data_testing'], - random_seed=split_params['random_seed'], - train_frac=split_params['train_fraction'], - test_frac=split_params['test_fraction'], + split_method=split_params[SplitDatasetKW.SPLIT_METHOD], + data_testing=split_params[SplitDatasetKW.DATA_TESTING], + random_seed=split_params[SplitDatasetKW.RANDOM_SEED], + train_frac=split_params[SplitDatasetKW.TRAIN_FRACTION], + test_frac=split_params[SplitDatasetKW.TEST_FRACTION], path_output=path_output, - balance=split_params['balance'] - if 'balance' in split_params else None, + balance=split_params[SplitDatasetKW.BALANCE] + if SplitDatasetKW.BALANCE in split_params else None, subject_selection=subject_selection) return train_lst, valid_lst, test_lst -def imed_collate(batch): +def imed_collate(batch: dict) -> dict | list | str | torch.Tensor: """Collates data to create batches Args: @@ -255,11 +265,11 @@ def imed_collate(batch): if elem.shape == (): # scalars py_type = float if elem.dtype.name.startswith('float') else int return __numpy_type_map[elem.dtype.name](list(map(py_type, batch))) - elif isinstance(batch[0], int_classes): + elif isinstance(batch[0], int): return torch.LongTensor(batch) elif isinstance(batch[0], float): return torch.DoubleTensor(batch) - elif isinstance(batch[0], string_classes): + elif isinstance(batch[0], str): return batch elif isinstance(batch[0], collections.abc.Mapping): return {key: imed_collate([d[key] for d in batch]) for key in batch[0]} @@ -269,7 +279,7 @@ def imed_collate(batch): return batch -def filter_roi(roi_data, nb_nonzero_thr): +def filter_roi(roi_data: np.ndarray, nb_nonzero_thr: int) -> bool: """Filter slices from dataset using ROI data. This function filters slices (roi_data) where the number of non-zero voxels within the @@ -287,7 +297,7 @@ def filter_roi(roi_data, nb_nonzero_thr): return not np.any(roi_data) or np.count_nonzero(roi_data) <= nb_nonzero_thr -def orient_img_hwd(data, slice_axis): +def orient_img_hwd(data: np.ndarray, slice_axis: int) -> np.ndarray: """Orient a given RAS image to height, width, depth according to slice axis. Args: @@ -306,7 +316,7 @@ def orient_img_hwd(data, slice_axis): return data -def orient_img_ras(data, slice_axis): +def orient_img_ras(data: np.ndarray, slice_axis: int) -> np.ndarray: """Orient a given array with dimensions (height, width, depth) to RAS orientation. Args: @@ -326,7 +336,7 @@ def orient_img_ras(data, slice_axis): return data -def orient_shapes_hwd(data, slice_axis): +def orient_shapes_hwd(data: list | tuple, slice_axis: int) -> np.ndarray: """Swap dimensions according to match the height, width, depth orientation. Args: @@ -345,120 +355,8 @@ def orient_shapes_hwd(data, slice_axis): elif slice_axis == 2: return np.array(data) -class SampleMetadata(object): - """Metadata class to help update, get and set metadata values. - - Args: - d (dict): Initial metadata. - - Attributes: - metadata (dict): Image metadata. - """ - - def __init__(self, d=None): - self.metadata = {} or d - - def __setitem__(self, key, value): - self.metadata[key] = value - - def __getitem__(self, key): - return self.metadata[key] - - def __contains__(self, key): - return key in self.metadata - - def items(self): - return self.metadata.items() - - def _update(self, ref, list_keys): - """Update metadata keys with a reference metadata. - - A given list of metadata keys will be changed and given the values of the reference - metadata. - - Args: - ref (SampleMetadata): Reference metadata object. - list_keys (list): List of keys that need to be updated. - """ - for k in list_keys: - if (k not in self.metadata.keys() or not bool(self.metadata[k])) and k in ref.metadata.keys(): - self.metadata[k] = ref.metadata[k] - - def keys(self): - return self.metadata.keys() - - -class BalancedSampler(torch.utils.data.sampler.Sampler): - """Estimate sampling weights in order to rebalance the - class distributions from an imbalanced dataset. - - Args: - dataset (BidsDataset): Dataset containing input, gt and metadata. - metadata (str): Indicates which metadata to use to balance the sampler. - - Attributes: - indices (list): List from 0 to length of dataset (number of elements in the dataset). - nb_samples (int): Number of elements in the dataset. - weights (Tensor): Weight of each dataset element equal to 1 over the frequency of a - given label (inverse of the frequency). - metadata_dict (dict): Stores the mapping from metadata string to index (int). - label_idx (int): Keeps track of the label indices already used for the metadata_dict. - """ - - def __init__(self, dataset, metadata='gt'): - self.indices = list(range(len(dataset))) - - self.nb_samples = len(self.indices) - self.metadata_dict = {} - self.label_idx = 0 - - cmpt_label = {} - for idx in self.indices: - label = self._get_label(dataset, idx, metadata) - if label in cmpt_label: - cmpt_label[label] += 1 - else: - cmpt_label[label] = 1 - - weights = [1.0 / cmpt_label[self._get_label(dataset, idx, metadata)] - for idx in self.indices] - - self.weights = torch.DoubleTensor(weights) - - def _get_label(self, dataset, idx, metadata): - """Returns 1 if sample is not empty, 0 if it is empty (only zeros). - - Args: - dataset (BidsDataset): Dataset containing input, gt and metadata. - idx (int): Element index. - - Returns: - int: 0 or 1. - """ - if metadata != 'gt': - label_str = dataset[idx]['input_metadata'][0][metadata] - if label_str not in self.metadata_dict: - self.metadata_dict[label_str] = self.label_idx - self.label_idx += 1 - return self.metadata_dict[label_str] - - else: - # For now, only supported with single label - sample_gt = np.array(dataset[idx]['gt'][0]) - if np.any(sample_gt): - return 1 - else: - return 0 - - def __iter__(self): - return (self.indices[i] for i in torch.multinomial( - self.weights, self.nb_samples, replacement=True)) - - def __len__(self): - return self.num_samples - -def update_metadata(metadata_src_lst, metadata_dest_lst): +def update_metadata(metadata_src_lst: list, metadata_dest_lst: list) -> list: """Update metadata keys with a reference metadata. A given list of metadata keys will be changed and given the values of the reference metadata. @@ -480,72 +378,7 @@ def update_metadata(metadata_src_lst, metadata_dest_lst): return metadata_dest_lst -class SliceFilter(object): - """Filter 2D slices from dataset. - - If a sample does not meet certain conditions, it is discarded from the dataset. - - Args: - filter_empty_mask (bool): If True, samples where all voxel labels are zeros are discarded. - filter_empty_input (bool): If True, samples where all voxel intensities are zeros are discarded. - filter_absent_class (bool): If True, samples where all voxel labels are zero for one or more classes are discarded. - filter_classification (bool): If True, samples where all images fail a custom classifier filter are discarded. - - Attributes: - filter_empty_mask (bool): If True, samples where all voxel labels are zeros are discarded. - filter_empty_input (bool): If True, samples where all voxel intensities are zeros are discarded. - filter_absent_class (bool): If True, samples where all voxel labels are zero for one or more classes are discarded. - filter_classification (bool): If True, samples where all images fail a custom classifier filter are discarded. - - """ - - def __init__(self, filter_empty_mask=True, - filter_empty_input=True, - filter_classification=False, - filter_absent_class=False, - classifier_path=None, device=None, cuda_available=None): - self.filter_empty_mask = filter_empty_mask - self.filter_empty_input = filter_empty_input - self.filter_absent_class = filter_absent_class - self.filter_classification = filter_classification - self.device = device - self.cuda_available = cuda_available - - if self.filter_classification: - if cuda_available: - self.classifier = torch.load(classifier_path, map_location=device) - else: - self.classifier = torch.load(classifier_path, map_location='cpu') - - def __call__(self, sample): - input_data, gt_data = sample['input'], sample['gt'] - - if self.filter_empty_mask: - # Filter slices that do not have ANY ground truth (i.e. all masks are empty) - if not np.any(gt_data): - return False - - if self.filter_absent_class: - # Filter slices that have absent classes (i.e. one or more masks are empty) - if not np.all([np.any(mask) for mask in gt_data]): - return False - - if self.filter_empty_input: - # Filter set of images if one of them is empty or filled with constant value (i.e. std == 0) - if np.any([img.std() == 0 for img in input_data]): - return False - - if self.filter_classification: - if not np.all([int( - self.classifier( - imed_utils.cuda(torch.from_numpy(img.copy()).unsqueeze(0).unsqueeze(0), - self.cuda_available))) for img in input_data]): - return False - - return True - - -def reorient_image(arr, slice_axis, nib_ref, nib_ref_canonical): +def reorient_image(arr: np.ndarray, slice_axis: int, nib_ref: nib, nib_ref_canonical: nib) -> nd.ndarray: """Reorient an image to match a reference image orientation. It reorients a array to a given orientation and convert it to a nibabel object using the @@ -570,298 +403,7 @@ def reorient_image(arr, slice_axis, nib_ref, nib_ref_canonical): return nib.orientations.apply_orientation(arr_ras, trans_orient) -class BidsDataframe: - """ - This class aims to create a dataframe containing all BIDS image files in a list of path_data and their metadata. - - Args: - loader_params (dict): Loader parameters, see :doc:`configuration_file` for more details. - path_output (str): Output folder. - derivatives (bool): If True, derivatives are indexed. - - Attributes: - path_data (list): Paths to the BIDS datasets. - bids_config (str): Path to the custom BIDS configuration file. - target_suffix (list of str): List of suffix of targetted structures. - roi_suffix (str): List of suffix of ROI masks. - extensions (list of str): List of file extensions of interest. - contrast_lst (list of str): List of the contrasts of interest. - derivatives (bool): If True, derivatives are indexed. - df (pd.DataFrame): Dataframe containing dataset information - """ - - def __init__(self, loader_params, path_output, derivatives): - - # paths_data from loader parameters - self.paths_data = loader_params['path_data'] - - # bids_config from loader parameters - self.bids_config = None if 'bids_config' not in loader_params else loader_params['bids_config'] - - # target_suffix and roi_suffix from loader parameters - self.target_suffix = copy.deepcopy(loader_params['target_suffix']) - # If `target_suffix` is a list of lists convert to list - if any(isinstance(t, list) for t in self.target_suffix): - self.target_suffix = list(itertools.chain.from_iterable(self.target_suffix)) - self.roi_suffix = loader_params['roi_params']['suffix'] - # If `roi_suffix` is not None, add to target_suffix - if self.roi_suffix is not None: - self.target_suffix.append(self.roi_suffix) - - # extensions from loader parameters - self.extensions = loader_params['extensions'] if loader_params['extensions'] else [".nii", ".nii.gz"] - - # contrast_lst from loader parameters - self.contrast_lst = [] if 'contrast_lst' not in loader_params['contrast_params'] \ - else loader_params['contrast_params']['contrast_lst'] - - # derivatives - self.derivatives = derivatives - - # Create dataframe - self.df = pd.DataFrame() - self.create_bids_dataframe() - - # Save dataframe as csv file - self.save(os.path.join(path_output, "bids_dataframe.csv")) - - def create_bids_dataframe(self): - """Generate the dataframe.""" - - # Suppress a Future Warning from pybids about leading dot included in 'extension' from version 0.14.0 - # The config_bids.json file used matches the future behavior - # TODO: when reaching version 0.14.0, remove the following line - pybids.config.set_option('extension_initial_dot', True) - - for path_data in self.paths_data: - path_data = os.path.join(path_data, '') - - # Initialize BIDSLayoutIndexer and BIDSLayout - # validate=True by default for both indexer and layout, BIDS-validator is not skipped - # Force index for samples tsv and json files, and for subject subfolders containing microscopy files based on extensions. - # Force index of subject subfolders containing CT-scan files under "anat" or "ct" folder based on extensions and modality suffix. - # TODO: remove force indexing of microscopy files after BEP microscopy is merged in BIDS - # TODO: remove force indexing of CT-scan files after BEP CT-scan is merged in BIDS - ext_microscopy = ('.png', '.tif', '.tiff', '.ome.tif', '.ome.tiff', '.ome.tf2', '.ome.tf8', '.ome.btf') - ext_ct = ('.nii.gz', '.nii') - suffix_ct = ('ct', 'CT') - force_index = [] - for root, dirs, files in os.walk(path_data): - for file in files: - # Microscopy - if file == "samples.tsv" or file == "samples.json": - force_index.append(file) - if (file.endswith(ext_microscopy) and os.path.basename(root) == "microscopy" and - (root.replace(path_data, '').startswith("sub"))): - force_index.append(os.path.join(root.replace(path_data, ''))) - # CT-scan - if (file.endswith(ext_ct) and file.split('.')[0].endswith(suffix_ct) and - (os.path.basename(root) == "anat" or os.path.basename(root) == "ct") and - (root.replace(path_data, '').startswith("sub"))): - force_index.append(os.path.join(root.replace(path_data, ''))) - indexer = pybids.BIDSLayoutIndexer(force_index=force_index) - - if self.derivatives: - self.write_derivatives_dataset_description(path_data) - - layout = pybids.BIDSLayout(path_data, config=self.bids_config, indexer=indexer, - derivatives=self.derivatives) - - # Transform layout to dataframe with all entities and json metadata - # As per pybids, derivatives don't include parsed entities, only the "path" column - df_next = layout.to_df(metadata=True) - - # Add filename column - df_next.insert(1, 'filename', df_next['path'].apply(os.path.basename)) - - # Drop rows with json, tsv and LICENSE files in case no extensions are provided in config file for filtering - df_next = df_next[~df_next['filename'].str.endswith(tuple(['.json', '.tsv', 'LICENSE']))] - - # Update dataframe with subject files of chosen contrasts - # and with derivative files of chosen target_suffix - df_next = df_next[(~df_next['path'].str.contains('derivatives') - & df_next['suffix'].str.contains('|'.join(self.contrast_lst))) - | (df_next['path'].str.contains('derivatives') - & df_next['filename'].str.contains('|'.join(self.target_suffix)))] - - # Update dataframe with files of chosen extensions - df_next = df_next[df_next['filename'].str.endswith(tuple(self.extensions))] - - # Warning if no subject files are found in path_data - if df_next[~df_next['path'].str.contains('derivatives')].empty: - logger.warning("No subject files were found in '{}' dataset. Skipping dataset.".format(path_data)) - else: - # Add tsv files metadata to dataframe - df_next = self.add_tsv_metadata(df_next, path_data, layout) - - # TODO: check if other files are needed for EEG and DWI - - # Merge dataframes - self.df = pd.concat([self.df, df_next], join='outer', ignore_index=True) - - if self.df.empty: - # Raise error and exit if no subject files are found in any path data - raise RuntimeError("No subject files found. Check selection of parameters in config.json" - " and datasets compliance with BIDS specification.") - - # Drop duplicated rows based on all columns except 'path' - # Keep first occurence - columns = self.df.columns.to_list() - columns.remove('path') - self.df = self.df[~(self.df.astype(str).duplicated(subset=columns, keep='first'))] - - # If indexing of derivatives is true - if self.derivatives: - - # Get list of subject files with available derivatives - has_deriv, deriv = self.get_subjects_with_derivatives() - - # Filter dataframe to keep subjects files with available derivatives only - if has_deriv: - self.df = self.df[self.df['filename'].str.contains('|'.join(has_deriv)) - | self.df['filename'].str.contains('|'.join(deriv))] - else: - # Raise error and exit if no derivatives are found for any subject files - raise RuntimeError("Derivatives not found.") - - # Reset index - self.df.reset_index(drop=True, inplace=True) - - # Drop columns with all null values - self.df.dropna(axis=1, inplace=True, how='all') - - def add_tsv_metadata(self, df, path_data, layout): - - """Add tsv files metadata to dataframe. - Args: - layout (BIDSLayout): pybids BIDSLayout of the indexed files of the path_data - """ - - # Add participant_id column, and metadata from participants.tsv file if present - # Uses pybids function - df['participant_id'] = "sub-" + df['subject'] - if layout.get_collections(level='dataset'): - df_participants = layout.get_collections(level='dataset', merge=True).to_df() - df_participants.drop(['suffix'], axis=1, inplace=True) - df = pd.merge(df, df_participants, on='subject', suffixes=("_x", None), how='left') - - # Add sample_id column if sample column exists, and add metadata from samples.tsv file if present - # TODO: use pybids function after BEP microscopy is merged in BIDS - if 'sample' in df: - df['sample_id'] = "sample-" + df['sample'] - fname_samples = os.path.join(path_data, "samples.tsv") - if os.path.exists(fname_samples): - df_samples = pd.read_csv(fname_samples, sep='\t') - df = pd.merge(df, df_samples, on=['participant_id', 'sample_id'], suffixes=("_x", None), - how='left') - - # Add metadata from all _sessions.tsv files, if present - # Uses pybids function - if layout.get_collections(level='subject'): - df_sessions = layout.get_collections(level='subject', merge=True).to_df() - df_sessions.drop(['suffix'], axis=1, inplace=True) - df = pd.merge(df, df_sessions, on=['subject', 'session'], suffixes=("_x", None), how='left') - - # Add metadata from all _scans.tsv files, if present - # TODO: use pybids function after BEP microscopy is merged in BIDS - # TODO: verify merge behavior with EEG and DWI scans files, tested with anat and microscopy only - df_scans = pd.DataFrame() - for root, dirs, files in os.walk(path_data): - for file in files: - if file.endswith("scans.tsv"): - df_temp = pd.read_csv(os.path.join(root, file), sep='\t') - df_scans = pd.concat([df_scans, df_temp], ignore_index=True) - if not df_scans.empty: - df_scans['filename'] = df_scans['filename'].apply(os.path.basename) - df = pd.merge(df, df_scans, on=['filename'], suffixes=("_x", None), how='left') - - return df - - def get_subjects_with_derivatives(self): - """Get lists of subject filenames with available derivatives. - - Returns: - list, list: subject filenames having derivatives, available derivatives filenames. - """ - subject_fnames = self.get_subject_fnames() - deriv_fnames = self.get_deriv_fnames() - has_deriv = [] - deriv = [] - - for subject_fname in subject_fnames: - available = self.get_derivatives(subject_fname, deriv_fnames) - if available: - if self.roi_suffix is not None: - if self.roi_suffix in ('|'.join(available)): - has_deriv.append(subject_fname) - deriv.extend(available) - else: - logger.warning("Missing roi_suffix {} for {}. Skipping." - .format(self.roi_suffix, subject_fname)) - else: - has_deriv.append(subject_fname) - deriv.extend(available) - for target in self.target_suffix: - if target not in str(available) and target != self.roi_suffix: - logger.warning("Missing target_suffix {} for {}".format(target, subject_fname)) - else: - logger.warning("Missing derivatives for {}. Skipping.".format(subject_fname)) - - return has_deriv, deriv - - def get_subject_fnames(self): - """Get the list of subject filenames in dataframe. - - Returns: - list: subject filenames. - """ - return self.df[~self.df['path'].str.contains('derivatives')]['filename'].to_list() - - def get_deriv_fnames(self): - """Get the list of derivative filenames in dataframe. - - Returns: - list: derivative filenames. - """ - return self.df[self.df['path'].str.contains('derivatives')]['filename'].tolist() - - def get_derivatives(self, subject_fname, deriv_fnames): - """Return list of available derivative filenames for a subject filename. - Args: - subject_fname (str): Subject filename. - deriv_fnames (list of str): List of derivative filenames. - - Returns: - list: derivative filenames - """ - prefix_fname = subject_fname.split('.')[0] - return [d for d in deriv_fnames if prefix_fname in d] - - def save(self, path): - """Save the dataframe into a csv file. - Args: - path (str): Path to csv file. - """ - try: - self.df.to_csv(path, index=False) - logger.info("Dataframe has been saved in {}.".format(path)) - except FileNotFoundError: - logger.error("Wrong path, bids_dataframe.csv could not be saved in {}.".format(path)) - - def write_derivatives_dataset_description(self, path_data): - """Writes default dataset_description.json file if not found in path_data/derivatives folder - """ - filename = 'dataset_description' - deriv_desc_file = '{}/derivatives/{}.json'.format(path_data, filename) - label_desc_file = '{}/derivatives/labels/{}.json'.format(path_data, filename) - # need to write default dataset_description.json file if not found - if not os.path.isfile(deriv_desc_file) and not os.path.isfile(label_desc_file): - f = open(deriv_desc_file, 'w') - f.write('{"Name": "Example dataset", "BIDSVersion": "1.0.2", "PipelineDescription": {"Name": "Example pipeline"}}') - f.close() - - -def get_file_extension(filename): +def get_file_extension(filename: str) -> Optional[str]: """ Get file extension if it is supported Args: filename (str): Path of the file. @@ -874,7 +416,7 @@ def get_file_extension(filename): return extension -def update_filename_to_nifti(filename): +def update_filename_to_nifti(filename: str) -> str: """ Update filename extension to 'nii.gz' if not a NifTI file. @@ -895,7 +437,7 @@ def update_filename_to_nifti(filename): return filename -def dropout_input(seg_pair): +def dropout_input(seg_pair: dict) -> dict: """Applies input-level dropout: zero to all channels minus one will be randomly set to zeros. This function verifies if some channels are already empty. Always at least one input channel will be kept. @@ -934,3 +476,51 @@ def dropout_input(seg_pair): logger.warning("\n Impossible to apply input-level dropout since input is not multi-channel.") return seg_pair + + +def create_temp_directory() -> str: + """Creates a temporary directory and returns its path. + This temporary directory is only deleted when explicitly requested. + + Returns: + str: Path of the temporary directory. + """ + import datetime + time_stamp = datetime.datetime.now().isoformat().replace(":", "") + temp_folder_location = mkdtemp(prefix="ivadomed_", suffix=f"_{time_stamp}") + return temp_folder_location + +def get_obj_size(obj) -> int: + """ + Returns the size of an object in bytes. Used to gauge whether storing object in memory vs write to disk. + + Source: https://stackoverflow.com/a/53705610 + + Args: + obj: + + Returns: + + """ + marked = {id(obj)} + obj_q = [obj] + object_size = 0 + + while obj_q: + object_size += sum(map(sys.getsizeof, obj_q)) + + # Lookup all the object referred to by the object in obj_q. + # See: https://docs.python.org/3.7/library/gc.html#gc.get_referents + all_refr = ((id(o), o) for o in gc.get_referents(*obj_q)) + + # Filter object that are already marked. + # Using dict notation will prevent repeated objects. + new_refr = {o_id: o for o_id, o in all_refr if o_id not in marked and not isinstance(o, type)} + + # The new obj_q will be the ones that were not marked, + # and we will update marked with their ids so we will + # not traverse them again. + obj_q = new_refr.values() + marked.update(new_refr.keys()) + + return object_size diff --git a/ivadomed/losses.py b/ivadomed/losses.py index 241e7ffaa..477ba5f75 100644 --- a/ivadomed/losses.py +++ b/ivadomed/losses.py @@ -180,11 +180,11 @@ class GeneralizedDiceLoss(nn.Module): Args: epsilon (float): Epsilon to avoid division by zero. - include_background (float): If True, then an extra channel is added, which represents the background class. + include_background (bool): If True, then an extra channel is added, which represents the background class. Attributes: epsilon (float): Epsilon to avoid division by zero. - include_background (float): If True, then an extra channel is added, which represents the background class. + include_background (bool): If True, then an extra channel is added, which represents the background class. """ def __init__(self, epsilon=1e-5, include_background=True): @@ -202,8 +202,8 @@ def forward(self, input, target): input_background = torch.zeros(size_background, dtype=input.dtype) target_background = torch.zeros(size_background, dtype=target.dtype) # fill with opposite - input_background[input.sum(1).expand_as(input_background) == 0] = 1 - target_background[target.sum(1).expand_as(input_background) == 0] = 1 + input_background[input.sum(1)[:, None, :, :] == 0] = 1 + target_background[target.sum(1)[:, None, :, :] == 0] = 1 # Concat input = torch.cat([input, input_background.to(input.device)], dim=1) target = torch.cat([target, target_background.to(target.device)], dim=1) @@ -381,19 +381,19 @@ def forward(self, input, target): hm_num = target.size()[1] mask = torch.zeros_like(target) - kernel = scipy.ndimage.morphology.generate_binary_structure(2, 2) + kernel = scipy.ndimage.generate_binary_structure(2, 2) # For 3D segmentation tasks if len(input.shape) == 5: - kernel = scipy.ndimage.morphology.generate_binary_structure(3, 2) + kernel = scipy.ndimage.generate_binary_structure(3, 2) for i in range(batch_size): img_list = list() img_list.append(np.round(target[i].cpu().numpy() * 255)) img_merge = np.concatenate(img_list) - img_dilate = scipy.ndimage.morphology.binary_opening(img_merge, np.expand_dims(kernel, axis=0)) + img_dilate = scipy.ndimage.binary_opening(img_merge, np.expand_dims(kernel, axis=0)) img_dilate[img_dilate < 51] = 1 # 0*omega+1 img_dilate[img_dilate >= 51] = 1 + self.omega # 1*omega+1 - img_dilate = np.array(img_dilate, dtype=np.int) + img_dilate = np.array(img_dilate, dtype=int) mask[i] = torch.tensor(img_dilate) diff --git a/ivadomed/main.py b/ivadomed/main.py index 8f89ebdef..375f65aa1 100644 --- a/ivadomed/main.py +++ b/ivadomed/main.py @@ -1,5 +1,4 @@ import json -import os import argparse import copy import joblib @@ -10,6 +9,7 @@ import multiprocessing import re +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import evaluation as imed_evaluation from ivadomed import config_manager as imed_config_manager from ivadomed import testing as imed_testing @@ -19,7 +19,11 @@ from ivadomed import metrics as imed_metrics from ivadomed import inference as imed_inference from ivadomed.loader import utils as imed_loader_utils, loader as imed_loader, film as imed_film +from ivadomed.keywords import ConfigKW, ModelParamsKW, LoaderParamsKW, ContrastParamsKW, BalanceSamplesKW, \ + TrainingParamsKW, ObjectDetectionParamsKW, UncertaintyKW, PostprocessingKW, BinarizeProdictionKW, MetricsKW, \ + MetadataKW, OptionKW, SplitDatasetKW from loguru import logger +from pathlib import Path cudnn.benchmark = True @@ -65,6 +69,14 @@ def get_parser(): optional_args.add_argument('--resume-training', dest="resume_training", required=False, action='store_true', help='Load a saved model ("checkpoint.pth.tar" in the output directory specified either with flag "--path-output" or via the config file "output_path" argument) ' 'for resume training. This training state is saved everytime a new best model is saved in the output directory specified with flag "--path-output"') + optional_args.add_argument('--no-patch', dest="no_patch", action='store_true', required=False, + help='2D patches are not used while segmenting with models trained with patches ' + '(command "--segment" only). The "--no-patch" argument supersedes the "--overlap-2D" argument. ' + ' This option may not be suitable with large images depending on computer RAM capacity.') + optional_args.add_argument('--overlap-2d', dest="overlap_2d", required=False, type=int, nargs="+", + help='Custom overlap for 2D patches while segmenting (command "--segment" only). ' + 'Example: "--overlap-2d 48 48" for an overlap of 48 pixels between patches in X and Y respectively. ' + 'Default model overlap is used otherwise.') optional_args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS, help='Shows function documentation.') @@ -72,43 +84,43 @@ def get_parser(): def create_path_model(context, model_params, ds_train, path_output, train_onehotencoder): - path_model = os.path.join(path_output, context["model_name"]) - if not os.path.isdir(path_model): - logger.info('Creating model directory: {}'.format(path_model)) - os.makedirs(path_model) - if 'film_layers' in model_params and any(model_params['film_layers']): - joblib.dump(train_onehotencoder, os.path.join(path_model, "one_hot_encoder.joblib")) - if 'metadata_dict' in ds_train[0]['input_metadata'][0]: - metadata_dict = ds_train[0]['input_metadata'][0]['metadata_dict'] - joblib.dump(metadata_dict, os.path.join(path_model, "metadata_dict.joblib")) + path_model = Path(path_output, context[ConfigKW.MODEL_NAME]) + if not path_model.is_dir(): + logger.info(f'Creating model directory: {path_model}') + path_model.mkdir(parents=True) + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): + joblib.dump(train_onehotencoder, path_model.joinpath("one_hot_encoder.joblib")) + if MetadataKW.METADATA_DICT in ds_train[0][MetadataKW.INPUT_METADATA][0]: + metadata_dict = ds_train[0][MetadataKW.INPUT_METADATA][0][MetadataKW.METADATA_DICT] + joblib.dump(metadata_dict, path_model.joinpath("metadata_dict.joblib")) else: - logger.info('Model directory already exists: {}'.format(path_model)) + logger.info(f'Model directory already exists: {path_model}') def check_multiple_raters(is_train, loader_params): - if any([isinstance(class_suffix, list) for class_suffix in loader_params["target_suffix"]]): + if any([isinstance(class_suffix, list) for class_suffix in loader_params[LoaderParamsKW.TARGET_SUFFIX]]): logger.info( "Annotations from multiple raters will be used during model training, one annotation from one rater " "randomly selected at each iteration.\n") if not is_train: logger.error( "Please provide only one annotation per class in 'target_suffix' when not training a model.\n") - exit() + sys.exit() def film_normalize_data(context, model_params, ds_train, ds_valid, path_output): # Normalize metadata before sending to the FiLM network results = imed_film.get_film_metadata_models(ds_train=ds_train, - metadata_type=model_params['metadata'], - debugging=context["debugging"]) + metadata_type=model_params[ModelParamsKW.METADATA], + debugging=context[ConfigKW.DEBUGGING]) ds_train, train_onehotencoder, metadata_clustering_models = results - ds_valid = imed_film.normalize_metadata(ds_valid, metadata_clustering_models, context["debugging"], - model_params['metadata']) - model_params.update({"film_onehotencoder": train_onehotencoder, - "n_metadata": len([ll for l in train_onehotencoder.categories_ for ll in l])}) - joblib.dump(metadata_clustering_models, os.path.join(path_output, "clustering_models.joblib")) - joblib.dump(train_onehotencoder, os.path.join(path_output + "one_hot_encoder.joblib")) + ds_valid = imed_film.normalize_metadata(ds_valid, metadata_clustering_models, context[ConfigKW.DEBUGGING], + model_params[ModelParamsKW.METADATA]) + model_params.update({ModelParamsKW.FILM_ONEHOTENCODER: train_onehotencoder, + ModelParamsKW.N_METADATA: len([ll for l in train_onehotencoder.categories_ for ll in l])}) + joblib.dump(metadata_clustering_models, Path(path_output, "clustering_models.joblib")) + joblib.dump(train_onehotencoder, Path(path_output + "one_hot_encoder.joblib")) return model_params, ds_train, ds_valid, train_onehotencoder @@ -124,109 +136,121 @@ def get_dataset(bids_df, loader_params, data_lst, transform_params, cuda_availab def save_config_file(context, path_output): # Save config file within path_output and path_output/model_name # Done after the threshold_analysis to propate this info in the config files - with open(os.path.join(path_output, "config_file.json"), 'w') as fp: + with Path(path_output, "config_file.json").open(mode='w') as fp: json.dump(context, fp, indent=4) - with open(os.path.join(path_output, context["model_name"], context["model_name"] + ".json"), 'w') as fp: + with Path(path_output, context[ConfigKW.MODEL_NAME], context[ConfigKW.MODEL_NAME] + ".json").open(mode='w') as fp: json.dump(context, fp, indent=4) def set_loader_params(context, is_train): - loader_params = copy.deepcopy(context["loader_parameters"]) + loader_params = copy.deepcopy(context[ConfigKW.LOADER_PARAMETERS]) if is_train: - loader_params["contrast_params"]["contrast_lst"] = loader_params["contrast_params"]["training_validation"] + loader_params[LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.CONTRAST_LST] = \ + loader_params[LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TRAINING_VALIDATION] else: - loader_params["contrast_params"]["contrast_lst"] = loader_params["contrast_params"]["testing"] - if "FiLMedUnet" in context and context["FiLMedUnet"]["applied"]: - loader_params.update({"metadata_type": context["FiLMedUnet"]["metadata"]}) + loader_params[LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.CONTRAST_LST] =\ + loader_params[LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] + if ConfigKW.FILMED_UNET in context and context[ConfigKW.FILMED_UNET][ModelParamsKW.APPLIED]: + loader_params.update({LoaderParamsKW.METADATA_TYPE: context[ConfigKW.FILMED_UNET][ModelParamsKW.METADATA]}) # Load metadata necessary to balance the loader - if context['training_parameters']['balance_samples']['applied'] and \ - context['training_parameters']['balance_samples']['type'] != 'gt': - loader_params.update({"metadata_type": context['training_parameters']['balance_samples']['type']}) + if context[ConfigKW.TRAINING_PARAMETERS][TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.APPLIED] and \ + context[ConfigKW.TRAINING_PARAMETERS][TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.TYPE] != 'gt': + loader_params.update({LoaderParamsKW.METADATA_TYPE: + context[ConfigKW.TRAINING_PARAMETERS][TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.TYPE]}) return loader_params def set_model_params(context, loader_params): - model_params = copy.deepcopy(context["default_model"]) - model_params["folder_name"] = copy.deepcopy(context["model_name"]) + model_params = copy.deepcopy(context[ConfigKW.DEFAULT_MODEL]) + model_params[ModelParamsKW.FOLDER_NAME] = copy.deepcopy(context[ConfigKW.MODEL_NAME]) model_context_list = [model_name for model_name in MODEL_LIST - if model_name in context and context[model_name]["applied"]] + if model_name in context and context[model_name][ModelParamsKW.APPLIED]] if len(model_context_list) == 1: - model_params["name"] = model_context_list[0] + model_params[ModelParamsKW.NAME] = model_context_list[0] model_params.update(context[model_context_list[0]]) - elif 'Modified3DUNet' in model_context_list and 'FiLMedUnet' in model_context_list and len(model_context_list) == 2: - model_params["name"] = 'Modified3DUNet' + elif ConfigKW.MODIFIED_3D_UNET in model_context_list and ConfigKW.FILMED_UNET in model_context_list \ + and len(model_context_list) == 2: + model_params[ModelParamsKW.NAME] = ConfigKW.MODIFIED_3D_UNET for i in range(len(model_context_list)): model_params.update(context[model_context_list[i]]) elif len(model_context_list) > 1: - logger.error('ERROR: Several models are selected in the configuration file: {}.' - 'Please select only one (i.e. only one where: "applied": true).'.format(model_context_list)) + logger.error(f'ERROR: Several models are selected in the configuration file: {model_context_list}.' + 'Please select only one (i.e. only one where: "applied": true).') exit() - model_params['is_2d'] = False if "Modified3DUNet" in model_params['name'] else model_params['is_2d'] + model_params[ModelParamsKW.IS_2D] = False if ConfigKW.MODIFIED_3D_UNET in model_params[ModelParamsKW.NAME] \ + else model_params[ModelParamsKW.IS_2D] # Get in_channel from contrast_lst - if loader_params["multichannel"]: - model_params["in_channel"] = len(loader_params["contrast_params"]["contrast_lst"]) + if loader_params[LoaderParamsKW.MULTICHANNEL]: + model_params[ModelParamsKW.IN_CHANNEL] = \ + len(loader_params[LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.CONTRAST_LST]) else: - model_params["in_channel"] = 1 + model_params[ModelParamsKW.IN_CHANNEL] = 1 # Get out_channel from target_suffix - model_params["out_channel"] = len(loader_params["target_suffix"]) + model_params[ModelParamsKW.OUT_CHANNEL] = len(loader_params[LoaderParamsKW.TARGET_SUFFIX]) # If multi-class output, then add background class - if model_params["out_channel"] > 1: - model_params.update({"out_channel": model_params["out_channel"] + 1}) + if model_params[ModelParamsKW.OUT_CHANNEL] > 1: + model_params.update({ModelParamsKW.OUT_CHANNEL: model_params[ModelParamsKW.OUT_CHANNEL] + 1}) # Display for spec' check imed_utils.display_selected_model_spec(params=model_params) # Update loader params - if 'object_detection_params' in context: - object_detection_params = context['object_detection_params'] - object_detection_params.update({"gpu_ids": context['gpu_ids'][0], - "path_output": context['path_output']}) - loader_params.update({"object_detection_params": object_detection_params}) + if ConfigKW.OBJECT_DETECTION_PARAMS in context: + object_detection_params = context[ConfigKW.OBJECT_DETECTION_PARAMS] + object_detection_params.update({ObjectDetectionParamsKW.GPU_IDS: context[ConfigKW.GPU_IDS][0], + ObjectDetectionParamsKW.PATH_OUTPUT: context[ConfigKW.PATH_OUTPUT]}) + loader_params.update({ConfigKW.OBJECT_DETECTION_PARAMS: object_detection_params}) - loader_params.update({"model_params": model_params}) + loader_params.update({LoaderParamsKW.MODEL_PARAMS: model_params}) return model_params, loader_params def set_output_path(context): - path_output = copy.deepcopy(context["path_output"]) - if not os.path.isdir(path_output): - logger.info('Creating output path: {}'.format(path_output)) - os.makedirs(path_output) + path_output = copy.deepcopy(context[ConfigKW.PATH_OUTPUT]) + if not Path(path_output).is_dir(): + logger.info(f'Creating output path: {path_output}') + Path(path_output).mkdir(parents=True) else: - logger.info('Output path already exists: {}'.format(path_output)) + logger.info(f'Output path already exists: {path_output}') return path_output def update_film_model_params(context, ds_test, model_params, path_output): - clustering_path = os.path.join(path_output, "clustering_models.joblib") + clustering_path = Path(path_output, "clustering_models.joblib") metadata_clustering_models = joblib.load(clustering_path) # Model directory - ohe_path = os.path.join(path_output, context["model_name"], "one_hot_encoder.joblib") + ohe_path = Path(path_output, context[ConfigKW.MODEL_NAME], "one_hot_encoder.joblib") one_hot_encoder = joblib.load(ohe_path) - ds_test = imed_film.normalize_metadata(ds_test, metadata_clustering_models, context["debugging"], - model_params['metadata']) - model_params.update({"film_onehotencoder": one_hot_encoder, - "n_metadata": len([ll for l in one_hot_encoder.categories_ for ll in l])}) + ds_test = imed_film.normalize_metadata(ds_test, metadata_clustering_models, context[ConfigKW.DEBUGGING], + model_params[ModelParamsKW.METADATA]) + model_params.update({ModelParamsKW.FILM_ONEHOTENCODER: one_hot_encoder, + ModelParamsKW.N_METADATA: len([ll for l in one_hot_encoder.categories_ for ll in l])}) return ds_test, model_params -def run_segment_command(context, model_params): +def run_segment_command(context, model_params, no_patch, overlap_2d): # BIDSDataframe of all image files # Indexing of derivatives is False for command segment - bids_df = imed_loader_utils.BidsDataframe(context['loader_parameters'], context['path_output'], derivatives=False) + # split_method is unused for command segment + bids_df = BidsDataframe( + context.get(ConfigKW.LOADER_PARAMETERS), + context.get(ConfigKW.PATH_OUTPUT), + derivatives=False, + split_method=None + ) # Append subjects filenames into a list - bids_subjects = sorted(bids_df.df['filename'].to_list()) + bids_subjects = sorted(bids_df.df.get('filename').to_list()) # Add postprocessing to packaged model - path_model = os.path.join(context['path_output'], context['model_name']) - path_model_config = os.path.join(path_model, context['model_name'] + ".json") - model_config = imed_config_manager.load_json(path_model_config) - model_config['postprocessing'] = context['postprocessing'] - with open(path_model_config, 'w') as fp: + path_model = Path(context[ConfigKW.PATH_OUTPUT], context[ConfigKW.MODEL_NAME]) + path_model_config = Path(path_model, context[ConfigKW.MODEL_NAME] + ".json") + model_config = imed_config_manager.load_json(str(path_model_config)) + model_config[ConfigKW.POSTPROCESSING] = context.get(ConfigKW.POSTPROCESSING) + with path_model_config.open(mode='w') as fp: json.dump(model_config, fp, indent=4) options = {} @@ -234,7 +258,7 @@ def run_segment_command(context, model_params): seen_subj_ids = [] for subject in bids_subjects: - if context['loader_parameters']['multichannel']: + if context.get(ConfigKW.LOADER_PARAMETERS).get(LoaderParamsKW.MULTICHANNEL): # Get subject_id for multichannel df_sub = bids_df.df.loc[bids_df.df['filename'] == subject] subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '', subject) @@ -242,7 +266,7 @@ def run_segment_command(context, model_params): # if subj_id has not been seen yet fname_img = [] provided_contrasts = [] - contrasts = context['loader_parameters']['contrast_params']['testing'] + contrasts = context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] # Keep contrast order for c in contrasts: df_tmp = bids_df.df[ @@ -252,8 +276,8 @@ def run_segment_command(context, model_params): fname_img.append(df_tmp['path'].values[0]) seen_subj_ids.append(subj_id) if len(fname_img) != len(contrasts): - logger.warning("Missing contrast for subject {}. {} were provided but {} are required. Skipping " - "subject.".format(subj_id, provided_contrasts, contrasts)) + logger.warning(f"Missing contrast for subject {subj_id}. {provided_contrasts} were provided but " + f"{contrasts} are required. Skipping subject.") continue else: # Returns an empty list for subj_id already seen @@ -262,36 +286,51 @@ def run_segment_command(context, model_params): fname_img = bids_df.df[bids_df.df['filename'] == subject]['path'].to_list() # Add film metadata to options for segment_volume - if 'film_layers' in model_params and any(model_params['film_layers']) and model_params['metadata']: - metadata = bids_df.df[bids_df.df['filename'] == subject][model_params['metadata']].values[0] - options['metadata'] = metadata - - # Add microscopy pixel size metadata to options for segment_volume - if 'PixelSize' in bids_df.df.columns: - options['pixel_size'] = bids_df.df.loc[bids_df.df['filename'] == subject]['PixelSize'].values[0] + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]) \ + and model_params[ModelParamsKW.METADATA]: + metadata = bids_df.df[bids_df.df['filename'] == subject][model_params[ModelParamsKW.METADATA]].values[0] + options[OptionKW.METADATA] = metadata + + # Add microscopy pixel size and pixel size units metadata to options for segment_volume + if MetadataKW.PIXEL_SIZE in bids_df.df.columns: + options[OptionKW.PIXEL_SIZE] = \ + bids_df.df.loc[bids_df.df['filename'] == subject][MetadataKW.PIXEL_SIZE].values[0] + if MetadataKW.PIXEL_SIZE_UNITS in bids_df.df.columns: + options[OptionKW.PIXEL_SIZE_UNITS] = \ + bids_df.df.loc[bids_df.df['filename'] == subject][MetadataKW.PIXEL_SIZE_UNITS].values[0] + + # Add 'no_patch' and 'overlap-2d' argument to options + if no_patch: + options[OptionKW.NO_PATCH] = no_patch + if overlap_2d: + options[OptionKW.OVERLAP_2D] = overlap_2d if fname_img: - pred_list, target_list = imed_inference.segment_volume(path_model, + pred_list, target_list = imed_inference.segment_volume(str(path_model), fname_images=fname_img, - gpu_id=context['gpu_ids'][0], + gpu_id=context[ConfigKW.GPU_IDS][0], options=options) - pred_path = os.path.join(context['path_output'], "pred_masks") - if not os.path.exists(pred_path): - os.makedirs(pred_path) + pred_path = Path(context[ConfigKW.PATH_OUTPUT], "pred_masks") + if not pred_path.exists(): + pred_path.mkdir(parents=True) + + # Reformat target list to include class index and be compatible with multiple raters + target_list = ["_class-%d" % i for i in range(len(target_list))] for pred, target in zip(pred_list, target_list): filename = subject.split('.')[0] + target + "_pred" + ".nii.gz" - nib.save(pred, os.path.join(pred_path, filename)) + nib.save(pred, Path(pred_path, filename)) # For Microscopy PNG/TIF files (TODO: implement OMETIFF behavior) extension = imed_loader_utils.get_file_extension(subject) if "nii" not in extension: imed_inference.pred_to_png(pred_list, target_list, - os.path.join(pred_path, subject).replace(extension, '')) + str(Path(pred_path, subject)).replace(extension, ''), + suffix="_pred.png") -def run_command(context, n_gif=0, thr_increment=None, resume_training=False): +def run_command(context, n_gif=0, thr_increment=None, resume_training=False, no_patch=False, overlap_2d=None): """Run main command. This function is central in the ivadomed project as training / testing / evaluation commands @@ -306,8 +345,14 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and the training + validation sub-dataset to find the optimal binarization threshold. The specified value indicates the increment between 0 and 1 used during the ROC analysis (e.g. 0.1). - resume_training (bool): Load a saved model ("checkpoint.pth.tar" in the output directory specified with flag "--path-output" or via the config file "output_path" ' This training state is saved everytime a new best model is saved in the log - argument) for resume training directory. + resume_training (bool): Load a saved model ("checkpoint.pth.tar" in the output directory specified with flag + "--path-output" or via the config file "output_path". This training state is saved everytime a new best + model is saved in the log argument) for resume training directory. + no_patch (bool): If True, 2D patches are not used while segmenting with models trained with patches + (command "--segment" only). Default: False (i.e. segment with patches). The "no_patch" option supersedes + the "overlap_2D" option. + overlap_2d (list of int): Custom overlap for 2D patches while segmenting (command "--segment" only). + Default model overlap is used otherwise. Returns: float or pandas.DataFrame or None: @@ -317,60 +362,65 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): * If "segment" command: No return value. """ - command = copy.deepcopy(context["command"]) + command = copy.deepcopy(context[ConfigKW.COMMAND]) path_output = set_output_path(context) - log_file = os.path.join(context['path_output'], context['log_file']) + path_log = Path(context.get('path_output'), context.get('log_file')) logger.remove() - logger.add(log_file) + logger.add(str(path_log)) logger.add(sys.stdout) # Create a log with the version of the Ivadomed software and the version of the Annexed dataset (if present) create_dataset_and_ivadomed_version_log(context) - cuda_available, device = imed_utils.define_device(context['gpu_ids'][0]) + cuda_available, device = imed_utils.define_device(context[ConfigKW.GPU_IDS][0]) # BACKWARDS COMPATIBILITY: If bids_path is string, assign to list - Do this here so it propagates to all functions - context['loader_parameters']['path_data'] = imed_utils.format_path_data(context['loader_parameters']['path_data']) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA] =\ + imed_utils.format_path_data(context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA]) # Loader params loader_params = set_loader_params(context, command == "train") # Get transforms for each subdataset transform_train_params, transform_valid_params, transform_test_params = \ - imed_transforms.get_subdatasets_transforms(context["transformation"]) + imed_transforms.get_subdatasets_transforms(context[ConfigKW.TRANSFORMATION]) # MODEL PARAMETERS model_params, loader_params = set_model_params(context, loader_params) if command == 'segment': - run_segment_command(context, model_params) + run_segment_command(context, model_params, no_patch, overlap_2d) return # BIDSDataframe of all image files - # Indexing of derivatives is True for command train and test - bids_df = imed_loader_utils.BidsDataframe(loader_params, path_output, derivatives=True) + # Indexing of derivatives is True for commands train and test + # split_method is used for removing unused subject files in bids_df for commands train and test + bids_df = BidsDataframe(loader_params, path_output, derivatives=True, + split_method=context.get(ConfigKW.SPLIT_DATASET).get(SplitDatasetKW.SPLIT_METHOD)) # Get subject filenames lists. "segment" command uses all participants of data path, hence no need to split - train_lst, valid_lst, test_lst = imed_loader_utils.get_subdatasets_subject_files_list(context["split_dataset"], + train_lst, valid_lst, test_lst = imed_loader_utils.get_subdatasets_subject_files_list(context[ConfigKW.SPLIT_DATASET], bids_df.df, path_output, - context["loader_parameters"] - ['subject_selection']) + context.get(ConfigKW.LOADER_PARAMETERS).get( + LoaderParamsKW.SUBJECT_SELECTION)) # Generating sha256 for the training files imed_utils.generate_sha_256(context, bids_df.df, train_lst) # TESTING PARAMS # Aleatoric uncertainty - if context['uncertainty']['aleatoric'] and context['uncertainty']['n_it'] > 0: + if context[ConfigKW.UNCERTAINTY][UncertaintyKW.ALEATORIC] \ + and context[ConfigKW.UNCERTAINTY][UncertaintyKW.N_IT] > 0: transformation_dict = transform_train_params else: transformation_dict = transform_test_params undo_transforms = imed_transforms.UndoCompose(imed_transforms.Compose(transformation_dict, requires_undo=True)) - testing_params = copy.deepcopy(context["training_parameters"]) - testing_params.update({'uncertainty': context["uncertainty"]}) - testing_params.update({'target_suffix': loader_params["target_suffix"], 'undo_transforms': undo_transforms, - 'slice_axis': loader_params['slice_axis']}) + testing_params = copy.deepcopy(context[ConfigKW.TRAINING_PARAMETERS]) + testing_params.update({ConfigKW.UNCERTAINTY: context[ConfigKW.UNCERTAINTY]}) + testing_params.update({LoaderParamsKW.TARGET_SUFFIX: loader_params[LoaderParamsKW.TARGET_SUFFIX], + ConfigKW.UNDO_TRANSFORMS: undo_transforms, + LoaderParamsKW.SLICE_AXIS: loader_params[LoaderParamsKW.SLICE_AXIS]}) if command == "train": imed_utils.display_selected_transfoms(transform_train_params, dataset_type=["training"]) @@ -392,7 +442,7 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): metric_fns = imed_metrics.get_metric_fns(ds_train.task) # If FiLM, normalize data - if 'film_layers' in model_params and any(model_params['film_layers']): + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): model_params, ds_train, ds_valid, train_onehotencoder = \ film_normalize_data(context, model_params, ds_train, ds_valid, path_output) else: @@ -408,14 +458,15 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): model_params=model_params, dataset_train=ds_train, dataset_val=ds_valid, - training_params=context["training_parameters"], + training_params=context[ConfigKW.TRAINING_PARAMETERS], + wandb_params=context.get(ConfigKW.WANDB), path_output=path_output, device=device, cuda_available=cuda_available, metric_fns=metric_fns, n_gif=n_gif, resume_training=resume_training, - debugging=context["debugging"]) + debugging=context[ConfigKW.DEBUGGING]) if thr_increment: # LOAD DATASET @@ -429,21 +480,26 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): 'training') # Choice of optimisation metric - metric = "recall_specificity" if model_params["name"] in imed_utils.CLASSIFIER_LIST else "dice" + if model_params[ModelParamsKW.NAME] in imed_utils.CLASSIFIER_LIST: + metric = MetricsKW.RECALL_SPECIFICITY + else: + metric = MetricsKW.DICE + # Model path - model_path = os.path.join(path_output, "best_model.pt") + model_path = Path(path_output, "best_model.pt") + # Run analysis - thr = imed_testing.threshold_analysis(model_path=model_path, + thr = imed_testing.threshold_analysis(model_path=str(model_path), ds_lst=[ds_train, ds_valid], model_params=model_params, testing_params=testing_params, metric=metric, increment=thr_increment, - fname_out=os.path.join(path_output, "roc.png"), + fname_out=str(Path(path_output, "roc.png")), cuda_available=cuda_available) # Update threshold in config file - context["postprocessing"]["binarize_prediction"] = {"thr": thr} + context[ConfigKW.POSTPROCESSING][PostprocessingKW.BINARIZE_PREDICTION] = {BinarizeProdictionKW.THR: thr} save_config_file(context, path_output) if command == 'train': @@ -452,7 +508,7 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): if command == 'test': # LOAD DATASET # Warn user that the input-level dropout is set during inference - if loader_params['is_input_dropout']: + if loader_params[LoaderParamsKW.IS_INPUT_DROPOUT]: logger.warning("Input-level dropout is set during testing. To turn this option off, set 'is_input_dropout'" "to 'false' in the configuration file.") ds_test = imed_loader.load_dataset(bids_df, **{**loader_params, **{'data_list': test_lst, @@ -461,9 +517,10 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): 'requires_undo': True}}, device=device, cuda_available=cuda_available) - metric_fns = imed_metrics.get_metric_fns(ds_test.task) + eval_params = context[ConfigKW.EVALUATION_PARAMETERS] + metric_fns = imed_metrics.get_metric_fns(ds_test.task, eval_params) - if 'film_layers' in model_params and any(model_params['film_layers']): + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): ds_test, model_params = update_film_model_params(context, ds_test, model_params, path_output) # RUN INFERENCE @@ -474,17 +531,17 @@ def run_command(context, n_gif=0, thr_increment=None, resume_training=False): device=device, cuda_available=cuda_available, metric_fns=metric_fns, - postprocessing=context['postprocessing']) + postprocessing=context[ConfigKW.POSTPROCESSING]) # RUN EVALUATION df_results = imed_evaluation.evaluate(bids_df, path_output=path_output, - target_suffix=loader_params["target_suffix"], - eval_params=context["evaluation_parameters"]) + target_suffix=loader_params[LoaderParamsKW.TARGET_SUFFIX], + eval_params=eval_params) return df_results, pred_metrics def create_dataset_and_ivadomed_version_log(context): - path_data = context['loader_parameters']['path_data'] + path_data = context.get(ConfigKW.LOADER_PARAMETERS).get(LoaderParamsKW.PATH_DATA) ivadomed_version = imed_utils._version_string() datasets_version = [] @@ -495,12 +552,12 @@ def create_dataset_and_ivadomed_version_log(context): for Dataset in path_data: datasets_version.append(imed_utils.__get_commit(path_to_git_folder=Dataset)) - log_file = os.path.join(context['path_output'], 'version_info.log') + path_log = Path(context.get(ConfigKW.PATH_OUTPUT), 'version_info.log') try: - f = open(log_file, "w") + f = path_log.open(mode="w") except OSError as err: - logger.error("OS error: {0}".format(err)) + logger.error(f"OS error: {err}") raise Exception("Have you selected a log folder, and do you have write permissions for that folder?") # IVADOMED @@ -558,15 +615,17 @@ def run_main(): path_config_file = args.config context = imed_config_manager.ConfigurationManager(path_config_file).get_config() - context["command"] = imed_utils.get_command(args, context) - context["path_output"] = imed_utils.get_path_output(args, context) - context["loader_parameters"]["path_data"] = imed_utils.get_path_data(args, context) + context[ConfigKW.COMMAND] = imed_utils.get_command(args, context) + context[ConfigKW.PATH_OUTPUT] = imed_utils.get_path_output(args, context) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA] = imed_utils.get_path_data(args, context) # Run command run_command(context=context, n_gif=args.gif if args.gif is not None else 0, thr_increment=args.thr_increment if args.thr_increment else None, - resume_training=bool(args.resume_training)) + resume_training=bool(args.resume_training), + no_patch=bool(args.no_patch), + overlap_2d=args.overlap_2d if args.overlap_2d else None) if __name__ == "__main__": diff --git a/ivadomed/metrics.py b/ivadomed/metrics.py index ee3a70d45..f8a49ab73 100644 --- a/ivadomed/metrics.py +++ b/ivadomed/metrics.py @@ -6,7 +6,7 @@ # METRICS -def get_metric_fns(task): +def get_metric_fns(task, eval_params=None): metric_fns = [dice_score, multi_class_dice_score, precision_score, @@ -14,7 +14,11 @@ def get_metric_fns(task): specificity_score, intersection_over_union, accuracy_score] - if task == "segmentation": + if eval_params: + object_detection_metrics = eval_params['object_detection_metrics'] + else: + object_detection_metrics = True + if task == "segmentation" and object_detection_metrics: metric_fns = metric_fns + [hausdorff_score] return metric_fns @@ -81,10 +85,10 @@ def numeric_score(prediction, groundtruth): Returns: float, float, float, float: FP, FN, TP, TN """ - FP = np.float(np.sum(prediction * (1.0 - groundtruth))) - FN = np.float(np.sum((1.0 - prediction) * groundtruth)) - TP = np.float(np.sum(prediction * groundtruth)) - TN = np.float(np.sum((1.0 - prediction) * (1.0 - groundtruth))) + FP = float(np.sum(prediction * (1.0 - groundtruth))) + FN = float(np.sum((1.0 - prediction) * groundtruth)) + TP = float(np.sum(prediction * groundtruth)) + TN = float(np.sum((1.0 - prediction) * (1.0 - groundtruth))) return FP, FN, TP, TN diff --git a/ivadomed/mixup.py b/ivadomed/mixup.py index af804bca1..2ccb88e78 100644 --- a/ivadomed/mixup.py +++ b/ivadomed/mixup.py @@ -1,7 +1,7 @@ -import os import matplotlib.pyplot as plt import numpy as np import torch +from pathlib import Path def mixup(data, targets, alpha, debugging=False, ofolder=None): @@ -27,7 +27,7 @@ def mixup(data, targets, alpha, debugging=False, ofolder=None): lambda_ = np.random.beta(alpha, alpha) lambda_ = max(lambda_, 1 - lambda_) # ensure lambda_ >= 0.5 - lambda_tensor = torch.FloatTensor([lambda_]) + lambda_tensor = torch.FloatTensor([lambda_]).to(data.device) data = data * lambda_tensor + data2 * (1 - lambda_tensor) targets = targets * lambda_tensor + targets2 * (1 - lambda_tensor) @@ -48,17 +48,17 @@ def save_mixup_sample(ofolder, input_data, labeled_data, lambda_tensor): lambda_tensor (Tensor): """ # Mixup folder - mixup_folder = os.path.join(ofolder, 'mixup') - if not os.path.isdir(mixup_folder): - os.makedirs(mixup_folder) + mixup_folder = Path(ofolder, 'mixup') + if not mixup_folder.is_dir(): + mixup_folder.mkdir(parents=True) # Random sample random_idx = np.random.randint(0, input_data.size()[0]) # Output fname - ofname = str(lambda_tensor.data.numpy()[0]) + '_' + str(random_idx).zfill(3) + '.png' - ofname = os.path.join(mixup_folder, ofname) + ofname = str(lambda_tensor.cpu().data.numpy()[0]) + '_' + str(random_idx).zfill(3) + '.png' + ofname = Path(mixup_folder, ofname) # Tensor to Numpy - x = input_data.data.numpy()[random_idx, 0, :, :] - y = labeled_data.data.numpy()[random_idx, 0, :, :] + x = input_data.cpu().data.numpy()[random_idx, 0, :, :] + y = labeled_data.cpu().data.numpy()[random_idx, 0, :, :] # Plot plt.figure(figsize=(20, 10)) plt.subplot(1, 2, 1) diff --git a/ivadomed/models.py b/ivadomed/models.py index 9db93fe2e..e2da4605f 100644 --- a/ivadomed/models.py +++ b/ivadomed/models.py @@ -1,15 +1,13 @@ -import os - from collections import OrderedDict import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import Module from torch.nn import init +from pathlib import Path import torchvision.models - #Modified from torchvision.models.resnet.Resnet class ResNet(nn.Module): """ResNet model based on @@ -142,14 +140,14 @@ class DenseNet(nn.Module): num_init_features (int) - the number of filters to learn in the first convolution layer bn_size (int) - multiplicative factor for number of bottle neck layers (i.e. bn_size * k features in the bottleneck layer) - drop_rate (float) - dropout rate after each dense layer + dropout_rate (float) - dropout rate after each dense layer num_classes (int) - number of classification classes memory_efficient (bool) - If True, uses checkpointing. Much more memory efficient, but slower. Default: *False*. See `"article" `_ """ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), - num_init_features=64, bn_size=4, drop_rate=0, num_classes=2, memory_efficient=False): + num_init_features=64, bn_size=4, dropout_rate=0.3, num_classes=2, memory_efficient=False): super(DenseNet, self).__init__() @@ -170,7 +168,7 @@ def __init__(self, growth_rate=32, block_config=(6, 12, 24, 16), num_input_features=num_features, bn_size=bn_size, growth_rate=growth_rate, - drop_rate=drop_rate, + drop_rate=dropout_rate, memory_efficient=memory_efficient ) self.features.add_module('denseblock%d' % (i + 1), block) @@ -215,14 +213,15 @@ def __init__(self, **kwargs): class DownConv(Module): - """Two successive series of down convolution, batch normalization and drop out in 2D. + """Two successive series of down convolution, batch normalization and dropout in 2D. Used in U-Net's encoder. Args: in_feat (int): Number of channels in the input image. out_feat (int): Number of channels in the output image. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. + is_2d (bool): Indicates dimensionality of model: True for 2D convolutions, False for 3D convolutions. Attributes: conv1 (Conv2d): First 2D down convolution with kernel size 3 and padding of 1. @@ -233,7 +232,7 @@ class DownConv(Module): conv2_drop (Dropout2d): Second 2D dropout. """ - def __init__(self, in_feat, out_feat, drop_rate=0.4, bn_momentum=0.1, is_2d=True): + def __init__(self, in_feat, out_feat, dropout_rate=0.3, bn_momentum=0.1, is_2d=True): super(DownConv, self).__init__() if is_2d: conv = nn.Conv2d @@ -246,11 +245,11 @@ def __init__(self, in_feat, out_feat, drop_rate=0.4, bn_momentum=0.1, is_2d=True self.conv1 = conv(in_feat, out_feat, kernel_size=3, padding=1) self.conv1_bn = bn(out_feat, momentum=bn_momentum) - self.conv1_drop = dropout(drop_rate) + self.conv1_drop = dropout(dropout_rate) self.conv2 = conv(out_feat, out_feat, kernel_size=3, padding=1) self.conv2_bn = bn(out_feat, momentum=bn_momentum) - self.conv2_drop = dropout(drop_rate) + self.conv2_drop = dropout(dropout_rate) def forward(self, x): x = F.relu(self.conv1(x)) @@ -270,17 +269,18 @@ class UpConv(Module): Args: in_feat (int): Number of channels in the input image. out_feat (int): Number of channels in the output image. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. + is_2d (bool): Indicates dimensionality of model: True for 2D convolutions, False for 3D convolutions. Attributes: downconv (DownConv): Down convolution. """ - def __init__(self, in_feat, out_feat, drop_rate=0.4, bn_momentum=0.1, is_2d=True): + def __init__(self, in_feat, out_feat, dropout_rate=0.3, bn_momentum=0.1, is_2d=True): super(UpConv, self).__init__() self.is_2d = is_2d - self.downconv = DownConv(in_feat, out_feat, drop_rate, bn_momentum, is_2d) + self.downconv = DownConv(in_feat, out_feat, dropout_rate, bn_momentum, is_2d) def forward(self, x, y): # For retrocompatibility purposes @@ -301,10 +301,12 @@ class Encoder(Module): Args: in_channel (int): Number of channels in the input image. depth (int): Number of down convolutions minus bottom down convolution. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. n_metadata (dict): FiLM metadata see ivadomed.loader.film for more details. film_layers (list): List of 0 or 1 indicating on which layer FiLM is applied. + is_2d (bool): Indicates dimensionality of model: True for 2D convolutions, False for 3D convolutions. + n_filters (int): Number of base filters in the U-Net. Attributes: depth (int): Number of down convolutions minus bottom down convolution. @@ -313,25 +315,25 @@ class Encoder(Module): film_bottom (FiLMlayer): FiLM layer applied to bottom convolution. """ - def __init__(self, in_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, n_metadata=None, film_layers=None, + def __init__(self, in_channel=1, depth=3, dropout_rate=0.3, bn_momentum=0.1, n_metadata=None, film_layers=None, is_2d=True, n_filters=64): super(Encoder, self).__init__() self.depth = depth self.down_path = nn.ModuleList() # first block - self.down_path.append(DownConv(in_channel, n_filters, drop_rate, bn_momentum, is_2d)) + self.down_path.append(DownConv(in_channel, n_filters, dropout_rate, bn_momentum, is_2d)) self.down_path.append(FiLMlayer(n_metadata, n_filters) if film_layers and film_layers[0] else None) max_pool = nn.MaxPool2d if is_2d else nn.MaxPool3d self.down_path.append(max_pool(2)) for i in range(depth - 1): - self.down_path.append(DownConv(n_filters, n_filters * 2, drop_rate, bn_momentum, is_2d)) + self.down_path.append(DownConv(n_filters, n_filters * 2, dropout_rate, bn_momentum, is_2d)) self.down_path.append(FiLMlayer(n_metadata, n_filters * 2) if film_layers and film_layers[i + 1] else None) self.down_path.append(max_pool(2)) n_filters = n_filters * 2 # Bottom - self.conv_bottom = DownConv(n_filters, n_filters, drop_rate, bn_momentum, is_2d) + self.conv_bottom = DownConv(n_filters, n_filters, dropout_rate, bn_momentum, is_2d) self.film_bottom = FiLMlayer(n_metadata, n_filters) if film_layers and film_layers[self.depth] else None def forward(self, x, context=None): @@ -367,12 +369,14 @@ class Decoder(Module): Args: out_channel (int): Number of channels in the output image. depth (int): Number of down convolutions minus bottom down convolution. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. n_metadata (dict): FiLM metadata see ivadomed.loader.film for more details. film_layers (list): List of 0 or 1 indicating on which layer FiLM is applied. hemis (bool): Boolean indicating if HeMIS is on or not. - final_activation (str): Choice of final activation between "sigmoid", "relu" and "softmax" + final_activation (str): Choice of final activation between "sigmoid", "relu" and "softmax". + is_2d (bool): Indicates dimensionality of model: True for 2D convolutions, False for 3D convolutions. + n_filters (int): Number of base filters in the U-Net. Attributes: depth (int): Number of down convolutions minus bottom down convolution. @@ -383,7 +387,7 @@ class Decoder(Module): softmax (Softmax): Softmax layer that can be applied as last layer. """ - def __init__(self, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, + def __init__(self, out_channel=1, depth=3, dropout_rate=0.3, bn_momentum=0.1, n_metadata=None, film_layers=None, hemis=False, final_activation="sigmoid", is_2d=True, n_filters=64): super(Decoder, self).__init__() self.depth = depth @@ -393,7 +397,7 @@ def __init__(self, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, self.up_path = nn.ModuleList() if hemis: in_channel = n_filters * 2 ** self.depth - self.up_path.append(UpConv(in_channel * 2, n_filters * 2 ** (self.depth - 1), drop_rate, bn_momentum, + self.up_path.append(UpConv(in_channel * 2, n_filters * 2 ** (self.depth - 1), dropout_rate, bn_momentum, is_2d)) if film_layers and film_layers[self.depth + 1]: self.up_path.append(FiLMlayer(n_metadata, n_filters * 2 ** (self.depth - 1))) @@ -403,7 +407,7 @@ def __init__(self, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, else: in_channel = n_filters * 2 ** self.depth - self.up_path.append(UpConv(in_channel, n_filters * 2 ** (self.depth - 1), drop_rate, bn_momentum, is_2d)) + self.up_path.append(UpConv(in_channel, n_filters * 2 ** (self.depth - 1), dropout_rate, bn_momentum, is_2d)) if film_layers and film_layers[self.depth + 1]: self.up_path.append(FiLMlayer(n_metadata, n_filters * 2 ** (self.depth - 1))) else: @@ -414,7 +418,7 @@ def __init__(self, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, self.up_path.append( UpConv(in_channel + n_filters * 2 ** (self.depth - i - 1 + int(hemis)), n_filters * 2 ** (self.depth - i - 1), - drop_rate, bn_momentum, is_2d)) + dropout_rate, bn_momentum, is_2d)) if film_layers and film_layers[self.depth + i + 1]: self.up_path.append(FiLMlayer(n_metadata, n_filters * 2 ** (self.depth - i - 1))) else: @@ -439,7 +443,9 @@ def forward(self, features, context=None, w_film=None): if self.last_film: x, w_film = self.last_film(x, context, w_film) - if hasattr(self, "final_activation") and self.final_activation == "softmax": + if hasattr(self, "final_activation") and self.final_activation not in ["softmax", "relu", "sigmoid"]: + raise ValueError("final_activation value has to be either softmax, relu, or sigmoid") + elif hasattr(self, "final_activation") and self.final_activation == "softmax": preds = self.softmax(x) elif hasattr(self, "final_activation") and self.final_activation == "relu": preds = nn.ReLU()(x) / nn.ReLU()(x).max() @@ -475,7 +481,7 @@ class Unet(Module): in_channel (int): Number of channels in the input image. out_channel (int): Number of channels in the output image. depth (int): Number of down convolutions minus bottom down convolution. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. final_activation (str): Choice of final activation between "sigmoid", "relu" and "softmax". is_2d (bool): Indicates dimensionality of model: True for 2D convolutions, False for 3D convolutions. @@ -487,16 +493,16 @@ class Unet(Module): decoder (Decoder): U-net decoder. """ - def __init__(self, in_channel=1, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, final_activation='sigmoid', + def __init__(self, in_channel=1, out_channel=1, depth=3, dropout_rate=0.3, bn_momentum=0.1, final_activation='sigmoid', is_2d=True, n_filters=64, **kwargs): super(Unet, self).__init__() # Encoder path - self.encoder = Encoder(in_channel=in_channel, depth=depth, drop_rate=drop_rate, bn_momentum=bn_momentum, + self.encoder = Encoder(in_channel=in_channel, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum, is_2d=is_2d, n_filters=n_filters) # Decoder path - self.decoder = Decoder(out_channel=out_channel, depth=depth, drop_rate=drop_rate, bn_momentum=bn_momentum, + self.decoder = Decoder(out_channel=out_channel, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum, final_activation=final_activation, is_2d=is_2d, n_filters=n_filters) def forward(self, x): @@ -513,7 +519,7 @@ class FiLMedUnet(Unet): n_channel (int): Number of channels in the input image. out_channel (int): Number of channels in the output image. depth (int): Number of down convolutions minus bottom down convolution. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. n_metadata (dict): FiLM metadata see ivadomed.loader.film for more details. film_layers (list): List of 0 or 1 indicating on which layer FiLM is applied. @@ -526,9 +532,10 @@ class FiLMedUnet(Unet): decoder (Decoder): U-net decoder. """ - def __init__(self, in_channel=1, out_channel=1, depth=3, drop_rate=0.4, + def __init__(self, in_channel=1, out_channel=1, depth=3, dropout_rate=0.3, bn_momentum=0.1, n_metadata=None, film_layers=None, is_2d=True, n_filters=64, **kwargs): - super().__init__(in_channel=1, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1) + super().__init__(in_channel=in_channel, out_channel=out_channel, depth=depth, + dropout_rate=dropout_rate, bn_momentum=bn_momentum) # Verify if the length of boolean FiLM layers corresponds to the depth if film_layers: @@ -538,10 +545,10 @@ def __init__(self, in_channel=1, out_channel=1, depth=3, drop_rate=0.4, else: film_layers = [0] * (2 * depth + 2) # Encoder path - self.encoder = Encoder(in_channel=in_channel, depth=depth, drop_rate=drop_rate, bn_momentum=bn_momentum, + self.encoder = Encoder(in_channel=in_channel, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum, n_metadata=n_metadata, film_layers=film_layers, is_2d=is_2d, n_filters=n_filters) # Decoder path - self.decoder = Decoder(out_channel=out_channel, depth=depth, drop_rate=drop_rate, bn_momentum=bn_momentum, + self.decoder = Decoder(out_channel=out_channel, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum, n_metadata=n_metadata, film_layers=film_layers, is_2d=is_2d, n_filters=n_filters) def forward(self, x, context=None): @@ -680,7 +687,7 @@ class HeMISUnet(Module): contrasts (list): List of contrasts. out_channel (int): Number of output channels. depth (int): Number of down convolutions minus bottom down convolution. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. **kwargs: @@ -691,18 +698,18 @@ class HeMISUnet(Module): decoder (Decoder): U-Net decoder. """ - def __init__(self, contrasts, out_channel=1, depth=3, drop_rate=0.4, bn_momentum=0.1, **kwargs): + def __init__(self, contrasts, out_channel=1, depth=3, dropout_rate=0.3, bn_momentum=0.1, **kwargs): super(HeMISUnet, self).__init__() self.depth = depth self.contrasts = contrasts # Encoder path self.Encoder_mod = nn.ModuleDict( - [['Encoder_{}'.format(Mod), Encoder(in_channel=1, depth=depth, drop_rate=drop_rate, + [['Encoder_{}'.format(Mod), Encoder(in_channel=1, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum)] for Mod in self.contrasts]) # Decoder path - self.decoder = Decoder(out_channel=out_channel, depth=depth, drop_rate=drop_rate, + self.decoder = Decoder(out_channel=out_channel, depth=depth, dropout_rate=dropout_rate, bn_momentum=bn_momentum, hemis=True) def forward(self, x_mods, indexes_mod): @@ -750,7 +757,7 @@ class Modified3DUNet(nn.Module): out_channel (int): Number of channels in the output image. n_filters (int): Number of base filters in the U-Net. attention (bool): Boolean indicating whether the attention module is on or not. - drop_rate (float): Probability of dropout. + dropout_rate (float): Probability of dropout. bn_momentum (float): Batch normalization momentum. final_activation (str): Choice of final activation between "sigmoid", "relu" and "softmax". **kwargs: @@ -766,7 +773,7 @@ class Modified3DUNet(nn.Module): Note: All layers are defined as attributes and used in the forward method. """ - def __init__(self, in_channel, out_channel, n_filters=16, attention=False, drop_rate=0.6, bn_momentum=0.1, + def __init__(self, in_channel, out_channel, n_filters=16, attention=False, dropout_rate=0.3, bn_momentum=0.1, final_activation="sigmoid", n_metadata=None, film_layers=None, **kwargs): super(Modified3DUNet, self).__init__() self.in_channels = in_channel @@ -777,7 +784,7 @@ def __init__(self, in_channel, out_channel, n_filters=16, attention=False, drop_ self.final_activation = final_activation self.lrelu = nn.LeakyReLU() - self.dropout3d = nn.Dropout3d(p=drop_rate) + self.dropout3d = nn.Dropout3d(p=dropout_rate) self.upsacle = nn.Upsample(scale_factor=2, mode='nearest') self.softmax = nn.Softmax(dim=1) @@ -1107,7 +1114,9 @@ def forward(self, x, context=None, w_film=None): out, w_film = self.film_layer10(out, context, w_film) seg_layer = out - if hasattr(self, "final_activation") and self.final_activation == "softmax": + if hasattr(self, "final_activation") and self.final_activation not in ["softmax", "relu", "sigmoid"]: + raise ValueError("final_activation value has to be either softmax, relu, or sigmoid") + elif hasattr(self, "final_activation") and self.final_activation == "softmax": out = self.softmax(out) elif hasattr(self, "final_activation") and self.final_activation == "relu": out = nn.ReLU()(seg_layer) / nn.ReLU()(seg_layer).max() if bool(nn.ReLU()(seg_layer).max()) \ @@ -1126,11 +1135,11 @@ class UNet3D(Modified3DUNet): """To ensure retrocompatibility, when calling UNet3D (old model name), Modified3DUNet will be called. see Modified3DUNet to learn more about parameters. """ - def __init__(self, in_channel, out_channel, n_filters=16, attention=False, drop_rate=0.6, bn_momentum=0.1, + def __init__(self, in_channel, out_channel, n_filters=16, attention=False, dropout_rate=0.3, bn_momentum=0.1, final_activation="sigmoid", n_metadata=None, film_layers=None, **kwargs): super(UNet3D, self).__init__() Modified3DUNet(in_channel=in_channel, out_channel=out_channel, n_filters=n_filters, attention=attention, - drop_rate=drop_rate, bn_momentum=bn_momentum, final_activation=final_activation, + dropout_rate=dropout_rate, bn_momentum=bn_momentum, final_activation=final_activation, n_metadata=n_metadata, film_layers=film_layers, **kwargs) @@ -1476,27 +1485,38 @@ def set_model_for_retrain(model_path, retrain_fraction, map_location, reset=True def get_model_filenames(folder_model): """Get trained model filenames from its folder path. - This function checks if the folder_model exists and get trained model (.pt or .onnx) and its configuration file - (.json) from it. - Note: if the model exists as .onnx, then this function returns its onnx path instead of the .pt version. + This function checks if the folder_model exists and get trained model path (.pt or .onnx based on + model and GPU availability) and its configuration file (.json) from it. Args: folder_name (str): Path of the model folder. Returns: - str, str: Paths of the model (.onnx) and its configuration file (.json). + str, str: Paths of the model (.pt or .onnx) and its configuration file (.json). """ - if os.path.isdir(folder_model): - prefix_model = os.path.basename(folder_model) - # Check if model and model metadata exist. Verify if ONNX model exists, if not try to find .pt model - fname_model = os.path.join(folder_model, prefix_model + '.onnx') - if not os.path.isfile(fname_model): - fname_model = os.path.join(folder_model, prefix_model + '.pt') - if not os.path.exists(fname_model): - raise FileNotFoundError(fname_model) - fname_model_metadata = os.path.join(folder_model, prefix_model + '.json') - if not os.path.isfile(fname_model_metadata): - raise FileNotFoundError(fname_model) + if Path(folder_model).is_dir(): + prefix_model = Path(folder_model).name + fname_model_onnx = Path(folder_model, prefix_model + '.onnx') + fname_model_pt = Path(folder_model, prefix_model + '.pt') + cuda_available = torch.cuda.is_available() + + # Assign '.pt' or '.onnx' model based on file existence and GPU/CPU device availability + if not fname_model_pt.is_file() and not fname_model_onnx.is_file(): + raise FileNotFoundError(f"Model files not found in model folder: " + f"'{str(fname_model_onnx)}' or '{str(fname_model_pt)}'") + # '.pt' is preferred on GPU, or on CPU if '.onnx' doesn't exist + elif (( cuda_available and fname_model_pt.is_file()) or + (not cuda_available and not fname_model_onnx.is_file())): + fname_model = fname_model_pt + # '.onnx' is preferred on CPU, or on GPU if '.pt' doesn't exist + elif ((not cuda_available and fname_model_onnx.is_file()) or + ( cuda_available and not fname_model_pt.is_file())): + fname_model = fname_model_onnx + + fname_model_metadata = Path(folder_model, prefix_model + '.json') + if not fname_model_metadata.is_file(): + raise FileNotFoundError(f"Model config file not found in model folder: '{str(fname_model_metadata)}'") else: - raise FileNotFoundError(fname_model) - return fname_model, fname_model_metadata + raise FileNotFoundError(folder_model) + + return str(fname_model), str(fname_model_metadata) diff --git a/ivadomed/object_detection/utils.py b/ivadomed/object_detection/utils.py index a73257d84..b7cf8d884 100644 --- a/ivadomed/object_detection/utils.py +++ b/ivadomed/object_detection/utils.py @@ -10,6 +10,7 @@ from ivadomed import postprocessing as imed_postpro from ivadomed import transforms as imed_transforms from ivadomed.loader import utils as imed_loader_utils +from ivadomed.keywords import ObjectDetectionParamsKW, MetadataKW def get_bounding_boxes(mask): @@ -23,7 +24,7 @@ def get_bounding_boxes(mask): """ # Label the different objects in the mask - labeled_mask, _ = ndimage.measurements.label(mask) + labeled_mask, _ = ndimage.label(mask) object_labels = np.unique(labeled_mask) bounding_boxes = [] for label in object_labels[1:]: @@ -140,17 +141,17 @@ def resample_bounding_box(metadata, transform): for idx, transfo in enumerate(transform.transform["im"].transforms): if "Resample" == transfo.__class__.__name__: hspace, wspace, dspace = (transfo.hspace, transfo.wspace, transfo.dspace) - hfactor = metadata['input_metadata'][0]['zooms'][0] / hspace - wfactor = metadata['input_metadata'][0]['zooms'][1] / wspace - dfactor = metadata['input_metadata'][0]['zooms'][2] / dspace + hfactor = metadata[MetadataKW.INPUT_METADATA][0][MetadataKW.ZOOMS][0] / hspace + wfactor = metadata[MetadataKW.INPUT_METADATA][0][MetadataKW.ZOOMS][1] / wspace + dfactor = metadata[MetadataKW.INPUT_METADATA][0][MetadataKW.ZOOMS][2] / dspace factor = (hfactor, wfactor, dfactor) - coord = adjust_bb_size(metadata['input_metadata'][0]['bounding_box'], factor, resample=True) + coord = adjust_bb_size(metadata[MetadataKW.INPUT_METADATA][0][MetadataKW.BOUNDING_BOX], factor, resample=True) - for i in range(len(metadata['input_metadata'])): - metadata['input_metadata'][i]['bounding_box'] = coord + for i in range(len(metadata[MetadataKW.INPUT_METADATA])): + metadata[MetadataKW.INPUT_METADATA][i][MetadataKW.BOUNDING_BOX] = coord - for i in range(len(metadata['gt_metadata'])): - metadata['gt_metadata'][i]['bounding_box'] = coord + for i in range(len(metadata[MetadataKW.GT_METADATA])): + metadata[MetadataKW.GT_METADATA][i][MetadataKW.BOUNDING_BOX] = coord break @@ -196,7 +197,7 @@ def adjust_transforms(transforms, seg_pair, length=None, stride=None): for metadata in seg_pair['input_metadata']: assert len(set(index_shape)) == 1 - metadata['index_shape'] = index_shape[0] + metadata[MetadataKW.INDEX_SHAPE] = index_shape[0] return transforms @@ -237,22 +238,23 @@ def load_bounding_boxes(object_detection_params, subject_path_list, slice_axis, """ # Load or generate bounding boxes and save them in json file bounding_box_dict = {} - if object_detection_params is None or object_detection_params['object_detection_path'] is None: + if object_detection_params is None or object_detection_params[ObjectDetectionParamsKW.OBJECT_DETECTION_PATH] is None: return bounding_box_dict - bounding_box_path = Path(object_detection_params.get('path_output'), 'bounding_boxes.json') + + bounding_box_path = Path(object_detection_params.get(ObjectDetectionParamsKW.PATH_OUTPUT), 'bounding_boxes.json') if bounding_box_path.exists(): with bounding_box_path.open(mode='r') as fp: bounding_box_dict = json.load(fp) - elif object_detection_params['object_detection_path'] is not None and \ - Path(object_detection_params.get('object_detection_path')).exists(): + elif object_detection_params[ObjectDetectionParamsKW.OBJECT_DETECTION_PATH] is not None and \ + Path(object_detection_params.get(ObjectDetectionParamsKW.OBJECT_DETECTION_PATH)).exists(): bounding_box_dict = generate_bounding_box_file(subject_path_list, - object_detection_params['object_detection_path'], - object_detection_params['path_output'], - object_detection_params['gpu_ids'], + object_detection_params[ObjectDetectionParamsKW.OBJECT_DETECTION_PATH], + object_detection_params[ObjectDetectionParamsKW.PATH_OUTPUT], + object_detection_params[ObjectDetectionParamsKW.GPU_IDS], slice_axis, constrast_lst, - safety_factor=object_detection_params['safety_factor']) - elif object_detection_params['object_detection_path'] is not None: + safety_factor=object_detection_params[ObjectDetectionParamsKW.SAFETY_FACTOR]) + elif object_detection_params[ObjectDetectionParamsKW.OBJECT_DETECTION_PATH] is not None: raise RuntimeError("Path to object detection model doesn't exist") return bounding_box_dict @@ -268,11 +270,11 @@ def verify_metadata(metadata, has_bounding_box): Returns: bool: Boolean indicating if 'bounding_box' is present across all metadata. """ - index_has_bounding_box = all(['bounding_box' in metadata['input_metadata'][i] - for i in range(len(metadata['input_metadata']))]) - for gt_metadata in metadata['gt_metadata']: + index_has_bounding_box = all([MetadataKW.BOUNDING_BOX in metadata[MetadataKW.INPUT_METADATA][i] + for i in range(len(metadata[MetadataKW.INPUT_METADATA]))]) + for gt_metadata in metadata[MetadataKW.GT_METADATA]: if gt_metadata is not None: - index_has_bounding_box &= 'bounding_box' in gt_metadata + index_has_bounding_box &= MetadataKW.BOUNDING_BOX in gt_metadata has_bounding_box &= index_has_bounding_box return has_bounding_box @@ -300,7 +302,7 @@ def bounding_box_prior(fname_mask, metadata, slice_axis, safety_factor=None): bounding_box = get_bounding_boxes(np_mask)[0] if safety_factor: bounding_box = adjust_bb_size(bounding_box, safety_factor) - metadata['bounding_box'] = bounding_box + metadata[MetadataKW.BOUNDING_BOX] = bounding_box def compute_bb_statistics(bounding_box_path): diff --git a/ivadomed/postprocessing.py b/ivadomed/postprocessing.py index 44b7a216d..3820daa8c 100644 --- a/ivadomed/postprocessing.py +++ b/ivadomed/postprocessing.py @@ -1,14 +1,13 @@ # Deals with postprocessing on generated segmentation. import functools -import os import nibabel as nib import numpy as np from loguru import logger -from scipy.ndimage import label, generate_binary_structure -from scipy.ndimage.morphology import binary_fill_holes +from scipy.ndimage import label, generate_binary_structure, binary_fill_holes from skimage.feature import peak_local_max +from pathlib import Path def nifti_capable(wrapped): @@ -24,7 +23,11 @@ def nifti_capable(wrapped): @functools.wraps(wrapped) def wrapper(data, *args, **kwargs): if isinstance(data, nib.Nifti1Image): - return nib.Nifti1Image(wrapper(np.copy(np.asanyarray(data.dataobj)), *args, **kwargs), data.affine) + return nib.Nifti1Image( + dataobj=wrapper(np.copy(np.asanyarray(data.dataobj)), *args, **kwargs), + affine=data.header.get_best_affine(), + header=data.header.copy() + ) return wrapped(data, *args, **kwargs) return wrapper @@ -90,7 +93,7 @@ def threshold_predictions(predictions, thr=0.5): thresholded_preds[low_values_indices] = 0 low_values_indices = thresholded_preds >= thr thresholded_preds[low_values_indices] = 1 - return thresholded_preds.astype(np.int) + return thresholded_preds.astype(int) @nifti_capable @@ -151,7 +154,7 @@ def fill_holes(predictions, structure=(3, 3, 3)): """ assert np.array_equal(predictions, predictions.astype(bool)) assert len(structure) == len(predictions.shape) - return binary_fill_holes(predictions, structure=np.ones(structure)).astype(np.int) + return binary_fill_holes(predictions, structure=np.ones(structure)).astype(int) @nifti_capable @@ -210,7 +213,10 @@ def label_file_from_coordinates(nifti_image, coord_list): for j in range(len(coord_list)): label_array[coord_list[j][0], coord_list[j][1], coord_list[j][2]] = 1 - nib_pred = nib.Nifti1Image(label_array, nifti_image.affine) + nib_pred = nib.Nifti1Image( + dataobj=label_array, + affine=nifti_image.header.get_best_affine(), + ) return nib_pred @@ -230,7 +236,7 @@ def remove_small_objects(data, bin_structure, size_min): data_label, n = label(data, structure=bin_structure) for idx in range(1, n + 1): - data_idx = (data_label == idx).astype(np.int) + data_idx = (data_label == idx).astype(int) n_nonzero = np.count_nonzero(data_idx) if n_nonzero < size_min: @@ -309,7 +315,7 @@ def uncertainty(self, thr, suffix): """ if thr >= 0: uncertainty_path = self.filename_prefix + suffix - if os.path.exists(uncertainty_path): + if Path(uncertainty_path).exists(): data_uncertainty = nib.load(uncertainty_path).get_fdata() if suffix == "_unc-iou.nii.gz" or suffix == "_soft.nii.gz": self.data_pred = mask_predictions(self.data_pred, data_uncertainty > thr) diff --git a/ivadomed/preprocessing.py b/ivadomed/preprocessing.py index 2a8b33528..dd63ae41f 100644 --- a/ivadomed/preprocessing.py +++ b/ivadomed/preprocessing.py @@ -33,6 +33,10 @@ def get_midslice_average(path_im, ind, slice_axis=0): arr_pred_ref_space = imed_loader_utils.reorient_image(np.expand_dims(mid[:, :], axis=slice_axis), 2, image, image_can).astype('float32') - nib_pred = nib.Nifti1Image(arr_pred_ref_space, image.affine) + nib_pred = nib.Nifti1Image( + dataobj=arr_pred_ref_space, + affine=image.header.get_best_affine(), + header=image.header.copy() + ) return nib_pred diff --git a/ivadomed/scripts/automate_training.py b/ivadomed/scripts/automate_training.py index 048c86e35..7186b87cc 100755 --- a/ivadomed/scripts/automate_training.py +++ b/ivadomed/scripts/automate_training.py @@ -16,7 +16,6 @@ import itertools from functools import partial import json -import logging import random import collections.abc import shutil @@ -25,16 +24,19 @@ import pandas as pd import numpy as np import torch.multiprocessing as mp +from ivadomed.loader.bids_dataframe import BidsDataframe import ivadomed.scripts.visualize_and_compare_testing_models as violin_plots from pathlib import Path +from loguru import logger from ivadomed import main as ivado from ivadomed import config_manager as imed_config_manager from ivadomed.loader import utils as imed_loader_utils from ivadomed.scripts.compare_models import compute_statistics from ivadomed import utils as imed_utils +from ivadomed.keywords import ConfigKW,SplitDatasetKW, LoaderParamsKW LOG_FILENAME = 'log.txt' -logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG) +logger.add(LOG_FILENAME) def get_parser(): @@ -85,25 +87,25 @@ def train_worker(config, thr_incr): ID = int(current.name[-1]) - 1 # Use GPU i from the array specified in the config file - config["gpu_ids"] = [config["gpu_ids"][ID]] + config[ConfigKW.GPU_IDS] = [config[ConfigKW.GPU_IDS][ID]] # Call ivado cmd_train try: # Save best validation score - config["command"] = "train" + config[ConfigKW.COMMAND] = "train" best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = \ ivado.run_command(config, thr_increment=thr_incr) except Exception: - logging.exception('Got exception on main handler') - logging.info("Unexpected error:", sys.exc_info()[0]) + logger.exception('Got exception on main handler') + logger.info("Unexpected error:", sys.exc_info()[0]) raise # Save config file in output path - config_copy = open(config["path_output"] + "/config_file.json", "w") + config_copy = open(config[ConfigKW.PATH_OUTPUT] + "/config_file.json", "w") json.dump(config, config_copy, indent=4) - return config["path_output"], best_training_dice, best_training_loss, best_validation_dice, \ + return config[ConfigKW.PATH_OUTPUT], best_training_dice, best_training_loss, best_validation_dice, \ best_validation_loss @@ -115,19 +117,19 @@ def test_worker(config): ID = int(current.name[-1]) - 1 # Use GPU i from the array specified in the config file - config["gpu_ids"] = [config["gpu_ids"][ID]] + config[ConfigKW.GPU_IDS] = [config[ConfigKW.GPU_IDS][ID]] try: # Save best test score - config["command"] = "test" + config[ConfigKW.COMMAND] = "test" df_results, test_dice = ivado.run_command(config) except Exception: - logging.exception('Got exception on main handler') - logging.info("Unexpected error:", sys.exc_info()[0]) + logger.exception('Got exception on main handler') + logger.info("Unexpected error:", sys.exc_info()[0]) raise - return config["path_output"], test_dice, df_results + return config[ConfigKW.PATH_OUTPUT], test_dice, df_results def split_dataset(initial_config): @@ -152,33 +154,33 @@ def split_dataset(initial_config): "path_output": "./tmp/" } """ - loader_parameters = initial_config["loader_parameters"] - path_output = Path(initial_config["path_output"]) + loader_parameters = initial_config[ConfigKW.LOADER_PARAMETERS] + path_output = Path(initial_config[ConfigKW.PATH_OUTPUT]) if not path_output.is_dir(): - print('Creating output path: {}'.format(path_output)) + logger.info(f'Creating output path: {path_output}') path_output.mkdir(parents=True) else: - print('Output path already exists: {}'.format(path_output)) + logger.info(f'Output path already exists: {path_output}') - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, str(path_output), derivatives=True) + bids_df = BidsDataframe(loader_parameters, str(path_output), derivatives=True) train_lst, valid_lst, test_lst = imed_loader_utils.get_new_subject_file_split( df=bids_df.df, - data_testing=initial_config["split_dataset"]["data_testing"], - split_method=initial_config["split_dataset"]["split_method"], - random_seed=initial_config["split_dataset"]["random_seed"], - train_frac=initial_config["split_dataset"]["train_fraction"], - test_frac=initial_config["split_dataset"]["test_fraction"], + data_testing=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.DATA_TESTING], + split_method=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.SPLIT_METHOD], + random_seed=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.RANDOM_SEED], + train_frac=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.TRAIN_FRACTION], + test_frac=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.TEST_FRACTION], path_output="./", - balance=initial_config["split_dataset"]['balance'] \ - if 'balance' in initial_config["split_dataset"] else None + balance=initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.BALANCE] \ + if SplitDatasetKW.BALANCE in initial_config[ConfigKW.SPLIT_DATASET] else None ) # save the subject distribution split_dct = {'train': train_lst, 'valid': valid_lst, 'test': test_lst} split_path = "./" + "common_split_datasets.joblib" joblib.dump(split_dct, split_path) - initial_config["split_dataset"]["fname_split"] = split_path + initial_config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.FNAME_SPLIT] = split_path return initial_config @@ -251,11 +253,14 @@ def make_config_list(param_list, initial_config, all_combin, multi_params): for combination in list(itertools.combinations(param_list, len(keys))): if keys_are_unique(combination): new_config = copy.deepcopy(initial_config) - path_output = new_config["path_output"] + path_output = new_config[ConfigKW.PATH_OUTPUT] for hyper_option in combination: new_config = update_dict(new_config, hyper_option.option, hyper_option.base_key) - path_output = path_output + hyper_option.name - new_config["path_output"] = path_output + folder_name_suffix = hyper_option.name + folder_name_suffix = folder_name_suffix.translate({ord(i): None for i in '[]}{ \''}) + folder_name_suffix = folder_name_suffix.translate({ord(i): '-' for i in ':=,'}) + path_output = path_output + folder_name_suffix + new_config[ConfigKW.PATH_OUTPUT] = path_output config_list.append(new_config) elif multi_params: base_keys = get_base_keys(param_list) @@ -265,18 +270,24 @@ def make_config_list(param_list, initial_config, all_combin, multi_params): max_length = np.min([len(base_key_dict[base_key]) for base_key in base_key_dict.keys()]) for i in range(0, max_length): new_config = copy.deepcopy(initial_config) - path_output = new_config["path_output"] + path_output = new_config[ConfigKW.PATH_OUTPUT] for key in base_key_dict.keys(): hyper_option = base_key_dict[key][i] new_config = update_dict(new_config, hyper_option.option, hyper_option.base_key) - path_output = path_output + hyper_option.name - new_config["path_output"] = path_output + folder_name_suffix = hyper_option.name + folder_name_suffix = folder_name_suffix.translate({ord(i): None for i in '[]}{ \''}) + folder_name_suffix = folder_name_suffix.translate({ord(i): '-' for i in ':=,'}) + path_output = path_output + folder_name_suffix + new_config[ConfigKW.PATH_OUTPUT] = path_output config_list.append(new_config) else: for hyper_option in param_list: new_config = copy.deepcopy(initial_config) update_dict(new_config, hyper_option.option, hyper_option.base_key) - new_config["path_output"] = initial_config["path_output"] + hyper_option.name + folder_name_suffix = hyper_option.name + folder_name_suffix = folder_name_suffix.translate({ord(i): None for i in '[]}{ \''}) + folder_name_suffix = folder_name_suffix.translate({ord(i): '-' for i in ':=,'}) + new_config[ConfigKW.PATH_OUTPUT] = initial_config[ConfigKW.PATH_OUTPUT] + folder_name_suffix config_list.append(new_config) return config_list @@ -461,10 +472,10 @@ def format_results(results_df, config_list, param_list): config_df = pd.DataFrame.from_dict(config_list) keep = list(set([list(hyper_option.option.keys())[0] for hyper_option in param_list])) - keep.append("path_output") + keep.append(ConfigKW.PATH_OUTPUT) config_df = config_df[keep] - results_df = config_df.set_index('path_output').join(results_df.set_index('path_output')) + results_df = config_df.set_index(ConfigKW.PATH_OUTPUT).join(results_df.set_index(ConfigKW.PATH_OUTPUT)) results_df = results_df.reset_index() results_df = results_df.sort_values(by=['best_validation_loss']) return results_df @@ -663,10 +674,10 @@ def automate_training(file_config, file_config_hyper, fixed_split, all_combin, p initial_config = imed_config_manager.ConfigurationManager(file_config).get_config() if path_data is not None: - initial_config["loader_parameters"]["path_data"] = path_data + initial_config[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA] = path_data # Split dataset if not already done - if fixed_split and (initial_config.get("split_path") is None): + if fixed_split and (initial_config.get(ConfigKW.SPLIT_PATH) is None): initial_config = split_dataset(initial_config) # Hyperparameters values to experiment @@ -681,25 +692,25 @@ def automate_training(file_config, file_config_hyper, fixed_split, all_combin, p ctx = mp.get_context("spawn") # Run all configs on a separate process, with a maximum of n_gpus processes at a given time - logging.info(initial_config['gpu_ids']) + logger.info(initial_config[ConfigKW.GPU_IDS]) results_df = pd.DataFrame() eval_df = pd.DataFrame() all_mean = pd.DataFrame() - with ctx.Pool(processes=len(initial_config["gpu_ids"])) as pool: + with ctx.Pool(processes=len(initial_config[ConfigKW.GPU_IDS])) as pool: for i in range(n_iterations): if not fixed_split: # Set seed for iteration seed = random.randint(1, 10001) for config in config_list: - config["split_dataset"]["random_seed"] = seed + config[ConfigKW.SPLIT_DATASET][SplitDatasetKW.RANDOM_SEED] = seed if all_logs: if i: - config["path_output"] = config["path_output"].replace("_n=" + str(i - 1).zfill(2), + config[ConfigKW.PATH_OUTPUT] = config[ConfigKW.PATH_OUTPUT].replace("_n=" + str(i - 1).zfill(2), "_n=" + str(i).zfill(2)) else: - config["path_output"] += "_n=" + str(i).zfill(2) + config[ConfigKW.PATH_OUTPUT] += "_n=" + str(i).zfill(2) validation_scores = pool.map(partial(train_worker, thr_incr=thr_increment), config_list) @@ -716,12 +727,12 @@ def automate_training(file_config, file_config_hyper, fixed_split, all_combin, p try: shutil.rmtree(str(path_pred)) except OSError as e: - logging.info("Error: %s - %s." % (e.filename, e.strerror)) + logger.info(f"Error: {e.filename} - {e.strerror}.") # Take the config file within the path_output because binarize_prediction may have been updated - json_path = Path(config['path_output'], 'config_file.json') + json_path = Path(config[ConfigKW.PATH_OUTPUT], 'config_file.json') new_config = imed_config_manager.ConfigurationManager(str(json_path)).get_config() - new_config["gpu_ids"] = config["gpu_ids"] + new_config[ConfigKW.GPU_IDS] = config[ConfigKW.GPU_IDS] new_config_list.append(new_config) test_results = pool.map(test_worker, new_config_list) @@ -767,8 +778,8 @@ def automate_training(file_config, file_config_hyper, fixed_split, all_combin, p results_df = format_results(results_df, config_list, param_list) results_df.to_csv(str(Path(output_dir, "detailed_results.csv"))) - logging.info("Detailed results") - logging.info(results_df) + logger.info("Detailed results") + logger.info(results_df) # Compute avg, std, p-values if n_iterations > 1: @@ -776,7 +787,7 @@ def automate_training(file_config, file_config_hyper, fixed_split, all_combin, p # If the test is selected, also show the violin plots if plot_comparison: - output_folders = [config_list[i]["path_output"] for i in range(len(config_list))] + output_folders = [config_list[i][ConfigKW.PATH_OUTPUT] for i in range(len(config_list))] violin_plots.visualize_and_compare_models(ofolders=output_folders) diff --git a/ivadomed/scripts/compare_models.py b/ivadomed/scripts/compare_models.py index a8da01349..ac058e568 100755 --- a/ivadomed/scripts/compare_models.py +++ b/ivadomed/scripts/compare_models.py @@ -15,6 +15,7 @@ import pandas as pd from ivadomed import utils as imed_utils from scipy.stats import ttest_ind_from_stats +from loguru import logger def get_parser(): @@ -59,10 +60,8 @@ def compute_statistics(dataframe, n_iterations, run_test=True, csv_out='comparis avg = dataframe.groupby(['path_output']).mean() std = dataframe.groupby(['path_output']).std() - print("Average dataframe") - print(avg) - print("Standard deviation dataframe") - print(std) + logger.info(f"Average dataframe: {avg}") + logger.info(f"Standard deviation dataframe: {std}") config_logs = list(avg.index.values) p_values = np.zeros((len(config_logs), len(config_logs))) @@ -85,8 +84,8 @@ def compute_statistics(dataframe, n_iterations, run_test=True, csv_out='comparis i += 1 p_df = pd.DataFrame(p_values, index=config_logs, columns=config_logs) - print("P-values dataframe") - print(p_df) + logger.info("P-values dataframe") + logger.info(p_df) if csv_out is not None: # Unnamed 0 column correspond to run number so we remoe that and add prefix for better readability df_concat = pd.concat([avg.add_prefix('avg_').drop(['avg_Unnamed: 0'], axis=1), diff --git a/ivadomed/scripts/download_data.py b/ivadomed/scripts/download_data.py index 926a00d5e..a1595ff62 100644 --- a/ivadomed/scripts/download_data.py +++ b/ivadomed/scripts/download_data.py @@ -1,4 +1,3 @@ -import os import shutil import logging import cgi @@ -9,10 +8,12 @@ import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util import Retry +from pathlib import Path import argparse import textwrap from ivadomed import utils as imed_utils +from ivadomed.keywords import IgnoredFolderKW DICT_URL = { @@ -22,8 +23,11 @@ `Spine Generic `_. Used for Tutorial and example in Ivadomed."""}, "data_testing": { - "url": ["https://github.com/ivadomed/data-testing/archive/r20210628.zip"], + "url": ["https://github.com/ivadomed/data-testing/archive/r20240130.zip"], "description": "Data Used for integration/unit test in Ivadomed."}, + "data_multi_testing": { + "url": ["https://github.com/MotionCorrect/data_multi-sessions-contrasts/archive/refs/tags/v2022-01-06.zip"], + "description": "Large Data Used for multi-session contrasts integration/unit test in Ivadomed."}, "t2_tumor": { "url": ["https://github.com/ivadomed/t2_tumor/archive/r20200621.zip"], "description": "Cord tumor segmentation model, trained on T2-weighted contrast."}, @@ -47,10 +51,12 @@ "url": ["https://github.com/ivadomed/model_find_disc_t2/archive/r20200928.zip"], "description": "Intervertebral disc detection model trained on T2-weighted images."}, "data_functional_testing": { - "url": ["https://github.com/ivadomed/data_functional_testing/archive/r20210617.zip"], - "description": "Data used for functional testing in Ivadomed." - } - + "url": ["https://github.com/ivadomed/data_functional_testing/archive/r20211002.zip"], + "description": "Data used for functional testing in Ivadomed."}, + "data_axondeepseg_sem": { + "url": ["https://github.com/axondeepseg/data_axondeepseg_sem/archive/r20211130.zip"], + "description": """SEM dataset for AxonDeepSeg. 10 rat spinal cord samples with axon and myelin + manual segmentation labels. Used for microscopy tutorial in ivadomed."""}, } @@ -93,30 +99,30 @@ def download_data(urls): response = session.get(url, stream=True) response.raise_for_status() - filename = os.path.basename(urllib.parse.urlparse(url).path) + filename = Path(urllib.parse.urlparse(url).path).name if "Content-Disposition" in response.headers: _, content = cgi.parse_header(response.headers['Content-Disposition']) filename = content["filename"] # protect against directory traversal - filename = os.path.basename(filename) + filename = Path(filename).name if not filename: # this handles cases where you're loading something like an index page # instead of a specific file. e.g. https://osf.io/ugscu/?action=view. raise ValueError("Unable to determine target filename for URL: %s" % (url,)) - tmp_path = os.path.join(tempfile.mkdtemp(), filename) + tmp_path = Path(tempfile.mkdtemp(), filename) logger.info('Downloading: %s' % filename) - with open(tmp_path, 'wb') as tmp_file: + with tmp_path.open(mode='wb') as tmp_file: total = int(response.headers.get('content-length', 1)) for chunk in response.iter_content(chunk_size=8192): if chunk: tmp_file.write(chunk) - return tmp_path + return str(tmp_path) except Exception as e: logger.warning("Link download error, trying next mirror (error was: %s)" % e) @@ -143,7 +149,7 @@ def unzip(compressed, dest_folder): try: open(compressed).extractall(dest_folder) except Exception: - print('ERROR: ZIP package corrupted. Please try downloading again.') + logger.error("ERROR: ZIP package corrupted. Please try downloading again.") raise @@ -196,10 +202,10 @@ def install_data(url, dest_folder, keep=False): keep (bool): whether to keep existing data in the destination folder (if it exists). Flag ``-k``, ``--keep`` """ - if not keep and os.path.exists(dest_folder): + if not keep and Path(dest_folder).exists(): logger.warning("Removing existing destination folder “%s”", dest_folder) shutil.rmtree(dest_folder) - os.makedirs(dest_folder, exist_ok=True) + Path(dest_folder).mkdir(parents=True, exist_ok=True) tmp_file = download_data(url) @@ -208,58 +214,50 @@ def install_data(url, dest_folder, keep=False): unzip(tmp_file, extraction_folder) # Identify whether we have a proper archive or a tarbomb - with os.scandir(extraction_folder) as it: - has_dir = False - nb_entries = 0 - for entry in it: - if entry.name in ("__MACOSX",): - continue - nb_entries += 1 - if entry.is_dir(): - has_dir = True + has_dir = False + nb_entries = 0 + for path_object in Path(extraction_folder).iterdir(): + if path_object.name in (IgnoredFolderKW.MACOSX,): + continue + nb_entries += 1 + if path_object.is_dir(): + has_dir = True if nb_entries == 1 and has_dir: # tarball with single-directory -> go under - with os.scandir(extraction_folder) as it: - for entry in it: - if entry.name in ("__MACOSX",): - continue - bundle_folder = entry.path + for path_object in Path(extraction_folder).iterdir(): + if path_object.name in (IgnoredFolderKW.MACOSX,): + continue + bundle_folder = path_object else: # bomb scenario -> stay here - bundle_folder = extraction_folder + bundle_folder: Path = Path(extraction_folder) # Copy over - for cwd, ds, fs in os.walk(bundle_folder): - ds.sort() - fs.sort() - ds[:] = [d for d in ds if d not in ("__MACOSX",)] - for d in ds: - srcpath = os.path.join(cwd, d) - relpath = os.path.relpath(srcpath, bundle_folder) - dstpath = os.path.join(dest_folder, relpath) - if os.path.exists(dstpath): - # lazy -- we assume existing is a directory, otherwise it will crash safely - logger.debug("- d- %s", relpath) - else: - logger.debug("- d+ %s", relpath) - os.makedirs(dstpath) - - for f in fs: - srcpath = os.path.join(cwd, f) - relpath = os.path.relpath(srcpath, bundle_folder) - dstpath = os.path.join(dest_folder, relpath) - if os.path.exists(dstpath): + for path_object in bundle_folder.glob("**/*"): + if path_object.is_dir(): + if path_object.name not in (IgnoredFolderKW.MACOSX,): + relpath = path_object.relative_to(bundle_folder) + dstpath = Path(dest_folder, relpath) + if dstpath.exists(): + logger.debug("- d- %s", str(relpath)) + else: + logger.debug("- d+ %s", relpath) + dstpath.mkdir(parents=True) + if path_object.is_file(): + relpath = path_object.relative_to(bundle_folder) + dstpath = Path(dest_folder, relpath) + if dstpath.exists(): logger.debug("- f! %s", relpath) logger.warning("Updating existing “%s”", dstpath) - os.unlink(dstpath) + dstpath.unlink() else: logger.debug("- f+ %s", relpath) - shutil.copy(srcpath, dstpath) + shutil.copy(str(path_object), str(dstpath)) logger.info("Removing temporary folders...") logger.info("Folder Created: {}".format(dest_folder)) - shutil.rmtree(os.path.dirname(tmp_file)) + shutil.rmtree(str(Path(tmp_file).parent)) shutil.rmtree(extraction_folder) @@ -283,7 +281,7 @@ def main(args=None): data_name = arguments.d if arguments.output is None: - dest_folder = os.path.join(os.path.abspath(os.curdir), data_name) + dest_folder = str(Path(Path.cwd().absolute(), data_name)) else: dest_folder = arguments.output diff --git a/ivadomed/scripts/extract_small_dataset.py b/ivadomed/scripts/extract_small_dataset.py index 561776c32..53fb3be0a 100644 --- a/ivadomed/scripts/extract_small_dataset.py +++ b/ivadomed/scripts/extract_small_dataset.py @@ -1,11 +1,13 @@ #!/usr/bin/env python -import os import shutil import argparse import numpy as np import pandas as pd from ivadomed import utils as imed_utils +from pathlib import Path +from typing import List +from loguru import logger EXCLUDED_SUBJECT = ["sub-mniPilot1"] @@ -42,11 +44,16 @@ def is_good_contrast(fname, good_contrast_list): def remove_some_contrasts(folder, subject_list, good_contrast_list): - file_list = [os.path.join(folder, s, "anat", f) for s in subject_list - for f in os.listdir(os.path.join(folder, s, "anat"))] - rm_file_list = [f for f in file_list if not is_good_contrast(f, good_contrast_list)] - for ff in rm_file_list: - os.remove(ff) + file_list: List[Path] = [] + for s in subject_list: + for f in Path(folder, s, "anat").iterdir(): + file_list.append(f) + rm_file_list: List[Path] = [] + for file in file_list: + if not is_good_contrast(str(file), good_contrast_list): + rm_file_list.append(file) + for file in rm_file_list: + file.unlink() def extract_small_dataset(input, output, n=10, contrast_list=None, include_derivatives=True, @@ -71,21 +78,21 @@ def extract_small_dataset(input, output, n=10, contrast_list=None, include_deriv each function run is independent. Flag: ``--seed``, ``-s``. """ # Create output folders - if not os.path.isdir(output): - os.makedirs(output) + if not Path(output).is_dir(): + Path(output).mkdir(parents=True) if include_derivatives: - oderivatives = os.path.join(output, "derivatives") - if not os.path.isdir(oderivatives): - os.makedirs(oderivatives) - oderivatives = os.path.join(oderivatives, "labels") - if not os.path.isdir(oderivatives): - os.makedirs(oderivatives) - iderivatives = os.path.join(input, "derivatives", "labels") + out_derivatives = Path(output, "derivatives") + if not out_derivatives.is_dir(): + out_derivatives.mkdir(parents=True) + out_derivatives = Path(out_derivatives, "labels") + if not out_derivatives.is_dir(): + out_derivatives.mkdir(parents=True) + in_derivatives = Path(input, "derivatives", "labels") # Get subject list - subject_list = [s for s in os.listdir(input) - if s.startswith("sub-") and os.path.isdir(os.path.join(input, s)) - and s not in EXCLUDED_SUBJECT] + subject_list = [s.name for s in Path(input).iterdir() + if s.name.startswith("sub-") and s.is_dir() + and s.name not in EXCLUDED_SUBJECT] # Randomly select subjects if seed != -1: @@ -97,49 +104,49 @@ def extract_small_dataset(input, output, n=10, contrast_list=None, include_deriv # Loop across subjects for subject in subject_random_list: - print("\nSubject: {}".format(subject)) + logger.debug(f"\nSubject: {subject}") # Copy images - isubjfolder = os.path.join(input, subject) - osubjfolder = os.path.join(output, subject) - assert os.path.isdir(isubjfolder) - print("\tCopying {} to {}.".format(isubjfolder, osubjfolder)) - shutil.copytree(isubjfolder, osubjfolder) + in_subj_folder = Path(input, subject) + out_subj_folder = Path(output, subject) + assert in_subj_folder.is_dir() + logger.debug(f"\tCopying {in_subj_folder} to {out_subj_folder}.") + shutil.copytree(str(in_subj_folder), str(out_subj_folder)) # Remove dwi data - if os.path.isdir(os.path.join(output, subject, "dwi")): - shutil.rmtree(os.path.join(output, subject, "dwi")) + if Path(output, subject, "dwi").is_dir(): + shutil.rmtree(str(Path(output, subject, "dwi"))) # Copy labels if include_derivatives: - isubjderivatives = os.path.join(iderivatives, subject) - osubjderivatives = os.path.join(oderivatives, subject) - assert os.path.isdir(isubjderivatives) - print("\tCopying {} to {}.".format(isubjderivatives, osubjderivatives)) - shutil.copytree(isubjderivatives, osubjderivatives) + in_subj_derivatives = Path(in_derivatives, subject) + out_subj_derivatives = Path(out_derivatives, subject) + assert in_subj_derivatives.is_dir() + logger.debug(f"\tCopying {in_subj_derivatives} to {out_subj_derivatives}.") + shutil.copytree(str(in_subj_derivatives), str(out_subj_derivatives)) # Remove dwi data - if os.path.isdir(os.path.join(osubjderivatives, subject, "dwi")): - shutil.rmtree(os.path.join(osubjderivatives, subject, "dwi")) + if Path(out_subj_derivatives, subject, "dwi").is_dir(): + shutil.rmtree(str(Path(out_subj_derivatives, subject, "dwi"))) if contrast_list: remove_some_contrasts(output, subject_random_list, contrast_list) if include_derivatives: - remove_some_contrasts(os.path.join(output, "derivatives", "labels"), + remove_some_contrasts(str(Path(output, "derivatives", "labels")), subject_random_list, contrast_list) # Copy dataset_description.json - idatasetjson = os.path.join(input, "dataset_description.json") - odatasetjson = os.path.join(output, "dataset_description.json") - shutil.copyfile(idatasetjson, odatasetjson) + in_dataset_json = Path(input, "dataset_description.json") + out_dataset_json = Path(output, "dataset_description.json") + shutil.copyfile(str(in_dataset_json), str(out_dataset_json)) # Copy participants.json if it exist - if os.path.isfile(os.path.join(input, "participants.json")): - iparticipantsjson = os.path.join(input, "participants.json") - oparticipantsjson = os.path.join(output, "participants.json") - shutil.copyfile(iparticipantsjson, oparticipantsjson) + if Path(input).joinpath("participants.json").is_file(): + in_participants_json = Path(input, "participants.json") + out_participants_json = Path(output, "participants.json") + shutil.copyfile(str(in_participants_json), str(out_participants_json)) # Copy participants.tsv - iparticipantstsv = os.path.join(input, "participants.tsv") - oparticipantstsv = os.path.join(output, "participants.tsv") - df = pd.read_csv(iparticipantstsv, sep='\t') + in_participants_tsv = Path(input, "participants.tsv") + out_participants_tsv = Path(output, "participants.tsv") + df = pd.read_csv(str(in_participants_tsv), sep='\t') # Drop subjects df = df[df.participant_id.isin(subject_random_list)] - df.to_csv(oparticipantstsv, sep='\t', index=False) + df.to_csv(str(out_participants_tsv), sep='\t', index=False) def main(args=None): diff --git a/ivadomed/scripts/prepare_dataset_vertebral_labeling.py b/ivadomed/scripts/prepare_dataset_vertebral_labeling.py index c5b3a2b2a..6d71c4718 100644 --- a/ivadomed/scripts/prepare_dataset_vertebral_labeling.py +++ b/ivadomed/scripts/prepare_dataset_vertebral_labeling.py @@ -3,9 +3,9 @@ import ivadomed.preprocessing as imed_preprocessing import nibabel as nib import numpy as np -import os import ivadomed.maths as imed_maths import ivadomed.loader.utils as imed_loader_utils +from pathlib import Path def mask2label(path_label, aim=0): @@ -62,21 +62,20 @@ def extract_mid_slice_and_convert_coordinates_to_heatmaps(path, suffix, aim=-1): Returns: None. Images are saved in BIDS folder """ - t = os.listdir(path) - t.remove('derivatives') + t = [path_object.name for path_object in Path(path).iterdir() if path_object.name != 'derivatives'] for i in range(len(t)): - sub = t[i] - path_image = os.path.join(path, t[i], 'anat', t[i] + suffix + '.nii.gz') - if os.path.isfile(path_image): - path_label = os.path.join(path, 'derivatives', 'labels', t[i], 'anat', t[i] + suffix + + subject = t[i] + path_image = Path(path, subject, 'anat', subject + suffix + '.nii.gz') + if path_image.is_file(): + path_label = Path(path, 'derivatives', 'labels', subject, 'anat', subject + suffix + '_labels-disc-manual.nii.gz') - list_points = mask2label(path_label, aim=aim) + list_points = mask2label(str(path_label), aim=aim) image_ref = nib.load(path_image) nib_ref_can = nib.as_closest_canonical(image_ref) imsh = np.array(nib_ref_can.dataobj).shape - mid_nifti = imed_preprocessing.get_midslice_average(path_image, list_points[0][0], slice_axis=0) - nib.save(mid_nifti, os.path.join(path, t[i], 'anat', t[i] + suffix + '_mid.nii.gz')) + mid_nifti = imed_preprocessing.get_midslice_average(str(path_image), list_points[0][0], slice_axis=0) + nib.save(mid_nifti, Path(path, subject, 'anat', subject + suffix + '_mid.nii.gz')) lab = nib.load(path_label) nib_ref_can = nib.as_closest_canonical(lab) label_array = np.zeros(imsh[1:]) @@ -87,7 +86,7 @@ def extract_mid_slice_and_convert_coordinates_to_heatmaps(path, suffix, aim=-1): heatmap = imed_maths.heatmap_generation(label_array[:, :], 10) arr_pred_ref_space = imed_loader_utils.reorient_image(np.expand_dims(heatmap[:, :], axis=0), 2, lab, nib_ref_can) nib_pred = nib.Nifti1Image(arr_pred_ref_space, lab.affine) - nib.save(nib_pred, os.path.join(path, 'derivatives', 'labels', t[i], 'anat', t[i] + suffix + + nib.save(nib_pred, Path(path, 'derivatives', 'labels', subject, 'anat', subject + suffix + '_mid_heatmap' + str(aim) + '.nii.gz')) else: pass diff --git a/ivadomed/scripts/segment_image.py b/ivadomed/scripts/segment_image.py new file mode 100644 index 000000000..019c276d0 --- /dev/null +++ b/ivadomed/scripts/segment_image.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +""" +This script applies a trained model on a single image. Output are generated in the current directory. +""" + +# TODO: create entry_points in setup.py and update docstrings usage +# TODO: 'add_suffix' and 'splitext' should be moved to utils library (if it makes sense). + +import argparse +import os +import nibabel as nib + +from ivadomed import inference as imed_inference +from ivadomed import utils as imed_utils + + +def get_parser(): + parser = argparse.ArgumentParser( + prog='segment_image', + description='Applies a trained model on a single image. Output are generated in the current directory.') + parser.add_argument("-i", "--image", nargs='+', required=True, + help="Image(s) to segment. You can specify more than one image (separate with space).", + metavar=imed_utils.Metavar.file) + parser.add_argument("-m", "--model", required=True, + help="Path to folder that contains ONNX and/or PT model and ivadomed JSON config file.", + metavar=imed_utils.Metavar.folder) + parser.add_argument("-s", "--suffix", default="_pred", + help="Suffix to add to the input image. Default: '_pred'", + metavar=imed_utils.Metavar.str) + return parser + + +def add_suffix(fname, suffix): + """ + Add suffix between end of file name and extension. + + :param fname: absolute or relative file name. Example: t2.nii + :param suffix: suffix. Example: _mean + :return: file name with suffix. Example: t2_mean.nii + + Examples: + .. code:: python + + add_suffix(t2.nii, _mean) -> t2_mean.nii + add_suffix(t2.nii.gz, a) -> t2a.nii.gz + """ + stem, ext = splitext(fname) + return stem + suffix + ext + + +def splitext(fname): + """ + Split a fname (folder/file + ext) into a folder/file and extension. + + Note: for .nii.gz the extension is understandably .nii.gz, not .gz + (``os.path.splitext()`` would want to do the latter, hence the special case). + """ + dir_, filename = os.path.split(fname) + for special_ext in ['.nii.gz', '.tar.gz']: + if filename.endswith(special_ext): + stem, ext = filename[:-len(special_ext)], special_ext + break + else: + stem, ext = os.path.splitext(filename) + + return os.path.join(dir_, stem), ext + + +def segment_image(fname_images: str, path_model: str, suffix_out: str, options: dict): + """ + Applies a trained model on image(s). Output predictions are generated in the current directory. + + For example:: + + ivadomed_segment_image -i t2s.nii.gz -m /usr/bob/my_model_directory + + Args: + fname_images (str): Image(s) to segment. You can specify more than one image (separate with space). Flag: ``--image``, ``-i`` + path_model (str): Path to folder that contains ONNX and/or PT model and ivadomed JSON config file. Flag: ``--model``, ``-m`` + suffix_out (str): Suffix to add to the input image. Default: '_pred'. Flag: ``--suffix-out``, ``-s`` + options (dict): Options to pass to `imed_inference.segment_volume`. + + Returns: + None + """ + nii_lst, target_lst = imed_inference.segment_volume(path_model, fname_images, options=options) + + for i in range(len(nii_lst)): + # TODO (minor): make path_out output images in the same dir as the input image. + path_out = './' + file_out = add_suffix(os.path.basename(fname_images[i]), suffix_out) + nib.save(nii_lst[i], os.path.join(path_out, file_out)) + + # TODO: add to support PNG + # imed_inference.pred_to_png(nii_lst, target_lst, "/image") + + # TODO: display a nice message at the end with syntax for FSLeyes if input is a NIfTI file. + + +def main(args=None): + imed_utils.init_ivadomed() + parser = get_parser() + args = imed_utils.get_arguments(parser, args) + # options = {"pixel_size": [0.13, 0.13], "overlap_2D": [48, 48], "binarize_maxpooling": True} + # TODO: the 'no_patch' option does not seem to work as expected, because only a fraction of the image is segmented. + # options = {"no_patch": True} + options = {} + segment_image(args.image, args.model, args.suffix, options) + + +if __name__ == '__main__': + main() diff --git a/ivadomed/scripts/training_curve.py b/ivadomed/scripts/training_curve.py index e1058532d..8df483e5b 100644 --- a/ivadomed/scripts/training_curve.py +++ b/ivadomed/scripts/training_curve.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -import os import argparse +import warnings import numpy as np from collections import defaultdict import pandas as pd @@ -9,48 +9,77 @@ from textwrap import wrap from tensorboard.backend.event_processing.event_accumulator import EventAccumulator from ivadomed import utils as imed_utils +from pathlib import Path +from loguru import logger def get_parser(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--input", required=True, type=str, help="""Input path. If using --multiple, this parameter indicates - the suffix path of all log directories of interest. To compare - trainings or set of trainings (using ``--multiple``) with subplots, - please list the paths by separating them with commas, e.g. - path_output1,path_output2.""", + the prefix path of all log directories of interest. To compare + trainings (not using ``--multiple``) or set of trainings + (using ``--multiple``) with subplots, please list the paths by separating + them with commas, e.g. path_output1,path_output2.""", metavar=imed_utils.Metavar.str) parser.add_argument("--multiple", required=False, dest="multiple", action='store_true', help="""Multiple log directories are considered: all available folders with -i as prefix. The plot represents the mean value (hard line) surrounded by the standard deviation envelope.""") + parser.add_argument("--lr", required=False, dest="learning_rate", action='store_true', + help="""Summary event file for learning rate is considered, the limits on + the y-axis plot are automatically defined.""") parser.add_argument("-y", "--ylim_loss", required=False, type=str, help="""Indicates the limits on the y-axis for the loss plots, otherwise these limits are automatically defined. Please separate the lower and the upper limit by a comma, e.g. -1,0. Note: for the validation - metrics: the y-limits are always 0.0 and 1.0.""", + metrics: the y-limits are always 0.0 and 1.0 except for the hausdorff + score where the limits are automatically defined.""", metavar=imed_utils.Metavar.float) parser.add_argument("-o", "--output", required=True, type=str, help="Output folder.", metavar=imed_utils.Metavar.file) return parser -def check_events_numbers(input_folder): - """Check to make sure there is at most one summary in any folder or any subfolder. +def get_events_path_list(input_folder, learning_rate): + """Check to make sure there is at most one summary event in any folder or any subfolder, + and returns a list of summary event paths. A summary is defined as any file of the format ``events.out.tfevents.{...}``` Args: input_folder (str): Input folder path. + learning_rate (bool): Indicate if learning_rate is considered. + Returns: + list : a list of events paths """ - for fold in os.listdir(input_folder): - fold_path = os.path.join(input_folder, fold) - if os.path.isdir(fold_path): - event_list = [f for f in os.listdir(fold_path) if f.startswith("events.out.tfevents.")] + events_path_list = [] + + # Check for events file in sub-folders + for fold_path in Path(input_folder).iterdir(): + if fold_path.is_dir(): + event_list = [f.name for f in fold_path.iterdir() if f.name.startswith("events.out.tfevents.")] if len(event_list): if len(event_list) > 1: raise ValueError(f"Multiple summary found in this folder: {fold_path}.\n" f"Please keep only one before running this script again.") + else: + events_path_list.append(fold_path) + # Sort events_path_list alphabetically + events_path_list = sorted(events_path_list) + + if learning_rate: + # Check for events file at the root of input_folder (contains learning_rate) + event_list = [f.name for f in Path(input_folder).iterdir() if f.name.startswith("events.out.tfevents.")] + if len(event_list): + if len(event_list) > 1: + raise ValueError(f"Multiple summary found in this folder: {Path(input_folder)}.\n" + f"Please keep only one before running this script again.") + else: + # Append learning_rate events file at the end of events_path_list + events_path_list.append(Path(input_folder)) + + return events_path_list def plot_curve(data_list, y_label, fig_ax, subplot_title, y_lim=None): @@ -83,17 +112,21 @@ def plot_curve(data_list, y_label, fig_ax, subplot_title, y_lim=None): fig_ax.set_ylabel(y_label) if y_lim is not None: fig_ax.set_ylim(y_lim) + + warnings.filterwarnings("ignore", category=UserWarning) fig_ax.set_xlim([1, max_nb_epoch]) fig_ax.title.set_text('\n'.join(wrap(subplot_title, 80))) -def run_plot_training_curves(input_folder, output_folder, multiple_training=False, y_lim_loss=None): - """Utility function to plot the training curves. +def run_plot_training_curves(input_folder, output_folder, multiple_training=False, learning_rate=False, + y_lim_loss=None): + """Utility function to plot the training curves and save data as .csv files. This function uses the TensorFlow summary that is generated during a training to plot for each epoch: - - the training against the validation loss - - the metrics computed on the validation sub-dataset. + - the training against the validation loss, + - the metrics computed on the validation sub-dataset, + - the learning rate if learning_rate is True. It could consider one output path at a time, for example: @@ -117,25 +150,30 @@ def run_plot_training_curves(input_folder, output_folder, multiple_training=Fals Args: input_folder (str): Input path name. Flag: ``--input``, ``-i``. If using ``--multiple``, - this parameter indicates the suffix path of all log directories of interest. To compare - trainings or set of trainings (using ``--multiple``) with subplots, please list the - paths by separating them with commas, e.g. path_output1, path_output2 + this parameter indicates the prefix path of all log directories of interest. To compare + trainings (not using ``--multiple``) or set of trainings (using ``--multiple``) with subplots, + please list the paths by separating them with commas, e.g. path_output1, path_output2 output_folder (str): Output folder. Flag: ``--output``, ``-o``. multiple_training (bool): Indicates if multiple log directories are considered (``True``) or not (``False``). Flag: ``--multiple``. All available folders with ``-i`` as prefix are considered. The plot represents the mean value (hard line) surrounded by the standard deviation (envelope). - y_lim_loss (list): List of the lower and upper limits of the y-axis of the loss plot. + learning_rate (bool): Indicates if the summary event file for learning rate is considered (``True``) + or not (``False``). Flag: ``--lr``. The limits on the y-axis plot are automatically defined. + y_lim_loss (list): List of the lower and upper limits of the y-axis of the loss plot, otherwise + these limits are automatically defined. Please separate the lower and the upper limit by a + comma, e.g. -1,0. Note: for the validation metrics: the y-limits are always 0.0 and 1.0 except + for the hausdorff score where the limits are automatically defined. """ group_list = input_folder.split(",") plt_dict = {} # Create output folder - if os.path.isdir(output_folder): - print(f"Output folder already exists: {output_folder}.") + if Path(output_folder).is_dir(): + logger.warning(f"Output folder already exists: {output_folder}.") else: - print(f"Creating output folder: {output_folder}.") - os.makedirs(output_folder) + logger.info(f"Creating output folder: {output_folder}.") + Path(output_folder).mkdir(parents=True) # Config subplots if len(group_list) > 1: @@ -145,24 +183,25 @@ def run_plot_training_curves(input_folder, output_folder, multiple_training=Fals n_cols, n_rows = 1, 1 for i_subplot, input_folder in enumerate(group_list): - input_folder = os.path.expanduser(input_folder) + input_folder = Path(input_folder).expanduser() # Find training folders: if multiple_training: - prefix = str(input_folder.split('/')[-1]) - input_folder = '/'.join(input_folder.split('/')[:-1]) - input_folder_list = [os.path.join(input_folder, f) for f in os.listdir(input_folder) if - f.startswith(prefix)] + prefix = input_folder.name + input_folder = input_folder.parent + input_folder_list = [f for f in input_folder.iterdir() if + f.name.startswith(prefix)] else: - prefix = str(input_folder.split('/')[-1]) + prefix = input_folder.name input_folder_list = [input_folder] events_df_list = [] for path_output in input_folder_list: # Find tf folders - check_events_numbers(path_output) + events_path_list = get_events_path_list(str(path_output), learning_rate) - # Get data as dataframe - events_vals_df = tensorboard_retrieve_event(path_output) + # Get data as dataframe and save as .csv file + events_vals_df = tensorboard_retrieve_event(events_path_list) + events_vals_df.to_csv(Path(output_folder, str(path_output.name) + "_training_values.csv")) # Store data events_df_list.append(events_vals_df) @@ -170,54 +209,60 @@ def run_plot_training_curves(input_folder, output_folder, multiple_training=Fals # Plot train and valid losses together loss_keys = [k for k in events_df_list[0].keys() if k.endswith("loss")] if i_subplot == 0: # Init plot - plt_dict[os.path.join(output_folder, "losses.png")] = plt.figure(figsize=(10 * n_cols, 5 * n_rows)) - ax = plt_dict[os.path.join(output_folder, "losses.png")].add_subplot(n_rows, n_cols, i_subplot + 1) + plt_dict[str(Path(output_folder, "losses.png"))] = plt.figure(figsize=(10 * n_cols, 5 * n_rows)) + ax = plt_dict[str(Path(output_folder, "losses.png"))].add_subplot(n_rows, n_cols, i_subplot + 1) plot_curve([df[loss_keys] for df in events_df_list], y_label="loss", fig_ax=ax, subplot_title=prefix, y_lim=y_lim_loss) - # Plot each validation metric separetly + # Plot each validation metric and learning rate separately for tag in events_df_list[0].keys(): if not tag.endswith("loss"): if i_subplot == 0: # Init plot - plt_dict[os.path.join(output_folder, tag + ".png")] = plt.figure(figsize=(10 * n_cols, 5 * n_rows)) - ax = plt_dict[os.path.join(output_folder, tag + ".png")].add_subplot(n_rows, n_cols, i_subplot + 1) + plt_dict[str(Path(output_folder, tag + ".png"))] = plt.figure(figsize=(10 * n_cols, 5 * n_rows)) + ax = plt_dict[str(Path(output_folder, tag + ".png"))].add_subplot(n_rows, n_cols, i_subplot + 1) + y_lim = None if (tag.startswith("hausdorff") or tag.startswith("learning_rate")) else [0, 1] plot_curve(data_list=[df[[tag]] for df in events_df_list], y_label=tag, fig_ax=ax, subplot_title=prefix, - y_lim=[0, 1]) + y_lim=y_lim) for fname_out in plt_dict: plt_dict[fname_out].savefig(fname_out) -def tensorboard_retrieve_event(path_output): +def tensorboard_retrieve_event(events_path_list): """Retrieve data from tensorboard summary event. Args: - path_output (str): output path where the event files are located + events_path_list (list): list of events paths Returns: df: a panda dataframe where the columns are the metric or loss and the row are the epochs. """ - # TODO : Find a way to not hardcode this list of metrics/loss - # These list of metrics and losses are in the same order as in the training file (where they are written) - list_metrics = ['dice_score', 'multiclass dice_score', 'hausdorff_score', 'precision_score', - 'recall_score', 'specificity_score', 'intersection_over_union', 'accuracy_score'] - - list_loss = ['train_loss', 'validation_loss'] + # Lists of metrics and losses in the same order as in events_path_list + list_metrics = [] + list_loss = [] + for events in events_path_list: + if str(events.name).startswith("Validation_Metrics_"): + metric_name = str(events.name.split("Validation_Metrics_")[1]) + list_metrics.append(metric_name) + elif str(events.name).startswith("losses_"): + loss_name = str(events.name.split("losses_")[1]) + list_loss.append(loss_name) # Each element in the summary iterator represent an element (e.g., scalars, images..) - # stored in the summary for all epochs in the form of event. - summary_iterators = [EventAccumulator(os.path.join(path_output, dname)).Reload() for dname in os.listdir(path_output)] + # stored in the summary for all epochs in the form of event, in the same order as in events_path_list. + summary_iterators = [EventAccumulator(str(events)).Reload() for events in events_path_list] metrics = defaultdict(list) num_metrics = 0 num_loss = 0 + num_lr = 0 for i in range(len(summary_iterators)): if summary_iterators[i].Tags()['scalars'] == ['Validation/Metrics']: @@ -238,9 +283,15 @@ def tensorboard_retrieve_event(path_output): out[events.step - 1] = events.value metrics[list_loss[num_loss]] = out num_loss += 1 + elif summary_iterators[i].Tags()['scalars'] == ['learning_rate']: + out = [0 for i in range(len(summary_iterators[i].Scalars("learning_rate")))] + for events in summary_iterators[i].Scalars("learning_rate"): + out[events.step - 1] = events.value + metrics['learning_rate'] = out + num_lr += 1 - if num_loss == 0 and num_metrics == 0: - raise Exception('No metrics or losses found in the event') + if num_loss == 0 and num_metrics == 0 and num_lr == 0: + raise Exception('No metrics, losses or learning rate found in the event') metrics_df = pd.DataFrame.from_dict(metrics) return metrics_df @@ -252,7 +303,8 @@ def main(args=None): y_lim_loss = [int(y) for y in args.ylim_loss.split(',')] if args.ylim_loss else None run_plot_training_curves(input_folder=args.input, output_folder=args.output, - multiple_training=args.multiple, y_lim_loss=y_lim_loss) + multiple_training=args.multiple, learning_rate=args.learning_rate, + y_lim_loss=y_lim_loss) if __name__ == '__main__': diff --git a/ivadomed/scripts/visualize_and_compare_testing_models.py b/ivadomed/scripts/visualize_and_compare_testing_models.py index 8f3e1ad81..462158638 100644 --- a/ivadomed/scripts/visualize_and_compare_testing_models.py +++ b/ivadomed/scripts/visualize_and_compare_testing_models.py @@ -9,32 +9,24 @@ ########################################################################################################### import matplotlib +from matplotlib import pyplot as plt import pandas as pd -import os import numpy as np import itertools import seaborn as sns from scipy.stats import ks_2samp from ivadomed.utils import init_ivadomed +from pathlib import Path +from loguru import logger import argparse + matplotlib.rcParams['toolbar'] = 'None' # Remove buttons -gui_env = ['TKAgg', 'GTKAgg', 'Qt4Agg', 'WXAgg'] -selected_gui_env = [] -for gui in gui_env: - try: - matplotlib.use(gui) - from matplotlib import pyplot as plt - selected_gui_env = gui - break - except: - continue -# If none works -if selected_gui_env == []: - from matplotlib import pyplot as plt - print("No backend can be used - Visualization will fail") +if matplotlib.get_backend() == "agg": + logger.warning("No backend can be used - Visualization will fail") else: - print("Using:", matplotlib.get_backend() + " gui") + logger.info(f"Using: {matplotlib.get_backend()} gui") + # ---------------------------------------------------------------------------------------------------------------------# @@ -45,14 +37,13 @@ def get_parser(): help="List of log folders from different models.") parser.add_argument("--metric", default='dice_class0', nargs=1, type=str, dest="metric", help="Metric from evaluation_3Dmetrics.csv to base the plots on.") - parser.add_argument("--metadata", required=False, nargs=2, type=str, dest="metadata", + parser.add_argument("--metadata", required=False, nargs=2, type=str, dest="metadata", help="Selection based on metadata from participants.tsv:" "(1) Label from column (2) string to match") return parser def onclick(event, df): - # Get the index of the selected violinplot datapoint # WARNING: More than one can be selected if they are very close to each other # If that's the case, all will be displayed @@ -66,7 +57,8 @@ def onclick(event, df): # Remove the previously displayed subject(s) # This also takes care of the case where more than one subjects are displayed - while len(fig.texts) > nfolders+np.math.factorial(nfolders)/(np.math.factorial(2)*np.math.factorial(nfolders-2)): + while len(fig.texts) > nfolders + np.math.factorial(nfolders) / ( + np.math.factorial(2) * np.math.factorial(nfolders - 2)): fig.texts.pop() # This is a hack to find the index of the Violinplot - There should be another way to get this from the @@ -77,7 +69,7 @@ def onclick(event, df): selected_output_folder = df[df["EvaluationModel"] == output_folders[i_output_folder]] for iSubject in range(0, len(clicked_index.tolist())): - frame = plt.text(event.mouseevent.xdata, -0.08 - 0.08*iSubject + event.mouseevent.ydata, + frame = plt.text(event.mouseevent.xdata, -0.08 - 0.08 * iSubject + event.mouseevent.ydata, selected_output_folder["subject"][clicked_index[iSubject]], size=10, ha="center", va="center", bbox=dict(facecolor='red', alpha=0.5) @@ -124,52 +116,55 @@ def visualize_and_compare_models(ofolders, metric="dice_class0", metadata=None): """ # access CLI options - print("ofolders: %r" % ofolders) - print("metric: %r" % metric) + logger.debug(f"ofolders: {ofolders}") + logger.debug(f"metric: {metric}") if metadata is None: metadata = [] if metadata: - print("metadata: %r" % metadata) + logger.debug(f"metadata: {metadata}") # Do a quick check that all the required files are present for folder in ofolders: - if not os.path.exists(os.path.join(folder, 'results_eval', 'evaluation_3Dmetrics.csv')): - print('evaluation_3Dmetrics.csv file is not present within ' + os.path.join(folder, 'results_eval')) + if not Path(folder, 'results_eval', 'evaluation_3Dmetrics.csv').exists(): + logger.error(f"evaluation_3Dmetrics.csv file is not present within {Path(folder, 'results_eval')}") raise Exception('evaluation_3Dmetrics.csv missing') - if not os.path.exists(os.path.join(folder, 'bids_dataframe.csv')): - print('bids_dataframe.csv file is not present within ' + folder) + if not Path(folder, 'bids_dataframe.csv').exists(): + logger.error(f"bids_dataframe.csv file is not present within {folder}") raise Exception('bids_dataframe.csv missing') if len(ofolders) < 1: raise Exception('No folders were selected - Nothing to show') - columnNames = ["EvaluationModel", metric, 'subject'] - df = pd.DataFrame([], columns=columnNames) - + np_lst = [] for folder in ofolders: - result = pd.read_csv(os.path.join(folder, 'results_eval', 'evaluation_3Dmetrics.csv')) + result = pd.read_csv(str(Path(folder, 'results_eval', 'evaluation_3Dmetrics.csv'))) if metadata: - participant_metadata = pd.read_table(os.path.join(folder, 'bids_dataframe.csv'), sep=',') + participant_metadata = pd.read_table(str(Path(folder, 'bids_dataframe.csv')), sep=',') # Select only the subjects that satisfy the --metadata input - selected_subjects = participant_metadata[participant_metadata[metadata[0]] == metadata[1]]["filename"].tolist() + selected_subjects = participant_metadata[participant_metadata[metadata[0]] == metadata[1]][ + "filename"].tolist() selected_subjects = [i.replace(".nii.gz", "") for i in selected_subjects] # Now select only the scores from these subjects result_subject_ids = result["image_id"] - result = result.iloc[[i for i in range(len(result_subject_ids)) if result_subject_ids[i] in selected_subjects]] + result = result.iloc[ + [i for i in range(len(result_subject_ids)) if result_subject_ids[i] in selected_subjects]] if result.empty: - print('No subject meet the selected criteria - skipping plot for: ' + folder) + logger.warning(f"No subject meet the selected criteria - skipping plot for: {folder}") if not result.empty: scores = result[metric] - folders = [os.path.basename(os.path.normpath(folder))] * len(scores) + folders = [Path(folder).resolve().name] * len(scores) subject_id = result["image_id"] - combined = np.column_stack((folders, scores.astype(np.object, folders), subject_id)).T - singleFolderDF = pd.DataFrame(combined, columnNames).T - df = df.append(singleFolderDF, ignore_index=True) + combined = np.column_stack((folders, scores.astype(np.object, folders), subject_id)) + np_lst.append(combined) + + columnNames = ["EvaluationModel", metric, 'subject'] + rows = np.vstack(np_lst) + df = pd.DataFrame(rows, columns=columnNames) nFolders = len(ofolders) combinedNumbers = list(itertools.combinations(range(nFolders), 2)) @@ -189,17 +184,15 @@ def visualize_and_compare_models(ofolders, metric="dice_class0", metadata=None): # Display the mean performance on top of every violinplot for i in range(len(ofolders)): # This will be used to plot the mean value on top of each individual violinplot - temp = df[metric][df['EvaluationModel'] == os.path.basename(os.path.normpath(ofolders[i]))] + temp = df[metric][df['EvaluationModel'] == Path(ofolders[i]).resolve().name] plt.text(i, df[metric].max() + 0.07, str((100 * temp.mean()).round() / 100), ha='center', va='top', color='r', picker=True) if len(ofolders) > 1 and len(ofolders) < 5: # Perform a Kolmogorov–Smirnov test for all combinations of results & connect the corresponding Violinplots for i in range(len(combinedNumbers)): - dataX = df[metric][df['EvaluationModel'] == - os.path.basename(os.path.normpath(combinedFolders[i][0]))] - dataY = df[metric][df['EvaluationModel'] == - os.path.basename(os.path.normpath(combinedFolders[i][1]))] + dataX = df[metric][df['EvaluationModel'] == Path(combinedFolders[i][0]).resolve().name] + dataY = df[metric][df['EvaluationModel'] == Path(combinedFolders[i][1]).resolve().name] ks_test = ks_2samp(dataX, dataY) @@ -230,8 +223,8 @@ def visualize_and_compare_models(ofolders, metric="dice_class0", metadata=None): plt.show(block=True) else: - print('No subjects meet the criteria selected for any model. ' - 'Probably you need to change the --metadata / --metric selection') + logger.warning("No subjects meet the criteria selected for any model. " + "Probably you need to change the --metadata / --metric selection") def main(): diff --git a/ivadomed/scripts/visualize_transforms.py b/ivadomed/scripts/visualize_transforms.py index 960e9f957..0e1cfc58c 100644 --- a/ivadomed/scripts/visualize_transforms.py +++ b/ivadomed/scripts/visualize_transforms.py @@ -1,17 +1,20 @@ #!/usr/bin/env python -import os import argparse import nibabel as nib import numpy as np import random import torch +from pathlib import Path +from loguru import logger from ivadomed import config_manager as imed_config_manager from ivadomed.loader import utils as imed_loader_utils +from ivadomed.loader.sample_meta_data import SampleMetadata from ivadomed import transforms as imed_transforms from ivadomed import utils as imed_utils from ivadomed import maths as imed_maths +from ivadomed.keywords import ConfigKW, TransformationKW, LoaderParamsKW, MetadataKW def get_parser(): @@ -97,11 +100,11 @@ def run_visualization(input, config, number, output, roi): context = imed_config_manager.ConfigurationManager(config).get_config() # Create output folder - if not os.path.isdir(output): - os.makedirs(output) + if not Path(output).is_dir(): + Path(output).mkdir(parents=True) # Slice extracted according to below axis - axis = imed_utils.AXIS_DCT[context["loader_parameters"]["slice_axis"]] + axis = imed_utils.AXIS_DCT[context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.SLICE_AXIS]] # Get data input_img, input_data = get_data(input, axis) # Image or Mask @@ -112,10 +115,10 @@ def run_visualization(input, config, number, output, roi): indexes = random.sample(range(0, input_data.shape[2]), number) # Get training transforms - training_transforms, _, _ = imed_transforms.get_subdatasets_transforms(context["transformation"]) + training_transforms, _, _ = imed_transforms.get_subdatasets_transforms(context[ConfigKW.TRANSFORMATION]) - if "ROICrop" in training_transforms: - if roi and os.path.isfile(roi): + if TransformationKW.ROICROP in training_transforms: + if roi and Path(roi).is_file(): roi_img, roi_data = get_data(roi, axis) else: raise ValueError("\nPlease provide ROI image (-r) in order to apply ROICrop transformation.") @@ -139,11 +142,12 @@ def run_visualization(input, config, number, output, roi): for i in indexes: data = [input_data[:, :, i]] # Init metadata - metadata = imed_loader_utils.SampleMetadata({"zooms": zooms, "data_type": "gt" if is_mask else "im"}) + metadata = SampleMetadata({MetadataKW.ZOOMS: zooms, MetadataKW.DATA_TYPE: "gt" if is_mask else "im"}) # Apply transformations to ROI - if "CenterCrop" in training_transforms or ("ROICrop" in training_transforms and os.path.isfile(roi)): - metadata.__setitem__('crop_params', {}) + if TransformationKW.CENTERCROP in training_transforms or \ + (TransformationKW.ROICROP in training_transforms and Path(roi).is_file()): + metadata.__setitem__(MetadataKW.CROP_PARAMS, {}) # Apply transformations to image stack_im, _ = composed_transforms(sample=data, @@ -151,9 +155,9 @@ def run_visualization(input, config, number, output, roi): data_type="im") # Plot before / after transformation - fname_out = os.path.join(output, stg_transforms+"slice"+str(i)+".png") - print("Fname out: {}.".format(fname_out)) - print("\t{}".format(dict(metadata))) + fname_out = str(Path(output, stg_transforms + "slice" + str(i) + ".png")) + logger.debug(f"Fname out: {fname_out}.") + logger.debug(f"\t{dict(metadata)}") # rescale intensities if len(stg_transforms[:-1].split("_")) == 1: before = np.rot90(imed_maths.rescale_values_array(data[0], 0.0, 1.0)) diff --git a/ivadomed/testing.py b/ivadomed/testing.py index eb0f6e0b9..e191f1a76 100644 --- a/ivadomed/testing.py +++ b/ivadomed/testing.py @@ -1,4 +1,3 @@ -import os import copy from pathlib import Path import nibabel as nib @@ -8,6 +7,7 @@ from loguru import logger from torch.utils.data import DataLoader, ConcatDataset from tqdm import tqdm +from pathlib import Path from ivadomed import metrics as imed_metrics from ivadomed import utils as imed_utils @@ -19,6 +19,7 @@ from ivadomed.loader.film import store_film_params, save_film_params from ivadomed.training import get_metadata from ivadomed.postprocessing import threshold_predictions +from ivadomed.keywords import ConfigKW, ModelParamsKW, MetadataKW cudnn.benchmark = True @@ -47,7 +48,7 @@ def test(model_params, dataset_test, testing_params, path_output, device, cuda_a num_workers=0) # LOAD TRAIN MODEL - fname_model = os.path.join(path_output, "best_model.pt") + fname_model = Path(path_output, "best_model.pt") logger.info('Loading model: {}'.format(fname_model)) model = torch.load(fname_model, map_location=device) if cuda_available: @@ -55,9 +56,9 @@ def test(model_params, dataset_test, testing_params, path_output, device, cuda_a model.eval() # CREATE OUTPUT FOLDER - path_3Dpred = os.path.join(path_output, 'pred_masks') - if not os.path.isdir(path_3Dpred): - os.makedirs(path_3Dpred) + path_3Dpred = Path(path_output, 'pred_masks') + if not path_3Dpred.is_dir(): + path_3Dpred.mkdir(parents=True) # METRIC MANAGER metric_mgr = imed_metrics.MetricManager(metric_fns) @@ -73,14 +74,14 @@ def test(model_params, dataset_test, testing_params, path_output, device, cuda_a n_monteCarlo = 1 for i_monteCarlo in range(n_monteCarlo): - preds_npy, gt_npy = run_inference(test_loader, model, model_params, testing_params, path_3Dpred, + preds_npy, gt_npy = run_inference(test_loader, model, model_params, testing_params, str(path_3Dpred), cuda_available, i_monteCarlo, postprocessing) metric_mgr(preds_npy, gt_npy) # If uncertainty computation, don't apply it on last iteration for prediction if testing_params['uncertainty']['applied'] and (n_monteCarlo - 2 == i_monteCarlo): testing_params['uncertainty']['applied'] = False # COMPUTE UNCERTAINTY MAPS - imed_uncertainty.run_uncertainty(ifolder=path_3Dpred) + imed_uncertainty.run_uncertainty(image_folder=str(path_3Dpred)) metrics_dict = metric_mgr.get_results() metric_mgr.reset() @@ -113,7 +114,7 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud weight_matrix = None # Create dict containing gammas and betas after each FiLM layer. - if 'film_layers' in model_params and any(model_params['film_layers']): + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): # 2 * model_params["depth"] + 2 is the number of FiLM layers. 1 is added since the range starts at one. gammas_dict = {i: [] for i in range(1, 2 * model_params["depth"] + 3)} betas_dict = {i: [] for i in range(1, 2 * model_params["depth"] + 3)} @@ -125,7 +126,7 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud # input_samples: list of batch_size tensors, whose size is n_channels X height X width X depth # gt_samples: idem with n_labels # batch['*_metadata']: list of batch_size lists, whose size is n_channels or n_labels - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: input_samples = imed_utils.cuda(imed_utils.unstack_tensors(batch["input"]), cuda_available) else: input_samples = imed_utils.cuda(batch["input"], cuda_available) @@ -138,34 +139,34 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud m.train() # RUN MODEL - if model_params["name"] == "HeMISUnet" or \ - ('film_layers' in model_params and any(model_params['film_layers'])): + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET or \ + (ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS])): metadata = get_metadata(batch["input_metadata"], model_params) preds = model(input_samples, metadata) else: preds = model(input_samples) - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: # Reconstruct image with only one modality input_samples = batch['input'][0] - if model_params["name"] == "Modified3DUNet" and model_params["attention"] and ofolder: - imed_visualize.save_feature_map(batch, "attentionblock2", os.path.dirname(ofolder), model, input_samples, + if model_params[ModelParamsKW.NAME] == ConfigKW.MODIFIED_3D_UNET and model_params[ModelParamsKW.ATTENTION] and ofolder: + imed_visualize.save_feature_map(batch, "attentionblock2", str(Path(ofolder).parent), model, input_samples, slice_axis=test_loader.dataset.slice_axis) - if 'film_layers' in model_params and any(model_params['film_layers']): + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): # Store the values of gammas and betas after the last epoch for each batch gammas_dict, betas_dict, metadata_values_lst = store_film_params(gammas_dict, betas_dict, metadata_values_lst, - batch['input_metadata'], model, - model_params["film_layers"], - model_params["depth"], - model_params['metadata']) + batch[MetadataKW.INPUT_METADATA], model, + model_params[ModelParamsKW.FILM_LAYERS], + model_params[ModelParamsKW.DEPTH], + model_params[ModelParamsKW.METADATA]) # PREDS TO CPU preds_cpu = preds.cpu() - task = imed_utils.get_task(model_params["name"]) + task = imed_utils.get_task(model_params[ModelParamsKW.NAME]) if task == "classification": gt_npy_list.append(gt_samples.cpu().numpy()) preds_npy_list.append(preds_cpu.data.numpy()) @@ -177,18 +178,18 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud # LOOP ACROSS SAMPLES for smp_idx in range(len(preds_cpu)): - if "bounding_box" in batch['input_metadata'][smp_idx][0]: + if "bounding_box" in batch[MetadataKW.INPUT_METADATA][smp_idx][0]: imed_obj_detect.adjust_undo_transforms(testing_params["undo_transforms"].transforms, batch, smp_idx) - if model_params["is_2d"]: + if model_params[ModelParamsKW.IS_2D]: preds_idx_arr = None - idx_slice = batch['input_metadata'][smp_idx][0]['slice_index'] - n_slices = batch['input_metadata'][smp_idx][0]['data_shape'][-1] + idx_slice = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['slice_index'] + n_slices = batch[MetadataKW.INPUT_METADATA][smp_idx][0]['data_shape'][-1] last_slice_bool = (idx_slice + 1 == n_slices) last_sample_bool = (last_batch_bool and smp_idx == len(preds_cpu) - 1) - length_2D = model_params["length_2D"] if "length_2D" in model_params else [] - stride_2D = model_params["stride_2D"] if "stride_2D" in model_params else [] + length_2D = model_params[ModelParamsKW.LENGTH_2D] if ModelParamsKW.LENGTH_2D in model_params else [] + stride_2D = model_params[ModelParamsKW.STRIDE_2D] if ModelParamsKW.STRIDE_2D in model_params else [] if length_2D: # undo transformations for patch and reconstruct slice preds_idx_undo, metadata_idx, last_patch_bool, image, weight_matrix = \ @@ -206,7 +207,7 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud preds_idx_arr = np.array(preds_idx_undo) # TODO: gt_filenames should not be a list - fname_ref = list(filter(None, metadata_idx[0]['gt_filenames']))[0] + fname_ref = list(filter(None, metadata_idx[0][MetadataKW.GT_FILENAMES]))[0] if preds_idx_arr is not None: # add new sample to pred_tmp_lst, of size n_label X h X w ... @@ -214,15 +215,15 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud # TODO: slice_index should be stored in gt_metadata as well z_tmp_lst.append(int(idx_slice)) - filenames = metadata_idx[0]['gt_filenames'] + filenames = metadata_idx[0][MetadataKW.GT_FILENAMES] # NEW COMPLETE VOLUME if (pred_tmp_lst and ((last_patch_bool and last_slice_bool) or last_sample_bool) and task != "classification"): # save the completely processed file as a NifTI file if ofolder: - fname_pred = os.path.join(ofolder, Path(fname_ref).name) - fname_pred = fname_pred.rsplit("_", 1)[0] + '_pred.nii.gz' + fname_pred = str(Path(ofolder, Path(fname_ref).name)) + fname_pred = fname_pred.split(testing_params['target_suffix'][0])[0] + '_pred.nii.gz' # If Uncertainty running, then we save each simulation result if testing_params['uncertainty']['applied']: fname_pred = fname_pred.split('.nii.gz')[0] + '_' + str(i_monte_carlo).zfill(2) + '.nii.gz' @@ -258,9 +259,12 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud extension = imed_loader_utils.get_file_extension(fname_ref) if "nii" not in extension and fname_pred: output_list = imed_inference.split_classes(output_nii) + # Reformat target list to include class index and be compatible with multiple raters + target_list = ["_class-%d" % i for i in range(len(testing_params['target_suffix']))] imed_inference.pred_to_png(output_list, - testing_params['target_suffix'], - fname_pred.split("_pred.nii.gz")[0]) + target_list, + fname_pred.split("_pred.nii.gz")[0], + suffix="_pred.png") # re-init pred_stack_lst and last_slice_bool pred_tmp_lst, z_tmp_lst = [], [] @@ -272,12 +276,12 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud preds_cpu, testing_params['undo_transforms'], smp_idx, volume, weight_matrix) - fname_ref = metadata[0]['gt_filenames'][0] # Indicator of last batch if last_sample_bool: pred_undo = np.array(pred_undo) + fname_ref = metadata[0][MetadataKW.GT_FILENAMES][0] if ofolder: - fname_pred = os.path.join(ofolder, fname_ref.split('/')[-1]) + fname_pred = str(Path(ofolder, Path(fname_ref).name)) fname_pred = fname_pred.split(testing_params['target_suffix'][0])[0] + '_pred.nii.gz' # If uncertainty running, then we save each simulation result if testing_params['uncertainty']['applied']: @@ -297,7 +301,7 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud output_data = output_nii.get_fdata().transpose(3, 0, 1, 2) preds_npy_list.append(output_data) - gt = get_gt(metadata[0]['gt_filenames']) + gt = get_gt(metadata[0][MetadataKW.GT_FILENAMES]) gt_npy_list.append(gt) # Save merged labels with color @@ -307,12 +311,12 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud # TODO: put back the code below. See #720 # imed_visualize.save_color_labels(pred_undo, # False, - # batch['input_metadata'][smp_idx][0]['input_filenames'], + # batch[MetadataKW.INPUT_METADATA][smp_idx][0]['input_filenames'], # fname_pred.split(".nii.gz")[0] + '_color.nii.gz', # slice_axis) - if 'film_layers' in model_params and any(model_params['film_layers']): - save_film_params(gammas_dict, betas_dict, metadata_values_lst, model_params["depth"], + if ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS]): + save_film_params(gammas_dict, betas_dict, metadata_values_lst, model_params[ModelParamsKW.DEPTH], ofolder.replace("pred_masks", "")) return preds_npy_list, gt_npy_list diff --git a/ivadomed/training.py b/ivadomed/training.py index 0ba93d5af..eefee5ee0 100644 --- a/ivadomed/training.py +++ b/ivadomed/training.py @@ -1,17 +1,18 @@ import copy import datetime -import os import random import time - +import os import numpy as np import torch import torch.backends.cudnn as cudnn +import wandb from loguru import logger from torch import optim from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm +from pathlib import Path from ivadomed import losses as imed_losses from ivadomed import mixup as imed_mixup @@ -20,11 +21,13 @@ from ivadomed import utils as imed_utils from ivadomed import visualize as imed_visualize from ivadomed.loader import utils as imed_loader_utils +from ivadomed.loader.balanced_sampler import BalancedSampler +from ivadomed.keywords import ModelParamsKW, ConfigKW, BalanceSamplesKW, TrainingParamsKW, MetadataKW, WandbKW cudnn.benchmark = True -def train(model_params, dataset_train, dataset_val, training_params, path_output, device, +def train(model_params, dataset_train, dataset_val, training_params, path_output, device, wandb_params=None, cuda_available=True, metric_fns=None, n_gif=0, resume_training=False, debugging=False): """Main command to train the network. @@ -52,20 +55,41 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output # Write the metrics, images, etc to TensorBoard format writer = SummaryWriter(log_dir=path_output) + # Enable wandb tracking if the required params are found in the config file and the api key is correct + wandb_tracking = imed_utils.initialize_wandb(wandb_params) + + if wandb_tracking: + # Collect all hyperparameters into a dictionary + cfg = { **training_params, **model_params} + + # Get the actual project, group, and run names if they exist, else choose the temporary names as default + project_name = wandb_params.get(WandbKW.PROJECT_NAME, "temp_project") + group_name = wandb_params.get(WandbKW.GROUP_NAME, "temp_group") + run_name = wandb_params.get(WandbKW.RUN_NAME, "temp_run") + + if project_name == "temp_project" or group_name == "temp_group" or run_name == "temp_run": + logger.info("{PROJECT/GROUP/RUN} name not found, initializing as {'temp_project'/'temp_group'/'temp_run'}") + + # Initialize WandB with metrics and hyperparameters + wandb.init(project=project_name, group=group_name, name=run_name, config=cfg, dir=path_output) + # BALANCE SAMPLES AND PYTORCH LOADER - conditions = all([training_params["balance_samples"]["applied"], model_params["name"] != "HeMIS"]) - sampler_train, shuffle_train = get_sampler(dataset_train, conditions, training_params['balance_samples']['type']) + conditions = all([training_params[TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.APPLIED], + model_params[ModelParamsKW.NAME] != "HeMIS"]) + sampler_train, shuffle_train = get_sampler(dataset_train, conditions, + training_params[TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.TYPE]) - train_loader = DataLoader(dataset_train, batch_size=training_params["batch_size"], + train_loader = DataLoader(dataset_train, batch_size=training_params[TrainingParamsKW.BATCH_SIZE], shuffle=shuffle_train, pin_memory=True, sampler=sampler_train, collate_fn=imed_loader_utils.imed_collate, num_workers=0) gif_dict = {"image_path": [], "slice_id": [], "gif": []} if dataset_val: - sampler_val, shuffle_val = get_sampler(dataset_val, conditions, training_params['balance_samples']['type']) + sampler_val, shuffle_val = get_sampler(dataset_val, conditions, + training_params[TrainingParamsKW.BALANCE_SAMPLES][BalanceSamplesKW.TYPE]) - val_loader = DataLoader(dataset_val, batch_size=training_params["batch_size"], + val_loader = DataLoader(dataset_val, batch_size=training_params[TrainingParamsKW.BATCH_SIZE], shuffle=shuffle_val, pin_memory=True, sampler=sampler_val, collate_fn=imed_loader_utils.imed_collate, num_workers=0) @@ -74,9 +98,9 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output if n_gif > 0: indexes_gif = random.sample(range(len(dataset_val)), n_gif) for i_gif in range(n_gif): - random_metadata = dict(dataset_val[indexes_gif[i_gif]]["input_metadata"][0]) - gif_dict["image_path"].append(random_metadata['input_filenames']) - gif_dict["slice_id"].append(random_metadata['slice_index']) + random_metadata = dict(dataset_val[indexes_gif[i_gif]][MetadataKW.INPUT_METADATA][0]) + gif_dict["image_path"].append(random_metadata[MetadataKW.INPUT_FILENAMES]) + gif_dict["slice_id"].append(random_metadata[MetadataKW.SLICE_INDEX]) gif_obj = imed_visualize.AnimatedGif(size=dataset_val[indexes_gif[i_gif]]["input"].numpy()[0].shape) gif_dict["gif"].append(copy.copy(gif_obj)) @@ -96,7 +120,7 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output reset=reset) else: logger.info("Initialising model's weights from scratch.") - model_class = getattr(imed_models, model_params["name"]) + model_class = getattr(imed_models, model_params[ModelParamsKW.NAME]) model = model_class(**model_params) if cuda_available: model.cuda() @@ -113,16 +137,21 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output num_epochs) logger.info("Scheduler parameters: {}".format(training_params["scheduler"]["lr_scheduler"])) + # Only call wandb methods if required params are found in the config file + if wandb_tracking: + # Logs gradients (at every log_freq steps) to the dashboard. + wandb.watch(model, log="gradients", log_freq=wandb_params["log_grads_every"]) + # Resume start_epoch = 1 - resume_path = os.path.join(path_output, "checkpoint.pth.tar") + resume_path = Path(path_output, "checkpoint.pth.tar") if resume_training: model, optimizer, gif_dict, start_epoch, val_loss_total_avg, scheduler, patience_count = load_checkpoint( model=model, optimizer=optimizer, gif_dict=gif_dict, scheduler=scheduler, - fname=resume_path) + fname=str(resume_path)) # Individually transfer the optimizer parts # TODO: check if following lines are needed for state in optimizer.state.values(): @@ -150,6 +179,8 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output lr = scheduler.get_last_lr()[0] writer.add_scalar('learning_rate', lr, epoch) + if wandb_tracking: + wandb.log({"learning_rate": lr}) # Training loop ----------------------------------------------------------- model.train() @@ -157,7 +188,7 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output num_steps = 0 for i, batch in enumerate(train_loader): # GET SAMPLES - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: input_samples = imed_utils.cuda(imed_utils.unstack_tensors(batch["input"]), cuda_available) else: input_samples = imed_utils.cuda(batch["input"], cuda_available) @@ -169,9 +200,9 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output debugging and epoch == 1, path_output) # RUN MODEL - if model_params["name"] == "HeMISUnet" or \ - ('film_layers' in model_params and any(model_params['film_layers'])): - metadata = get_metadata(batch["input_metadata"], model_params) + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET or \ + (ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS])): + metadata = get_metadata(batch[MetadataKW.INPUT_METADATA], model_params) preds = model(input_samples, metadata) else: preds = model(input_samples) @@ -189,9 +220,11 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output scheduler.step() num_steps += 1 - if i == 0 and debugging: - imed_visualize.save_tensorboard_img(writer, epoch, "Train", input_samples, gt_samples, preds, - is_three_dim=not model_params["is_2d"]) + # Save image at every 50th step if debugging is true + if i%50 == 0 and debugging: + imed_visualize.save_img(writer, epoch, "Train", input_samples, gt_samples, preds, + wandb_tracking=wandb_tracking, + is_three_dim=not model_params[ModelParamsKW.IS_2D]) if not step_scheduler_batch: scheduler.step() @@ -206,10 +239,10 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output tqdm.write(msg) # CURRICULUM LEARNING - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: # Increase the probability of a missing modality - model_params["missing_probability"] **= model_params["missing_probability_growth"] - dataset_train.update(p=model_params["missing_probability"]) + model_params[ModelParamsKW.MISSING_PROBABILITY] **= model_params[ModelParamsKW.MISSING_PROBABILITY_GROWTH] + dataset_train.update(p=model_params[ModelParamsKW.MISSING_PROBABILITY]) # Validation loop ----------------------------------------------------- model.eval() @@ -220,16 +253,16 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output for i, batch in enumerate(val_loader): with torch.no_grad(): # GET SAMPLES - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: input_samples = imed_utils.cuda(imed_utils.unstack_tensors(batch["input"]), cuda_available) else: input_samples = imed_utils.cuda(batch["input"], cuda_available) gt_samples = imed_utils.cuda(batch["gt"], cuda_available, non_blocking=True) # RUN MODEL - if model_params["name"] == "HeMISUnet" or \ - ('film_layers' in model_params and any(model_params['film_layers'])): - metadata = get_metadata(batch["input_metadata"], model_params) + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET or \ + (ModelParamsKW.FILM_LAYERS in model_params and any(model_params[ModelParamsKW.FILM_LAYERS])): + metadata = get_metadata(batch[MetadataKW.INPUT_METADATA], model_params) preds = model(input_samples, metadata) else: preds = model(input_samples) @@ -242,7 +275,7 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output # Add frame to GIF for i_ in range(len(input_samples)): im, pr, met = input_samples[i_].cpu().numpy()[0], preds[i_].cpu().numpy()[0], \ - batch["input_metadata"][i_][0] + batch[MetadataKW.INPUT_METADATA][i_][0] for i_gif in range(n_gif): if gif_dict["image_path"][i_gif] == met.__getitem__('input_filenames') and \ gif_dict["slice_id"][i_gif] == met.__getitem__('slice_index'): @@ -256,20 +289,30 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output preds_npy = preds.data.cpu().numpy() metric_mgr(preds_npy, gt_npy) - if i == 0 and debugging: - imed_visualize.save_tensorboard_img(writer, epoch, "Validation", input_samples, gt_samples, preds, - is_three_dim=not model_params['is_2d']) + # Save image at every 10th step if debugging is true + if i%50 == 0 and debugging: + imed_visualize.save_img(writer, epoch, "Validation", input_samples, gt_samples, preds, + wandb_tracking=wandb_tracking, + is_three_dim=not model_params[ModelParamsKW.IS_2D]) # METRICS COMPUTATION FOR CURRENT EPOCH val_loss_total_avg_old = val_loss_total_avg if epoch > 1 else None metrics_dict = metric_mgr.get_results() metric_mgr.reset() - writer.add_scalars('Validation/Metrics', metrics_dict, epoch) val_loss_total_avg = val_loss_total / num_steps + # log losses on Tensorboard by default + writer.add_scalars('Validation/Metrics', metrics_dict, epoch) writer.add_scalars('losses', { 'train_loss': train_loss_total_avg, 'val_loss': val_loss_total_avg, }, epoch) + # log on wandb if the corresponding dictionary is provided + if wandb_tracking: + wandb.log({"validation-metrics": metrics_dict}) + wandb.log({"losses": { + 'train_loss': train_loss_total_avg, + 'val_loss': val_loss_total_avg, + }}) msg = "Epoch {} validation loss: {:.4f}.".format(epoch, val_loss_total_avg) val_dice_loss_total_avg = val_dice_loss_total / num_steps if training_params["loss"]["name"] != "DiceLoss": @@ -293,7 +336,7 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output torch.save(state, resume_path) # Save best model file - model_path = os.path.join(path_output, "best_model.pt") + model_path = Path(path_output, "best_model.pt") torch.save(model, model_path) # Update best scores @@ -310,41 +353,45 @@ def train(model_params, dataset_train, dataset_val, training_params, path_output break # Save final model - final_model_path = os.path.join(path_output, "final_model.pt") + final_model_path = Path(path_output, "final_model.pt") torch.save(model, final_model_path) # Save best model in output path - if os.path.isfile(resume_path): + if resume_path.is_file(): state = torch.load(resume_path) - model_path = os.path.join(path_output, "best_model.pt") + model_path = Path(path_output, "best_model.pt") model.load_state_dict(state['state_dict']) torch.save(model, model_path) # Save best model as ONNX in the model directory try: # Convert best model to ONNX and save it in model directory - best_model_path = os.path.join(path_output, model_params["folder_name"], - model_params["folder_name"] + ".onnx") - imed_utils.save_onnx_model(model, input_samples, best_model_path) - except: - # Save best model in model directory - best_model_path = os.path.join(path_output, model_params["folder_name"], - model_params["folder_name"] + ".pt") - torch.save(model, best_model_path) - logger.warning("Failed to save the model as '.onnx', saved it as '.pt': {}".format(best_model_path)) + best_model_path = Path(path_output, model_params[ModelParamsKW.FOLDER_NAME], + model_params[ModelParamsKW.FOLDER_NAME] + ".onnx") + imed_utils.save_onnx_model(model, input_samples, str(best_model_path)) + logger.info(f"Model saved as '.onnx': {best_model_path}") + except Exception as e: + logger.warning(f"Failed to save the model as '.onnx': {e}") + + # Save best model as PT in the model directory + best_model_path = Path(path_output, model_params[ModelParamsKW.FOLDER_NAME], + model_params[ModelParamsKW.FOLDER_NAME] + ".pt") + torch.save(model, best_model_path) + logger.info(f"Model saved as '.pt': {best_model_path}") # Save GIFs - gif_folder = os.path.join(path_output, "gifs") - if n_gif > 0 and not os.path.isdir(gif_folder): - os.makedirs(gif_folder) + gif_folder = Path(path_output, "gifs") + if n_gif > 0 and not gif_folder.is_dir(): + gif_folder.mkdir(parents=True) for i_gif in range(n_gif): - fname_out = gif_dict["image_path"][i_gif].split('/')[-3] + "__" - fname_out += gif_dict["image_path"][i_gif].split('/')[-1].split(".nii.gz")[0].split( - gif_dict["image_path"][i_gif].split('/')[-3] + "_")[1] + "__" + fname_out = gif_dict["image_path"][i_gif].split(os.sep)[-3] + "__" + fname_out += gif_dict["image_path"][i_gif].split(os.sep)[-1].split(".nii.gz")[0].split( + gif_dict["image_path"][i_gif].split(os.sep)[-3] + "_")[1] + "__" fname_out += str(gif_dict["slice_id"][i_gif]) + ".gif" - path_gif_out = os.path.join(gif_folder, fname_out) - gif_dict["gif"][i_gif].save(path_gif_out) + path_gif_out = Path(gif_folder, fname_out) + gif_dict["gif"][i_gif].save(str(path_gif_out)) writer.close() + wandb.finish() final_time = time.time() duration_time = final_time - begin_time logger.info('begin ' + time.strftime('%H:%M:%S', time.localtime(begin_time)) + "| End " + @@ -366,7 +413,7 @@ def get_sampler(ds, balance_bool, metadata): Otherwise: Returns None and True. """ if balance_bool: - return imed_loader_utils.BalancedSampler(ds, metadata), False + return BalancedSampler(ds, metadata), False else: return None, True @@ -437,10 +484,10 @@ def get_metadata(metadata, model_params): If FiLMedUnet, Returns a list of metadata, that have been transformed by the One Hot Encoder. If HeMISUnet, Returns a numpy array where each row represents a sample and each column represents a contrast. """ - if model_params["name"] == "HeMISUnet": + if model_params[ModelParamsKW.NAME] == ConfigKW.HEMIS_UNET: return np.array([m[0]["missing_mod"] for m in metadata]) else: - return [model_params["film_onehotencoder"].transform([metadata[k][0]['film_input']]).tolist()[0] + return [model_params[ModelParamsKW.FILM_ONEHOTENCODER].transform([metadata[k][0]['film_input']]).tolist()[0] for k in range(len(metadata))] diff --git a/ivadomed/transforms.py b/ivadomed/transforms.py index a58aaf1ed..19fcce8fd 100644 --- a/ivadomed/transforms.py +++ b/ivadomed/transforms.py @@ -4,18 +4,21 @@ import numbers import random +from typing import Tuple + import numpy as np import torch from loguru import logger from scipy.ndimage import zoom -from scipy.ndimage.filters import gaussian_filter -from scipy.ndimage.interpolation import map_coordinates, affine_transform -from scipy.ndimage.measurements import label, center_of_mass -from scipy.ndimage.morphology import binary_dilation, binary_fill_holes, binary_closing +from scipy.ndimage import gaussian_filter, map_coordinates, affine_transform, label, center_of_mass, binary_dilation, \ + binary_fill_holes, binary_closing from skimage.exposure import equalize_adapthist from torchvision import transforms as torchvision_transforms +import torchio as tio + from ivadomed.loader import utils as imed_loader_utils +from ivadomed.keywords import TransformationKW, MetadataKW def multichannel_capable(wrapped): @@ -237,11 +240,11 @@ def __init__(self, hspace, wspace, dspace=1.): @two_dim_compatible def undo_transform(self, sample, metadata=None): """Resample to original resolution.""" - assert "data_shape" in metadata + assert MetadataKW.DATA_SHAPE in metadata is_2d = sample.shape[-1] == 1 # Get params - original_shape = metadata["preresample_shape"] + original_shape = metadata[MetadataKW.PRE_RESAMPLE_SHAPE] current_shape = sample.shape params_undo = [x / y for x, y in zip(original_shape, current_shape)] if is_2d: @@ -250,7 +253,7 @@ def undo_transform(self, sample, metadata=None): # Undo resampling data_out = zoom(sample, zoom=params_undo, - order=1 if metadata['data_type'] == 'gt' else 2) + order=1 if metadata[MetadataKW.DATA_TYPE] == 'gt' else 2) # Data type data_out = data_out.astype(sample.dtype) @@ -265,8 +268,9 @@ def __call__(self, sample, metadata=None): # Get params # Voxel dimension in mm is_2d = sample.shape[-1] == 1 - metadata['preresample_shape'] = sample.shape - zooms = list(metadata["zooms"]) + metadata[MetadataKW.PRE_RESAMPLE_SHAPE] = sample.shape + # metadata is not a dictionary! + zooms = list(metadata[MetadataKW.ZOOMS]) if len(zooms) == 2: zooms += [1.0] @@ -279,7 +283,7 @@ def __call__(self, sample, metadata=None): # Run resampling data_out = zoom(sample, zoom=params_resample, - order=1 if metadata['data_type'] == 'gt' else 2) + order=1 if metadata[MetadataKW.DATA_TYPE] == 'gt' else 2) # Data type data_out = data_out.astype(sample.dtype) @@ -297,7 +301,12 @@ def undo_transform(self, sample, metadata=None): @multichannel_capable def __call__(self, sample, metadata=None): - data_out = (sample - sample.mean()) / sample.std() + # if sample uniform: do mean-subtraction + if sample.std() < 1e-5: + data_out = (sample - sample.mean()) + # else: normalize sample + else: + data_out = (sample - sample.mean()) / sample.std() return data_out, metadata @@ -411,7 +420,7 @@ def __call__(self, sample, metadata): # Get params is_2d = sample.shape[-1] == 1 th, tw, td = self.size - fh, fw, fd, h, w, d = metadata['crop_params'][self.__class__.__name__] + fh, fw, fd, h, w, d = metadata[MetadataKW.CROP_PARAMS].get(self.__class__.__name__) # Crop data # Note we use here CroppableArray in order to deal with "out of boundaries" crop @@ -429,7 +438,7 @@ def undo_transform(self, sample, metadata=None): # Get crop params is_2d = sample.shape[-1] == 1 th, tw, td = self.size - fh, fw, fd, h, w, d = metadata["crop_params"][self.__class__.__name__] + fh, fw, fd, h, w, d = metadata[MetadataKW.CROP_PARAMS].get(self.__class__.__name__) # Compute params to undo transform pad_left = fw @@ -466,7 +475,7 @@ def __call__(self, sample, metadata=None): fw = int(round((w - tw) / 2.)) fd = int(round((d - td) / 2.)) params = (fh, fw, fd, h, w, d) - metadata['crop_params'][self.__class__.__name__] = params + metadata[MetadataKW.CROP_PARAMS][self.__class__.__name__] = params # Call base method return super().__call__(sample, metadata) @@ -481,9 +490,9 @@ class ROICrop(Crop): def __call__(self, sample, metadata=None): # If crop_params are not in metadata, # then we are here dealing with ROI data to determine crop params - if self.__class__.__name__ not in metadata['crop_params']: + if self.__class__.__name__ not in metadata[MetadataKW.CROP_PARAMS]: # Compute center of mass of the ROI - h_roi, w_roi, d_roi = center_of_mass(sample.astype(np.int)) + h_roi, w_roi, d_roi = center_of_mass(sample.astype(int)) h_roi, w_roi, d_roi = int(round(h_roi)), int(round(w_roi)), int(round(d_roi)) th, tw, td = self.size th_half, tw_half, td_half = int(round(th / 2.)), int(round(tw / 2.)), int(round(td / 2.)) @@ -496,7 +505,7 @@ def __call__(self, sample, metadata=None): # Crop params h, w, d = sample.shape params = (fh, fw, fd, h, w, d) - metadata['crop_params'][self.__class__.__name__] = params + metadata[MetadataKW.CROP_PARAMS][self.__class__.__name__] = params # Call base method return super().__call__(sample, metadata) @@ -526,26 +535,26 @@ def dilate_lesion(arr_bin, arr_soft, label_values): arr_dilated = binary_dilation(arr_bin, iterations=1) # isolate new voxels, i.e. the ones from the dilation - new_voxels = np.logical_xor(arr_dilated, arr_bin).astype(np.int) + new_voxels = np.logical_xor(arr_dilated, arr_bin).astype(int) # assign a soft value (]0, 1[) to the new voxels soft_new_voxels = lb * new_voxels # add the new voxels to the input mask arr_soft += soft_new_voxels - arr_bin = (arr_soft > 0).astype(np.int) + arr_bin = (arr_soft > 0).astype(int) return arr_bin, arr_soft def dilate_arr(self, arr, dil_factor): # identify each object - arr_labeled, lb_nb = label(arr.astype(np.int)) + arr_labeled, lb_nb = label(arr.astype(int)) # loop across each object arr_bin_lst, arr_soft_lst = [], [] for obj_idx in range(1, lb_nb + 1): - arr_bin_obj = (arr_labeled == obj_idx).astype(np.int) - arr_soft_obj = np.copy(arr_bin_obj).astype(np.float) + arr_bin_obj = (arr_labeled == obj_idx).astype(int) + arr_soft_obj = np.copy(arr_bin_obj).astype(float) # compute the number of dilation iterations depending on the size of the lesion nb_it = int(round(dil_factor * math.sqrt(arr_bin_obj.sum()))) # values of the voxels added to the input mask @@ -561,7 +570,7 @@ def dilate_arr(self, arr, dil_factor): # clip values in case dilated voxels overlap arr_bin_clip, arr_soft_clip = np.clip(arr_bin_idx, 0, 1), np.clip(arr_soft_idx, 0.0, 1.0) - return arr_soft_clip.astype(np.float), arr_bin_clip.astype(np.int) + return arr_soft_clip.astype(float), arr_bin_clip.astype(int) @staticmethod def random_holes(arr_in, arr_soft, arr_bin): @@ -581,7 +590,7 @@ def random_holes(arr_in, arr_soft, arr_bin): arr_soft_out[new_voxels_xx[idx_to_remove], new_voxels_yy[idx_to_remove], new_voxels_zz[idx_to_remove]] = 0.0 - arr_bin_out = (arr_soft_out > 0).astype(np.int) + arr_bin_out = (arr_soft_out > 0).astype(int) return arr_soft_out, arr_bin_out @@ -597,7 +606,7 @@ def post_processing(arr_in, arr_soft, arr_bin, arr_dil): struct = np.ones((3, 3, 1) if arr_soft.shape[2] == 1 else (3, 3, 3)) # binary closing - arr_bin_closed = binary_closing((arr_soft > 0).astype(np.int), structure=struct) + arr_bin_closed = binary_closing((arr_soft > 0).astype(int), structure=struct) # fill binary holes arr_bin_filled = binary_fill_holes(arr_bin_closed) @@ -635,10 +644,10 @@ class BoundingBoxCrop(Crop): @multichannel_capable @two_dim_compatible def __call__(self, sample, metadata): - assert 'bounding_box' in metadata - x_min, x_max, y_min, y_max, z_min, z_max = metadata['bounding_box'] + assert MetadataKW.BOUNDING_BOX in metadata + x_min, x_max, y_min, y_max, z_min, z_max = metadata[MetadataKW.BOUNDING_BOX] x, y, z = sample.shape - metadata['crop_params'][self.__class__.__name__] = (x_min, y_min, z_min, x, y, z) + metadata[MetadataKW.CROP_PARAMS][self.__class__.__name__] = (x_min, y_min, z_min, x, y, z) # Call base method return super().__call__(sample, metadata) @@ -702,8 +711,8 @@ def __init__(self, degrees=0, translate=None, scale=None): def __call__(self, sample, metadata=None): # Rotation # If angle and metadata have been already defined for this sample, then use them - if 'rotation' in metadata: - angle, axes = metadata['rotation'] + if MetadataKW.ROTATION in metadata: + angle, axes = metadata[MetadataKW.ROTATION] # Otherwise, get random ones else: # Get the random angle @@ -712,20 +721,20 @@ def __call__(self, sample, metadata=None): axes = list(random.sample(range(3 if sample.shape[2] > 1 else 2), 2)) axes.sort() # Save params - metadata['rotation'] = [angle, axes] + metadata[MetadataKW.ROTATION] = [angle, axes] # Scale - if "scale" in metadata: - scale_x, scale_y, scale_z = metadata['scale'] + if MetadataKW.SCALE in metadata: + scale_x, scale_y, scale_z = metadata[MetadataKW.SCALE] else: scale_x = random.uniform(1 - self.scale[0], 1 + self.scale[0]) scale_y = random.uniform(1 - self.scale[1], 1 + self.scale[1]) scale_z = random.uniform(1 - self.scale[2], 1 + self.scale[2]) - metadata['scale'] = [scale_x, scale_y, scale_z] + metadata[MetadataKW.SCALE] = [scale_x, scale_y, scale_z] # Get params - if 'translation' in metadata: - translations = metadata['translation'] + if MetadataKW.TRANSLATION in metadata: + translations = metadata[MetadataKW.TRANSLATION] else: self.data_shape = sample.shape @@ -739,7 +748,7 @@ def __call__(self, sample, metadata=None): else: translations = (0, 0, 0) - metadata['translation'] = translations + metadata[MetadataKW.TRANSLATION] = translations # Do rotation shape = 0.5 * np.array(sample.shape) @@ -759,7 +768,7 @@ def __call__(self, sample, metadata=None): raise ValueError("Unknown axes value") scale = np.array([[1 / scale_x, 0, 0], [0, 1 / scale_y, 0], [0, 0, 1 / scale_z]]) - if "undo" in metadata and metadata["undo"]: + if MetadataKW.UNDO in metadata and metadata[MetadataKW.UNDO]: transforms = scale.dot(rotate) else: transforms = rotate.dot(scale) @@ -774,16 +783,17 @@ def __call__(self, sample, metadata=None): @multichannel_capable @two_dim_compatible def undo_transform(self, sample, metadata=None): - assert "rotation" in metadata - assert "scale" in metadata - assert "translation" in metadata + assert MetadataKW.ROTATION in metadata + assert MetadataKW.SCALE in metadata + assert MetadataKW.TRANSLATION in metadata # Opposite rotation, same axes - angle, axes = - metadata['rotation'][0], metadata['rotation'][1] - scale = 1 / np.array(metadata['scale']) - translation = - np.array(metadata['translation']) + angle, axes = - metadata[MetadataKW.ROTATION][0], metadata[MetadataKW.ROTATION][1] + scale = 1 / np.array(metadata[MetadataKW.SCALE]) + translation = - np.array(metadata[MetadataKW.TRANSLATION]) # Undo rotation - dict_params = {"rotation": [angle, axes], "scale": scale, "translation": [0, 0, 0], "undo": True} + dict_params = {MetadataKW.ROTATION: [angle, axes], MetadataKW.SCALE: scale, + MetadataKW.TRANSLATION: [0, 0, 0], MetadataKW.UNDO: True} data_out, _ = self.__call__(sample, dict_params) @@ -799,17 +809,17 @@ class RandomReverse(ImedTransform): @multichannel_capable @two_dim_compatible def __call__(self, sample, metadata=None): - if 'reverse' in metadata: - flip_axes = metadata['reverse'] + if MetadataKW.REVERSE in metadata: + flip_axes = metadata[MetadataKW.REVERSE] else: # Flip axis booleans flip_axes = [np.random.randint(2) == 1 for _ in [0, 1, 2]] # Save in metadata - metadata['reverse'] = flip_axes + metadata[MetadataKW.REVERSE] = flip_axes # Run flip for idx_axis, flip_bool in enumerate(flip_axes): - if flip_axes: + if flip_bool: sample = np.flip(sample, axis=idx_axis).copy() return sample, metadata @@ -817,7 +827,7 @@ def __call__(self, sample, metadata=None): @multichannel_capable @two_dim_compatible def undo_transform(self, sample, metadata=None): - assert "reverse" in metadata + assert MetadataKW.REVERSE in metadata return self.__call__(sample, metadata) @@ -843,16 +853,16 @@ def __call__(self, sample, metadata=None): offset = 0.0 # Update metadata - metadata['offset'] = offset + metadata[MetadataKW.OFFSET] = offset # Shift intensity data = (sample + offset).astype(sample.dtype) return data, metadata @multichannel_capable def undo_transform(self, sample, metadata=None): - assert 'offset' in metadata + assert MetadataKW.OFFSET in metadata # Get offset - offset = metadata['offset'] + offset = metadata[MetadataKW.OFFSET] # Substract offset data = (sample - offset).astype(sample.dtype) return data, metadata @@ -879,8 +889,8 @@ def __init__(self, alpha_range, sigma_range, p=0.1): @two_dim_compatible def __call__(self, sample, metadata=None): # if params already defined, i.e. sample is GT - if "elastic" in metadata: - alpha, sigma = metadata["elastic"] + if MetadataKW.ELASTIC in metadata: + alpha, sigma = metadata[MetadataKW.ELASTIC] elif np.random.random() < self.p: # Get params @@ -888,12 +898,12 @@ def __call__(self, sample, metadata=None): sigma = np.random.uniform(self.sigma_range[0], self.sigma_range[1]) # Save params - metadata["elastic"] = [alpha, sigma] + metadata[MetadataKW.ELASTIC] = [alpha, sigma] else: - metadata["elastic"] = [None, None] + metadata[MetadataKW.ELASTIC] = [None, None] - if any(metadata["elastic"]): + if any(metadata[MetadataKW.ELASTIC]): # Get shape shape = sample.shape @@ -940,8 +950,8 @@ def __init__(self, mean=0.0, std=0.01): @multichannel_capable def __call__(self, sample, metadata=None): - if "gaussian_noise" in metadata: - noise = metadata["gaussian_noise"] + if MetadataKW.GAUSSIAN_NOISE in metadata: + noise = metadata[MetadataKW.GAUSSIAN_NOISE] else: # Get random noise noise = np.random.normal(self.mean, self.std, sample.shape) @@ -1009,6 +1019,132 @@ def __call__(self, sample, metadata=None): return data, metadata +class RandomGamma(ImedTransform): + """Randomly changes the contrast of an image by gamma exponential + + Args: + log_gamma_range (tuple of floats): Log gamma range for changing contrast. Length equals 2. + p (float): Probability of performing the gamma contrast + """ + + def __init__(self, log_gamma_range, p=0.5): + self.log_gamma_range = log_gamma_range + self.p = p + + @multichannel_capable + @two_dim_compatible + def __call__(self, sample, metadata=None): + if np.random.random() < self.p: + # Get params + gamma = np.exp(np.random.uniform(self.log_gamma_range[0], self.log_gamma_range[1])) + + # Save params + metadata[MetadataKW.GAMMA] = [gamma] + + else: + metadata[MetadataKW.GAMMA] = [None] + + if any(metadata[MetadataKW.GAMMA]): + # Suppress the overflow case (due to exponentiation) + with np.errstate(over='ignore'): + # Apply gamma contrast + data_out = np.sign(sample) * (np.abs(sample) ** gamma) + + # Keep data type + data_out = data_out.astype(sample.dtype) + + # Clip +/- inf values to the max/min quantization of the native dtype + data_out = np.nan_to_num(data_out) + + return data_out, metadata + + else: + return sample, metadata + + +class RandomBiasField(ImedTransform): + """Applies a random MRI bias field artifact to the image via torchio.RandomBiasField() + + Args: + coefficients (float): Maximum magnitude of polynomial coefficients + order: Order of the basis polynomial functions + p (float): Probability of applying the bias field + """ + + def __init__(self, coefficients, order, p=0.5): + self.coefficients = coefficients + self.order = order + self.p = p + + @multichannel_capable + @two_dim_compatible + def __call__(self, sample, metadata=None): + if np.random.random() < self.p: + # Get params + random_bias_field = tio.Compose([tio.RandomBiasField(coefficients=self.coefficients, + order=self.order, + p=self.p)]) + + # Save params + metadata[MetadataKW.BIAS_FIELD] = [random_bias_field] + + else: + metadata[MetadataKW.BIAS_FIELD] = [None] + + if any(metadata[MetadataKW.BIAS_FIELD]): + # Apply random bias field + data_out, history = tio_transform(x=sample, transform=random_bias_field) + + # Keep data type + data_out = data_out.astype(sample.dtype) + + # Update metadata to history + metadata[MetadataKW.BIAS_FIELD] = [history] + + return data_out, metadata + + else: + return sample, metadata + + +class RandomBlur(ImedTransform): + """Applies a random blur to the image + + Args: + sigma_range (tuple of floats): Standard deviation range for the gaussian filter + p (float): Probability of performing blur + """ + + def __init__(self, sigma_range, p=0.5): + self.sigma_range = sigma_range + self.p = p + + @multichannel_capable + @two_dim_compatible + def __call__(self, sample, metadata=None): + if np.random.random() < self.p: + # Get params + sigma = np.random.uniform(self.sigma_range[0], self.sigma_range[1]) + + # Save params + metadata[MetadataKW.BLUR] = [sigma] + + else: + metadata[MetadataKW.BLUR] = [None] + + if any(metadata[MetadataKW.BLUR]): + # Apply random blur + data_out = gaussian_filter(sample, sigma) + + # Keep data type + data_out = data_out.astype(sample.dtype) + + return data_out, metadata + + else: + return sample, metadata + + def get_subdatasets_transforms(transform_params): """Get transformation parameters for each subdataset: training, validation and testing. @@ -1048,7 +1184,7 @@ def get_preprocessing_transforms(transforms): original_transforms = copy.deepcopy(transforms) preprocessing_transforms = copy.deepcopy(transforms) for idx, tr in enumerate(original_transforms): - if tr == "Resample" or tr == "CenterCrop" or tr == "ROICrop": + if tr == TransformationKW.RESAMPLE or tr == TransformationKW.CENTERCROP or tr == TransformationKW.ROICROP: del transforms[tr] else: del preprocessing_transforms[tr] @@ -1056,7 +1192,7 @@ def get_preprocessing_transforms(transforms): return preprocessing_transforms -def apply_preprocessing_transforms(transforms, seg_pair, roi_pair=None): +def apply_preprocessing_transforms(transforms, seg_pair, roi_pair=None) -> Tuple[dict, dict]: """ Applies preprocessing transforms to segmentation pair (input, gt and metadata). @@ -1093,16 +1229,16 @@ def apply_preprocessing_transforms(transforms, seg_pair, roi_pair=None): seg_pair = { 'input': stack_input, 'gt': stack_gt, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_gt + MetadataKW.INPUT_METADATA: metadata_input, + MetadataKW.GT_METADATA: metadata_gt } if roi_pair is not None and len(roi_pair['gt']): roi_pair = { 'input': stack_input, 'gt': stack_roi, - 'input_metadata': metadata_input, - 'gt_metadata': metadata_roi + MetadataKW.INPUT_METADATA: metadata_input, + MetadataKW.GT_METADATA: metadata_roi } return (seg_pair, roi_pair) @@ -1127,3 +1263,19 @@ def prepare_transforms(transform_dict, requires_undo=True): transforms = Compose(transform_dict, requires_undo=requires_undo) tranform_lst = [prepro_transforms if len(preprocessing_transforms) else None, transforms] return tranform_lst, training_undo_transform + + +def tio_transform(x, transform): + """ + Applies TorchIO transformations to a given image and returns the transformed image and history. + + Args: + x (np.ndarray): input image + transform (tio.transforms.Transform): TorchIO transform + + Returns: + np.ndarray, list: transformed image, history of parameters used for the applied transformation + """ + tio_subject = tio.Subject(input=tio.ScalarImage(tensor=x[np.newaxis, ...])) + transformed = transform(tio_subject) + return transformed.input.numpy()[0], transformed.get_composed_history() diff --git a/ivadomed/uncertainty.py b/ivadomed/uncertainty.py index ec6425b2e..325e45ed5 100644 --- a/ivadomed/uncertainty.py +++ b/ivadomed/uncertainty.py @@ -1,57 +1,58 @@ import nibabel as nib -import os from tqdm import tqdm from scipy.ndimage import label, generate_binary_structure +from pathlib import Path import json import numpy as np from ivadomed import postprocessing as imed_postpro +from typing import List -def run_uncertainty(ifolder): +def run_uncertainty(image_folder): """Compute uncertainty from model prediction. This function loops across the model predictions (nifti masks) and estimates the uncertainty from the Monte Carlo samples. Both voxel-wise and structure-wise uncertainty are estimates. Args: - ifolder (str): Folder containing the Monte Carlo samples. + image_folder (str): Folder containing the Monte Carlo samples. """ # list subj_acq prefixes - subj_acq_lst = [f.split('_pred')[0] for f in os.listdir(ifolder) - if f.endswith('.nii.gz') and '_pred' in f] + subj_acq_lst = [file.name.split('_pred')[0] for file in Path(image_folder).iterdir() + if file.name.endswith('.nii.gz') and '_pred' in file.name] # remove duplicates subj_acq_lst = list(set(subj_acq_lst)) # keep only the images where unc has not been computed yet - subj_acq_lst = [f for f in subj_acq_lst if not os.path.isfile( - os.path.join(ifolder, f + '_unc-cv.nii.gz'))] + subj_acq_lst = [file for file in subj_acq_lst if not Path(image_folder, file + '_unc-cv.nii.gz').is_file()] # loop across subj_acq for subj_acq in tqdm(subj_acq_lst, desc="Uncertainty Computation"): # hard segmentation from MC samples - fname_pred = os.path.join(ifolder, subj_acq + '_pred.nii.gz') + fname_pred: Path = Path(image_folder, subj_acq + '_pred.nii.gz') # fname for soft segmentation from MC simulations - fname_soft = os.path.join(ifolder, subj_acq + '_soft.nii.gz') + fname_soft: Path = Path(image_folder, subj_acq + '_soft.nii.gz') # find Monte Carlo simulations - fname_pred_lst = [os.path.join(ifolder, f) - for f in os.listdir(ifolder) if subj_acq + '_pred_' in f and - ('_painted' not in f) and ('_color' not in f)] + fname_pred_lst: List[str] = [] + for file in Path(image_folder).iterdir(): + if subj_acq + '_pred_' in file.name and ('_painted' not in file.name) and ('_color' not in file.name): + fname_pred_lst.append(str(file)) # if final segmentation from Monte Carlo simulations has not been generated yet - if not os.path.isfile(fname_pred) or not os.path.isfile(fname_soft): + if not fname_pred.is_file() or not fname_soft.is_file(): # threshold used for the hard segmentation thr = 1. / len(fname_pred_lst) # 1 for all voxels where at least on MC sample predicted 1 # average then argmax - combine_predictions(fname_pred_lst, fname_pred, fname_soft, thr=thr) + combine_predictions(fname_pred_lst, str(fname_pred), str(fname_soft), thr=thr) - fname_unc_vox = os.path.join(ifolder, subj_acq + '_unc-vox.nii.gz') - if not os.path.isfile(fname_unc_vox): + fname_unc_vox = Path(image_folder, subj_acq + '_unc-vox.nii.gz') + if not fname_unc_vox.is_file(): # compute voxel-wise uncertainty map - voxelwise_uncertainty(fname_pred_lst, fname_unc_vox) + voxelwise_uncertainty(fname_pred_lst, str(fname_unc_vox)) - fname_unc_struct = os.path.join(ifolder, subj_acq + '_unc.nii.gz') - if not os.path.isfile(os.path.join(ifolder, subj_acq + '_unc-cv.nii.gz')): + fname_unc_struct = Path(image_folder, subj_acq + '_unc.nii.gz') + if not Path(image_folder, subj_acq + '_unc-cv.nii.gz').is_file(): # compute structure-wise uncertainty - structurewise_uncertainty(fname_pred_lst, fname_pred, fname_unc_vox, fname_unc_struct) + structurewise_uncertainty(fname_pred_lst, str(fname_pred), str(fname_unc_vox), str(fname_unc_struct)) def combine_predictions(fname_lst, fname_hard, fname_prob, thr=0.5): @@ -69,18 +70,26 @@ def combine_predictions(fname_lst, fname_hard, fname_prob, thr=0.5): """ # collect all MC simulations mc_data = np.array([nib.load(fname).get_fdata() for fname in fname_lst]) - affine = nib.load(fname_lst[0]).affine + first_file_header = nib.load(fname_lst[0]).header # average over all the MC simulations data_prob = np.mean(mc_data, axis=0) # save prob segmentation - nib_prob = nib.Nifti1Image(data_prob, affine) + nib_prob = nib.Nifti1Image( + dataobj=data_prob, + affine=first_file_header.get_best_affine(), + header=first_file_header.copy() + ) nib.save(nib_prob, fname_prob) # argmax operator data_hard = imed_postpro.threshold_predictions(data_prob, thr=thr).astype(np.uint8) # save hard segmentation - nib_hard = nib.Nifti1Image(data_hard, affine) + nib_hard = nib.Nifti1Image( + dataobj=data_hard, + affine=first_file_header.get_best_affine(), + header=first_file_header.copy() + ) nib.save(nib_hard, fname_hard) @@ -96,7 +105,7 @@ def voxelwise_uncertainty(fname_lst, fname_out, eps=1e-5): """ # collect all MC simulations mc_data = np.array([nib.load(fname).get_fdata() for fname in fname_lst]) - affine = nib.load(fname_lst[0]).affine + affine = nib.load(fname_lst[0]).header.get_best_affine() # entropy unc = np.repeat(np.expand_dims(mc_data, -1), 2, -1) # n_it, x, y, z, 2 @@ -165,7 +174,7 @@ def structurewise_uncertainty(fname_lst, fname_hard, fname_unc_vox, fname_out): # Loop across objects for i_obj in labels: # select the current structure, remaining voxels are set to zero - data_hard_labeled_class_obj = (np.array(data_hard_labeled_class) == i_obj).astype(np.int) + data_hard_labeled_class_obj = (np.array(data_hard_labeled_class) == i_obj).astype(int) # Get object coordinates xx_obj, yy_obj, zz_obj = np.where(data_hard_labeled_class_obj) @@ -183,7 +192,7 @@ def structurewise_uncertainty(fname_lst, fname_hard, fname_unc_vox, fname_out): if i_mc_label > 0: data_tmp[mc_dict["mc_labeled"][i_mc][i_class] == i_mc_label] = 1. - data_class_obj_mc.append(data_tmp.astype(np.bool)) + data_class_obj_mc.append(data_tmp.astype(bool)) # COMPUTE IoU # Init intersection and union @@ -219,9 +228,23 @@ def structurewise_uncertainty(fname_lst, fname_hard, fname_unc_vox, fname_out): fname_iou = fname_out.split('.nii.gz')[0] + '-iou.nii.gz' fname_cv = fname_out.split('.nii.gz')[0] + '-cv.nii.gz' fname_avgUnc = fname_out.split('.nii.gz')[0] + '-avgUnc.nii.gz' - nib_iou = nib.Nifti1Image(data_iou, nib_hard.affine) - nib_cv = nib.Nifti1Image(data_cv, nib_hard.affine) - nib_avgUnc = nib.Nifti1Image(data_avgUnc, nib_hard.affine) + + nib_iou = nib.Nifti1Image( + dataobj=data_iou, + affine=nib_hard.header.get_best_affine(), + header=nib_hard.header.copy() + ) + nib_cv = nib.Nifti1Image( + dataobj=data_cv, + affine=nib_hard.header.get_best_affine(), + header=nib_hard.header.copy() + ) + nib_avgUnc = nib.Nifti1Image( + data_avgUnc, + affine=nib_hard.header.get_best_affine(), + header=nib_hard.header.copy() + ) + nib.save(nib_iou, fname_iou) nib.save(nib_cv, fname_cv) nib.save(nib_avgUnc, fname_avgUnc) diff --git a/ivadomed/utils.py b/ivadomed/utils.py index 432e4baa9..0b43f4724 100644 --- a/ivadomed/utils.py +++ b/ivadomed/utils.py @@ -2,10 +2,17 @@ import sys import subprocess import hashlib +import datetime +import platform + +import numpy as np +import wandb from enum import Enum from loguru import logger - +from pathlib import Path +from ivadomed.keywords import ConfigKW, LoaderParamsKW, WandbKW from typing import List +from difflib import SequenceMatcher AXIS_DCT = {'sagittal': 0, 'coronal': 1, 'axial': 2} @@ -27,6 +34,43 @@ def __str__(self): return self.value +def initialize_wandb(wandb_params): + """Initializes WandB and based upon the parameters sets it up or disables it for experimental tracking + + Args: + wandb_params (dict): wandb parameters + + Returns: + bool, wandb_tracking: True if wandb tracking is enabled + """ + try: + # raise an error if the key is empty + if not bool(wandb_params[WandbKW.WANDB_API_KEY].strip()): + raise ValueError() + + # Log on to WandB (assuming that the API Key is correct) + # if not, login would raise an exception for the cases invalid API key and not found + wandb.login(key=wandb_params[WandbKW.WANDB_API_KEY], anonymous='allow', timeout=60) + + except Exception as e: + # log error mssg for unsuccessful wandb authentication + if wandb_params is not None: + logger.info("Incorrect WandB API Key! Please re-check the entered API key.") + logger.info("Disabling WandB Tracking, continuing with Tensorboard Logging") + else: + logger.info("No WandB parameters found! Continuing with Tensorboard Logging") + + # set flag + wandb_tracking = False + + else: + # setting flag after successful authentication + logger.info("WandB API Authentication Successful!") + wandb_tracking = True + + return wandb_tracking + + def get_task(model_name): return "classification" if model_name in CLASSIFIER_LIST else "segmentation" @@ -78,7 +122,7 @@ def generate_sha_256(context: dict, df, file_lst: List[str]) -> None: assert isinstance(df, DataFrame) # generating sha256 for list of data - context['training_sha256'] = {} + context[ConfigKW.TRAINING_SHA256] = {} # file_list is a list of filename strings for file in file_lst: # bids_df is a dataframe with column values path...filename... @@ -89,7 +133,7 @@ def generate_sha_256(context: dict, df, file_lst: List[str]) -> None: with open(file_path, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) - context['training_sha256'][file] = sha256_hash.hexdigest() + context[ConfigKW.TRAINING_SHA256][file] = sha256_hash.hexdigest() def save_onnx_model(model, inputs, model_path): @@ -177,6 +221,7 @@ def plot_transformed_sample(before, after, list_title=None, fname_out="", cmap=" list_title = ['Sample before transform', 'Sample after transform'] plt.interactive(False) + plt.rcParams.update({'figure.max_open_warning': 0}) plt.figure(figsize=(20, 10)) plt.subplot(1, 2, 1) @@ -192,7 +237,6 @@ def plot_transformed_sample(before, after, list_title=None, fname_out="", cmap=" if fname_out: plt.savefig(fname_out) else: - matplotlib.use('TkAgg') plt.show() @@ -209,7 +253,7 @@ def _git_info(commit_env='IVADOMED_COMMIT', branch_env='IVADOMED_BRANCH'): """ ivadomed_commit = os.getenv(commit_env, "unknown") ivadomed_branch = os.getenv(branch_env, "unknown") - if check_exe("git") and os.path.isdir(os.path.join(__ivadomed_dir__, ".git")): + if check_exe("git") and Path(__ivadomed_dir__, ".git").is_dir(): ivadomed_commit = __get_commit() or ivadomed_commit ivadomed_branch = __get_branch() or ivadomed_branch @@ -218,8 +262,8 @@ def _git_info(commit_env='IVADOMED_COMMIT', branch_env='IVADOMED_BRANCH'): else: install_type = 'package' - path_version = os.path.join(__ivadomed_dir__, 'ivadomed', 'version.txt') - with open(path_version) as f: + path_version = Path(__ivadomed_dir__, 'ivadomed', 'version.txt') + with path_version.open() as f: version_ivadomed = f.read().strip() return install_type, ivadomed_commit, ivadomed_branch, version_ivadomed @@ -235,15 +279,15 @@ def check_exe(name): """ def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + return Path(fpath).is_file() and os.access(fpath, os.X_OK) - fpath, fname = os.path.split(name) + fpath = Path(name).parent if fpath and is_exe(name): return fpath else: for path in os.environ["PATH"].split(os.pathsep): path = path.strip('"') - exe_file = os.path.join(path, name) + exe_file = str(Path(path, name)) if is_exe(exe_file): return exe_file @@ -264,10 +308,7 @@ def get_arguments(parser, args): ["-d", "SOME_ARG", "--model", "SOME_ARG"] """ try: - if args: - args = parser.parse_args(args) - else: - args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) + args = parser.parse_args(args) except SystemExit as e: if e.code != 0: # Calling `--help` raises SystemExit with 0 exit code (i.e. not an ArgParseException) raise ArgParseException('Error parsing args') @@ -288,7 +329,7 @@ def __get_commit(path_to_git_folder=None): if path_to_git_folder is None: path_to_git_folder = __ivadomed_dir__ else: - path_to_git_folder = os.path.abspath(os.path.expanduser(path_to_git_folder)) + path_to_git_folder = Path(path_to_git_folder).expanduser().absolute() p = subprocess.Popen(["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path_to_git_folder) @@ -343,7 +384,7 @@ def _version_string(): return "{install_type}-{ivadomed_branch}-{ivadomed_commit}".format(**locals()) -__ivadomed_dir__ = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) +__ivadomed_dir__ = Path(__file__).resolve().parent.parent __version__ = _version_string() @@ -355,11 +396,13 @@ def get_command(args, context): elif args.segment: return "segment" else: - logger.info("No CLI argument given for command: ( --train | --test | --segment ). Will check config file for command...") + logger.info( + "No CLI argument given for command: ( --train | --test | --segment ). Will check config file for command...") try: - if context["command"] == "train" or context["command"] == "test" or context["command"] == "segment": - return context["command"] + if context[ConfigKW.COMMAND] == "train" or context[ConfigKW.COMMAND] == "test" or context[ + ConfigKW.COMMAND] == "segment": + return context[ConfigKW.COMMAND] else: logger.error("Specified invalid command argument in config file.") except AttributeError: @@ -370,10 +413,11 @@ def get_path_output(args, context): if args.path_output: return args.path_output else: - logger.info("CLI flag --path-output not used to specify output directory. Will check config file for directory...") + logger.info( + "CLI flag --path-output not used to specify output directory. Will check config file for directory...") try: - if context["path_output"]: - return context["path_output"] + if context[ConfigKW.PATH_OUTPUT]: + return context[ConfigKW.PATH_OUTPUT] except AttributeError: logger.error("Have not specified a path-output argument via CLI nor config file.") @@ -382,10 +426,11 @@ def get_path_data(args, context): if args.path_data: return args.path_data else: - logger.info("CLI flag --path-data not used to specify BIDS data directory. Will check config file for directory...") + logger.info( + "CLI flag --path-data not used to specify BIDS data directory. Will check config file for directory...") try: - if context["loader_parameters"]["path_data"]: - return context["loader_parameters"]["path_data"] + if context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA]: + return context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.PATH_DATA] except AttributeError: logger.error("Have not specified a path-data argument via CLI nor config file.") @@ -404,7 +449,103 @@ def format_path_data(path_data): return path_data +def similarity_score(a: str, b: str) -> float: + """ + use DiffLIb SequenceMatcher to resolve the similarity between text. Help make better choice in terms of derivatives. + Args: + a: a string + b: another string + Returns: a score indicative of the similarity between the sequence. + """ + return SequenceMatcher(None, a, b).ratio() + + def init_ivadomed(): """Initialize the ivadomed for typical terminal usage.""" # Display ivadomed version logger.info('\nivadomed ({})\n'.format(__version__)) + + +def print_stats(arr): + logger.info(f"\tMean: {np.mean(arr)} %") + logger.info(f"\tMedian: {np.median(arr)} %") + logger.info(f"\tInter-quartile range: [{np.percentile(arr, 25)}, {np.percentile(arr, 75)}] %") + + +def get_timestamp() -> str: + """ + Return a datetime string in the format YYYY-MM-DDTHHMMSS.(sub-precision) + Returns: + """ + timestamp = datetime.datetime.now().isoformat().replace(":", "") + return timestamp + + +def get_system_memory() -> float: + """ + Return the system memory in GB. + Returns: + """ + current_platform = platform.system() + if current_platform == "Linux": + return get_linux_system_memory() + elif current_platform == "Windows": + return get_win_system_memory() + elif current_platform == "Darwin": + return get_mac_system_memory() + + +def get_win_system_memory() -> float: + """ + Obtain the amount of memory available on Windows system. + Returns: memory in GB + Source: https://stackoverflow.com/a/21589439 + """ + process = os.popen('wmic memorychip get capacity') + result = process.read() + process.close() + totalMem = 0 + for m in result.split(" \n\n")[1:-1]: + totalMem += int(m) + return totalMem / (1024 ** 3) + + +def get_linux_system_memory() -> float: + """ + Obtain the amount of memory available on Linux system. + Returns: memory in GB + Source: https://stackoverflow.com/a/28161352 + """ + import os + mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES') # e.g. 4015976448 + mem_gib = mem_bytes / (1024. ** 3) # e.g. 3.74 + return mem_gib + + +def get_mac_system_memory() -> float: + """ + Obtain the amount of memory available on MacOS system. + Returns: memory in GB + Source: https://apple.stackexchange.com/a/4296 + """ + + import subprocess + import re + + # Get process info + ps = subprocess.Popen(['ps', '-caxm', '-orss,comm'], stdout=subprocess.PIPE).communicate()[0].decode() + + # Iterate processes + processLines = ps.split('\n') + sep = re.compile('[\s]+') + rssTotal = 0 # kB + for row in range(1, len(processLines)): + rowText = processLines[row].strip() + rowElements = sep.split(rowText) + try: + rss = float(rowElements[0]) * 1024 + except: + rss = 0 # ignore... + rssTotal += rss + + return rssTotal / 1024 ** 3 diff --git a/ivadomed/version.txt b/ivadomed/version.txt index cecd169ae..089040982 100644 --- a/ivadomed/version.txt +++ b/ivadomed/version.txt @@ -1,3 +1 @@ -2.7.4 - - +2.9.10 diff --git a/ivadomed/visualize.py b/ivadomed/visualize.py index 6407e02ca..d946f909f 100644 --- a/ivadomed/visualize.py +++ b/ivadomed/visualize.py @@ -1,4 +1,3 @@ -import os import matplotlib.animation as anim import matplotlib.pyplot as plt import numpy as np @@ -6,9 +5,11 @@ import torchvision.utils as vutils from ivadomed import postprocessing as imed_postpro from ivadomed import inference as imed_inference +from pathlib import Path import torch import torch.nn.functional as F import torch.nn as nn +import wandb from torch.autograd import Variable from loguru import logger from ivadomed.loader import utils as imed_loader_utils @@ -128,8 +129,8 @@ def convert_labels_to_RGB(grid_img): return rgb_img -def save_tensorboard_img(writer, epoch, dataset_type, input_samples, gt_samples, preds, is_three_dim=False): - """Saves input images, gt and predictions in tensorboard. +def save_img(writer, epoch, dataset_type, input_samples, gt_samples, preds, wandb_tracking=False, is_three_dim=False): + """Saves input images, gt and predictions in tensorboard (and wandb depending upon the inputs in the config file). Args: writer (SummaryWriter): Tensorboard's summary writer. @@ -174,18 +175,22 @@ def save_tensorboard_img(writer, epoch, dataset_type, input_samples, gt_samples, normalize=True, scale_each=True) writer.add_image(dataset_type + '/Input', grid_img, epoch) + if wandb_tracking: + wandb.log({dataset_type+"/Input": wandb.Image(grid_img)}) grid_img = vutils.make_grid(convert_labels_to_RGB(preds), normalize=True, scale_each=True) - writer.add_image(dataset_type + '/Predictions', grid_img, epoch) + if wandb_tracking: + wandb.log({dataset_type+"/Predictions": wandb.Image(grid_img)}) grid_img = vutils.make_grid(convert_labels_to_RGB(gt_samples), normalize=True, scale_each=True) - writer.add_image(dataset_type + '/Ground Truth', grid_img, epoch) + if wandb_tracking: + wandb.log({dataset_type+"/Ground-Truth": wandb.Image(grid_img)}) def save_feature_map(batch, layer_name, path_output, model, test_input, slice_axis): @@ -199,8 +204,8 @@ def save_feature_map(batch, layer_name, path_output, model, test_input, slice_ax test_input (Tensor): slice_axis (int): Indicates the axis used for the 2D slice extraction: Sagittal: 0, Coronal: 1, Axial: 2. """ - if not os.path.exists(os.path.join(path_output, layer_name)): - os.mkdir(os.path.join(path_output, layer_name)) + if not Path(path_output, layer_name).exists(): + Path(path_output, layer_name).mkdir() # Save for subject in batch for i in range(batch['input'].size(0)): @@ -217,20 +222,28 @@ def save_feature_map(batch, layer_name, path_output, model, test_input, slice_ax path = batch["input_metadata"][0][i]["input_filenames"] basename = path.split('/')[-1] - save_directory = os.path.join(path_output, layer_name, basename) + save_directory = Path(path_output, layer_name, basename) # Write the attentions to a nifti image nib_ref = nib.load(path) nib_ref_can = nib.as_closest_canonical(nib_ref) oriented_image = imed_loader_utils.reorient_image(orig_input_img[0, 0, :, :, :], slice_axis, nib_ref, nib_ref_can) - nib_pred = nib.Nifti1Image(oriented_image, nib_ref.affine) + nib_pred = nib.Nifti1Image( + dataobj=oriented_image, + affine=nib_ref.header.get_best_affine(), + header=nib_ref.header.copy() + ) nib.save(nib_pred, save_directory) basename = basename.split(".")[0] + "_att.nii.gz" - save_directory = os.path.join(path_output, layer_name, basename) + save_directory = Path(path_output, layer_name, basename) attention_map = imed_loader_utils.reorient_image(upsampled_attention[0, 0, :, :, :], slice_axis, nib_ref, nib_ref_can) - nib_pred = nib.Nifti1Image(attention_map, nib_ref.affine) + nib_pred = nib.Nifti1Image( + dataobj=attention_map, + affine=nib_ref.header.get_best_affine(), + header=nib_ref.header.copy() + ) nib.save(nib_pred, save_directory) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..8e3069919 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,10 @@ +[pytest] +filterwarnings = + ignore:::torch.*: + ignore:::torchvision.*: + ignore:::matplotlib.*: + ignore:::seaborn.*: + ignore:::numpy.*: + ignore:::setuptools.*: + ignore:::imageio.*: + ignore:::bids.*: diff --git a/requirements.txt b/requirements.txt index 4075857b3..26d14f197 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,25 @@ -# Common Requirements --r requirements_common.txt - -# Find older pytorch pip wheel from PyTorch Org website ---find-links https://download.pytorch.org/whl/torch_stable.html - -# Torch CPU Version Windows/Linux -torch==1.5.0+cpu; sys_platform != "darwin" -torchvision==0.6.0+cpu; sys_platform != "darwin" - -# Torch CPU Version macOS (macOS has no CPU/GPU version separation) -torch==1.5.0; sys_platform == "darwin" -torchvision==0.6.0; sys_platform == "darwin" \ No newline at end of file +csv-diff>=1.0 +loguru~=0.5 +imageio>=2.31.4 +joblib~=1.0 +matplotlib>=3.3.0 +nibabel~=5.2 +# v1.16.2/v1.17.0 aren't built correctly for Windows: +# https://github.com/onnx/onnx/issues/6267 +onnx<1.16.2 +# 1.7.0>onnxruntime>=1.5.1 required `brew install libomp` on macOS. +# So, pin to >=1.7.0 to avoid having to ask users to install libomp. +# Avoid version 1.16.0 due to: https://github.com/spinalcordtoolbox/spinalcordtoolbox/issues/4225 +onnxruntime>=1.7.0,!=1.16.0 +pandas>=1.1 +pybids>=0.14.0,<0.15.6 +scikit-learn>=0.20.3 +scikit-image~=0.17 +seaborn~=0.11 +tensorboard>=1.15.0 +tqdm>=4.30 +scipy +torchio>=0.18.68 +torch>=1.8.1 +torchvision>=0.9.1 +wandb>=0.12.11 diff --git a/requirements_common.txt b/requirements_common.txt deleted file mode 100644 index bded6f88d..000000000 --- a/requirements_common.txt +++ /dev/null @@ -1,16 +0,0 @@ -csv-diff>=1.0 -h5py==2.10.0 -loguru~=0.5 -joblib~=1.0 -matplotlib>=3.3.0 -nibabel~=3.2 -onnxruntime==1.4.0 -pandas~=1.1 -pillow>=7.0.0 -pybids>=0.12.4 -scikit-learn>=0.20.3 -scikit-image~=0.17 -seaborn~=0.11 -tensorboard~=2.4 -tqdm>=4.30 -scipy diff --git a/requirements_dev.txt b/requirements_dev.txt deleted file mode 100644 index 7a6da391d..000000000 --- a/requirements_dev.txt +++ /dev/null @@ -1,18 +0,0 @@ -# CPU Compatible Requirements --r requirements.txt - -# Dev: -# Development, documentation, testing related dependencies. -pytest~=6.2 -pytest-cov -pytest-console-scripts -pytest-ordering~=0.6 -sphinx -flake8 -coverage -coveralls -pypandoc -sphinx_rtd_theme -sphinx-jsonschema~=1.16 -pytest-console-scripts~=1.1 -pre-commit==2.10.1 diff --git a/requirements_gpu.txt b/requirements_gpu.txt deleted file mode 100644 index d939159f4..000000000 --- a/requirements_gpu.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Common Requirements --r requirements_common.txt - -# Torch GPU Version ---find-links https://download.pytorch.org/whl/torch_stable.html -torch==1.5.0 -torchvision==0.6.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 4ab554cb0..f7c781aaf 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ # Manually specified, more generic version of the software. # See: https://stackoverflow.com/a/49684835 -with open('requirements_common.txt') as f: +with open('requirements.txt') as f: requirements = f.readlines() # Get README @@ -17,6 +17,38 @@ with open(path_version) as f: version = f.read().strip() +extra_requirements = { + 'docs': [ + # pin sphinx to match what RTD uses: + # https://github.com/readthedocs/readthedocs.org/blob/ecac31de54bbb2c100f933e86eb22b0f4389ba84/requirements/pip.txt#L16 + 'sphinx', + 'sphinx_rtd_theme', + 'sphinx-tabs', + 'sphinx-toolbox', + 'sphinx-jsonschema', + 'pypandoc', + ], + 'tests': [ + 'pytest~=6.2', + 'pytest-cases~=3.6.8', + 'pytest-cov', + 'pytest-ordering~=0.6', + 'pytest-console-scripts~=1.1', + 'coverage', + 'coveralls', + ], + 'contrib': [ + 'pre-commit>=2.10.1', + 'flake8', + ] +} + +extra_requirements['dev'] = [ + requirements, + extra_requirements['docs'], + extra_requirements['tests'], + extra_requirements['contrib'], + ] setup( name='ivadomed', @@ -32,18 +64,11 @@ 'Intended Audience :: Developers', 'Programming Language :: Python :: 3', ], - python_requires='>=3.6, <3.9', + python_requires='>=3.7,<3.11', packages=find_packages(exclude=['docs', 'tests']), include_package_data=True, install_requires=requirements, - extras_require={ - 'docs': [ # pin sphinx to match what RTD uses: - # https://github.com/readthedocs/readthedocs.org/blob/ecac31de54bbb2c100f933e86eb22b0f4389ba84/requirements/pip.txt#L16 - 'sphinx<2', - 'sphinx-rtd-theme<0.5', - ], - 'dev': ["pre-commit>=2.10.0"] - }, + extras_require=extra_requirements, entry_points={ 'console_scripts': [ 'ivadomed=ivadomed.main:run_main', @@ -54,6 +79,7 @@ 'ivadomed_convert_to_onnx=ivadomed.scripts.convert_to_onnx:main', 'ivadomed_extract_small_dataset=ivadomed.scripts.extract_small_dataset:main', 'ivadomed_download_data=ivadomed.scripts.download_data:main', + 'ivadomed_segment_image=ivadomed.scripts.segment_image:main', 'ivadomed_training_curve=ivadomed.scripts.training_curve:main', 'ivadomed_visualize_and_compare_testing_models=ivadomed.scripts.visualize_and_compare_testing_models:main' ], diff --git a/testing/README.md b/testing/README.md index 1dd623b23..8682b253a 100644 --- a/testing/README.md +++ b/testing/README.md @@ -10,21 +10,22 @@ Checkout `ivadomed/.github/workflows/run_tests.yml` to see how tests are run on ## Running Locally -1. Download the required dataset(s) using the `ivadomed` command line tools: +1. Install dependencies ``` cd ivadomed # root of the repo -ivadomed_download_data -d data_testing -o data_testing # for unit tests -ivadomed_download_data -d data_functional_testing -o data_functional_testing # for functional tests +pip install -e .[dev] ``` + 2. To run all tests: ``` -pytest +pytest -v ``` + or, to run specific tests: ``` -pytest testing/functional_tests/ -pytest testing/unit_tests/ -pytest testing/functional_tests/test_example.py +pytest -v testing/functional_tests/ +pytest -v testing/unit_tests/ +pytest -v testing/functional_tests/test_example.py ``` ## Wiki diff --git a/testing/common_testing_util.py b/testing/common_testing_util.py index d64259297..49c88f76e 100644 --- a/testing/common_testing_util.py +++ b/testing/common_testing_util.py @@ -1,7 +1,16 @@ +import os from pathlib import Path + +import json +import csv_diff +import pandas as pd import pytest +from loguru import logger + +from ivadomed.keywords import BidsDataFrameKW, LoaderParamsKW, ContrastParamsKW, ConfigKW +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed.scripts import download_data as ivadomed_download_data -import os +from ivadomed.main import set_loader_params import shutil import sys @@ -23,23 +32,26 @@ path_data_functional_source: str = str(path_repo_root / "data_functional_testing") path_data_functional_tmp: str = str(path_repo_root / "tmp" / Path(path_data_functional_source).name) +path_data_multi_sessions_contrasts_source: Path = path_repo_root / "data_multi_testing" +path_data_multi_sessions_contrasts_tmp: Path = path_repo_root / "tmp" / Path( + path_data_multi_sessions_contrasts_source).name + -def download_dataset(dataset: str = 'data_testing', verbose=True): +def download_dataset(dataset: str = 'data_testing'): """Download testing data from internet. Args: dataset (str): the name of the dataset to download - verbose (bool): whether or not to print """ path_dataset: Path = path_repo_root / dataset # Early abort if testing data already exist. if path_dataset.exists(): - printv(f'\nTesting data files appear to already exist at {path_dataset}, aborting ddownload', verbose) + logger.warning(f"\nTesting data files appear to already exist at {path_dataset}, aborting download") return - printv(f'\nDownloading testing data... to {dataset}', verbose) + logger.info(f"\nDownloading testing data... to {dataset}") # Call the ivadomed download CLI ivadomed_download_data.main([ @@ -48,17 +60,16 @@ def download_dataset(dataset: str = 'data_testing', verbose=True): ]) -def remove_dataset(dataset: str = 'data_testing', verbose=True): +def remove_dataset(dataset: str = 'data_testing'): """Recursively remove the data_testing folder. Args: dataset (str): the name of the dataset to remove - verbose (bool): whether or not to print """ - path_dataset = os.path.join(path_temp, dataset) + path_dataset = Path(path_temp, dataset) - printv("rm -rf %s" % (path_dataset), verbose=verbose, type="code") + logger.debug(f"rm -rf {path_dataset}") shutil.rmtree(path_dataset, ignore_errors=True) @@ -66,45 +77,3 @@ def remove_dataset(dataset: str = 'data_testing', verbose=True): def remove_tmp_dir(): """Recursively remove the ``tmp`` directory if it exists.""" shutil.rmtree(path_temp, ignore_errors=True) - - -class bcolors(object): - """Class for different colours.""" - - normal = '\033[0m' - red = '\033[91m' - green = '\033[92m' - yellow = '\033[93m' - blue = '\033[94m' - magenta = '\033[95m' - cyan = '\033[96m' - bold = '\033[1m' - underline = '\033[4m' - - -def printv(string, verbose=1, type='normal'): - """Print color-coded messages, depending on verbose status. - - Only use in command-line programs (e.g. sct_propseg). - """ - colors = { - 'normal': bcolors.normal, - 'info': bcolors.green, - 'warning': bcolors.yellow, - 'error': bcolors.red, - 'code': bcolors.blue, - 'bold': bcolors.bold, - 'process': bcolors.magenta - } - - if verbose: - # The try/except is there in case stdout does not have isatty field (it did happen to me) - try: - # Print color only if the output is the terminal - if sys.stdout.isatty(): - color = colors.get(type, bcolors.normal) - print(color + string + bcolors.normal) - else: - print(string) - except Exception: - print(string) diff --git a/testing/functional_tests/t_template.py b/testing/functional_tests/t_template.py index ccd2cf0d4..f58e0311b 100644 --- a/testing/functional_tests/t_template.py +++ b/testing/functional_tests/t_template.py @@ -55,9 +55,9 @@ """ import logging -import os from testing.functional_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path logger = logging.getLogger(__name__) @@ -76,10 +76,10 @@ def setup_function(): def test_template(): # Test Input Files: all test input files should be in tmp/data_functional_testing # aka __data_testing_dir__ - logger.info(os.listdir(__data_testing_dir__)) + logger.info([f.name for f in Path(__data_testing_dir__).iterdir()]) # Test Output Files: put your output files in tmp folder - os.mkdir(os.path.join(__tmp_dir__, 'my_output_dir')) + Path(__tmp_dir__, 'my_output_dir').mkdir(parents=True, exist_ok=True) assert 1 == 1 diff --git a/testing/functional_tests/t_utils.py b/testing/functional_tests/t_utils.py index 2b79050c7..cb364afe0 100644 --- a/testing/functional_tests/t_utils.py +++ b/testing/functional_tests/t_utils.py @@ -1,12 +1,12 @@ -import os import pytest import shutil +from pathlib import Path from ivadomed.utils import init_ivadomed from ivadomed import config_manager as imed_config_manager from testing.common_testing_util import remove_tmp_dir, path_repo_root, path_temp, path_data_functional_source, \ path_data_functional_tmp, download_dataset -__test_dir__ = os.path.join(path_repo_root, 'testing/functional_tests') +__test_dir__ = Path(path_repo_root, 'testing/functional_tests') __data_testing_dir__ = path_data_functional_source __tmp_dir__ = path_temp @@ -28,9 +28,9 @@ def check_sha256(file_config): initial_config = imed_config_manager.ConfigurationManager(file_config).get_config() result = [] name = "config_file.json" - for root, dirs, files in os.walk(os.path.dirname(initial_config["path_output"])): - if name in files: - result.append(os.path.join(root, name)) + for path_object in Path(initial_config["path_output"]).parent.glob("**/*"): + if path_object.is_file() and name in path_object.name: + result.append(str(path_object)) assert result != [] for generated_config in result: config = imed_config_manager.ConfigurationManager(generated_config).get_config() @@ -51,7 +51,7 @@ def create_tmp_dir(copy_data_testing_dir=True): into the ``tmp`` folder. """ remove_tmp_dir() - os.mkdir(path_temp) - if os.path.exists(path_data_functional_source) and copy_data_testing_dir: + Path(path_temp).mkdir(parents=True, exist_ok=True) + if Path(path_data_functional_source).exists() and copy_data_testing_dir: shutil.copytree(path_data_functional_source, path_data_functional_tmp) diff --git a/testing/functional_tests/test_automate_training.py b/testing/functional_tests/test_automate_training.py index ae54ff19b..5f3177466 100644 --- a/testing/functional_tests/test_automate_training.py +++ b/testing/functional_tests/test_automate_training.py @@ -1,7 +1,8 @@ import logging -import os import pytest +import os from pytest_console_scripts import script_runner +from pathlib import Path from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, __data_testing_dir__, \ download_functional_test_files, check_sha256 from testing.common_testing_util import remove_tmp_dir @@ -15,48 +16,78 @@ def setup_function(): @pytest.mark.script_launch_mode('subprocess') def test_automate_training(download_functional_test_files, script_runner): - file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') - file_config_hyper = os.path.join(__data_testing_dir__, - 'automate_training_hyperparameter_opt.json') - __output_dir__ = os.path.join(__tmp_dir__, 'results') + file_config = Path(__data_testing_dir__, 'automate_training_config.json') + file_config_hyper = Path(__data_testing_dir__, 'automate_training_hyperparameter_opt.json') + __output_dir__ = Path(__tmp_dir__, 'results') ret = script_runner.run('ivadomed_automate_training', '--config', f'{file_config}', '--config-hyper', f'{file_config_hyper}', '--path-data', f'{__data_testing_dir__}', '--output_dir', f'{__output_dir__}') - print(f"{ret.stdout}") - print(f"{ret.stderr}") + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") assert ret.success - assert os.path.exists(os.path.join(__output_dir__, 'detailed_results.csv')) - assert os.path.exists(os.path.join(__output_dir__, 'temporary_results.csv')) - assert os.path.exists(os.path.join(__output_dir__, 'average_eval.csv')) + assert Path(__output_dir__, 'detailed_results.csv').exists() + assert Path(__output_dir__, 'temporary_results.csv').exists() + assert Path(__output_dir__, 'average_eval.csv').exists() # check sha256 is recorded in config_file.json - check_sha256(file_config) + check_sha256(str(file_config)) -@pytest.mark.script_launch_mode('subprocess') -def test_automate_training_run_test(download_functional_test_files, script_runner): +def test_automate_training_run_test_debug(download_functional_test_files): + """A unit test similar to test_automate_training_run_test but allow step through (instead of using script caller/ + subprocess mode which cannot be stepped. Other than that, nothing else really changed and is exactly the same. + Very useful for debugging this high level function to spot problems + + Fixture Required: + download_functional_test_files: + """ file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') file_config_hyper = os.path.join(__data_testing_dir__, 'automate_training_hyperparameter_opt.json') __output_dir__ = os.path.join(__tmp_dir__, 'results') + from ivadomed.scripts.automate_training import automate_training + + automate_training(file_config=file_config, + file_config_hyper=file_config_hyper, + path_data=__data_testing_dir__, + run_test=True, + output_dir=__output_dir__, + fixed_split=False, + all_combin=True, + n_iterations=1, + all_logs=True, + multi_params=True, + ) + + assert Path(__output_dir__, 'detailed_results.csv').exists() + assert Path(__output_dir__, 'temporary_results.csv').exists() + assert Path(__output_dir__, 'average_eval.csv').exists() + + +@pytest.mark.script_launch_mode('subprocess') +def test_automate_training_run_test(download_functional_test_files, script_runner): + file_config = Path(__data_testing_dir__, 'automate_training_config.json') + file_config_hyper = Path(__data_testing_dir__, 'automate_training_hyperparameter_opt.json') + __output_dir__ = Path(__tmp_dir__, 'results') + ret = script_runner.run('ivadomed_automate_training', '--config', f'{file_config}', '--config-hyper', f'{file_config_hyper}', '--path-data', f'{__data_testing_dir__}', '--output_dir', f'{__output_dir__}', '--run-test') - print(f"{ret.stdout}") - print(f"{ret.stderr}") + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") assert ret.success - assert os.path.exists(os.path.join(__output_dir__, 'detailed_results.csv')) - assert os.path.exists(os.path.join(__output_dir__, 'temporary_results.csv')) - assert os.path.exists(os.path.join(__output_dir__, 'average_eval.csv')) + assert Path(__output_dir__, 'detailed_results.csv').exists() + assert Path(__output_dir__, 'temporary_results.csv').exists() + assert Path(__output_dir__, 'average_eval.csv').exists() # check sha256 is recorded in config_file.json - check_sha256(file_config) + check_sha256(str(file_config)) -def teardown_function(): - remove_tmp_dir() +# def teardown_function(): +# remove_tmp_dir() diff --git a/testing/functional_tests/test_compare_models.py b/testing/functional_tests/test_compare_models.py index aba6977e2..327bfb83d 100644 --- a/testing/functional_tests/test_compare_models.py +++ b/testing/functional_tests/test_compare_models.py @@ -1,9 +1,9 @@ import logging -import os from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, __data_testing_dir__, \ download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import compare_models +from pathlib import Path logger = logging.getLogger(__name__) @@ -12,12 +12,12 @@ def setup_function(): def test_compare_models(download_functional_test_files): - __output_file__ = os.path.join(__tmp_dir__, 'comparison_results.csv') - path_df = os.path.join(__data_testing_dir__, 'temporary_results.csv') - compare_models.main(args=['-df', path_df, + __output_file__ = Path(__tmp_dir__, 'comparison_results.csv') + path_df = Path(__data_testing_dir__, 'temporary_results.csv') + compare_models.main(args=['-df', str(path_df), '-n', '2', - '-o', __output_file__]) - assert os.path.exists(__output_file__) + '-o', str(__output_file__)]) + assert __output_file__.exists() def teardown_function(): diff --git a/testing/functional_tests/test_convert_to_onnx.py b/testing/functional_tests/test_convert_to_onnx.py index a6421b177..24a4999c4 100644 --- a/testing/functional_tests/test_convert_to_onnx.py +++ b/testing/functional_tests/test_convert_to_onnx.py @@ -1,14 +1,14 @@ import logging import pytest -import os from testing.functional_tests.t_utils import create_tmp_dir, __data_testing_dir__, download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import convert_to_onnx from ivadomed.utils import ArgParseException +from pathlib import Path logger = logging.getLogger(__name__) -__model_path__ = os.path.join(__data_testing_dir__, 'spinegeneric_model.pt') +__model_path__ = Path(__data_testing_dir__, 'spinegeneric_model.pt') def setup_function(): @@ -17,7 +17,7 @@ def setup_function(): def test_convert_to_onnx(download_functional_test_files): convert_to_onnx.main(args=['-m', f'{__model_path__}', '-d', '2']) - assert os.path.exists(os.path.join(__data_testing_dir__, 'spinegeneric_model.onnx')) + assert Path(__data_testing_dir__, 'spinegeneric_model.onnx').exists() def test_convert_to_onnx_no_model(): diff --git a/testing/functional_tests/test_download_data.py b/testing/functional_tests/test_download_data.py index 1b099999a..67d23a188 100644 --- a/testing/functional_tests/test_download_data.py +++ b/testing/functional_tests/test_download_data.py @@ -1,10 +1,10 @@ import logging import pytest -import os from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir from testing.common_testing_util import remove_dataset, remove_tmp_dir from ivadomed.scripts import download_data from ivadomed.utils import ArgParseException +from pathlib import Path logger = logging.getLogger(__name__) @@ -14,10 +14,10 @@ def setup_function(): def test_download_data(): for dataset in download_data.DICT_URL: - output_folder = os.path.join(__tmp_dir__, dataset) + output_folder = Path(__tmp_dir__, dataset) download_data.main(args=['-d', dataset, - '-o', output_folder]) - assert os.path.exists(output_folder) + '-o', str(output_folder)]) + assert output_folder.exists() remove_dataset(dataset=dataset) diff --git a/testing/functional_tests/test_extract_small_dataset.py b/testing/functional_tests/test_extract_small_dataset.py index 279489c67..c4e6ea109 100644 --- a/testing/functional_tests/test_extract_small_dataset.py +++ b/testing/functional_tests/test_extract_small_dataset.py @@ -1,9 +1,9 @@ import logging -import os from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, __data_testing_dir__, \ download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import extract_small_dataset +from pathlib import Path logger = logging.getLogger(__name__) @@ -12,11 +12,11 @@ def setup_function(): def test_extract_small_dataset_default_n(download_functional_test_files): - __output_dir__ = os.path.join(__tmp_dir__, 'output_extract_small_dataset') - extract_small_dataset.main(args=['--input', __data_testing_dir__, - '--output', __output_dir__]) - assert os.path.exists(__output_dir__) - output_dir_list = os.listdir(__output_dir__) + __output_dir__ = Path(__tmp_dir__, 'output_extract_small_dataset') + extract_small_dataset.main(args=['--input', str(__data_testing_dir__), + '--output', str(__output_dir__)]) + assert __output_dir__.exists() + output_dir_list = [f.name for f in __output_dir__.iterdir()] assert 'derivatives' in output_dir_list assert 'participants.tsv' in output_dir_list assert 'dataset_description.json' in output_dir_list @@ -26,12 +26,12 @@ def test_extract_small_dataset_default_n(download_functional_test_files): def test_extract_small_dataset_n_2(download_functional_test_files): - __output_dir__ = os.path.join(__tmp_dir__, 'output_extract_small_dataset_2') - extract_small_dataset.main(args=['--input', __data_testing_dir__, - '--output', __output_dir__, + __output_dir__ = Path(__tmp_dir__, 'output_extract_small_dataset_2') + extract_small_dataset.main(args=['--input', str(__data_testing_dir__), + '--output', str(__output_dir__), '-n', '2']) - assert os.path.exists(__output_dir__) - output_dir_list = os.listdir(__output_dir__) + assert __output_dir__.exists() + output_dir_list = [f.name for f in __output_dir__.iterdir()] assert 'derivatives' in output_dir_list assert 'participants.tsv' in output_dir_list assert 'dataset_description.json' in output_dir_list @@ -44,12 +44,12 @@ def test_extract_small_dataset_n_2(download_functional_test_files): def test_extract_small_dataset_no_derivatives(download_functional_test_files): - __output_dir__ = os.path.join(__tmp_dir__, 'output_extract_small_dataset_3') - extract_small_dataset.main(args=['--input', __data_testing_dir__, - '--output', __output_dir__, + __output_dir__ = Path(__tmp_dir__, 'output_extract_small_dataset_3') + extract_small_dataset.main(args=['--input', str(__data_testing_dir__), + '--output', str(__output_dir__), '-d', '0']) - assert os.path.exists(__output_dir__) - output_dir_list = os.listdir(__output_dir__) + assert __output_dir__.exists() + output_dir_list = [f.name for f in __output_dir__.iterdir()] assert 'derivatives' not in output_dir_list assert 'participants.tsv' in output_dir_list assert 'dataset_description.json' in output_dir_list @@ -59,12 +59,12 @@ def test_extract_small_dataset_no_derivatives(download_functional_test_files): def test_extract_small_dataset_contrast_list(download_functional_test_files): - __output_dir__ = os.path.join(__tmp_dir__, 'output_extract_small_dataset_4') - extract_small_dataset.main(args=['--input', __data_testing_dir__, - '--output', __output_dir__, + __output_dir__ = Path(__tmp_dir__, 'output_extract_small_dataset_4') + extract_small_dataset.main(args=['--input', str(__data_testing_dir__), + '--output', str(__output_dir__), '-c', 'T1w, T2w']) - assert os.path.exists(__output_dir__) - output_dir_list = os.listdir(__output_dir__) + assert __output_dir__.exists() + output_dir_list = [f.name for f in __output_dir__.iterdir()] assert 'derivatives' in output_dir_list assert 'participants.tsv' in output_dir_list assert 'dataset_description.json' in output_dir_list diff --git a/testing/functional_tests/test_prepare_dataset_vertebral_labelling.py b/testing/functional_tests/test_prepare_dataset_vertebral_labelling.py index ac1510cc0..0a403b90d 100644 --- a/testing/functional_tests/test_prepare_dataset_vertebral_labelling.py +++ b/testing/functional_tests/test_prepare_dataset_vertebral_labelling.py @@ -1,8 +1,8 @@ import logging -import os from testing.functional_tests.t_utils import create_tmp_dir, __data_testing_dir__, download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import prepare_dataset_vertebral_labeling +from pathlib import Path logger = logging.getLogger(__name__) @@ -14,12 +14,12 @@ def test_prepare_dataset_vertebral_labeling(download_functional_test_files): prepare_dataset_vertebral_labeling.main(args=['--path', __data_testing_dir__, '--suffix', '_T2w', '--aim', '3']) - assert os.path.exists(os.path.join( - __data_testing_dir__, "derivatives/labels/sub-unf01/anat/sub-unf01_T2w_mid_heatmap3.nii.gz")) - assert os.path.exists(os.path.join( - __data_testing_dir__, "derivatives/labels/sub-unf02/anat/sub-unf02_T2w_mid_heatmap3.nii.gz")) - assert os.path.exists(os.path.join( - __data_testing_dir__, "derivatives/labels/sub-unf03/anat/sub-unf03_T2w_mid_heatmap3.nii.gz")) + assert Path( + __data_testing_dir__, "derivatives/labels/sub-unf01/anat/sub-unf01_T2w_mid_heatmap3.nii.gz").exists() + assert Path( + __data_testing_dir__, "derivatives/labels/sub-unf02/anat/sub-unf02_T2w_mid_heatmap3.nii.gz").exists() + assert Path( + __data_testing_dir__, "derivatives/labels/sub-unf03/anat/sub-unf03_T2w_mid_heatmap3.nii.gz").exists() def teardown_function(): diff --git a/testing/functional_tests/test_segment_volume.py b/testing/functional_tests/test_segment_volume.py index bb4c88103..961d3281d 100644 --- a/testing/functional_tests/test_segment_volume.py +++ b/testing/functional_tests/test_segment_volume.py @@ -1,5 +1,4 @@ import json -import os import shutil import nibabel as nib import numpy as np @@ -9,9 +8,10 @@ from ivadomed import inference as imed_inference from testing.functional_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_functional_test_files from testing.common_testing_util import remove_tmp_dir +from pathlib import Path BATCH_SIZE = 1 -DROPOUT = 0.4 +DROPOUT = 0.3 BN = 0.1 SLICE_AXIS = 2 LENGTH_3D = [96, 96, 16] @@ -22,9 +22,9 @@ def setup_function(): create_tmp_dir() -PATH_MODEL = os.path.join(__tmp_dir__, "model_test") -IMAGE_PATH = os.path.join(__data_testing_dir__, "sub-unf01", "anat", "sub-unf01_T1w.nii.gz") -ROI_PATH = os.path.join(__data_testing_dir__, "derivatives", "labels", "sub-unf01", "anat", +PATH_MODEL = Path(__tmp_dir__, "model_test") +PATH_IMAGE = Path(__data_testing_dir__, "sub-unf01", "anat", "sub-unf01_T1w.nii.gz") +PATH_ROI = Path(__data_testing_dir__, "derivatives", "labels", "sub-unf01", "anat", "sub-unf01_T1w_seg-manual.nii.gz") @@ -32,13 +32,13 @@ def test_segment_volume_2d_NumpyToTensor_retrocompatibility(download_functional_ model = imed_models.Unet(in_channel=1, out_channel=1, depth=2, - drop_rate=DROPOUT, + dropout_rate=DROPOUT, bn_momentum=BN) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir(parents=True, exist_ok=True) - torch.save(model, os.path.join(PATH_MODEL, "model_test.pt")) + torch.save(model, Path(PATH_MODEL, "model_test.pt")) config = { "loader_parameters": { "slice_filter_params": { @@ -68,13 +68,13 @@ def test_segment_volume_2d_NumpyToTensor_retrocompatibility(download_functional_ } } - PATH_CONFIG = os.path.join(PATH_MODEL, 'model_test.json') + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') with open(PATH_CONFIG, 'w') as fp: json.dump(config, fp) - nib_lst, _ = imed_inference.segment_volume(PATH_MODEL, [IMAGE_PATH], options={'fname_prior': ROI_PATH}) + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)], options={'fname_prior': str(PATH_ROI)}) nib_img = nib_lst[0] - assert np.squeeze(nib_img.get_fdata()).shape == nib.load(IMAGE_PATH).shape + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) assert nib_img.dataobj.dtype == 'float32' @@ -85,13 +85,13 @@ def test_segment_volume_2d(download_functional_test_files): model = imed_models.Unet(in_channel=1, out_channel=1, depth=2, - drop_rate=DROPOUT, + dropout_rate=DROPOUT, bn_momentum=BN) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir(parents=True, exist_ok=True) - torch.save(model, os.path.join(PATH_MODEL, "model_test.pt")) + torch.save(model, Path(PATH_MODEL, "model_test.pt")) config = { "loader_parameters": { "slice_filter_params": { @@ -120,13 +120,13 @@ def test_segment_volume_2d(download_functional_test_files): } } - PATH_CONFIG = os.path.join(PATH_MODEL, 'model_test.json') - with open(PATH_CONFIG, 'w') as fp: + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') + with PATH_CONFIG.open(mode='w') as fp: json.dump(config, fp) - nib_lst, _ = imed_inference.segment_volume(PATH_MODEL, [IMAGE_PATH], options={'fname_prior': ROI_PATH}) + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)], options={'fname_prior': str(PATH_ROI)}) nib_img = nib_lst[0] - assert np.squeeze(nib_img.get_fdata()).shape == nib.load(IMAGE_PATH).shape + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) assert nib_img.dataobj.dtype == 'float32' @@ -137,13 +137,13 @@ def test_segment_volume_2d_no_prepro_transform(download_functional_test_files): model = imed_models.Unet(in_channel=1, out_channel=1, depth=2, - drop_rate=DROPOUT, + dropout_rate=DROPOUT, bn_momentum=BN) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir() - torch.save(model, os.path.join(PATH_MODEL, "model_test.pt")) + torch.save(model, Path(PATH_MODEL, "model_test.pt")) config = { "loader_parameters": { "slice_filter_params": { @@ -165,13 +165,13 @@ def test_segment_volume_2d_no_prepro_transform(download_functional_test_files): } } - PATH_CONFIG = os.path.join(PATH_MODEL, 'model_test.json') - with open(PATH_CONFIG, 'w') as fp: + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') + with PATH_CONFIG.open(mode='w') as fp: json.dump(config, fp) - nib_lst, _ = imed_inference.segment_volume(PATH_MODEL, [IMAGE_PATH]) + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)]) nib_img = nib_lst[0] - assert np.squeeze(nib_img.get_fdata()).shape == nib.load(IMAGE_PATH).shape + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) assert nib_img.dataobj.dtype == 'float32' @@ -183,19 +183,23 @@ def test_segment_volume_2d_with_patches(download_functional_test_files, center_c model = imed_models.Unet(in_channel=1, out_channel=1, depth=2, - drop_rate=DROPOUT, + dropout_rate=DROPOUT, bn_momentum=BN) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir(parents=True, exist_ok=True) - torch.save(model, os.path.join(PATH_MODEL, "model_test.pt")) + torch.save(model, Path(PATH_MODEL, "model_test.pt")) config = { "loader_parameters": { "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": False }, + "patch_filter_params": { + "filter_empty_mask": False, + "filter_empty_input": False + }, "roi_params": { "suffix": None, "slice_filter_roi": None @@ -222,13 +226,77 @@ def test_segment_volume_2d_with_patches(download_functional_test_files, center_c } } - PATH_CONFIG = os.path.join(PATH_MODEL, 'model_test.json') - with open(PATH_CONFIG, 'w') as fp: + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') + with PATH_CONFIG.open(mode='w') as fp: json.dump(config, fp) - nib_lst, _ = imed_inference.segment_volume(PATH_MODEL, [IMAGE_PATH]) + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)]) nib_img = nib_lst[0] - assert np.squeeze(nib_img.get_fdata()).shape == nib.load(IMAGE_PATH).shape + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape + assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) + assert nib_img.dataobj.dtype == 'float32' + + shutil.rmtree(PATH_MODEL) + + +@pytest.mark.parametrize("center_crop", [[200, 200]]) +def test_segment_volume_2d_without_patches(download_functional_test_files, center_crop): + model = imed_models.Unet(in_channel=1, + out_channel=1, + depth=2, + dropout_rate=DROPOUT, + bn_momentum=BN) + + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir(parents=True, exist_ok=True) + + torch.save(model, Path(PATH_MODEL, "model_test.pt")) + config = { + "loader_parameters": { + "slice_filter_params": { + "filter_empty_mask": False, + "filter_empty_input": False + }, + "patch_filter_params": { + "filter_empty_mask": False, + "filter_empty_input": False + }, + "roi_params": { + "suffix": None, + "slice_filter_roi": None + }, + "slice_axis": "axial" + }, + "default_model": { + "length_2D": LENGTH_2D, + "stride_2D": LENGTH_2D + }, + "transformation": { + "Resample": {"wspace": 0.75, "hspace": 0.75}, + "CenterCrop": {"size": center_crop}, + "RandomTranslation": { + "translate": [0.03, 0.03], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + }, + "NormalizeInstance": {"applied_to": ["im"]} + }, + "postprocessing": {}, + "training_parameters": { + "batch_size": BATCH_SIZE + } + } + + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') + with PATH_CONFIG.open(mode='w') as fp: + json.dump(config, fp) + + options = {} + options['no_patch'] = True + + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)], options=options) + nib_img = nib_lst[0] + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) assert nib_img.dataobj.dtype == 'float32' @@ -241,10 +309,10 @@ def test_segment_volume_3d(download_functional_test_files, center_crop): out_channel=1, base_n_filter=1) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir(parents=True, exist_ok=True) - torch.save(model, os.path.join(PATH_MODEL, "model_test.pt")) + torch.save(model, Path(PATH_MODEL, "model_test.pt")) config = { "Modified3DUNet": { "applied": True, @@ -286,13 +354,13 @@ def test_segment_volume_3d(download_functional_test_files, center_crop): } } - PATH_CONFIG = os.path.join(PATH_MODEL, 'model_test.json') - with open(PATH_CONFIG, 'w') as fp: + PATH_CONFIG = Path(PATH_MODEL, 'model_test.json') + with PATH_CONFIG.open(mode='w') as fp: json.dump(config, fp) - nib_lst, _ = imed_inference.segment_volume(PATH_MODEL, [IMAGE_PATH]) + nib_lst, _ = imed_inference.segment_volume(str(PATH_MODEL), [str(PATH_IMAGE)]) nib_img = nib_lst[0] - assert np.squeeze(nib_img.get_fdata()).shape == nib.load(IMAGE_PATH).shape + assert np.squeeze(nib_img.get_fdata()).shape == nib.load(PATH_IMAGE).shape assert (nib_img.dataobj.max() <= 1.0) and (nib_img.dataobj.min() >= 0.0) assert nib_img.dataobj.dtype == 'float32' diff --git a/testing/functional_tests/test_training_3d.py b/testing/functional_tests/test_training_3d.py new file mode 100644 index 000000000..e6a5ff44f --- /dev/null +++ b/testing/functional_tests/test_training_3d.py @@ -0,0 +1,246 @@ +import json +import logging +import os +import pytest +from pytest_console_scripts import script_runner +from pathlib import Path +from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, __data_testing_dir__, \ + download_functional_test_files +from testing.common_testing_util import remove_tmp_dir +from ivadomed import config_manager as imed_config_manager +from ivadomed.keywords import ConfigKW, ModelParamsKW, LoaderParamsKW, ContrastParamsKW, TransformationKW + + +logger = logging.getLogger(__name__) + + +def setup_function(): + create_tmp_dir() + + +@pytest.mark.script_launch_mode('subprocess') +def test_training_3d_1class_single_channel_with_data_augmentation(download_functional_test_files, script_runner): + + # Load automate training config as context + file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') + context = imed_config_manager.ConfigurationManager(file_config).get_config() + + # Modify key-value pairs in context for given test + # Set-up 3D model params + context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D] = False + context[ConfigKW.MODIFIED_3D_UNET] = { + ModelParamsKW.APPLIED: True, + ModelParamsKW.LENGTH_3D: [32, 32, 16], + ModelParamsKW.STRIDE_3D: [32, 32, 16], + ModelParamsKW.N_FILTERS: 4 + } + # Set target_suffix (1 class) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.TARGET_SUFFIX] = ["_lesion-manual"] + # Set contrasts or interest (2 single channels) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TRAINING_VALIDATION] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.MULTICHANNEL] = False + # Set 3D preprocessing and data augmentation + context[ConfigKW.TRANSFORMATION][TransformationKW.RESAMPLE] = { + "wspace": 0.75, + "hspace": 0.75, + "dspace": 0.75 + } + context[ConfigKW.TRANSFORMATION][TransformationKW.CENTERCROP] = { + "size": [32, 32, 16], + } + context[ConfigKW.TRANSFORMATION][TransformationKW.RANDOM_AFFINE] = { + "degrees": 10, + "scale": [0.03, 0.03, 0.03], + "translate": [0.8, 0.8, 0.8], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + } + + # Write temporary config file for given test + file_config_updated = os.path.join(__tmp_dir__, "data_functional_testing", "config_3d_training.json") + with Path(file_config_updated).open(mode='w') as fp: + json.dump(context, fp, indent=4) + + # Set output directory + __output_dir__ = Path(__tmp_dir__, 'results') + + # Run ivadomed + ret = script_runner.run('ivadomed', '-c', f'{file_config_updated}', + '--path-data', f'{__data_testing_dir__}', + '--path-output', f'{__output_dir__}') + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") + assert ret.success + + +@pytest.mark.script_launch_mode('subprocess') +def test_training_3d_2class_single_channel_with_data_augmentation(download_functional_test_files, script_runner): + + # Load automate training config as context + file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') + context = imed_config_manager.ConfigurationManager(file_config).get_config() + + # Modify key-value pairs in context for given test + # Set-up 3D model params + context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D] = False + context[ConfigKW.MODIFIED_3D_UNET] = { + ModelParamsKW.APPLIED: True, + ModelParamsKW.LENGTH_3D: [32, 32, 16], + ModelParamsKW.STRIDE_3D: [32, 32, 16], + ModelParamsKW.N_FILTERS: 4 + } + # Set target_suffix (2-class) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.TARGET_SUFFIX] = ["_lesion-manual", "_seg-manual"] + # Set contrasts or interest (2 single channels) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TRAINING_VALIDATION] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.MULTICHANNEL] = False + # Set 3D preprocessing and data augmentation + context[ConfigKW.TRANSFORMATION][TransformationKW.RESAMPLE] = { + "wspace": 0.75, + "hspace": 0.75, + "dspace": 0.75 + } + context[ConfigKW.TRANSFORMATION][TransformationKW.CENTERCROP] = { + "size": [32, 32, 16], + } + context[ConfigKW.TRANSFORMATION][TransformationKW.RANDOM_AFFINE] = { + "degrees": 10, + "scale": [0.03, 0.03, 0.03], + "translate": [0.8, 0.8, 0.8], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + } + + # Write temporary config file for given test + file_config_updated = os.path.join(__tmp_dir__, "data_functional_testing", "config_3d_training.json") + with Path(file_config_updated).open(mode='w') as fp: + json.dump(context, fp, indent=4) + + # Set output directory + __output_dir__ = Path(__tmp_dir__, 'results') + + # Run ivadomed + ret = script_runner.run('ivadomed', '-c', f'{file_config_updated}', + '--path-data', f'{__data_testing_dir__}', + '--path-output', f'{__output_dir__}') + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") + assert ret.success + + +@pytest.mark.script_launch_mode('subprocess') +def test_training_3d_1class_multi_channel_with_data_augmentation(download_functional_test_files, script_runner): + + # Load automate training config as context + file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') + context = imed_config_manager.ConfigurationManager(file_config).get_config() + + # Modify key-value pairs in context for given test + # Set-up 3D model params + context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D] = False + context[ConfigKW.MODIFIED_3D_UNET] = { + ModelParamsKW.APPLIED: True, + ModelParamsKW.LENGTH_3D: [32, 32, 16], + ModelParamsKW.STRIDE_3D: [32, 32, 16], + ModelParamsKW.N_FILTERS: 4 + } + # Set target_suffix (1-class) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.TARGET_SUFFIX] = ["_lesion-manual"] + # Set contrasts or interest (1 multi-channel) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TRAINING_VALIDATION] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.MULTICHANNEL] = True + # Set 3D preprocessing and data augmentation + context[ConfigKW.TRANSFORMATION][TransformationKW.RESAMPLE] = { + "wspace": 0.75, + "hspace": 0.75, + "dspace": 0.75 + } + context[ConfigKW.TRANSFORMATION][TransformationKW.CENTERCROP] = { + "size": [32, 32, 16], + } + context[ConfigKW.TRANSFORMATION][TransformationKW.RANDOM_AFFINE] = { + "degrees": 10, + "scale": [0.03, 0.03, 0.03], + "translate": [0.8, 0.8, 0.8], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + } + + # Write temporary config file for given test + file_config_updated = os.path.join(__tmp_dir__, "data_functional_testing", "config_3d_training.json") + with Path(file_config_updated).open(mode='w') as fp: + json.dump(context, fp, indent=4) + + # Set output directory + __output_dir__ = Path(__tmp_dir__, 'results') + + # Run ivadomed + ret = script_runner.run('ivadomed', '-c', f'{file_config_updated}', + '--path-data', f'{__data_testing_dir__}', + '--path-output', f'{__output_dir__}') + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") + assert ret.success + + +@pytest.mark.script_launch_mode('subprocess') +def test_training_3d_1class_multirater_with_data_augmentation(download_functional_test_files, script_runner): + + # Load automate training config as context + file_config = os.path.join(__data_testing_dir__, 'automate_training_config.json') + context = imed_config_manager.ConfigurationManager(file_config).get_config() + + # Modify key-value pairs in context for given test + # Set-up 3D model params + context[ConfigKW.DEFAULT_MODEL][ModelParamsKW.IS_2D] = False + context[ConfigKW.MODIFIED_3D_UNET] = { + ModelParamsKW.APPLIED: True, + ModelParamsKW.LENGTH_3D: [32, 32, 16], + ModelParamsKW.STRIDE_3D: [32, 32, 16], + ModelParamsKW.N_FILTERS: 4 + } + # Set target_suffix (1-class, multirater) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.TARGET_SUFFIX] = [["_lesion-manual", "_seg-manual"]] + # Set contrasts or interest (2 single channels) + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TRAINING_VALIDATION] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.CONTRAST_PARAMS][ContrastParamsKW.TESTING] = ["T1w", "T2w"] + context[ConfigKW.LOADER_PARAMETERS][LoaderParamsKW.MULTICHANNEL] = False + # Set 3D preprocessing and data augmentation + context[ConfigKW.TRANSFORMATION][TransformationKW.RESAMPLE] = { + "wspace": 0.75, + "hspace": 0.75, + "dspace": 0.75 + } + context[ConfigKW.TRANSFORMATION][TransformationKW.CENTERCROP] = { + "size": [32, 32, 16], + } + context[ConfigKW.TRANSFORMATION][TransformationKW.RANDOM_AFFINE] = { + "degrees": 10, + "scale": [0.03, 0.03, 0.03], + "translate": [0.8, 0.8, 0.8], + "applied_to": ["im", "gt"], + "dataset_type": ["training"] + } + + # Write temporary config file for given test + file_config_updated = os.path.join(__tmp_dir__, "data_functional_testing", "config_3d_training.json") + with Path(file_config_updated).open(mode='w') as fp: + json.dump(context, fp, indent=4) + + # Set output directory + __output_dir__ = Path(__tmp_dir__, 'results') + + # Run ivadomed + ret = script_runner.run('ivadomed', '-c', f'{file_config_updated}', + '--path-data', f'{__data_testing_dir__}', + '--path-output', f'{__output_dir__}') + logger.debug(f"{ret.stdout}") + logger.debug(f"{ret.stderr}") + assert ret.success + + +def teardown_function(): + remove_tmp_dir() diff --git a/testing/functional_tests/test_training_curve.py b/testing/functional_tests/test_training_curve.py index 3d88dd450..5a2ae8039 100644 --- a/testing/functional_tests/test_training_curve.py +++ b/testing/functional_tests/test_training_curve.py @@ -1,8 +1,8 @@ import logging -import os from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import training_curve +from pathlib import Path logger = logging.getLogger(__name__) @@ -11,21 +11,22 @@ def setup_function(): def test_training_curve(download_functional_test_files): - __data_testing_dir__ = os.path.join(__tmp_dir__, "data_functional_testing") - __input_dir__ = os.path.join(__data_testing_dir__, 'tensorboard_events') - __output_dir__ = os.path.join(__tmp_dir__, 'output_training_curve') - training_curve.main(args=['--input', __input_dir__, - '--output', __output_dir__]) - assert os.path.exists(__output_dir__) - assert os.path.exists(os.path.join(__output_dir__, "accuracy_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "dice_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "hausdorff_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "intersection_over_union.png")) - assert os.path.exists(os.path.join(__output_dir__, "losses.png")) - assert os.path.exists(os.path.join(__output_dir__, "multiclass dice_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "precision_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "recall_score.png")) - assert os.path.exists(os.path.join(__output_dir__, "specificity_score.png")) + __data_testing_dir__ = Path(__tmp_dir__, "data_functional_testing") + __input_dir__ = Path(__data_testing_dir__, 'tensorboard_events') + __output_dir__ = Path(__tmp_dir__, 'output_training_curve') + training_curve.main(args=['--input', str(__input_dir__), + '--output', str(__output_dir__)]) + assert Path(__output_dir__).exists() + assert Path(__output_dir__, "accuracy_score.png").exists() + assert Path(__output_dir__, "dice_score.png").exists() + assert Path(__output_dir__, "hausdorff_score.png").exists() + assert Path(__output_dir__, "intersection_over_union.png").exists() + assert Path(__output_dir__, "losses.png").exists() + assert Path(__output_dir__, "multi_class_dice_score.png").exists() + assert Path(__output_dir__, "precision_score.png").exists() + assert Path(__output_dir__, "recall_score.png").exists() + assert Path(__output_dir__, "specificity_score.png").exists() + assert Path(__output_dir__, "tensorboard_events_training_values.csv").exists() def teardown_function(): diff --git a/testing/functional_tests/test_visualize_transforms.py b/testing/functional_tests/test_visualize_transforms.py index dd5089dad..724bb6d0b 100644 --- a/testing/functional_tests/test_visualize_transforms.py +++ b/testing/functional_tests/test_visualize_transforms.py @@ -1,8 +1,8 @@ import logging -import os from testing.functional_tests.t_utils import __tmp_dir__, create_tmp_dir, download_functional_test_files from testing.common_testing_util import remove_tmp_dir from ivadomed.scripts import visualize_transforms +from pathlib import Path logger = logging.getLogger(__name__) @@ -12,18 +12,18 @@ def setup_function(): def test_visualize_transforms_n_1(download_functional_test_files): - __data_testing_dir__ = os.path.join(__tmp_dir__, "data_functional_testing") - __input_file__ = os.path.join(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz') - __output_dir__ = os.path.join(__tmp_dir__, "output_visualize_transforms_n_1") - __config_file__ = os.path.join(__data_testing_dir__, "model_config.json") - __label_file__ = os.path.join(__data_testing_dir__, + __data_testing_dir__ = Path(__tmp_dir__, "data_functional_testing") + __input_file__ = Path(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz') + __output_dir__ = Path(__tmp_dir__, "output_visualize_transforms_n_1") + __config_file__ = Path(__data_testing_dir__, "model_config.json") + __label_file__ = Path(__data_testing_dir__, 'derivatives/labels/sub-test001/anat/sub-unf01_T1w_seg-manual.nii.gz') - visualize_transforms.main(args=['--input', __input_file__, - '--output', __output_dir__, - '--config', __config_file__, - '-r', __label_file__]) - assert os.path.exists(__output_dir__) - output_files = os.listdir(__output_dir__) + visualize_transforms.main(args=['--input', str(__input_file__), + '--output', str(__output_dir__), + '--config', str(__config_file__), + '-r', str(__label_file__)]) + assert __output_dir__.exists() + output_files = [f.name for f in __output_dir__.iterdir()] assert len(output_files) == 5 for output_file in output_files: assert "Resample" in output_file @@ -32,19 +32,19 @@ def test_visualize_transforms_n_1(download_functional_test_files): def test_visualize_transforms_n_2(download_functional_test_files): - __data_testing_dir__ = os.path.join(__tmp_dir__, "data_functional_testing") - __input_file__ = os.path.join(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz') - __output_dir__ = os.path.join(__tmp_dir__, "output_visualize_transforms_n_2") - __config_file__ = os.path.join(__data_testing_dir__, "model_config.json") - __label_file__ = os.path.join(__data_testing_dir__, + __data_testing_dir__ = Path(__tmp_dir__, "data_functional_testing") + __input_file__ = Path(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz') + __output_dir__ = Path(__tmp_dir__, "output_visualize_transforms_n_2") + __config_file__ = Path(__data_testing_dir__, "model_config.json") + __label_file__ = Path(__data_testing_dir__, 'derivatives/labels/sub-test001/anat/sub-unf01_T1w_seg-manual.nii.gz') - visualize_transforms.main(args=['--input', __input_file__, - '--output', __output_dir__, - '--config', __config_file__, - '-r', __label_file__, + visualize_transforms.main(args=['--input', str(__input_file__), + '--output', str(__output_dir__), + '--config', str(__config_file__), + '-r', str(__label_file__), '-n', '2']) - assert os.path.exists(__output_dir__) - output_files = os.listdir(__output_dir__) + assert __output_dir__.exists() + output_files = [f.name for f in __output_dir__.iterdir()] assert len(output_files) == 10 for output_file in output_files: assert "Resample" in output_file diff --git a/testing/tutorials/readme.md b/testing/tutorials/readme.md new file mode 100644 index 000000000..671aea544 --- /dev/null +++ b/testing/tutorials/readme.md @@ -0,0 +1,5 @@ +[Tutorial 1 2D Segmentation UNet ![Open Tutorial 1 on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ivadomed/ivadomed/blob/master/testing/tutorials/tutorial_1_2d_segmentation_unet.ipynb) + +[Tutorial 3 Uncertainty Estimation ![Open Tutorial 3 on Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ivadomed/ivadomed/blob/master/testing/tutorials/tutorial_3_uncertainty_estimation.ipynb) + + diff --git a/testing/tutorials/tutorial_1_2d_segmentation_unet.ipynb b/testing/tutorials/tutorial_1_2d_segmentation_unet.ipynb new file mode 100644 index 000000000..01fc905af --- /dev/null +++ b/testing/tutorials/tutorial_1_2d_segmentation_unet.ipynb @@ -0,0 +1,638 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "f9egIMJiOj75" + }, + "source": [ + "## Welcome to the ivadomed's first tutorial: **One-class segmentation with 2D U-Net**\n", + "\n", + "In this tutorial, we will be looking at how to train a two-dimensional (2D) segmentation model for segmenting the spinal cord with a single label on multiple Magnetic Resonance (MR) contrasts. The model will then be evaluted using various metrics like the Dice coefficient, Hausdorff distance, etc. This tutorial also provides visualizations of training curves and the segmented images on Tensorboard. \n", + "\n", + "⚠️ Before getting started, please ensure that you: \n", + "\n", + "1. Are connected to the GPU. You can do this by doing the following from the task bar on the top: `Runtime` $\\to$ `Change Runtime type` $\\to$ `Hardware accelerator: GPU`\n", + "2. **Are running this tutorial from _your_ Google Drive. You can do this by going to: `File` $\\to$ `Save a Copy in Drive`.**\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# @title Fetch configuration file\n", + "# fetch the configuration (config) file to be used for this tutorial\n", + "!wget https://raw.githubusercontent.com/ivadomed/ivadomed/master/ivadomed/config/config.json ./content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NTmS_K7wHUSN", + "outputId": "34f8d16f-0816-404b-88f5-b6729c6f4cb3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[?25l\r\u001b[K |▋ | 10 kB 16.5 MB/s eta 0:00:01\r\u001b[K |█▏ | 20 kB 19.7 MB/s eta 0:00:01\r\u001b[K |█▊ | 30 kB 20.1 MB/s eta 0:00:01\r\u001b[K |██▎ | 40 kB 17.0 MB/s eta 0:00:01\r\u001b[K |███ | 51 kB 12.3 MB/s eta 0:00:01\r\u001b[K |███▌ | 61 kB 13.8 MB/s eta 0:00:01\r\u001b[K |████ | 71 kB 12.2 MB/s eta 0:00:01\r\u001b[K |████▋ | 81 kB 13.2 MB/s eta 0:00:01\r\u001b[K |█████▎ | 92 kB 11.3 MB/s eta 0:00:01\r\u001b[K |█████▉ | 102 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████▍ | 112 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████ | 122 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████▋ | 133 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████▏ | 143 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████▊ | 153 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████▎ | 163 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████ | 174 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████▌ | 184 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████ | 194 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████▋ | 204 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████▎ | 215 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████▉ | 225 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████▍ | 235 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████ | 245 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████▋ | 256 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████▏ | 266 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████▊ | 276 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████▎ | 286 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████▉ | 296 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████▌ | 307 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████ | 317 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████▋ | 327 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████▏ | 337 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████▉ | 348 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████████▍ | 358 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████ | 368 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████▌ | 378 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████▏ | 389 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████▊ | 399 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████████▎ | 409 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████████▉ | 419 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████████████▌ | 430 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████████ | 440 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████████▋ | 450 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████████▏ | 460 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████████▉ | 471 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████████████▍ | 481 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████████████████ | 491 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████████████████▌ | 501 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▏ | 512 kB 11.6 MB/s eta 0:00:01\r\u001b[K |█████████████████████████████▊ | 522 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▎ | 532 kB 11.6 MB/s eta 0:00:01\r\u001b[K |██████████████████████████████▉ | 542 kB 11.6 MB/s eta 0:00:01\r\u001b[K |███████████████████████████████▍| 552 kB 11.6 MB/s eta 0:00:01\r\u001b[K |████████████████████████████████| 562 kB 11.6 MB/s \n", + "\u001b[?25h Building wheel for imgaug (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Looking in links: https://download.pytorch.org/whl/torch_stable.html\n", + "Collecting torch==1.8.0+cu111\n", + " Downloading https://download.pytorch.org/whl/cu111/torch-1.8.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (1982.2 MB)\n", + "\u001b[K |█████████████▌ | 834.1 MB 2.2 MB/s eta 0:08:52tcmalloc: large alloc 1147494400 bytes == 0x561b539ee000 @ 0x7f497abaf615 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7ecd00 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a77b039 0x561b1a7be409 0x561b1a779c52 0x561b1a7ecc25 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8915 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee\n", + "\u001b[K |█████████████████ | 1055.7 MB 1.4 MB/s eta 0:10:44tcmalloc: large alloc 1434370048 bytes == 0x561b98044000 @ 0x7f497abaf615 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7ecd00 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a77b039 0x561b1a7be409 0x561b1a779c52 0x561b1a7ecc25 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8915 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee\n", + "\u001b[K |█████████████████████▋ | 1336.2 MB 1.7 MB/s eta 0:06:27tcmalloc: large alloc 1792966656 bytes == 0x561b1ce76000 @ 0x7f497abaf615 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7ecd00 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a77b039 0x561b1a7be409 0x561b1a779c52 0x561b1a7ecc25 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8915 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee\n", + "\u001b[K |███████████████████████████▎ | 1691.1 MB 1.4 MB/s eta 0:03:30tcmalloc: large alloc 2241208320 bytes == 0x561b87c5e000 @ 0x7f497abaf615 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7ecd00 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a86bc66 0x561b1a7e8daf 0x561b1a77b039 0x561b1a7be409 0x561b1a779c52 0x561b1a7ecc25 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8915 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee\n", + "\u001b[K |████████████████████████████████| 1982.2 MB 1.4 MB/s eta 0:00:01tcmalloc: large alloc 1982251008 bytes == 0x561c0d5c0000 @ 0x7f497abae1e7 0x561b1a7ac067 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee\n", + "tcmalloc: large alloc 2477817856 bytes == 0x561c8382c000 @ 0x7f497abaf615 0x561b1a7764cc 0x561b1a85647a 0x561b1a7792ed 0x561b1a86ae1d 0x561b1a7ece99 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e8c0d 0x561b1a77aafa 0x561b1a7e8c0d 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77abda 0x561b1a7e9737 0x561b1a7e79ee 0x561b1a77b271\n", + "\u001b[K |████████████████████████████████| 1982.2 MB 1.1 kB/s \n", + "\u001b[?25hCollecting torchvision==0.9.0+cu111\n", + " Downloading https://download.pytorch.org/whl/cu111/torchvision-0.9.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (17.6 MB)\n", + "\u001b[K |████████████████████████████████| 17.6 MB 49.1 MB/s \n", + "\u001b[?25hCollecting torchtext==0.9.0\n", + " Downloading torchtext-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)\n", + "\u001b[K |████████████████████████████████| 7.1 MB 12.1 MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torch==1.8.0+cu111) (1.19.5)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.8.0+cu111) (3.10.0.2)\n", + "Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.7/dist-packages (from torchvision==0.9.0+cu111) (7.1.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0) (2.23.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0) (4.62.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (1.24.3)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (2021.10.8)\n", + "Installing collected packages: torch, torchvision, torchtext\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 1.10.0+cu111\n", + " Uninstalling torch-1.10.0+cu111:\n", + " Successfully uninstalled torch-1.10.0+cu111\n", + " Attempting uninstall: torchvision\n", + " Found existing installation: torchvision 0.11.1+cu111\n", + " Uninstalling torchvision-0.11.1+cu111:\n", + " Successfully uninstalled torchvision-0.11.1+cu111\n", + " Attempting uninstall: torchtext\n", + " Found existing installation: torchtext 0.11.0\n", + " Uninstalling torchtext-0.11.0:\n", + " Successfully uninstalled torchtext-0.11.0\n", + "Successfully installed torch-1.8.0+cu111 torchtext-0.9.0 torchvision-0.9.0+cu111\n", + "\u001b[K |████████████████████████████████| 215 kB 11.9 MB/s \n", + "\u001b[K |████████████████████████████████| 11.2 MB 48.0 MB/s \n", + "\u001b[K |████████████████████████████████| 3.2 MB 30.6 MB/s \n", + "\u001b[K |████████████████████████████████| 3.3 MB 39.8 MB/s \n", + "\u001b[K |████████████████████████████████| 4.1 MB 25.2 MB/s \n", + "\u001b[K |████████████████████████████████| 57 kB 4.4 MB/s \n", + "\u001b[K |████████████████████████████████| 873 kB 38.0 MB/s \n", + "\u001b[K |████████████████████████████████| 101 kB 12.2 MB/s \n", + "\u001b[K |████████████████████████████████| 55 kB 4.5 MB/s \n", + "\u001b[K |████████████████████████████████| 1.3 MB 38.2 MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# @title Install Dependencies\n", + "\n", + "!pip install imgaug==0.2.5 --quiet \n", + "!pip install ivadomed --quiet \n", + "\n", + "%load_ext tensorboard" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yv-G9XObSVm0" + }, + "source": [ + "### Download the Dataset\n", + "\n", + "We will be using a publicly available dataset consisting of the MRI data of the spinal cord. This dataset is a subset of the [spine-generic multi-center dataset](https://github.com/spine-generic/data-multi-subject) and has been pre-processed to facilitate training/testing of a new model. Namely, for each subject, all six contrasts were co-registered together. Semi-manual cord segmentation for all modalities and manual cerebrospinal fluid labels for T2w modality were created. More details can be found [here](https://github.com/ivadomed/ivadomed/blob/master/dev/prepare_data/README.md).\n", + "\n", + "In addition to the MRI data, this sample dataset also includes a trained model for spinal cord segmentation. The size of the dataset is about 490MB. Please run the following cell to download the dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "FYHpujawOElC", + "outputId": "2447440e-3e75-4025-cf8e-e63871558dbf" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2021-11-18 16:40:49.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36minit_ivadomed\u001b[0m:\u001b[36m408\u001b[0m - \u001b[1m\n", + "ivadomed (2.9.0)\n", + "\u001b[0m\n", + "Trying URL: https://github.com/ivadomed/data_example_spinegeneric/archive/r20200825.zip\n", + "Downloading: data_example_spinegeneric-r20200825.zip\n", + "Unzip data to: /tmp/tmpzfsxoabq\n", + "Removing temporary folders...\n", + "Folder Created: /content/data_example_spinegeneric\n", + "--2021-11-18 16:41:31-- https://raw.githubusercontent.com/ivadomed/ivadomed/master/ivadomed/config/config.json\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 3699 (3.6K) [text/plain]\n", + "Saving to: ‘config.json’\n", + "\n", + "config.json 100%[===================>] 3.61K --.-KB/s in 0s \n", + "\n", + "2021-11-18 16:41:31 (33.5 MB/s) - ‘config.json’ saved [3699/3699]\n", + "\n", + "--2021-11-18 16:41:31-- http://./content\n", + "Resolving . (.)... failed: No address associated with hostname.\n", + "wget: unable to resolve host address ‘.’\n", + "FINISHED --2021-11-18 16:41:31--\n", + "Total wall clock time: 0.2s\n", + "Downloaded: 1 files, 3.6K in 0s (33.5 MB/s)\n" + ] + } + ], + "source": [ + "# @title Run Me to Download the Dataset!\n", + "\n", + "# download the dataset\n", + "!ivadomed_download_data -d data_example_spinegeneric" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zMJcEJLXTpEo" + }, + "source": [ + "### Configuration File\n", + "In `ivadomed`, the training is orchestrated by a configuration file. In short, it is the JSON file that contains all the parameters used for loading the data, training and evaluating the model. An in-depth documentation on how to use the configuration file is available [here](https://ivadomed.org/configuration_file.html). Some examples of configuration files are available in the `ivadomed/config/` folder [here](https://github.com/ivadomed/ivadomed/tree/master/ivadomed/config).\n", + "\n", + "In this tutorial, we will be using the configuration file: `ivadomed/config/config.json`. This is already downloaded for you and can be seen under Colab's `Files` (📁) tab on the left.\n", + "\n", + "Open this file and follow on for more information on some of the key parameters for performing the one-class 2D segmentation:\n", + "\n", + "\n", + "1. `command` - The task to perform. This can either be \"train\" or \"test\". For training the model, we first set this key to \"train\".\n", + "```json\n", + " \"command\": \"train\"\n", + "```\n", + "\n", + "2. `path_output` - The name of the folder that will be populated by the output files (e.g. the trained model, predictions, results, etc.)\n", + "```json\n", + " \"path_output\": \"spineGeneric\"\n", + "```\n", + "\n", + "3. `loader_parameters:path_data` - The location of the dataset. As discussed in [Data](https://ivadomed.org/data.html), the dataset must conform to the BIDS standard. This value can be modified so as to point to the correct location of the downloaded dataset.\n", + "```json\n", + " path_data: \"data_example_spinegeneric\"\n", + "```\n", + "\n", + "4. `loader_parameters:target_suffix` - The suffix for the name of the ground truth (GT) segmentation file. The GT is located under the `DATASET/derivatives/labels` folder. For this tutorial, the suffix is `_seg-manual`.\n", + "```json\n", + " \"target_suffix\": [\"_seg_manual\"]\n", + "```\n", + "\n", + "5. `loader_parameters:contrast_params` - A dicitionary containing the contrasts of interest.\n", + "```json\n", + " \"contrast params\": {\n", + " \"training_validation\": [\"T1w\", \"T2w\", \"T2star\"],\n", + " \"testing\": [\"T1w\", \"T2w\", \"T2star\"],\n", + " \"balance\": {}\n", + " }\n", + "```\n", + "\n", + "6. `loader_parameters:slice_axis` - The orientation of the 2D slice to use with the model.\n", + "```json\n", + " \"slice_axis\": \"axial\"\n", + "```\n", + "\n", + "7. `loader_parameters:multichannel` - Turn on/off multi-channel training. If true, each sample has several channels, where each channel is an image contrast. If false, only one image contrast is used per sample.\n", + "```json\n", + " \"multichannel\": false\n", + "```\n", + "\n", + "⚠️ **Note**: The multichannel approach requires that for each subject, the image contrasts are co-registered. This implies that a ground truth segmentation is aligned with all contrasts, for a given subject. In this tutorial, only a single channel will be used.\n", + "\n", + "8. `training_time:num_epochs` - The maximum number of epochs that will be run during training. Each epoch is composed of a training part and an validation part. It should be a positive integer.\n", + "```json\n", + " \"num_epochs\": 100\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nbi9YV9j4E3Q" + }, + "source": [ + "### Modify the Config File\n", + "\n", + "Now that we know how the config file is structured, open the `config.json` file under the \"Files\" tab on the left. This should let you edit the contents of the json file as mentioned above. Change the following parameters:\n", + "\n", + "1. `\"path_output\": \"spineGeneric_gpu\"` (just to differentiate the results obtained from the GPU)\n", + "2. `\"debugging\": true` (to visualize training on Tensorboard)\n", + "3. `\"num_epochs\": 20` (running on a few epochs for the purpose of this tutorial)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rlPGhRMrh051" + }, + "source": [ + "### Train the Model\n", + "\n", + "Once the config file is saved and ready, the following command is used for training:\n", + "```shell\n", + " ivadomed --train -c config.json --path-data path/to/bids/data --path-output path/to/output/directory\n", + "```\n", + "\n", + "If the `--path_data` and `--path_output` keys are already mentioned in the config file then they do not need to be specified again. The **shorter command** shown below can be run instead: \n", + "```shell\n", + " ivadomed --train -c config.json\n", + "```\n", + "\n", + "⚠️ **Note**: If a compatible GPU is available, it will be used by default (see the `\"gpu_id\"` key in the config file). Otherwise, training will use the CPU, which will take a prohibitively long computational time (several hours).\n", + "\n", + "The main parameters of the training scheme and model will be displayed on the terminal, followed by the loss value on training and validation sets at every epoch. To know more about the meaning of each parameter, go to the [Configuration File](https://ivadomed.org/configuration_file.html). The value of the loss should decrease during the training.\n", + "\n", + "After 20 epochs (see \"num_epochs\" in the configuration file), the Dice score on the validation set should be ~0.9." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "pSnnnu6SA_0-", + "outputId": "6f86c0dc-b8ce-410f-8e55-e09eb4dd7f63" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2021-11-18 16:48:46.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36minit_ivadomed\u001b[0m:\u001b[36m408\u001b[0m - \u001b[1m\n", + "ivadomed (2.9.0)\n", + "\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36m_display_differing_keys\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAdding the following keys to the configuration file\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m log_file: log\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m loader_parameters: is_input_dropout: False\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m default_model: is_2d: True\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36m_display_differing_keys\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1m\n", + "\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_output\u001b[0m:\u001b[36m371\u001b[0m - \u001b[1mCLI flag --path-output not used to specify output directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_data\u001b[0m:\u001b[36m383\u001b[0m - \u001b[1mCLI flag --path-data not used to specify BIDS data directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.577\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.main\u001b[0m:\u001b[36mset_output_path\u001b[0m:\u001b[36m195\u001b[0m - \u001b[1mCreating output path: spineGeneric_gpu\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.730\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdefine_device\u001b[0m:\u001b[36m135\u001b[0m - \u001b[1mUsing GPU ID 0\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.731\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m145\u001b[0m - \u001b[1mSelected architecture: Unet, with the following parameters:\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdropout_rate: 0.3\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tbn_momentum: 0.1\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdepth: 3\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tis_2d: True\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfinal_activation: sigmoid\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfolder_name: my_model\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tin_channel: 1\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:46.733\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tout_channel: 1\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/bids/config.py:40: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.\n", + " FutureWarning)\n", + "\u001b[32m2021-11-18 16:48:46.744\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mivadomed.loader.bids_dataframe\u001b[0m:\u001b[36mwrite_derivatives_dataset_description\u001b[0m:\u001b[36m304\u001b[0m - \u001b[33m\u001b[1m/content/data_example_spinegeneric/derivatives/dataset_description.json not found. Please ensure a full path is specified in the configuration file. Will attempt to create a place holder description file for now at/content/data_example_spinegeneric/derivatives/dataset_description.json.\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/bids/layout/validation.py:149: UserWarning: The PipelineDescription field was superseded by GeneratedBy in BIDS 1.4.0. You can use ``pybids upgrade`` to update your derivative dataset.\n", + " warnings.warn(\"The PipelineDescription field was superseded \"\n", + "\u001b[32m2021-11-18 16:48:48.117\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.bids_dataframe\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m289\u001b[0m - \u001b[1mDataframe has been saved in spineGeneric_gpu/bids_dataframe.csv.\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:48.124\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mivadomed.loader.utils\u001b[0m:\u001b[36msplit_dataset\u001b[0m:\u001b[36m102\u001b[0m - \u001b[33m\u001b[1mAfter splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id.\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mSelected transformations for the ['training'] dataset:\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.076\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tResample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tCenterCrop: {'size': [128, 128]}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tRandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.077\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tNormalizeInstance: {'applied_to': ['im']}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mSelected transformations for the ['validation'] dataset:\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tResample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tCenterCrop: {'size': [128, 128]}\u001b[0m\n", + "\u001b[32m2021-11-18 16:48:49.078\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tNormalizeInstance: {'applied_to': ['im']}\u001b[0m\n", + "Loading dataset: 100% 6/6 [00:00<00:00, 167.01it/s]\n", + "\u001b[32m2021-11-18 16:48:55.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.loader\u001b[0m:\u001b[36mload_dataset\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mLoaded 92 axial slices for the validation set.\u001b[0m\n", + "Loading dataset: 100% 17/17 [00:00<00:00, 90.33it/s]\n", + "\u001b[32m2021-11-18 16:49:13.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.loader\u001b[0m:\u001b[36mload_dataset\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mLoaded 276 axial slices for the training set.\u001b[0m\n", + "\u001b[32m2021-11-18 16:49:13.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.main\u001b[0m:\u001b[36mcreate_path_model\u001b[0m:\u001b[36m78\u001b[0m - \u001b[1mCreating model directory: spineGeneric_gpu/my_model\u001b[0m\n", + "\u001b[32m2021-11-18 16:49:21.176\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.training\u001b[0m:\u001b[36mtrain\u001b[0m:\u001b[36m99\u001b[0m - \u001b[1mInitialising model's weights from scratch.\u001b[0m\n", + "\u001b[32m2021-11-18 16:49:31.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.training\u001b[0m:\u001b[36mtrain\u001b[0m:\u001b[36m115\u001b[0m - \u001b[1mScheduler parameters: {'name': 'CosineAnnealingLR', 'base_lr': 1e-05, 'max_lr': 0.01}\u001b[0m\n", + "\u001b[32m2021-11-18 16:49:31.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.training\u001b[0m:\u001b[36mtrain\u001b[0m:\u001b[36m135\u001b[0m - \u001b[1mSelected Loss: DiceLoss\u001b[0m\n", + "\u001b[32m2021-11-18 16:49:31.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.training\u001b[0m:\u001b[36mtrain\u001b[0m:\u001b[36m137\u001b[0m - \u001b[1m\twith the parameters: []\u001b[0m\n", + "Training: 7% 1/15 [00:00 {\n const url = new URL(await google.colab.kernel.proxyPort(6006, {'cache': true}));\n url.searchParams.set('tensorboardColab', 'true');\n const iframe = document.createElement('iframe');\n iframe.src = url;\n iframe.setAttribute('width', '100%');\n iframe.setAttribute('height', '800');\n iframe.setAttribute('frameborder', 0);\n document.body.appendChild(iframe);\n })();\n ", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# @title Visualize loss curves on Tensorboard\n", + "\n", + "# see the training progress on Tensorboard\n", + "# note that the output folder is the same \"path_output\" folder used in the config file\n", + "%tensorboard --logdir spineGeneric_gpu" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WUZOIO8BukmF" + }, + "source": [ + "### Evaluate the Model\n", + "\n", + "To test the trained model on the testing subset of the dataset and compute the evaluation metrics, run the following command: \n", + "```shell\n", + " ivadomed --test -c config.json --path-data path/to/bids/data --path-output path/to/output/directory\n", + "```\n", + "\n", + "Again, if `--path_data` and `--path_output` are already mentioned in the config file, use the command below instead:\n", + "```shell\n", + " ivadomed --test -c config.json\n", + "```\n", + "\n", + "The model’s parameters will be displayed in the cell's output, followed by a preview of the results for each image. The resulting segmentation is saved for each image in the `/pred_masks` while a csv file, saved in `/results_eval/evaluation_3Dmetrics.csv`, contains all the evaluation metrics. For more details on the evaluation metrics, see `ivadomed.metrics` [here](https://ivadomed.org/api_ref.html#module-ivadomed.metrics)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PkUga7e_Mdht", + "outputId": "18491925-2ca4-4850-d569-6ca1a7412a83" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2021-11-18 16:55:56.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36minit_ivadomed\u001b[0m:\u001b[36m408\u001b[0m - \u001b[1m\n", + "ivadomed (2.9.0)\n", + "\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36m_display_differing_keys\u001b[0m:\u001b[36m150\u001b[0m - \u001b[1mAdding the following keys to the configuration file\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m log_file: log\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m loader_parameters: is_input_dropout: False\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36mdeep_dict_compare\u001b[0m:\u001b[36m43\u001b[0m - \u001b[1m default_model: is_2d: True\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.config_manager\u001b[0m:\u001b[36m_display_differing_keys\u001b[0m:\u001b[36m152\u001b[0m - \u001b[1m\n", + "\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_output\u001b[0m:\u001b[36m371\u001b[0m - \u001b[1mCLI flag --path-output not used to specify output directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_data\u001b[0m:\u001b[36m383\u001b[0m - \u001b[1mCLI flag --path-data not used to specify BIDS data directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.main\u001b[0m:\u001b[36mset_output_path\u001b[0m:\u001b[36m198\u001b[0m - \u001b[1mOutput path already exists: spineGeneric_gpu\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.419\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdefine_device\u001b[0m:\u001b[36m135\u001b[0m - \u001b[1mUsing GPU ID 0\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m145\u001b[0m - \u001b[1mSelected architecture: Unet, with the following parameters:\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdropout_rate: 0.3\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tbn_momentum: 0.1\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.420\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdepth: 3\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tis_2d: True\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfinal_activation: sigmoid\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfolder_name: my_model\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tin_channel: 1\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:56.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tout_channel: 1\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/bids/config.py:40: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.\n", + " FutureWarning)\n", + "/usr/local/lib/python3.7/dist-packages/bids/layout/validation.py:149: UserWarning: The PipelineDescription field was superseded by GeneratedBy in BIDS 1.4.0. You can use ``pybids upgrade`` to update your derivative dataset.\n", + " warnings.warn(\"The PipelineDescription field was superseded \"\n", + "\u001b[32m2021-11-18 16:55:57.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.bids_dataframe\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m289\u001b[0m - \u001b[1mDataframe has been saved in spineGeneric_gpu/bids_dataframe.csv.\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:57.752\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mivadomed.loader.utils\u001b[0m:\u001b[36msplit_dataset\u001b[0m:\u001b[36m102\u001b[0m - \u001b[33m\u001b[1mAfter splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id.\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:58.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mSelected transformations for the ['testing'] dataset:\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:58.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tResample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:58.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tCenterCrop: {'size': [128, 128]}\u001b[0m\n", + "\u001b[32m2021-11-18 16:55:58.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tNormalizeInstance: {'applied_to': ['im']}\u001b[0m\n", + "Loading dataset: 100% 6/6 [00:00<00:00, 161.52it/s]\n", + "\u001b[32m2021-11-18 16:56:05.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.loader\u001b[0m:\u001b[36mload_dataset\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mLoaded 94 axial slices for the testing set.\u001b[0m\n", + "\u001b[32m2021-11-18 16:56:05.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.testing\u001b[0m:\u001b[36mtest\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mLoading model: spineGeneric_gpu/best_model.pt\u001b[0m\n", + "Inference - Iteration 0: 100% 6/6 [00:14<00:00, 2.44s/it]\n", + "\u001b[32m2021-11-18 16:56:35.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.testing\u001b[0m:\u001b[36mtest\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1m{'dice_score': 0.9090897621830837, 'multi_class_dice_score': 0.9090897621830837, 'precision_score': 0.8584016858957487, 'recall_score': 0.9689729883662824, 'specificity_score': 0.9997912595822612, 'intersection_over_union': 0.8348760781023308, 'accuracy_score': 0.9997505841734929, 'hausdorff_score': 0.0674809834038413}\u001b[0m\n", + "\u001b[32m2021-11-18 16:56:35.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.evaluation\u001b[0m:\u001b[36mevaluate\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m\n", + "Run Evaluation on spineGeneric_gpu/pred_masks\n", + "\u001b[0m\n", + "Evaluation: 100% 6/6 [00:07<00:00, 1.26s/it]\n", + "\u001b[32m2021-11-18 16:56:42.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.evaluation\u001b[0m:\u001b[36mevaluate\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1m avd_class0 ... vol_pred_class0\n", + "image_id ... \n", + "sub-unf01_T1w 0.035910 ... 6382.499391\n", + "sub-unf01_T2w 0.075269 ... 6624.999368\n", + "sub-mpicbs06_T2star 0.316078 ... 6006.249427\n", + "sub-mpicbs06_T2w 0.148635 ... 5573.749468\n", + "sub-unf01_T2star 0.137438 ... 6558.749375\n", + "\n", + "[5 rows x 16 columns]\u001b[0m\n" + ] + } + ], + "source": [ + "# @title Run me to test the model!\n", + "\n", + "# test the model\n", + "!ivadomed --test -c config.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fnwz7It8BACM", + "outputId": "4006eb91-0fd0-47a8-d022-6626eb28089e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Zip file created!\n" + ] + } + ], + "source": [ + "# @title Save the results in a zip file!\n", + "# @markdown Now that training and testing are done, we would like to download\n", + "# @markdown the results locally for further anaylsis. For that, we first\n", + "# @markdown create a `.zip` file of the results folder and then download\n", + "# @markdown the zipped file manually. \n", + "\n", + "# first, zip the results folder\n", + "!zip -r --quiet spineGeneric_gpu.zip spineGeneric_gpu/\n", + "print(\"Zip file created!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_ye2RNLip-ci" + }, + "source": [ + "Now, check out the Files tab on the right. You can find spineGeneric_gpu.zip when you refresh the content (see the top bar) and then download the zip file to your browser's standard Downloads folder." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gGRw-LtivjaP" + }, + "source": [ + "The test image segmentations are stored in `/pred_masks/` and have the same name as the input image with the suffix `_pred`. To visualize the segmentation of a given subject, you can use any Nifti image viewer (e.g. [ITK-SNAP](http://www.itksnap.org/pmwiki/pmwiki.php), [FSLeyes](https://open.win.ox.ac.uk/pages/fsl/fsleyes/fsleyes/userdoc/)). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A69EXdJeGIwT" + }, + "source": [ + "After the training for 100 epochs, the segmentations should be similar to the one presented in the following image. The output and ground truth segmentations of the spinal cord are presented in red (subject `sub-mpicbs06` with contrast T2w):\n", + "\n", + "" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RygQLOgBHTs_" + }, + "source": [ + "⚠️ **Note**: In case you prefer running things on the terminal instead of notebooks, ivadomed also makes that possible. Head over to [this](https://ivadomed.org/tutorials/one_class_segmentation_2d_unet.html) page that explains this tutorial from the terminal. However, before doing that please ensure that you have installed `ivadomed` locally. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uDco238GHHti" + }, + "source": [ + "So, that was it for the first tutorial! We saw a simple example of how `ivadomed` can be used to segment the spinal cord. Please try the other tutorials to get a better feel of what `ivadomed` has to offer. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "A6M5V1XwBAFL" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ivadomed_tutorial-1_2d-segmentation-unet.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/testing/tutorials/tutorial_3_uncertainty_estimation.ipynb b/testing/tutorials/tutorial_3_uncertainty_estimation.ipynb new file mode 100644 index 000000000..f4bff3db8 --- /dev/null +++ b/testing/tutorials/tutorial_3_uncertainty_estimation.ipynb @@ -0,0 +1,496 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "TOFLk8iPrzkf" + }, + "source": [ + "### Ivadomed Tutorial 3: **Uncertainty Estimation**\n", + "\n", + "This tutorial shows how to estimate uncertainty measures (aleatoric and epistemic) on the model's predictions. These uncertainty measures are already implemented in `ivadomed` and are detailed in [Technical features](https://ivadomed.org/technical_features.html#uncertainty-measures).\n", + "\n", + "⚠️ Before getting started, please ensure that you: \n", + "\n", + "1. Are connected to the GPU. You can do this by doing the following from the task bar on the top: `Runtime` $\\to$ `Change Runtime type` $\\to$ `Hardware accelerator: GPU`\n", + "2. **Are running this tutorial from _your_ Google Drive. You can do this by going to: `File` $\\to$ `Save a Copy in Drive`.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# @title Fetch configuration file\n", + "# fetch the configuration (config) file to be used for this tutorial\n", + "!wget https://raw.githubusercontent.com/ivadomed/ivadomed/master/ivadomed/config/config.json ./content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uzOt6ugooys4", + "outputId": "b78724a5-06f4-4b6a-f694-c2151506c5f8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in links: https://download.pytorch.org/whl/torch_stable.html\n", + "Collecting torch==1.8.0+cu111\n", + " Downloading https://download.pytorch.org/whl/cu111/torch-1.8.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (1982.2 MB)\n", + "\u001b[K |█████████████▌ | 834.1 MB 1.6 MB/s eta 0:12:08tcmalloc: large alloc 1147494400 bytes == 0x5624825fa000 @ 0x7fc3b5af8615 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448904d00 0x5624488ff9ee 0x562448892bda 0x562448901737 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448893039 0x5624488d6409 0x562448891c52 0x562448904c25 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448900915 0x562448892afa 0x562448900c0d 0x5624488ff9ee\n", + "\u001b[K |█████████████████ | 1055.7 MB 1.7 MB/s eta 0:09:00tcmalloc: large alloc 1434370048 bytes == 0x5624c6c50000 @ 0x7fc3b5af8615 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448904d00 0x5624488ff9ee 0x562448892bda 0x562448901737 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448893039 0x5624488d6409 0x562448891c52 0x562448904c25 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448900915 0x562448892afa 0x562448900c0d 0x5624488ff9ee\n", + "\u001b[K |█████████████████████▋ | 1336.2 MB 1.7 MB/s eta 0:06:27tcmalloc: large alloc 1792966656 bytes == 0x56244ba82000 @ 0x7fc3b5af8615 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448904d00 0x5624488ff9ee 0x562448892bda 0x562448901737 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448893039 0x5624488d6409 0x562448891c52 0x562448904c25 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448900915 0x562448892afa 0x562448900c0d 0x5624488ff9ee\n", + "\u001b[K |███████████████████████████▎ | 1691.1 MB 1.3 MB/s eta 0:03:37tcmalloc: large alloc 2241208320 bytes == 0x5624b686a000 @ 0x7fc3b5af8615 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448904d00 0x5624488ff9ee 0x562448892bda 0x562448901737 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448983c66 0x562448900daf 0x562448893039 0x5624488d6409 0x562448891c52 0x562448904c25 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448900915 0x562448892afa 0x562448900c0d 0x5624488ff9ee\n", + "\u001b[K |████████████████████████████████| 1982.2 MB 1.5 MB/s eta 0:00:01tcmalloc: large alloc 1982251008 bytes == 0x56253c1cc000 @ 0x7fc3b5af71e7 0x5624488c4067 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x562448892afa 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee\n", + "tcmalloc: large alloc 2477817856 bytes == 0x5625b2438000 @ 0x7fc3b5af8615 0x56244888e4cc 0x56244896e47a 0x5624488912ed 0x562448982e1d 0x562448904e99 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448900c0d 0x562448892afa 0x562448900c0d 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448892bda 0x562448901737 0x5624488ff9ee 0x562448893271\n", + "\u001b[K |████████████████████████████████| 1982.2 MB 1.1 kB/s \n", + "\u001b[?25hCollecting torchvision==0.9.0+cu111\n", + " Downloading https://download.pytorch.org/whl/cu111/torchvision-0.9.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (17.6 MB)\n", + "\u001b[K |████████████████████████████████| 17.6 MB 902 kB/s \n", + "\u001b[?25hCollecting torchtext==0.9.0\n", + " Downloading torchtext-0.9.0-cp37-cp37m-manylinux1_x86_64.whl (7.1 MB)\n", + "\u001b[K |████████████████████████████████| 7.1 MB 5.6 MB/s \n", + "\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torch==1.8.0+cu111) (1.19.5)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.8.0+cu111) (3.10.0.2)\n", + "Requirement already satisfied: pillow>=4.1.1 in /usr/local/lib/python3.7/dist-packages (from torchvision==0.9.0+cu111) (7.1.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0) (2.23.0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from torchtext==0.9.0) (4.62.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (3.0.4)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (1.24.3)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (2.10)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->torchtext==0.9.0) (2021.10.8)\n", + "Installing collected packages: torch, torchvision, torchtext\n", + " Attempting uninstall: torch\n", + " Found existing installation: torch 1.10.0+cu111\n", + " Uninstalling torch-1.10.0+cu111:\n", + " Successfully uninstalled torch-1.10.0+cu111\n", + " Attempting uninstall: torchvision\n", + " Found existing installation: torchvision 0.11.1+cu111\n", + " Uninstalling torchvision-0.11.1+cu111:\n", + " Successfully uninstalled torchvision-0.11.1+cu111\n", + " Attempting uninstall: torchtext\n", + " Found existing installation: torchtext 0.11.0\n", + " Uninstalling torchtext-0.11.0:\n", + " Successfully uninstalled torchtext-0.11.0\n", + "Successfully installed torch-1.8.0+cu111 torchtext-0.9.0 torchvision-0.9.0+cu111\n", + "\u001b[K |████████████████████████████████| 562 kB 5.4 MB/s \n", + "\u001b[?25h Building wheel for imgaug (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "\u001b[K |████████████████████████████████| 215 kB 5.3 MB/s \n", + "\u001b[K |████████████████████████████████| 3.2 MB 38.5 MB/s \n", + "\u001b[K |████████████████████████████████| 11.2 MB 37.3 MB/s \n", + "\u001b[K |████████████████████████████████| 4.1 MB 54 kB/s \n", + "\u001b[K |████████████████████████████████| 3.3 MB 37.5 MB/s \n", + "\u001b[K |████████████████████████████████| 57 kB 4.6 MB/s \n", + "\u001b[K |████████████████████████████████| 873 kB 32.9 MB/s \n", + "\u001b[K |████████████████████████████████| 55 kB 3.9 MB/s \n", + "\u001b[K |████████████████████████████████| 101 kB 8.5 MB/s \n", + "\u001b[K |████████████████████████████████| 1.3 MB 27.2 MB/s \n", + "\u001b[?25h" + ] + } + ], + "source": [ + "# @title Install Dependencies\n", + "!pip install imgaug==0.2.5 --quiet \n", + "!pip install ivadomed --quiet \n", + "\n", + "%load_ext tensorboard" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "G8cIbs-IrwtW", + "outputId": "17dee5c9-864f-4189-c7e6-62854c9b5d25" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2021-11-18 17:08:58.880\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36minit_ivadomed\u001b[0m:\u001b[36m408\u001b[0m - \u001b[1m\n", + "ivadomed (2.9.0)\n", + "\u001b[0m\n", + "Trying URL: https://github.com/ivadomed/data_example_spinegeneric/archive/r20200825.zip\n", + "Downloading: data_example_spinegeneric-r20200825.zip\n", + "Unzip data to: /tmp/tmpl29u9rom\n", + "Removing temporary folders...\n", + "Folder Created: /content/data_example_spinegeneric\n", + "--2021-11-18 17:09:41-- https://raw.githubusercontent.com/ivadomed/ivadomed/master/ivadomed/config/config.json\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 3699 (3.6K) [text/plain]\n", + "Saving to: ‘config.json’\n", + "\n", + "config.json 100%[===================>] 3.61K --.-KB/s in 0s \n", + "\n", + "2021-11-18 17:09:41 (43.6 MB/s) - ‘config.json’ saved [3699/3699]\n", + "\n", + "--2021-11-18 17:09:41-- http://./content\n", + "Resolving . (.)... failed: No address associated with hostname.\n", + "wget: unable to resolve host address ‘.’\n", + "FINISHED --2021-11-18 17:09:41--\n", + "Total wall clock time: 0.2s\n", + "Downloaded: 1 files, 3.6K in 0s (43.6 MB/s)\n" + ] + } + ], + "source": [ + "# @title Run Me for Downloading the Dataset!\n", + "\n", + "# @markdown We will be using a publicly-available dataset consisting of the MRI data of the spinal cord. \n", + "# @markdown More details on this dataset can be found in \n", + "# @markdown Tutorial 1: [One-class segmentation with 2D U-Net](https://ivadomed.org/tutorials/one_class_segmentation_2d_unet.html).\n", + "\n", + "# download the dataset\n", + "!ivadomed_download_data -d data_example_spinegeneric" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrMSqatm67i5" + }, + "source": [ + "#### Configuration File\n", + "\n", + "In this tutorial, we will be using the configuration file: `ivadomed/config/config.json`. This is already downloaded for you and can be seen under Colab's `Files` (📁) tab on the left.\n", + "\n", + "Open this file; this is the same configuration file used in the [first tutorial](https://ivadomed.org/tutorials/one_class_segmentation_2d_unet.html) and will be modified as mentioned in the [Technical features](https://ivadomed.org/technical_features.html#uncertainty-measures). Please ensure that the `path_data` key points to the correct location of the dataset. The parameters that are of interest for this tutorial are as follows: \n", + "\n", + "1. `path_data` - Location of the directory containing the dataset. \n", + "```json\n", + " \"path_data\": \"data_example_spinegeneric\"\n", + "```\n", + "\n", + "2. `path_output` - Location of the directory containing the trained model. To avoid having to train a model from scratch, there is a pre-trained model for spinal cord segmentation in the folder named trained_model, in the downloaded dataset. Modify the path so it points to the location of the trained model.\n", + "```json\n", + " \"path_output\": \"/data_example_spinegeneric/trained_model\"\n", + "```\n", + "\n", + "3. `command` - The task to perform. Since we are interested in inference on a trained model, we set the command to \"test\" as shown below. \n", + "```json\n", + " \"command\": \"test\"\n", + "```\n", + "\n", + "4. `uncertainty` - The type of uncertainty to estimate. Available choices are \"epistemic\" and \"aleatoric\". Note that both can be true. More details on the implementation can be found in [Technical features](https://ivadomed.org/technical_features.html#uncertainty-measures). `\"n_it\"` controls the number of Monte Carlo iterations that are performed to estimate the uncertainty. It is set to a positive integer for this tutorial (e.g. `15`).\n", + "```json\n", + " \"uncertainty\": {\n", + " \"epistemic\": true,\n", + " \"aleatoric\": true,\n", + " \"n_it\": 15\n", + " }\n", + "```\n", + "\n", + "5. `transformation` - The transformations performed as a part of data augmentation. If aleatoric uncertainty is enabled, the data augmentation that will be performed is the same as the one performed for the training. Note that only transformations for which an `undo_transform` (i.e. inverse transformation) is available will be performed since these inverse transformations are required to reconstruct the predicted volume.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JIqUMWMSA3Fl" + }, + "source": [ + "### Modify the Config File\n", + "\n", + "Open the `config.json` file under the \"Files\" tab on the left. This should let you edit the contents of the json file as mentioned above. Change the following parameters:\n", + "\n", + "1. \n", + "```json\n", + " \"command\": \"test\"\n", + "```\n", + "2. \n", + "```json\n", + " \"path_output\": \"data_example_spinegeneric/trained_model\"\n", + "```\n", + "3. \n", + "```json\n", + " \"debugging\": true\n", + "```\n", + "4. \n", + "```json\n", + " \"uncertainty\": {\n", + " \"epistemic\": true,\n", + " \"aleatoric\": true,\n", + " \"n_it\": 15\n", + " }\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QrtC-Etn66Mm", + "outputId": "8c5982bc-1568-40eb-eee6-9b15ed430e34" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m2021-11-18 17:11:09.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36minit_ivadomed\u001b[0m:\u001b[36m408\u001b[0m - \u001b[1m\n", + "ivadomed (2.9.0)\n", + "\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_output\u001b[0m:\u001b[36m371\u001b[0m - \u001b[1mCLI flag --path-output not used to specify output directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mget_path_data\u001b[0m:\u001b[36m383\u001b[0m - \u001b[1mCLI flag --path-data not used to specify BIDS data directory. Will check config file for directory...\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.main\u001b[0m:\u001b[36mset_output_path\u001b[0m:\u001b[36m198\u001b[0m - \u001b[1mOutput path already exists: data_example_spinegeneric/trained_model\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdefine_device\u001b[0m:\u001b[36m135\u001b[0m - \u001b[1mUsing GPU ID 0\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m145\u001b[0m - \u001b[1mSelected architecture: Unet, with the following parameters:\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdropout_rate: 0.3\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tbn_momentum: 0.1\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tdepth: 3\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.722\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tis_2d: True\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfinal_activation: sigmoid\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tfolder_name: my_model\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.723\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tin_channel: 1\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:09.724\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_model_spec\u001b[0m:\u001b[36m148\u001b[0m - \u001b[1m\tout_channel: 1\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/bids/config.py:40: FutureWarning: Setting 'extension_initial_dot' will be removed in pybids 0.16.\n", + " FutureWarning)\n", + "\u001b[32m2021-11-18 17:11:09.732\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mivadomed.loader.bids_dataframe\u001b[0m:\u001b[36mwrite_derivatives_dataset_description\u001b[0m:\u001b[36m304\u001b[0m - \u001b[33m\u001b[1m/content/data_example_spinegeneric/derivatives/dataset_description.json not found. Please ensure a full path is specified in the configuration file. Will attempt to create a place holder description file for now at/content/data_example_spinegeneric/derivatives/dataset_description.json.\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/bids/layout/validation.py:149: UserWarning: The PipelineDescription field was superseded by GeneratedBy in BIDS 1.4.0. You can use ``pybids upgrade`` to update your derivative dataset.\n", + " warnings.warn(\"The PipelineDescription field was superseded \"\n", + "\u001b[32m2021-11-18 17:11:11.080\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.bids_dataframe\u001b[0m:\u001b[36msave\u001b[0m:\u001b[36m289\u001b[0m - \u001b[1mDataframe has been saved in data_example_spinegeneric/trained_model/bids_dataframe.csv.\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.084\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mivadomed.loader.utils\u001b[0m:\u001b[36msplit_dataset\u001b[0m:\u001b[36m102\u001b[0m - \u001b[33m\u001b[1mAfter splitting: train, validation and test fractions are respectively 0.6, 0.2 and 0.2 of participant_id.\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.transforms\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m132\u001b[0m - \u001b[1mElasticTransform transform not included since no undo_transform available for it.\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m158\u001b[0m - \u001b[1mSelected transformations for the ['testing'] dataset:\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.981\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tResample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tCenterCrop: {'size': [128, 128]}\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tRandomAffine: {'degrees': 5, 'scale': [0.1, 0.1, 0.0], 'translate': [0.03, 0.03, 0.0], 'applied_to': ['im', 'gt']}\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']}\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.982\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.utils\u001b[0m:\u001b[36mdisplay_selected_transfoms\u001b[0m:\u001b[36m160\u001b[0m - \u001b[1m\tNormalizeInstance: {'applied_to': ['im']}\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:11.983\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.transforms\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m132\u001b[0m - \u001b[1mElasticTransform transform not included since no undo_transform available for it.\u001b[0m\n", + "Loading dataset: 100% 6/6 [00:00<00:00, 102.15it/s]\n", + "\u001b[32m2021-11-18 17:11:18.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.loader.loader\u001b[0m:\u001b[36mload_dataset\u001b[0m:\u001b[36m114\u001b[0m - \u001b[1mLoaded 94 axial slices for the testing set.\u001b[0m\n", + "\u001b[32m2021-11-18 17:11:18.897\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.testing\u001b[0m:\u001b[36mtest\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mLoading model: data_example_spinegeneric/trained_model/best_model.pt\u001b[0m\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'ivadomed.models.Unet' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'ivadomed.models.Encoder' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'torch.nn.modules.container.ModuleList' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'ivadomed.models.DownConv' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'torch.nn.modules.conv.Conv2d' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'torch.nn.modules.batchnorm.BatchNorm2d' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'torch.nn.modules.dropout.Dropout2d' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'torch.nn.modules.pooling.MaxPool2d' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'ivadomed.models.Decoder' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "/usr/local/lib/python3.7/dist-packages/torch/serialization.py:656: SourceChangeWarning: source code of class 'ivadomed.models.UpConv' has changed. you can retrieve the original source code by accessing the object's source attribute or set `torch.nn.Module.dump_patches = True` and use the patch tool to revert the changes.\n", + " warnings.warn(msg, SourceChangeWarning)\n", + "\u001b[32m2021-11-18 17:11:21.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.testing\u001b[0m:\u001b[36mtest\u001b[0m:\u001b[36m70\u001b[0m - \u001b[1mComputing model uncertainty over 10 iterations.\u001b[0m\n", + "Inference - Iteration 0: 100% 6/6 [00:10<00:00, 1.79s/it]\n", + "Inference - Iteration 1: 100% 6/6 [00:05<00:00, 1.19it/s]\n", + "Inference - Iteration 2: 100% 6/6 [00:04<00:00, 1.26it/s]\n", + "Inference - Iteration 3: 100% 6/6 [00:04<00:00, 1.27it/s]\n", + "Inference - Iteration 4: 100% 6/6 [00:04<00:00, 1.22it/s]\n", + "Inference - Iteration 5: 100% 6/6 [00:04<00:00, 1.27it/s]\n", + "Inference - Iteration 6: 100% 6/6 [00:04<00:00, 1.26it/s]\n", + "Inference - Iteration 7: 100% 6/6 [00:04<00:00, 1.21it/s]\n", + "Inference - Iteration 8: 100% 6/6 [00:04<00:00, 1.23it/s]\n", + "Inference - Iteration 9: 100% 6/6 [00:04<00:00, 1.26it/s]\n", + "Uncertainty Computation: 100% 6/6 [00:41<00:00, 6.99s/it]\n", + "Inference - Iteration 10: 100% 6/6 [00:09<00:00, 1.59s/it]\n", + "\u001b[32m2021-11-18 17:15:21.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.testing\u001b[0m:\u001b[36mtest\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1m{'dice_score': 0.8902438680723904, 'multi_class_dice_score': 0.8902438680723904, 'precision_score': 0.8902424710595589, 'recall_score': 0.8927448282084386, 'specificity_score': 0.9998551088037474, 'intersection_over_union': 0.8025785222811419, 'accuracy_score': 0.999715174700608, 'hausdorff_score': 0.06832593207064888}\u001b[0m\n", + "\u001b[32m2021-11-18 17:15:21.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.evaluation\u001b[0m:\u001b[36mevaluate\u001b[0m:\u001b[36m33\u001b[0m - \u001b[1m\n", + "Run Evaluation on data_example_spinegeneric/trained_model/pred_masks\n", + "\u001b[0m\n", + "Evaluation: 100% 6/6 [00:08<00:00, 1.41s/it]\n", + "\u001b[32m2021-11-18 17:15:29.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mivadomed.evaluation\u001b[0m:\u001b[36mevaluate\u001b[0m:\u001b[36m113\u001b[0m - \u001b[1m avd_class0 ... vol_pred_class0\n", + "image_id ... \n", + "sub-unf01_T1w 0.070805 ... 5724.999454\n", + "sub-unf01_T2w 0.053561 ... 5831.249444\n", + "sub-mpicbs06_T2star 0.075322 ... 4907.499532\n", + "sub-mpicbs06_T2w 0.005667 ... 4879.999535\n", + "sub-unf01_T2star 0.023846 ... 5903.749437\n", + "\n", + "[5 rows x 16 columns]\u001b[0m\n" + ] + } + ], + "source": [ + "# @title Run Uncertainty Estimation\n", + "\n", + "# @markdown Once the configuration file has been modified, run the inference with the following command:\n", + "# @markdown ```shell\n", + "# @markdown ivadomed --test -c config.json\n", + "# @markdown ```\n", + "\n", + "# run uncertainty estimation\n", + "!ivadomed --test -c config.json" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CEph9yzPQ5ZC" + }, + "source": [ + "If aleatoric uncertainty was enabled, then data augmentation operations will be performed at the test time, as indicated in the terminal output (see below). Note that `ElasticTransform` has been deactivated because `undo_transform` function is not available for it.\n", + "\n", + "```\n", + " Selected transformations for the ['testing'] dataset:\n", + " Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\n", + " CenterCrop: {'size': [128, 128]}\n", + " RandomAffine: {'degrees': 5, 'scale': [0.1, 0.1], 'translate': [0.03, 0.03], 'applied_to': ['im', 'gt']}\n", + " ElasticTransform: {'alpha_range': [28.0, 30.0], 'sigma_range': [3.5, 4.5], 'p': 0.1, 'applied_to': ['im', 'gt']}\n", + " NumpyToTensor: {}\n", + " NormalizeInstance: {'applied_to': ['im']}\n", + " ElasticTransform transform not included since no undo_transform available for it.\n", + "```\n", + "\n", + ".... otherwise, only preprocessing and data normalization are performed, see below:\n", + "\n", + "```\n", + " Selected transformations for the ['testing'] dataset:\n", + " Resample: {'hspace': 0.75, 'wspace': 0.75, 'dspace': 1}\n", + " CenterCrop: {'size': [128, 128]}\n", + " NumpyToTensor: {}\n", + " NormalizeInstance: {'applied_to': ['im']}\n", + "```\n", + "\n", + "For each testing image, `\"n_it\"` Monte Carlo samples for that image are segmented using the trained model and saved under `pred_masks`, with the iteration number as suffix (e.g. `sub-001_pred_00.nii.gz … sub-001_pred_19.nii.gz`).\n", + "\n", + "```\n", + " Computing model uncertainty over 20 iterations.\n", + " Inference - Iteration 0: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:11<00:00, 2.27s/it] \n", + " Inference - Iteration 1: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.81s/it]\n", + " Inference - Iteration 2: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.96s/it]\n", + " Inference - Iteration 3: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.66s/it]\n", + " Inference - Iteration 4: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.69s/it]\n", + " Inference - Iteration 5: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it]\n", + " Inference - Iteration 6: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it]\n", + " Inference - Iteration 7: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:08<00:00, 1.74s/it]\n", + " Inference - Iteration 8: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it]\n", + " Inference - Iteration 9: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [00:07<00:00, 1.59s/it]\n", + " Inference - Iteration 10: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it]\n", + " Inference - Iteration 11: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.85s/it]\n", + " Inference - Iteration 12: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.92s/it]\n", + " Inference - Iteration 13: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.83s/it]\n", + " Inference - Iteration 14: 100%|██████████████████████████████████████████████████████████████████████████████████| 5/5 [00:09<00:00, 1.84s/it]\n", + "```\n", + "\n", + "The Monte Carlo samples are then used to compute uncertainty measures for each image. The results are saved under `pred_masks`.\n", + "```\n", + " Uncertainty Computation: 100%|███████████████████████████████████████████████████████████████████████████████████| 5/5 [01:31<00:00, 18.28s/it]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-Us9-l2YVn-V", + "outputId": "480f285f-81f2-4081-e896-acb5dc66a111" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Zip file created!\n" + ] + } + ], + "source": [ + "# @title Save and Download the results!\n", + "# @markdown Now that we have the uncertainty estimates, we would like to download\n", + "# @markdown the results locally for further anaylsis. For that, we first\n", + "# @markdown create a `.zip`file of the results folder and then download\n", + "# @markdown the zipped file manually. \n", + "\n", + "# first, zip the results folder\n", + "!zip -r --quiet spineGeneric_unc.zip ./data_example_spinegeneric/trained_model/\n", + "print(\"Zip file created!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F2okWrkETGYL" + }, + "source": [ + "Note that six files are generated during this process for each testing image:\n", + "\n", + "* `*_soft.nii.gz`: Soft segmentation (i.e. values between 0 and 1) which is generated by averaging the Monte Carlo samples.\n", + "\n", + "* `*_pred.nii.gz`: Binary segmentation obtained by thresholding `*_soft.nii.gz` with `1 / (Number of Monte Carlo iterations)` i.e. `1/n_it`.\n", + "\n", + "* `*_unc-vox.nii.gz`: Voxel-wise measure of uncertainty derived from the entropy of the Monte Carlo samples. The higher a given voxel value is, the more uncertain is the prediction for this voxel.\n", + "\n", + "* `*_unc-avgUnc.nii.gz`: Structure-wise measure of uncertainty derived from the mean value of `*_unc-vox.nii.gz` within a given connected object (e.g. a lesion, grey matter).\n", + "\n", + "* `*_unc-cv.nii.gz`: Structure-wise measure of uncertainty derived from the coefficient of variation of the volume of a given connected object across the Monte Carlo samples. The higher the value for a given voxel, the more uncertain is the prediction for this voxel.\n", + "\n", + "* `*_unc-iou.nii.gz`: Structure-wise measure of uncertainty derived from the Intersection-over-Union (IoU) of the predictions of a given connected object across the Monte Carlo samples. The lower the value for a given voxel, the more uncertain is the prediction for this voxel.\n", + "\n", + "These files can further be used for post-processing to refine the segmentation. For example, the voxels depicted in pink under the \"Uncertainty\" panel are more uncertain than the ones in blue: therefore, we can further refine the model's prediction by removing the voxels with low uncertainty (in blue) **AND** low prediction values (in dark red under the \"Model Prediction\" panel) from the foreground class. \n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ihqD2cacWdT1" + }, + "source": [ + "And that concludes the tutorial on how to use the in-built uncertainty estimation measures in `ivadomed` for spinal cord segmentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1dNLUwA5rw3k" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "ivadomed_tutorial-3_uncertainty-estimation.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/testing/unit_tests/t_template.py b/testing/unit_tests/t_template.py index 8b8a2aa63..7c32e7ddf 100644 --- a/testing/unit_tests/t_template.py +++ b/testing/unit_tests/t_template.py @@ -55,9 +55,9 @@ """ import logging -import os from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path logger = logging.getLogger(__name__) @@ -75,10 +75,10 @@ def setup_function(): def test_template(): # Test Input Files: all test input files should be in tmp/data_testing aka __data_testing_dir__ - logger.info(os.listdir(__data_testing_dir__)) + logger.info([f.name for f in Path(__data_testing_dir__).iterdir()]) # Test Output Files: put your output files in tmp folder - os.mkdir(os.path.join(__tmp_dir__, 'my_output_dir')) + Path(__tmp_dir__, 'my_output_dir').mkdir() assert 1 == 1 diff --git a/testing/unit_tests/t_utils.py b/testing/unit_tests/t_utils.py index de62a7b70..3da730e42 100644 --- a/testing/unit_tests/t_utils.py +++ b/testing/unit_tests/t_utils.py @@ -1,11 +1,13 @@ import os import shutil import pytest +from pathlib import Path from ivadomed.utils import init_ivadomed from testing.common_testing_util import remove_tmp_dir, path_repo_root, path_temp, path_data_testing_tmp, \ - path_data_testing_source, download_dataset + path_data_testing_source, download_dataset, path_data_multi_sessions_contrasts_source, \ + path_data_multi_sessions_contrasts_tmp -__test_dir__ = os.path.join(path_repo_root, 'testing/unit_tests') +__test_dir__ = Path(path_repo_root, 'testing/unit_tests') __data_testing_dir__ = path_data_testing_tmp __tmp_dir__ = path_temp @@ -20,6 +22,15 @@ def download_data_testing_test_files(): download_dataset("data_testing") +@pytest.fixture(scope='session') +def download_multi_data(): + """ + This Pytest fixture DOWNLOAD all the test data set REQUIRED for the multi-session, multi-contrast related unit + testing. + """ + download_dataset("data_multi_testing") + + def create_tmp_dir(copy_data_testing_dir=True): """Create a temporary directory for unit_test data and copy test data files. @@ -34,7 +45,23 @@ def create_tmp_dir(copy_data_testing_dir=True): into the ``tmp`` folder. """ remove_tmp_dir() - os.mkdir(path_temp) - if os.path.exists(path_data_testing_source) and copy_data_testing_dir: + Path(path_temp).mkdir() + if Path(path_data_testing_source).exists() and copy_data_testing_dir: shutil.copytree(path_data_testing_source, path_data_testing_tmp) + + +def create_tmp_dir_multi_session(): + """Create a temporary directory for data related to multi-session unit tests and copy test data files. + 1. Remove the ``tmp`` directory if it exists. + 2. Copy the ``data_testing_multi`` directory to the ``tmp`` directory. Ignoring the `.git` folder within + Any data files created during testing will go into ``tmp`` directory. + This is created/removed for each test. + """ + ignore_git_pattern = shutil.ignore_patterns(str(path_data_multi_sessions_contrasts_source / '.git')) + remove_tmp_dir() + Path(path_temp).mkdir() + if Path(path_data_multi_sessions_contrasts_source).exists(): + shutil.copytree(path_data_multi_sessions_contrasts_source, + path_data_multi_sessions_contrasts_tmp, + ignore=ignore_git_pattern) \ No newline at end of file diff --git a/testing/unit_tests/test_HeMIS.py b/testing/unit_tests/test_HeMIS.py deleted file mode 100644 index 63748dfc3..000000000 --- a/testing/unit_tests/test_HeMIS.py +++ /dev/null @@ -1,243 +0,0 @@ -import os -import time -import pytest -import numpy as np -import torch -import torch.backends.cudnn as cudnn -from torch import optim -from torch.utils.data import DataLoader -from tqdm import tqdm -import ivadomed.transforms as imed_transforms -from ivadomed import losses -from ivadomed import models -from ivadomed import utils as imed_utils -from ivadomed.loader import utils as imed_loader_utils, adaptative as imed_adaptative -from ivadomed import training as imed_training -import logging -from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, \ - download_data_testing_test_files -from testing.common_testing_util import remove_tmp_dir - -logger = logging.getLogger(__name__) - -cudnn.benchmark = True - -GPU_ID = 0 -BATCH_SIZE = 4 -DROPOUT = 0.4 -BN = 0.1 -N_EPOCHS = 10 -INIT_LR = 0.01 -p = 0.0001 -__path_hdf5__ = os.path.join(__data_testing_dir__, "mytestfile.hdf5") -__path_csv__ = os.path.join(__data_testing_dir__, "hdf5.csv") - - -def setup_function(): - create_tmp_dir() - - -@pytest.mark.parametrize('loader_parameters', [{ - "path_data": [__data_testing_dir__], - "target_suffix": ["_lesion-manual"], - "extensions": [".nii.gz"], - "roi_params": {"suffix": "_seg-manual", "slice_filter_roi": None}, - "contrast_params": {"contrast_lst": ['T1w', 'T2w', 'T2star']}} - ]) -@pytest.mark.run(order=1) -def test_HeMIS(download_data_testing_test_files, loader_parameters, p=0.0001): - print('[INFO]: Starting test ... \n') - - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) - - contrast_params = loader_parameters["contrast_params"] - target_suffix = loader_parameters["target_suffix"] - roi_params = loader_parameters["roi_params"] - - training_transform_dict = { - "Resample": - { - "wspace": 0.75, - "hspace": 0.75 - }, - "CenterCrop": - { - "size": [48, 48] - }, - "NumpyToTensor": {} - } - - transform_lst, _ = imed_transforms.prepare_transforms(training_transform_dict) - - train_lst = ['sub-unf01'] - - print('[INFO]: Creating dataset ...\n') - model_params = { - "name": "HeMISUnet", - "dropout_rate": 0.3, - "bn_momentum": 0.9, - "depth": 2, - "in_channel": 1, - "out_channel": 1, - "missing_probability": 0.00001, - "missing_probability_growth": 0.9, - "contrasts": ["T1w", "T2w"], - "ram": False, - "path_hdf5": __path_hdf5__, - "csv_path": __path_csv__, - "target_lst": ["T2w"], - "roi_lst": ["T2w"] - } - dataset = imed_adaptative.HDF5Dataset(bids_df=bids_df, - subject_file_lst=train_lst, - model_params=model_params, - contrast_params=contrast_params, - target_suffix=target_suffix, - slice_axis=2, - transform=transform_lst, - metadata_choice=False, - dim=2, - slice_filter_fn=imed_loader_utils.SliceFilter( - filter_empty_input=True, - filter_empty_mask=True), - roi_params=roi_params) - - dataset.load_into_ram(['T1w', 'T2w', 'T2star']) - print("[INFO]: Dataset RAM status:") - print(dataset.status) - print("[INFO]: In memory Dataframe:") - print(dataset.dataframe) - - # TODO - # ds_train.filter_roi(nb_nonzero_thr=10) - - train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, - shuffle=True, pin_memory=True, - collate_fn=imed_loader_utils.imed_collate, - num_workers=1) - - model = models.HeMISUnet(contrasts=contrast_params["contrast_lst"], - depth=3, - drop_rate=DROPOUT, - bn_momentum=BN) - - print(model) - cuda_available = torch.cuda.is_available() - - if cuda_available: - torch.cuda.set_device(GPU_ID) - print("Using GPU ID {}".format(GPU_ID)) - model.cuda() - - # Initialing Optimizer and scheduler - step_scheduler_batch = False - optimizer = optim.Adam(model.parameters(), lr=INIT_LR) - scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, N_EPOCHS) - - load_lst, reload_lst, pred_lst, opt_lst, schedul_lst, init_lst, gen_lst = [], [], [], [], [], [], [] - - for epoch in tqdm(range(1, N_EPOCHS + 1), desc="Training"): - start_time = time.time() - - start_init = time.time() - lr = scheduler.get_last_lr()[0] - model.train() - - tot_init = time.time() - start_init - init_lst.append(tot_init) - - num_steps = 0 - start_gen = 0 - for i, batch in enumerate(train_loader): - if i > 0: - tot_gen = time.time() - start_gen - gen_lst.append(tot_gen) - - start_load = time.time() - input_samples, gt_samples = imed_utils.unstack_tensors(batch["input"]), batch["gt"] - - print(batch["input_metadata"][0][0]["missing_mod"]) - missing_mod = imed_training.get_metadata(batch["input_metadata"], model_params) - - print("Number of missing contrasts = {}." - .format(len(input_samples) * len(input_samples[0]) - missing_mod.sum())) - print("len input = {}".format(len(input_samples))) - print("Batch = {}, {}".format(input_samples[0].shape, gt_samples[0].shape)) - - if cuda_available: - var_input = imed_utils.cuda(input_samples) - var_gt = imed_utils.cuda(gt_samples, non_blocking=True) - else: - var_input = input_samples - var_gt = gt_samples - - tot_load = time.time() - start_load - load_lst.append(tot_load) - - start_pred = time.time() - preds = model(var_input, missing_mod) - tot_pred = time.time() - start_pred - pred_lst.append(tot_pred) - - start_opt = time.time() - loss = - losses.DiceLoss()(preds, var_gt) - - optimizer.zero_grad() - loss.backward() - - optimizer.step() - if step_scheduler_batch: - scheduler.step() - - num_steps += 1 - tot_opt = time.time() - start_opt - opt_lst.append(tot_opt) - - start_gen = time.time() - - start_schedul = time.time() - if not step_scheduler_batch: - scheduler.step() - tot_schedul = time.time() - start_schedul - schedul_lst.append(tot_schedul) - - start_reload = time.time() - print("[INFO]: Updating Dataset") - p = p ** (2 / 3) - dataset.update(p=p) - print("[INFO]: Reloading dataset") - train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, - shuffle=True, pin_memory=True, - collate_fn=imed_loader_utils.imed_collate, - num_workers=1) - tot_reload = time.time() - start_reload - reload_lst.append(tot_reload) - - end_time = time.time() - total_time = end_time - start_time - tqdm.write("Epoch {} took {:.2f} seconds.".format(epoch, total_time)) - - print('Mean SD init {} -- {}'.format(np.mean(init_lst), np.std(init_lst))) - print('Mean SD load {} -- {}'.format(np.mean(load_lst), np.std(load_lst))) - print('Mean SD reload {} -- {}'.format(np.mean(reload_lst), np.std(reload_lst))) - print('Mean SD pred {} -- {}'.format(np.mean(pred_lst), np.std(pred_lst))) - print('Mean SD opt {} -- {}'.format(np.mean(opt_lst), np.std(opt_lst))) - print('Mean SD gen {} -- {}'.format(np.mean(gen_lst), np.std(gen_lst))) - print('Mean SD scheduler {} -- {}'.format(np.mean(schedul_lst), np.std(schedul_lst))) - - -@pytest.mark.run(order=2) -def test_hdf5_bids(download_data_testing_test_files): - __output_dir__ = os.path.join(__tmp_dir__, "test_adap_bids") - os.makedirs(__output_dir__) - imed_adaptative.HDF5ToBIDS( - __path_hdf5__, - ['sub-unf01'], - __output_dir__) - assert os.path.isdir(os.path.join(__output_dir__, "sub-unf01/anat")) - assert os.path.isdir(os.path.join(__output_dir__, "derivatives/labels/sub-unf01/anat")) - print('\n [INFO]: Test of HeMIS passed successfully.') - - -def teardown_function(): - remove_tmp_dir() diff --git a/testing/unit_tests/test_adaptative.py b/testing/unit_tests/test_adaptative.py deleted file mode 100644 index 21f38c334..000000000 --- a/testing/unit_tests/test_adaptative.py +++ /dev/null @@ -1,171 +0,0 @@ -import os -import pytest -import h5py -import torch -from torch.utils.data import DataLoader - -import ivadomed.transforms as imed_transforms -from ivadomed import utils as imed_utils -from ivadomed.loader import utils as imed_loader_utils, adaptative as imed_adaptative -import logging -from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files -from testing.common_testing_util import remove_tmp_dir -logger = logging.getLogger(__name__) - -GPU_ID = 0 -BATCH_SIZE = 4 -DROPOUT = 0.4 -DEPTH = 3 -BN = 0.1 -N_EPOCHS = 10 -INIT_LR = 0.01 -FILM_LAYERS = [0, 0, 0, 0, 0, 1, 1, 1] - - -def setup_function(): - create_tmp_dir() - - -@pytest.mark.parametrize('loader_parameters', [{ - "path_data": [__data_testing_dir__], - "target_suffix": ["_lesion-manual"], - "extensions": [".nii.gz"], - "roi_params": {"suffix": "_seg-manual", "slice_filter_roi": None}, - "contrast_params": {"contrast_lst": ['T1w', 'T2w', 'T2star']} - }]) -def test_hdf5(download_data_testing_test_files, loader_parameters): - print('[INFO]: Starting test ... \n') - - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) - - contrast_params = loader_parameters["contrast_params"] - target_suffix = loader_parameters["target_suffix"] - roi_params = loader_parameters["roi_params"] - - train_lst = ['sub-unf01_T1w.nii.gz', 'sub-unf01_T2w.nii.gz', 'sub-unf01_T2star.nii.gz'] - - training_transform_dict = { - "Resample": - { - "wspace": 0.75, - "hspace": 0.75 - }, - "CenterCrop": - { - "size": [48, 48] - }, - "NumpyToTensor": {} - } - transform_lst, _ = imed_transforms.prepare_transforms(training_transform_dict) - - bids_to_hdf5 = imed_adaptative.BIDStoHDF5(bids_df=bids_df, - subject_file_lst=train_lst, - path_hdf5=os.path.join(__data_testing_dir__, 'mytestfile.hdf5'), - target_suffix=target_suffix, - roi_params=roi_params, - contrast_lst=contrast_params["contrast_lst"], - metadata_choice="contrast", - transform=transform_lst, - contrast_balance={}, - slice_axis=2, - slice_filter_fn=imed_loader_utils.SliceFilter( - filter_empty_input=True, - filter_empty_mask=True)) - - # Checking architecture - def print_attrs(name, obj): - print("\nName of the object: {}".format(name)) - print("Type: {}".format(type(obj))) - print("Including the following attributes:") - for key, val in obj.attrs.items(): - print(" %s: %s" % (key, val)) - - print('\n[INFO]: HDF5 architecture:') - with h5py.File(bids_to_hdf5.path_hdf5, "a") as hdf5_file: - hdf5_file.visititems(print_attrs) - print('\n[INFO]: HDF5 file successfully generated.') - print('[INFO]: Generating dataframe ...\n') - - df = imed_adaptative.Dataframe(hdf5_file=hdf5_file, - contrasts=['T1w', 'T2w', 'T2star'], - path=os.path.join(__data_testing_dir__, 'hdf5.csv'), - target_suffix=['T1w', 'T2w', 'T2star'], - roi_suffix=['T1w', 'T2w', 'T2star'], - dim=2, - filter_slices=True) - - print(df.df) - - print('\n[INFO]: Dataframe successfully generated. ') - print('[INFO]: Creating dataset ...\n') - - model_params = { - "name": "HeMISUnet", - "dropout_rate": 0.3, - "bn_momentum": 0.9, - "depth": 2, - "in_channel": 1, - "out_channel": 1, - "missing_probability": 0.00001, - "missing_probability_growth": 0.9, - "contrasts": ["T1w", "T2w"], - "ram": False, - "path_hdf5": os.path.join(__data_testing_dir__, 'mytestfile.hdf5'), - "csv_path": os.path.join(__data_testing_dir__, 'hdf5.csv'), - "target_lst": ["T2w"], - "roi_lst": ["T2w"] - } - - dataset = imed_adaptative.HDF5Dataset(bids_df=bids_df, - subject_file_lst=train_lst, - target_suffix=target_suffix, - slice_axis=2, - model_params=model_params, - contrast_params=contrast_params, - transform=transform_lst, - metadata_choice=False, - dim=2, - slice_filter_fn=imed_loader_utils.SliceFilter( - filter_empty_input=True, - filter_empty_mask=True), - roi_params=roi_params) - - dataset.load_into_ram(['T1w', 'T2w', 'T2star']) - print("Dataset RAM status:") - print(dataset.status) - print("In memory Dataframe:") - print(dataset.dataframe) - print('\n[INFO]: Test passed successfully. ') - - print("\n[INFO]: Starting loader test ...") - - device = torch.device("cuda:" + str(GPU_ID) if torch.cuda.is_available() else "cpu") - cuda_available = torch.cuda.is_available() - if cuda_available: - torch.cuda.set_device(device) - print("Using GPU ID {}".format(device)) - - train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, - shuffle=False, pin_memory=True, - collate_fn=imed_loader_utils.imed_collate, - num_workers=1) - - for i, batch in enumerate(train_loader): - input_samples, gt_samples = batch["input"], batch["gt"] - print("len input = {}".format(len(input_samples))) - print("Batch = {}, {}".format(input_samples[0].shape, gt_samples[0].shape)) - - if cuda_available: - var_input = imed_utils.cuda(input_samples) - var_gt = imed_utils.cuda(gt_samples, non_blocking=True) - else: - var_input = input_samples - var_gt = gt_samples - - break - print("Congrats your dataloader works! You can go home now and get a beer.") - return 0 - - -def teardown_function(): - remove_tmp_dir() diff --git a/testing/unit_tests/test_automate_training.py b/testing/unit_tests/test_automate_training.py index 417cd9e89..9fadcff26 100644 --- a/testing/unit_tests/test_automate_training.py +++ b/testing/unit_tests/test_automate_training.py @@ -40,7 +40,7 @@ "gpu_ids": [[2], [5]] } -The ``config_list`` depends on the flag ``all_combin``, ``multi_params``, or no flag. +The ``config_list`` depends on the flag ``all-combin``, ``multi-params``, or no flag. For no flag (``default``), the options are: @@ -87,14 +87,14 @@ """ import pytest + +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed.scripts.automate_training import make_config_list, get_param_list, \ HyperparameterOption -from ivadomed.loader import utils as imed_loader_utils from ivadomed.utils import generate_sha_256 -import logging +from loguru import logger from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir -logger = logging.getLogger(__name__) initial_config = { "training_parameters": { @@ -126,7 +126,7 @@ "depth": 2 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=2-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-2-gpu_ids-2" }, { "training_parameters": { @@ -140,7 +140,7 @@ "depth": 3 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=3-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-3-gpu_ids-2" }, { "training_parameters": { @@ -154,7 +154,7 @@ "depth": 4 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=4-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-4-gpu_ids-2" }, { "training_parameters": { @@ -168,7 +168,7 @@ "depth": 2 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=2-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-2-gpu_ids-2" }, { "training_parameters": { @@ -182,7 +182,7 @@ "depth": 3 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=3-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-3-gpu_ids-2" }, { "training_parameters": { @@ -196,7 +196,7 @@ "depth": 4 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=4-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-4-gpu_ids-2" }, { "training_parameters": { @@ -210,7 +210,7 @@ "depth": 2 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=2-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-2-gpu_ids-2" }, { "training_parameters": { @@ -224,7 +224,7 @@ "depth": 3 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=3-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-3-gpu_ids-2" }, { "training_parameters": { @@ -238,7 +238,7 @@ "depth": 4 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=4-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-4-gpu_ids-2" }, { "training_parameters": { @@ -252,7 +252,7 @@ "depth": 2 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=2-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-2-gpu_ids-2" }, { "training_parameters": { @@ -266,7 +266,7 @@ "depth": 3 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=3-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-3-gpu_ids-2" }, { "training_parameters": { @@ -280,7 +280,7 @@ "depth": 4 }, "gpu_ids": [2], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=4-gpu_ids=[2]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-4-gpu_ids-2" }, { "training_parameters": { @@ -294,7 +294,7 @@ "depth": 2 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=2-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-2-gpu_ids-5" }, { "training_parameters": { @@ -308,7 +308,7 @@ "depth": 3 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=3-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-3-gpu_ids-5" }, { "training_parameters": { @@ -322,7 +322,7 @@ "depth": 4 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=4-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-4-gpu_ids-5" }, { "training_parameters": { @@ -336,7 +336,7 @@ "depth": 2 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=2-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-2-gpu_ids-5" }, { "training_parameters": { @@ -350,7 +350,7 @@ "depth": 3 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=3-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-3-gpu_ids-5" }, { "training_parameters": { @@ -364,7 +364,7 @@ "depth": 4 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=2-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=4-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-2-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-4-gpu_ids-5" }, { "training_parameters": { @@ -378,7 +378,7 @@ "depth": 2 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=2-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-2-gpu_ids-5" }, { "training_parameters": { @@ -392,7 +392,7 @@ "depth": 3 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=3-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-3-gpu_ids-5" }, { "training_parameters": { @@ -406,7 +406,7 @@ "depth": 4 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'DiceLoss'}-depth=4-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-DiceLoss-depth-4-gpu_ids-5" }, { "training_parameters": { @@ -420,7 +420,7 @@ "depth": 2 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=2-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-2-gpu_ids-5" }, { "training_parameters": { @@ -434,7 +434,7 @@ "depth": 3 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=3-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-3-gpu_ids-5" }, { "training_parameters": { @@ -448,7 +448,7 @@ "depth": 4 }, "gpu_ids": [5], - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=4-gpu_ids=[5]" + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-4-gpu_ids-5" } ] @@ -465,7 +465,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-batch_size=2", + "path_output": "./tmp/-batch_size-2", "gpu_ids": [1] }, { @@ -479,7 +479,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-batch_size=64", + "path_output": "./tmp/-batch_size-64", "gpu_ids": [1] }, @@ -494,7 +494,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-loss={'name': 'DiceLoss'}", + "path_output": "./tmp/-loss-name-DiceLoss", "gpu_ids": [1] }, { @@ -508,7 +508,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}", + "path_output": "./tmp/-loss-name-FocalLoss-gamma-0.2-alpha-0.5", "gpu_ids": [1] }, { @@ -522,7 +522,7 @@ "dropout_rate": 0.3, "depth": 2 }, - "path_output": "./tmp/-depth=2", + "path_output": "./tmp/-depth-2", "gpu_ids": [1] }, { @@ -536,7 +536,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-depth=3", + "path_output": "./tmp/-depth-3", "gpu_ids": [1] }, { @@ -550,7 +550,7 @@ "dropout_rate": 0.3, "depth": 4 }, - "path_output": "./tmp/-depth=4", + "path_output": "./tmp/-depth-4", "gpu_ids": [1] }, { @@ -564,7 +564,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-gpu_ids=[2]", + "path_output": "./tmp/-gpu_ids-2", "gpu_ids": [2] }, { @@ -578,7 +578,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-gpu_ids=[5]", + "path_output": "./tmp/-gpu_ids-5", "gpu_ids": [5] } ] @@ -595,7 +595,7 @@ "dropout_rate": 0.3, "depth": 2 }, - "path_output": "./tmp/-batch_size=2-loss={'name': 'DiceLoss'}-depth=2-gpu_ids=[2]", + "path_output": "./tmp/-batch_size-2-loss-name-DiceLoss-depth-2-gpu_ids-2", "gpu_ids": [2] }, { @@ -609,7 +609,7 @@ "dropout_rate": 0.3, "depth": 3 }, - "path_output": "./tmp/-batch_size=64-loss={'name': 'FocalLoss', 'gamma': 0.2, 'alpha': 0.5}-depth=3-gpu_ids=[5]", + "path_output": "./tmp/-batch_size-64-loss-name-FocalLoss-gamma-0.2-alpha-0.5-depth-3-gpu_ids-5", "gpu_ids": [5] } ] @@ -691,6 +691,9 @@ def test_make_config_list(initial_config, all_combin, multi_params, param_list, config_list = make_config_list(param_list, initial_config, all_combin, multi_params) assert len(config_list) == len(expected_config_list) for config_option in config_list: + if config_option not in expected_config_list: + logger.debug(config_option['path_output']) + logger.debug(expected_config_list) assert config_option in expected_config_list for config_option in expected_config_list: assert config_option in config_list @@ -717,7 +720,7 @@ def test_config_sha256(download_data_testing_test_files, initial_config): "multichannel": False } - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) generate_sha_256(initial_config, bids_df.df, file_lst) assert(initial_config['training_sha256']['sub-unf01_T2w.nii.gz'] == 'f020b368fea15399fa112badd28b2df69e044dba5d23b3fe1646d12d7d3d39ac') diff --git a/testing/unit_tests/test_bounding_box.py b/testing/unit_tests/test_bounding_box.py index c08c23a41..793a19928 100644 --- a/testing/unit_tests/test_bounding_box.py +++ b/testing/unit_tests/test_bounding_box.py @@ -1,11 +1,13 @@ -import numpy as np -import pytest -import os import json import shutil +import pickle +from pathlib import Path + +import numpy as np +import pytest +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed.loader import loader as imed_loader -from ivadomed.loader import utils as imed_loader_utils from ivadomed.object_detection import utils as imed_obj_detect import logging from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files @@ -13,7 +15,7 @@ logger = logging.getLogger(__name__) BATCH_SIZE = 8 -PATH_OUTPUT = os.path.join(__tmp_dir__, "log") +PATH_OUTPUT = Path(__tmp_dir__, "log") def setup_function(): @@ -27,7 +29,7 @@ def setup_function(): "object_detection_params": { "object_detection_path": "object_detection", "safety_factor": [1.0, 1.0, 1.0], - "path_output": PATH_OUTPUT + "path_output": str(PATH_OUTPUT) }, "transforms_params": { "NumpyToTensor": {}}, @@ -39,7 +41,7 @@ def setup_function(): "object_detection_params": { "object_detection_path": "object_detection", "safety_factor": [1.0, 1.0, 1.0], - "path_output": PATH_OUTPUT + "path_output": str(PATH_OUTPUT) }, "transforms_params": {"NumpyToTensor": {}}, "roi_params": {"suffix": "_seg-manual", "slice_filter_roi": 10}, @@ -71,6 +73,7 @@ def test_bounding_box(download_data_testing_test_files, train_lst, target_lst, c "target_suffix": target_lst, "extensions": [".nii.gz"], "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": "axial" } @@ -79,13 +82,13 @@ def test_bounding_box(download_data_testing_test_files, train_lst, target_lst, c config['model_params'].update(config["Modified3DUNet"]) bounding_box_dict = {} - bounding_box_path = os.path.join(PATH_OUTPUT, 'bounding_boxes.json') - if not os.path.exists(PATH_OUTPUT): - os.mkdir(PATH_OUTPUT) - current_dir = os.getcwd() + bounding_box_path = Path(PATH_OUTPUT, 'bounding_boxes.json') + if not Path(PATH_OUTPUT).exists(): + PATH_OUTPUT.mkdir(parents=True, exist_ok=True) + current_dir = Path.cwd() sub = train_lst[0].split('_')[0] contrast = config['contrast_params']['contrast_lst'][0] - bb_path = os.path.join(current_dir, __data_testing_dir__, sub, "anat", sub + "_" + contrast + ".nii.gz") + bb_path = str(Path(current_dir, __data_testing_dir__, sub, "anat", sub + "_" + contrast + ".nii.gz")) bounding_box_dict[bb_path] = coord with open(bounding_box_path, 'w') as fp: json.dump(bounding_box_dict, fp, indent=4) @@ -93,16 +96,28 @@ def test_bounding_box(download_data_testing_test_files, train_lst, target_lst, c # Update loader_params with config loader_params.update(config) - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds = imed_loader.load_dataset(bids_df, **loader_params) handler = ds.handlers if "Modified3DUNet" in config else ds.indexes - for index in handler: - seg_pair, _ = index + for index in range(len(handler)): + if "Modified3DUNet" in config: + if ds.disk_cache: + path_seg_pair, _ = handler[index] + with path_seg_pair.open('rb') as f: + seg_pair = pickle.load(f) + else: + seg_pair, _ = handler[index] assert seg_pair['input'][0].shape[-3:] == (mx2 - mx1, my2 - my1, mz2 - mz1) else: + if ds.disk_cache: + path_seg_pair = handler[index] + with path_seg_pair.open('rb') as f: + seg_pair, _ = pickle.load(f) + else: + seg_pair, _ = handler[index] assert seg_pair['input'][0].shape[-2:] == (mx2 - mx1, my2 - my1) shutil.rmtree(PATH_OUTPUT) @@ -116,8 +131,8 @@ def test_adjust_bb_size(): def test_compute_bb_statistics(download_data_testing_test_files): """Check to make sure compute_bb_statistics runs.""" - imed_obj_detect.compute_bb_statistics(os.path.join(__data_testing_dir__, - "bounding_box_dict.json")) + imed_obj_detect.compute_bb_statistics(str(Path(__data_testing_dir__, + "bounding_box_dict.json"))) def teardown_function(): diff --git a/testing/unit_tests/test_loader.py b/testing/unit_tests/test_loader.py index 895382ef9..8f9d31c49 100644 --- a/testing/unit_tests/test_loader.py +++ b/testing/unit_tests/test_loader.py @@ -1,15 +1,22 @@ import os +from pathlib import Path +import shutil + import pytest import csv_diff -import logging import torch import numpy as np +from loguru import logger +from ivadomed.loader.bids_dataframe import BidsDataframe from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files, path_repo_root from testing.common_testing_util import remove_tmp_dir -from ivadomed.loader import utils as imed_loader_utils +from ivadomed import utils as imed_utils from ivadomed.loader import loader as imed_loader -logger = logging.getLogger(__name__) +import ivadomed.loader.utils as imed_loader_utils +from ivadomed.loader import mri2d_segmentation_dataset as imed_loader_mri2dseg +from ivadomed.keywords import LoaderParamsKW, MetadataKW, ModelParamsKW, TransformationKW + def setup_function(): @@ -17,13 +24,12 @@ def setup_function(): @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": [["_seg-myelin-manual", "_seg-axon-manual"]], "extensions": [".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, "contrast_params": {"contrast_lst": []} - }]) +}]) def test_bids_df_microscopy_png(download_data_testing_test_files, loader_parameters): """ Test for microscopy png file format @@ -32,11 +38,11 @@ def test_bids_df_microscopy_png(download_data_testing_test_files, loader_paramet Test for when no contrast_params are provided """ - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) - csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") - csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") + csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") + csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []} @@ -57,11 +63,11 @@ def test_bids_df_anat(download_data_testing_test_files, loader_parameters): Test behavior when "roi_suffix" is not None """ - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) - csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") - csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") + csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") + csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], @@ -69,8 +75,42 @@ def test_bids_df_anat(download_data_testing_test_files, loader_parameters): @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [__data_testing_dir__, os.path.join(__data_testing_dir__, "microscopy_png")], + "path_data": [str(Path(__data_testing_dir__, "ct_scan"))], "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "target_suffix": ["_seg-manual"], + "extensions": [".nii.gz"], + "roi_params": {"suffix": None, "slice_filter_roi": None}, + "contrast_params": {"contrast_lst": ["ct"]}, + "bids_validate": False + }]) +def test_bids_df_no_validate(download_data_testing_test_files, loader_parameters): + """ + Test for ct-scan nii.gz file format + Test for when validate_BIDS is set to False for the loader + """ + + # Rename files so the loader won't pick them up if validate_BIDS is true + Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "sub-spleen2").rename( + Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "ssub-spleen2")) + + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + df_test = bids_df.df.drop(columns=['path']) + df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) + csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") + csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") + df_test.to_csv(csv_test, index=False) + diff = csv_diff.compare( + csv_diff.load_csv(open(csv_ref)), + csv_diff.load_csv(open(csv_test)) + ) + + Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "ssub-spleen2").rename( + Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "sub-spleen2")) + assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []} + + +@pytest.mark.parametrize('loader_parameters', [{ + "path_data": [__data_testing_dir__, str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": ["_seg-manual", "seg-axon-manual"], "extensions": [".nii.gz", ".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, @@ -81,11 +121,11 @@ def test_bids_df_multi(download_data_testing_test_files, loader_parameters): Test for multiple folders in path_data """ - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) - csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref_multi.csv") - csv_test = os.path.join(loader_parameters["path_data"][0], "df_test_multi.csv") + csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref_multi.csv") + csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test_multi.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], @@ -93,7 +133,7 @@ def test_bids_df_multi(download_data_testing_test_files, loader_parameters): @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "ct_scan")], + "path_data": [str(Path(__data_testing_dir__, "ct_scan"))], "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", "target_suffix": ["_seg-manual"], "extensions": [".nii.gz"], @@ -106,11 +146,11 @@ def test_bids_df_ctscan(download_data_testing_test_files, loader_parameters): Test for when dataset_description.json is not present in derivatives folder """ - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) - csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") - csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") + csv_ref = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_ref.csv") + csv_test = Path(loader_parameters[LoaderParamsKW.PATH_DATA][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []} @@ -137,14 +177,14 @@ def test_dropout_input(seg_pair): @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": ["_seg-myelin-manual"], "extensions": [".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, "contrast_params": {"contrast_lst": [], "balance": {}}, "slice_axis": "axial", "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "multichannel": False }]) @pytest.mark.parametrize('model_parameters', [{ @@ -163,8 +203,8 @@ def test_load_dataset_2d_png(download_data_testing_test_files, Test to make sure load_dataset runs with 2D PNG files, writes corresponding NIfTI files, and binarizes ground-truth values to 0 and 1. """ - loader_parameters.update({"model_params": model_parameters}) - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, @@ -172,21 +212,21 @@ def test_load_dataset_2d_png(download_data_testing_test_files, 'dataset_type': 'training'}}) fname_png = bids_df.df[bids_df.df['filename'] == data_lst[0]]['path'].values[0] fname_nii = imed_loader_utils.update_filename_to_nifti(fname_png) - assert os.path.exists(fname_nii) == 1 + assert Path(fname_nii).exists() == 1 assert ds[0]['input'].shape == (1, 756, 764) assert ds[0]['gt'].shape == (1, 756, 764) assert np.unique(ds[0]['gt']).tolist() == [0, 1] @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": ["_seg-myelin-manual"], "extensions": [".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, "contrast_params": {"contrast_lst": [], "balance": {}}, "slice_axis": "axial", "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "multichannel": False }]) @pytest.mark.parametrize('model_parameters', [{ @@ -195,42 +235,44 @@ def test_load_dataset_2d_png(download_data_testing_test_files, "bn_momentum": 0.1, "final_activation": "sigmoid", "depth": 3, - "length_2D": [256, 256], - "stride_2D": [244, 244] + "length_2D": [256, 128], + "stride_2D": [244, 116] }]) @pytest.mark.parametrize('transform_parameters', [{ "Resample": { - "wspace": 0.0001, + "wspace": 0.0002, "hspace": 0.0001 }, "NumpyToTensor": {}, }]) -def test_2d_patches(download_data_testing_test_files, - loader_parameters, model_parameters, transform_parameters): +def test_2d_patches_and_resampling(download_data_testing_test_files, + loader_parameters, model_parameters, transform_parameters): """ - Test to make sure load_dataset runs with 2D PNG data. + Test that 2d patching is done properly. + Test that microscopy pixelsize and resampling are applied on the right dimensions. """ - loader_parameters.update({"model_params": model_parameters}) - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, 'transforms_params': transform_parameters, 'dataset_type': 'training'}}) assert ds.is_2d_patch == True - assert ds[0]['input'].shape == (1, 256, 256) - assert len(ds) == 16 + assert ds[0]['input'].shape == (1, 256, 128) + assert ds[0]['input_metadata'][0].metadata[MetadataKW.INDEX_SHAPE] == (1512, 382) + assert len(ds) == 28 @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": ["_seg-myelin-manual", "_seg-axon-manual"], "extensions": [".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, "contrast_params": {"contrast_lst": [], "balance": {}}, "slice_axis": "axial", "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "multichannel": False }]) @pytest.mark.parametrize('model_parameters', [{ @@ -246,8 +288,8 @@ def test_get_target_filename_list(loader_parameters, model_parameters, transform """ Test that all target_suffix are considered for target filename when list """ - loader_parameters.update({"model_params": model_parameters}) - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] test_ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, @@ -255,18 +297,18 @@ def test_get_target_filename_list(loader_parameters, model_parameters, transform 'dataset_type': 'training'}}) target_filename = test_ds.filename_pairs[0][1] - assert len(target_filename) == len(loader_parameters["target_suffix"]) + assert len(target_filename) == len(loader_parameters[LoaderParamsKW.TARGET_SUFFIX]) @pytest.mark.parametrize('loader_parameters', [{ - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": [["_seg-myelin-manual", "_seg-axon-manual"], ["_seg-myelin-manual", "_seg-axon-manual"]], "extensions": [".png"], "roi_params": {"suffix": None, "slice_filter_roi": None}, "contrast_params": {"contrast_lst": [], "balance": {}}, "slice_axis": "axial", "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "multichannel": False }]) @pytest.mark.parametrize('model_parameters', [{ @@ -282,8 +324,8 @@ def test_get_target_filename_list_multiple_raters(loader_parameters, model_param """ Test that all target_suffix are considered for target filename when list """ - loader_parameters.update({"model_params": model_parameters}) - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] test_ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, @@ -291,10 +333,112 @@ def test_get_target_filename_list_multiple_raters(loader_parameters, model_param 'dataset_type': 'training'}}) target_filename = test_ds.filename_pairs[0][1] - assert len(target_filename) == len(loader_parameters["target_suffix"]) - assert len(target_filename[0]) == len(loader_parameters["target_suffix"][0]) - assert len(target_filename[1]) == len(loader_parameters["target_suffix"][1]) + assert len(target_filename) == len(loader_parameters[LoaderParamsKW.TARGET_SUFFIX]) + assert len(target_filename[0]) == len(loader_parameters[LoaderParamsKW.TARGET_SUFFIX][0]) + assert len(target_filename[1]) == len(loader_parameters[LoaderParamsKW.TARGET_SUFFIX][1]) + +@pytest.mark.parametrize('loader_parameters', [{ + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], + "target_suffix": ["_seg-myelin-manual"], + "extensions": [".png"], + "roi_params": {"suffix": None, "slice_filter_roi": None}, + "contrast_params": {"contrast_lst": [], "balance": {}}, + "slice_axis": "axial", + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, + "multichannel": False + }]) +@pytest.mark.parametrize('model_parameters', [{ + "name": "Unet", + "dropout_rate": 0.3, + "bn_momentum": 0.1, + "final_activation": "sigmoid", + "depth": 3 + }]) +def test_microscopy_pixelsize(download_data_testing_test_files, loader_parameters, model_parameters): + """ + Test that PixelSize and PixelSizeUnits microscopy metadata + are handled properly for PixelSizeUnits: "mm", "um" and "nm" + """ + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + + # PixelSizeUnits: "mm" + data_lst = ['sub-rat2_sample-data5_SEM.png'] + transform_parameters = {TransformationKW.RESAMPLE: {"wspace": 0.000093, "hspace": 0.000093}} + ds = imed_loader.load_dataset(bids_df, + **{**loader_parameters, **{'data_list': data_lst, + 'transforms_params': transform_parameters, + 'dataset_type': 'training'}}) + assert ds[0]['input'].shape == (1, 725, 725) + + # PixelSizeUnits: "um" + data_lst = ['sub-rat3_ses-02_sample-data11_run-1_SEM.png'] + transform_parameters = {TransformationKW.RESAMPLE: {"wspace": 0.0001, "hspace": 0.0001}} + ds = imed_loader.load_dataset(bids_df, + **{**loader_parameters, **{'data_list': data_lst, + 'transforms_params': transform_parameters, + 'dataset_type': 'training'}}) + assert ds[0]['input'].shape == (1, 839, 769) + + # PixelSizeUnits: "nm" + data_lst = ['sub-rat3_ses-02_sample-data10_SEM.png'] + transform_parameters = {TransformationKW.RESAMPLE: {"wspace": 0.0001, "hspace": 0.0001}} + ds = imed_loader.load_dataset(bids_df, + **{**loader_parameters, **{'data_list': data_lst, + 'transforms_params': transform_parameters, + 'dataset_type': 'training'}}) + assert ds[0]['input'].shape == (1, 758, 737) + + +@pytest.mark.parametrize('loader_parameters', [{ + "path_data": [str(Path(__data_testing_dir__, "data_test_png_tif"))], + "target_suffix": ["_seg-myelin-manual"], + "extensions": [".png", ".tif"], + "roi_params": {"suffix": None, "slice_filter_roi": None}, + "contrast_params": {"contrast_lst": [], "balance": {}}, + "slice_axis": "axial", + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, + "multichannel": False + }]) +@pytest.mark.parametrize('model_parameters', [{ + "name": "Unet", + "dropout_rate": 0.3, + "bn_momentum": 0.1, + "final_activation": "sigmoid", + "depth": 3 + }]) +def test_read_png_tif(download_data_testing_test_files, loader_parameters, model_parameters): + """ + Test to make sure all combinaitions of PNG/TIF, 8/16 bits, Grayscale/RGB/RGBA files + can be loaded without errors. + """ + metadata = {} + metadata[MetadataKW.PIXEL_SIZE] = [0.07, 0.07] + metadata[MetadataKW.PIXEL_SIZE_UNITS] = "um" + loader_parameters.update({LoaderParamsKW.MODEL_PARAMS: model_parameters}) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=False) + file_lst = bids_df.df['path'].tolist() + filename_pairs = [(file_lst, None, None, metadata if isinstance(metadata, list) else [metadata])] + slice_axis = imed_utils.AXIS_DCT[loader_parameters[LoaderParamsKW.SLICE_AXIS]] + ds = imed_loader_mri2dseg.MRI2DSegmentationDataset(filename_pairs, + slice_axis=slice_axis, + nibabel_cache=True, + transform=[None, None], + slice_filter_fn=None) + ds.load_filenames() + +def test_create_cache_folder(): + """ + Test to make sure the cache folder is created when it doesn't exist, remove it afterwards. + NOTE: this means this test cannot be parallelized with other tests that utilize this folder! + """ + path_cache = imed_loader_utils.create_temp_directory() + print(path_cache) + assert(os.path.exists(path_cache)) + shutil.rmtree(path_cache) def teardown_function(): remove_tmp_dir() diff --git a/testing/unit_tests/test_losses.py b/testing/unit_tests/test_losses.py index fc73ce5f5..7a70aefb2 100644 --- a/testing/unit_tests/test_losses.py +++ b/testing/unit_tests/test_losses.py @@ -80,7 +80,15 @@ def test_multiclassdiceloss(params): (torch.tensor([[[[1.0, 0.0], [0.0, 0.0]], [[0.0, 1.0], [0.0, 1.0]]]]), torch.tensor([[[[1.0, 0.0], [0.0, 0.0]], [[0.0, 1.0], [0.0, 0.0]]]]), -18 / 23, - GeneralizedDiceLoss(epsilon=1e-5)) + GeneralizedDiceLoss(epsilon=1e-5)), + (torch.tensor([[[[1.0, 0.0], [0.0, 1.0]]], [[[1.0, 0.0], [0.0, 1.0]]]]), + torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]], [[[1.0, 0.0], [1.0, 1.0]]]]), + -0.8, + GeneralizedDiceLoss(epsilon=1e-5, include_background=False)), + (torch.tensor([[[[1.0, 0.0], [0.0, 1.0]]], [[[1.0, 0.0], [0.0, 1.0]]]]), + torch.tensor([[[[1.0, 0.0], [1.0, 1.0]]], [[[1.0, 0.0], [1.0, 1.0]]]]), + -11 / 16, + GeneralizedDiceLoss(epsilon=1e-5, include_background=True)) ]) def test_generalizeddiceloss(params): """Test GeneralizedDiceLoss class in ivadomed.losses. diff --git a/testing/unit_tests/test_main.py b/testing/unit_tests/test_main.py new file mode 100644 index 000000000..4853d4c49 --- /dev/null +++ b/testing/unit_tests/test_main.py @@ -0,0 +1,14 @@ +import pytest + +from ivadomed.main import check_multiple_raters + +@pytest.mark.parametrize( + 'is_train, loader_params', [ + (False, {"target_suffix": + [["_seg-axon-manual1", "_seg-axon-manual2"], + ["_seg-myelin-manual1", "_seg-myelin-manual2"]] + }) +]) +def test_check_multiple_raters(is_train, loader_params): + with pytest.raises(SystemExit): + check_multiple_raters(is_train, loader_params) diff --git a/testing/unit_tests/test_metrics.py b/testing/unit_tests/test_metrics.py index 997c8838b..d1040649d 100644 --- a/testing/unit_tests/test_metrics.py +++ b/testing/unit_tests/test_metrics.py @@ -1,10 +1,10 @@ import ivadomed.metrics as imed_metrics import numpy as np import pytest -import os import logging from testing.unit_tests.t_utils import create_tmp_dir, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path logger = logging.getLogger(__name__) @@ -64,17 +64,17 @@ def test_plot_roc_curve(): tpr = [0, 0.1, 0.5, 0.6, 0.9] fpr = [1, 0.8, 0.5, 0.3, 0.1] opt_thr_idx = 3 - __output_file__ = os.path.join(__tmp_dir__, "roc_test.png") - imed_metrics.plot_roc_curve(tpr, fpr, opt_thr_idx, __output_file__) - assert os.path.isfile(__output_file__) + __output_file__ = Path(__tmp_dir__, "roc_test.png") + imed_metrics.plot_roc_curve(tpr, fpr, opt_thr_idx, str(__output_file__)) + assert __output_file__.is_file() def test_dice_plot(): thr_list = [0.1, 0.3, 0.5, 0.7] dice_list = [0.6, 0.7, 0.8, 0.75] - __output_file__ = os.path.join(__tmp_dir__, "test_dice.png") - imed_metrics.plot_dice_thr(thr_list, dice_list, 2, __output_file__) - assert os.path.isfile(__output_file__) + __output_file__ = Path(__tmp_dir__, "test_dice.png") + imed_metrics.plot_dice_thr(thr_list, dice_list, 2, str(__output_file__)) + assert __output_file__.is_file() def teardown_function(): diff --git a/testing/unit_tests/test_mixup.py b/testing/unit_tests/test_mixup.py index 2591dd9ba..3e74dacfc 100644 --- a/testing/unit_tests/test_mixup.py +++ b/testing/unit_tests/test_mixup.py @@ -2,9 +2,9 @@ import torch import pytest import logging -import os from testing.unit_tests.t_utils import create_tmp_dir, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path logger = logging.getLogger(__name__) @@ -13,8 +13,8 @@ def setup_function(): @pytest.mark.parametrize("debugging", [False, True]) -@pytest.mark.parametrize("ofolder", [os.path.join(__tmp_dir__, "test"), - os.path.join(__tmp_dir__, "mixup_test")]) +@pytest.mark.parametrize("ofolder", [str(Path(__tmp_dir__, "test")), + str(Path(__tmp_dir__, "mixup_test"))]) def test_mixup(debugging, ofolder): inp = [[[[0 for i in range(40)] for i in range(40)]]] targ = [[[[0 for i in range(40)] for i in range(40)]]] diff --git a/testing/unit_tests/test_onnx.py b/testing/unit_tests/test_onnx.py index 09b64eff6..ab105f91c 100644 --- a/testing/unit_tests/test_onnx.py +++ b/testing/unit_tests/test_onnx.py @@ -1,4 +1,3 @@ -import os import nibabel as nib import torch import numpy as np @@ -7,6 +6,7 @@ from ivadomed import utils as imed_utils from ivadomed import inference as imed_inference from ivadomed import models as imed_models +from pathlib import Path from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir logger = logging.getLogger(__name__) @@ -16,29 +16,29 @@ def setup_function(): create_tmp_dir() -IMAGE_PATH = os.path.join(__data_testing_dir__, "sub-unf01", "anat", "sub-unf01_T1w.nii.gz") -PATH_MODEL = os.path.join(__data_testing_dir__, 'model') -PATH_MODEL_ONNX = os.path.join(PATH_MODEL, 'model.onnx') -PATH_MODEL_PT = PATH_MODEL_ONNX.replace('onnx', 'pt') +IMAGE_PATH = Path(__data_testing_dir__, "sub-unf01", "anat", "sub-unf01_T1w.nii.gz") +PATH_MODEL = Path(__data_testing_dir__, 'model') +PATH_MODEL_ONNX = Path(PATH_MODEL, 'model.onnx') +PATH_MODEL_PT = PATH_MODEL_ONNX.with_suffix('.pt') LENGTH_3D = (112, 112, 112) def test_onnx(download_data_testing_test_files): model = imed_models.Modified3DUNet(1, 1) - if not os.path.exists(PATH_MODEL): - os.mkdir(PATH_MODEL) + if not PATH_MODEL.exists(): + PATH_MODEL.mkdir() torch.save(model, PATH_MODEL_PT) img = nib.load(IMAGE_PATH).get_fdata().astype('float32')[:16, :64, :32] # Add batch and channel dimensions img_tensor = torch.tensor(img).unsqueeze(0).unsqueeze(0) dummy_input = torch.randn(1, 1, 32, 32, 32) - imed_utils.save_onnx_model(model, dummy_input, PATH_MODEL_ONNX) + imed_utils.save_onnx_model(model, dummy_input, str(PATH_MODEL_ONNX)) model = torch.load(PATH_MODEL_PT) model.eval() out_pt = model(img_tensor).detach().numpy() - out_onnx = imed_inference.onnx_inference(PATH_MODEL_ONNX, img_tensor).numpy() + out_onnx = imed_inference.onnx_inference(str(PATH_MODEL_ONNX), img_tensor).numpy() shutil.rmtree(PATH_MODEL) assert np.allclose(out_pt, out_onnx, rtol=1e-3) diff --git a/testing/unit_tests/test_orientation.py b/testing/unit_tests/test_orientation.py index 7ed7a53f0..d56ed3a90 100644 --- a/testing/unit_tests/test_orientation.py +++ b/testing/unit_tests/test_orientation.py @@ -3,8 +3,9 @@ import numpy as np import torch from torch.utils.data import DataLoader -import logging +from loguru import logger +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed.loader.bids3d_dataset import Bids3DDataset from ivadomed.loader.bids_dataset import BidsDataset from ivadomed.loader.segmentation_pair import SegmentationPair @@ -15,7 +16,6 @@ from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir -logger = logging.getLogger(__name__) GPU_ID = 0 @@ -36,9 +36,9 @@ def test_image_orientation(download_data_testing_test_files, loader_parameters): cuda_available = torch.cuda.is_available() if cuda_available: torch.cuda.set_device(device) - print("Using GPU ID {}".format(device)) + logger.info(f"Using GPU ID {device}") - bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) contrast_params = loader_parameters["contrast_params"] target_suffix = loader_parameters["target_suffix"] diff --git a/testing/unit_tests/test_patch_filter.py b/testing/unit_tests/test_patch_filter.py new file mode 100644 index 000000000..8329094ec --- /dev/null +++ b/testing/unit_tests/test_patch_filter.py @@ -0,0 +1,143 @@ +import os +import pytest +import numpy as np +import torch.backends.cudnn as cudnn +from torch.utils.data import DataLoader +from loguru import logger + +from ivadomed.loader.bids_dataframe import BidsDataframe +from ivadomed import utils as imed_utils +from ivadomed.loader import utils as imed_loader_utils, loader as imed_loader +from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files, path_repo_root +from testing.common_testing_util import remove_tmp_dir + +cudnn.benchmark = True + +GPU_ID = 0 +BATCH_SIZE = 1 + + +def setup_function(): + create_tmp_dir() + + +def _cmpt_slice(ds_loader): + cmpt_label = {0: 0, 1: 0} + for i, batch in enumerate(ds_loader): + for gt in batch['gt']: + # TODO: multi label + if np.any(gt.numpy()): + cmpt_label[1] += 1 + else: + cmpt_label[0] += 1 + logger.debug(cmpt_label) + return cmpt_label[0], cmpt_label[1] + + +@pytest.mark.parametrize('transforms_dict', [{"CenterCrop": {"size": [128, 128], "applied_to": ["im", "gt"]}}]) +@pytest.mark.parametrize('train_lst', [['sub-rat3_ses-01_sample-data9_SEM.png']]) +@pytest.mark.parametrize('target_lst', [["_seg-axon-manual", "_seg-myelin-manual"]]) +@pytest.mark.parametrize('patch_filter_params', [ + {"filter_empty_mask": False, "filter_empty_input": True}, + {"filter_empty_mask": True, "filter_empty_input": True}]) +@pytest.mark.parametrize('dataset_type', ["training", "testing"]) +def test_patch_filter_2d(download_data_testing_test_files, transforms_dict, train_lst, target_lst, patch_filter_params, + dataset_type): + + cuda_available, device = imed_utils.define_device(GPU_ID) + + loader_params = { + "transforms_params": transforms_dict, + "data_list": train_lst, + "dataset_type": dataset_type, + "requires_undo": False, + "contrast_params": {"contrast_lst": ['SEM'], "balance": {}}, + "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], + "target_suffix": target_lst, + "extensions": [".png"], + "roi_params": {"suffix": None, "slice_filter_roi": None}, + "model_params": {"name": "Unet", "length_2D": [32, 32], "stride_2D": [32, 32]}, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, + "patch_filter_params": patch_filter_params, + "slice_axis": "axial", + "multichannel": False + } + # Get Training dataset + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + ds = imed_loader.load_dataset(bids_df, **loader_params) + + logger.info(f"\tNumber of loaded patches: {len(ds)}") + + loader = DataLoader(ds, batch_size=BATCH_SIZE, + shuffle=True, pin_memory=True, + collate_fn=imed_loader_utils.imed_collate, + num_workers=0) + logger.info("\tNumber of Neg/Pos patches in GT.") + cmpt_neg, cmpt_pos = _cmpt_slice(loader) + if patch_filter_params["filter_empty_mask"]: + if dataset_type == "testing": + # Filters on patches are not applied at testing time + assert cmpt_neg + cmpt_pos == len(ds) + else: + # Filters on patches are applied at training time + assert cmpt_neg == 0 + assert cmpt_pos != 0 + else: + # We verify if there are still some negative patches (they are removed with our filter) + assert cmpt_neg != 0 and cmpt_pos != 0 + +@pytest.mark.parametrize('transforms_dict', [{"CenterCrop": {"size": [128, 128, 128], "applied_to": ["im", "gt"]}}]) +@pytest.mark.parametrize('train_lst', [['sub-unf01_T2w.nii.gz']]) +@pytest.mark.parametrize('target_lst', [["_seg-manual"]]) +@pytest.mark.parametrize('patch_filter_params', [ + {"filter_empty_mask": False, "filter_empty_input": True}, + {"filter_empty_mask": True, "filter_empty_input": True}]) +@pytest.mark.parametrize('dataset_type', ["training", "testing"]) +def test_patch_filter_3d(download_data_testing_test_files, transforms_dict, train_lst, target_lst, patch_filter_params, + dataset_type): + + cuda_available, device = imed_utils.define_device(GPU_ID) + + loader_params = { + "transforms_params": transforms_dict, + "data_list": train_lst, + "dataset_type": dataset_type, + "requires_undo": False, + "contrast_params": {"contrast_lst": ['T2w'], "balance": {}}, + "path_data": [os.path.join(__data_testing_dir__)], + "target_suffix": target_lst, + "extensions": [".nii.gz"], + "roi_params": {"suffix": None, "slice_filter_roi": None}, + "model_params": {"name": "Unet", "is_2d": False, "length_3D": [32, 32, 32], "stride_3D": [32, 32, 32]}, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, + "patch_filter_params": patch_filter_params, + "slice_axis": "axial", + "multichannel": False + } + # Get Training dataset + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + ds = imed_loader.load_dataset(bids_df, **loader_params) + + logger.info(f"\tNumber of loaded subvolumes: {len(ds)}") + + loader = DataLoader(ds, batch_size=BATCH_SIZE, + shuffle=True, pin_memory=True, + collate_fn=imed_loader_utils.imed_collate, + num_workers=0) + logger.info("\tNumber of Neg/Pos subvolumes in GT.") + cmpt_neg, cmpt_pos = _cmpt_slice(loader) + if patch_filter_params["filter_empty_mask"]: + if dataset_type == "testing": + # Filters on patches are not applied at testing time + assert cmpt_neg + cmpt_pos == len(ds) + else: + # Filters on patches are applied at training time + assert cmpt_neg == 0 + assert cmpt_pos != 0 + else: + # We verify if there are still some negative patches (they are removed with our filter) + assert cmpt_neg != 0 and cmpt_pos != 0 + + +def teardown_function(): + remove_tmp_dir() diff --git a/testing/unit_tests/test_postprocessing.py b/testing/unit_tests/test_postprocessing.py index e609d243e..5764e9e14 100644 --- a/testing/unit_tests/test_postprocessing.py +++ b/testing/unit_tests/test_postprocessing.py @@ -7,10 +7,10 @@ import numpy as np import pytest import scipy -import os from ivadomed import postprocessing as imed_postpro from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir +from pathlib import Path def setup_function(): @@ -173,7 +173,7 @@ def test_label_file_from_coordinates(download_data_testing_test_files): coord = [[0, 0, 0]] # load test image nifti = nib.load( - os.path.join(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz')) + Path(__data_testing_dir__, 'sub-unf01/anat/sub-unf01_T1w.nii.gz')) # create fake label label = imed_postpro.label_file_from_coordinates(nifti, coord) # check if it worked diff --git a/testing/unit_tests/test_rgb.py b/testing/unit_tests/test_rgb.py index ed1a77612..2250fe033 100644 --- a/testing/unit_tests/test_rgb.py +++ b/testing/unit_tests/test_rgb.py @@ -1,9 +1,9 @@ from ivadomed import visualize as imed_visualize import numpy as np import torch -import os from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir +from pathlib import Path def setup_function(): create_tmp_dir() @@ -20,10 +20,10 @@ def test_save_rgb(download_data_testing_test_files): imed_visualize.save_color_labels( gt_data=image_n, binarize=False, - gt_filename=os.path.join( + gt_filename=str(Path( __data_testing_dir__, - "rgb_test_file.nii.gz"), - output_filename=os.path.join(__tmp_dir__, "rgb_test.nii.gz"), + "rgb_test_file.nii.gz")), + output_filename=str(Path(__tmp_dir__, "rgb_test.nii.gz")), slice_axis=0 ) diff --git a/testing/unit_tests/test_sampler.py b/testing/unit_tests/test_sampler.py index 3265b4b4a..097f51cf1 100644 --- a/testing/unit_tests/test_sampler.py +++ b/testing/unit_tests/test_sampler.py @@ -2,8 +2,12 @@ import pytest import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader +from loguru import logger + +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import utils as imed_utils from ivadomed.loader import utils as imed_loader_utils, loader as imed_loader +from ivadomed.loader.balanced_sampler import BalancedSampler from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir @@ -29,7 +33,7 @@ def _cmpt_label(ds_loader): neg_sample_ratio = cmpt_label[0] * 100. / cmpt_sample pos_sample_ratio = cmpt_label[1] * 100. / cmpt_sample - print({'neg_sample_ratio': neg_sample_ratio, + logger.info({'neg_sample_ratio': neg_sample_ratio, 'pos_sample_ratio': pos_sample_ratio}) return neg_sample_ratio, pos_sample_ratio @@ -63,18 +67,16 @@ def test_sampler(download_data_testing_test_files, transforms_dict, train_lst, t "extensions": [".nii.gz"], "roi_params": roi_params, "model_params": {"name": "Unet"}, - "slice_filter_params": { - "filter_empty_mask": False, - "filter_empty_input": True - }, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": "axial", "multichannel": False } # Get Training dataset - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) - print('\nLoading without sampling') + logger.info("\nLoading without sampling") train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, @@ -82,9 +84,9 @@ def test_sampler(download_data_testing_test_files, transforms_dict, train_lst, t neg_percent, pos_percent = _cmpt_label(train_loader) assert abs(neg_percent - pos_percent) > 20 - print('\nLoading with sampling') + logger.info("\nLoading with sampling") train_loader_balanced = DataLoader(ds_train, batch_size=BATCH_SIZE, - sampler=imed_loader_utils.BalancedSampler(ds_train), + sampler=BalancedSampler(ds_train), shuffle=False, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) diff --git a/testing/unit_tests/test_slice_filter.py b/testing/unit_tests/test_slice_filter.py index c71482573..8d2298432 100644 --- a/testing/unit_tests/test_slice_filter.py +++ b/testing/unit_tests/test_slice_filter.py @@ -2,7 +2,9 @@ import numpy as np import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader +from loguru import logger +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import utils as imed_utils from ivadomed.loader import utils as imed_loader_utils, loader as imed_loader from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, __tmp_dir__, download_data_testing_test_files @@ -27,7 +29,7 @@ def _cmpt_slice(ds_loader): cmpt_label[1] += 1 else: cmpt_label[0] += 1 - print(cmpt_label) + logger.debug(cmpt_label) return cmpt_label[0], cmpt_label[1] @@ -64,20 +66,20 @@ def test_slice_filter(download_data_testing_test_files, transforms_dict, train_l "roi_params": roi_params, "model_params": {"name": "Unet"}, "slice_filter_params": slice_filter_params, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": "axial", "multichannel": False } # Get Training dataset - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) - print('\tNumber of loaded slices: {}'.format(len(ds_train))) + logger.info(f"\tNumber of loaded slices: {len(ds_train)}") train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) - print('\tNumber of Neg/Pos slices in GT.') cmpt_neg, cmpt_pos = _cmpt_slice(train_loader) if slice_filter_params["filter_empty_mask"]: assert cmpt_neg == 0 @@ -85,6 +87,7 @@ def test_slice_filter(download_data_testing_test_files, transforms_dict, train_l else: # We verify if there are still some negative slices (they are removed with our filter) assert cmpt_neg != 0 and cmpt_pos != 0 + logger.info(f"\tNumber of Neg/Pos slices in GT: {cmpt_neg/cmpt_pos}") def teardown_function(): diff --git a/testing/unit_tests/test_split_dataset.py b/testing/unit_tests/test_split_dataset.py index 7ccd10440..64fd4ddd8 100644 --- a/testing/unit_tests/test_split_dataset.py +++ b/testing/unit_tests/test_split_dataset.py @@ -1,4 +1,3 @@ -import os import csv import json import pytest @@ -7,9 +6,10 @@ from ivadomed.loader import utils as imed_loader_utils from testing.unit_tests.t_utils import create_tmp_dir, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path -PATH_DATA = os.path.join(__tmp_dir__, 'bids') -LOG_PATH = os.path.join(__tmp_dir__, 'log') +PATH_DATA = Path(__tmp_dir__, 'bids') +PATH_LOG = Path(__tmp_dir__, 'log') N = 200 N_CENTERS = 5 @@ -23,12 +23,12 @@ def load_dataset(split_params): create_jsonfile() # Create log path - if not os.path.isdir(LOG_PATH): - os.mkdir(LOG_PATH) + if not PATH_LOG.is_dir(): + PATH_LOG.mkdir(parents=True, exist_ok=True) - df = pd.read_csv(os.path.join(PATH_DATA, "participants.tsv"), sep='\t') + df = pd.read_csv(Path(PATH_DATA, "participants.tsv"), sep='\t') df['filename'] = df["participant_id"] - train, val, test = imed_loader_utils.get_subdatasets_subject_files_list(split_params, df, LOG_PATH) + train, val, test = imed_loader_utils.get_subdatasets_subject_files_list(split_params, df, str(PATH_LOG)) return train, val, test, patient_mapping @@ -162,8 +162,8 @@ def test_per_center_balance(split_params): def create_tsvfile(): # Create data path - if not os.path.isdir(PATH_DATA): - os.mkdir(PATH_DATA) + if not PATH_DATA.is_dir(): + PATH_DATA.mkdir(parents=True, exist_ok=True) patient_mapping = {} @@ -184,7 +184,7 @@ def create_tsvfile(): patient_mapping[patient_id]['center'] = center_id participants.append(row_participants) - with open(os.path.join(PATH_DATA, "participants.tsv"), 'w') as tsv_file: + with Path(PATH_DATA, "participants.tsv").open(mode='w') as tsv_file: tsv_writer = csv.writer(tsv_file, delimiter='\t', lineterminator='\n') tsv_writer.writerow(["participant_id", "disability", "institution_id"]) for item in sorted(participants): @@ -200,7 +200,7 @@ def create_jsonfile(): dataset_description[u'Name'] = 'Test' dataset_description[u'BIDSVersion'] = '1.2.1' - with open(os.path.join(PATH_DATA, "dataset_description.json"), 'w') as outfile: + with Path(PATH_DATA, "dataset_description.json").open(mode='w') as outfile: outfile.write(json.dumps(dataset_description, indent=2, sort_keys=True)) outfile.close() diff --git a/testing/unit_tests/test_tensorboard_save.py b/testing/unit_tests/test_tensorboard_save.py index dc778d7bb..f282bf265 100644 --- a/testing/unit_tests/test_tensorboard_save.py +++ b/testing/unit_tests/test_tensorboard_save.py @@ -2,13 +2,13 @@ from torch.utils.tensorboard import SummaryWriter import numpy as np import torch -import os import io from tensorboard.backend.event_processing.event_accumulator import EventAccumulator import ivadomed.maths as imed_math from PIL import Image from testing.unit_tests.t_utils import create_tmp_dir, __tmp_dir__ from testing.common_testing_util import remove_tmp_dir +from pathlib import Path def setup_function(): @@ -19,18 +19,21 @@ def test_tensorboard_save(): inp = torch.tensor(np.zeros((1, 1, 15, 15))) gt = torch.tensor(np.zeros((1, 1, 15, 15))) pred = torch.tensor(np.zeros((1, 1, 15, 15))) - dpath = os.path.join(__tmp_dir__, "test_tensorboard_save") - os.makedirs(dpath) - writer = SummaryWriter(log_dir=dpath) - imed_visualize.save_tensorboard_img(writer, 1, "Training", inp, pred, gt) + dpath = Path(__tmp_dir__, "test_tensorboard_save") + dpath.mkdir(parents=True, exist_ok=True) + writer = SummaryWriter(log_dir=str(dpath)) + imed_visualize.save_img(writer, 1, "Training", inp, pred, gt) writer.flush() - summary_iterators = [EventAccumulator(os.path.join(dpath, dname)).Reload() for dname in os.listdir(dpath)] + summary_iterators = [EventAccumulator(str(dname)).Reload() for dname in dpath.iterdir()] for i in range(len(summary_iterators)): if summary_iterators[i].Tags()['images'] == ['Training/Input', 'Training/Predictions', 'Training/Ground Truth']: - input_retrieve = np.array(Image.open(io.BytesIO(summary_iterators[i].Images('Training/Input')[0][2]))) - pred_retrieve = np.array(Image.open(io.BytesIO(summary_iterators[i].Images('Training/Predictions')[0][2]))) - gt_retrieve = np.array(Image.open(io.BytesIO(summary_iterators[i].Images('Training/Ground Truth')[0][2]))) + input_retrieve = np.array(Image.open(io.BytesIO( + summary_iterators[i].Images('Training/Input')[0].encoded_image_string))) + pred_retrieve = np.array(Image.open(io.BytesIO( + summary_iterators[i].Images('Training/Predictions')[0].encoded_image_string))) + gt_retrieve = np.array(Image.open(io.BytesIO( + summary_iterators[i].Images('Training/Ground Truth')[0].encoded_image_string))) assert np.allclose(imed_math.rescale_values_array(input_retrieve[:, :, 0], 0, 1), inp[0, 0, :, :]) assert np.allclose(imed_math.rescale_values_array(pred_retrieve[:, :, 0], 0, 1), pred[0, 0, :, :]) diff --git a/testing/unit_tests/test_testing.py b/testing/unit_tests/test_testing.py index 499418e34..a43d40314 100644 --- a/testing/unit_tests/test_testing.py +++ b/testing/unit_tests/test_testing.py @@ -1,8 +1,9 @@ -import os import pytest import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader +from pathlib import Path +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import metrics as imed_metrics from ivadomed import transforms as imed_transforms from ivadomed import utils as imed_utils @@ -21,7 +22,7 @@ DROPOUT = 0.4 BN = 0.1 SLICE_AXIS = "axial" -__output_dir__ = os.path.join(__tmp_dir__, "output_inference") +__output_dir__ = Path(__tmp_dir__, "output_inference") def setup_function(): @@ -63,16 +64,14 @@ def test_inference(download_data_testing_test_files, transforms_dict, test_lst, "target_suffix": target_lst, "extensions": [".nii.gz"], "roi_params": roi_params, - "slice_filter_params": { - "filter_empty_mask": False, - "filter_empty_input": True - }, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": SLICE_AXIS, "multichannel": False } loader_params.update({"model_params": model_params}) - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) # Get Testing dataset ds_test = imed_loader.load_dataset(bids_df, **loader_params) @@ -108,20 +107,20 @@ def test_inference(download_data_testing_test_files, transforms_dict, test_lst, metric_mgr = imed_metrics.MetricManager(metric_fns) - if not os.path.isdir(__output_dir__): - os.makedirs(__output_dir__) + if not __output_dir__.is_dir(): + __output_dir__.mkdir(parents=True, exist_ok=True) preds_npy, gt_npy = imed_testing.run_inference(test_loader=test_loader, model=model, model_params=model_params, testing_params=testing_params, - ofolder=__output_dir__, + ofolder=str(__output_dir__), cuda_available=cuda_available) metric_mgr(preds_npy, gt_npy) metrics_dict = metric_mgr.get_results() metric_mgr.reset() - print(metrics_dict) + logger.debug(metrics_dict) @pytest.mark.parametrize('transforms_dict', [{ @@ -160,21 +159,18 @@ def test_inference_2d_microscopy(download_data_testing_test_files, transforms_di "dataset_type": "testing", "requires_undo": True, "contrast_params": {"contrast_lst": ['SEM'], "balance": {}}, - "path_data": [os.path.join(__data_testing_dir__, "microscopy_png")], - "bids_config": f"{path_repo_root}/ivadomed/config/config_bids.json", + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], "target_suffix": target_lst, "extensions": [".png"], "roi_params": roi_params, - "slice_filter_params": { - "filter_empty_mask": False, - "filter_empty_input": True - }, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": SLICE_AXIS, "multichannel": False } loader_params.update({"model_params": model_params}) - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) # Get Testing dataset ds_test = imed_loader.load_dataset(bids_df, **loader_params) @@ -200,19 +196,113 @@ def test_inference_2d_microscopy(download_data_testing_test_files, transforms_di model.cuda() model.eval() - if not os.path.isdir(__output_dir__): - os.makedirs(__output_dir__) + if not __output_dir__.is_dir(): + __output_dir__.mkdir(parents=True, exist_ok=True) preds_npy, gt_npy = imed_testing.run_inference(test_loader=test_loader, model=model, model_params=model_params, testing_params=testing_params, - ofolder=__output_dir__, + ofolder=str(__output_dir__), cuda_available=cuda_available) - assert len([x for x in os.listdir(__output_dir__) if x.endswith(".nii.gz")]) == len(test_lst) - assert len([x for x in os.listdir(__output_dir__) if x.endswith(".png")]) == 2*len(test_lst) + assert len([x for x in __output_dir__.iterdir() if x.name.endswith(".nii.gz")]) == len(test_lst) + assert len([x for x in __output_dir__.iterdir() if x.name.endswith(".png")]) == 2*len(test_lst) +@pytest.mark.parametrize('transforms_dict', [{ + "CenterCrop": { + "size": [128, 128] + }, + "NormalizeInstance": {"applied_to": ["im"]} + }]) +@pytest.mark.parametrize('test_lst', + [['sub-rat3_ses-01_sample-data9_SEM.png', 'sub-rat3_ses-02_sample-data10_SEM.png']]) +@pytest.mark.parametrize('target_lst', [["_seg-axon_manual", "_seg-myelin_manual"]]) +@pytest.mark.parametrize('roi_params', [{"suffix": None, "slice_filter_roi": None}]) +@pytest.mark.parametrize('testing_params', [{ + "binarize_maxpooling": {}, + "uncertainty": { + "applied": False, + "epistemic": False, + "aleatoric": False, + "n_it": 0 + }}]) +def test_inference_target_suffix(download_data_testing_test_files, transforms_dict, test_lst, target_lst, roi_params, + testing_params): + """ + This test checks if the filename(s) of the prediction(s) saved as NifTI file(s) in the pred_masks + dir conform to the target_suffix or not. Thus, independent of underscore(s) in the target_suffix. As a result, + _seg-axon-manual or _seg-axon_manual should yield the same filename(s). + (c.f: https://github.com/ivadomed/ivadomed/issues/1135) + """ + cuda_available, device = imed_utils.define_device(GPU_ID) + + model_params = {"name": "Unet", "is_2d": True, "out_channel": 3} + loader_params = { + "transforms_params": transforms_dict, + "data_list": test_lst, + "dataset_type": "testing", + "requires_undo": True, + "contrast_params": {"contrast_lst": ['SEM'], "balance": {}}, + "path_data": [str(Path(__data_testing_dir__, "microscopy_png"))], + "target_suffix": target_lst, + "extensions": [".png"], + "roi_params": roi_params, + "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, + "slice_axis": SLICE_AXIS, + "multichannel": False + } + loader_params.update({"model_params": model_params}) + + # restructuring the dataset + gt_path = f'{loader_params["path_data"][0]}/derivatives/labels/' + for file_path in Path(gt_path).rglob('*.png'): + src_filename = file_path.resolve() + dst_filename = '_'.join(str(src_filename).rsplit('-', 1)) + src_filename.rename(Path(dst_filename)) + + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + + ds_test = imed_loader.load_dataset(bids_df, **loader_params) + test_loader = DataLoader(ds_test, batch_size=BATCH_SIZE, + shuffle=False, pin_memory=True, + collate_fn=imed_loader_utils.imed_collate, + num_workers=0) + + # Undo transform + val_undo_transform = imed_transforms.UndoCompose(imed_transforms.Compose(transforms_dict)) + + # Update testing_params + testing_params.update({ + "slice_axis": loader_params["slice_axis"], + "target_suffix": loader_params["target_suffix"], + "undo_transforms": val_undo_transform + }) + + # Model + model = imed_models.Unet(out_channel=model_params['out_channel']) + + if cuda_available: + model.cuda() + model.eval() + + if not __output_dir__.is_dir(): + __output_dir__.mkdir(parents=True, exist_ok=True) + + preds_npy, gt_npy = imed_testing.run_inference(test_loader=test_loader, + model=model, + model_params=model_params, + testing_params=testing_params, + ofolder=str(__output_dir__), + cuda_available=cuda_available) + + for x in __output_dir__.iterdir(): + if x.name.endswith('_pred.nii.gz'): + assert x.name.rsplit('_', 1)[0].endswith(loader_params['contrast_params']['contrast_lst'][-1]), ( + 'Incompatible filename(s) of the prediction(s) saved as NifTI file(s)!' + ) + def teardown_function(): remove_tmp_dir() diff --git a/testing/unit_tests/test_training_time.py b/testing/unit_tests/test_training_time.py index b92d5e037..1975d1793 100644 --- a/testing/unit_tests/test_training_time.py +++ b/testing/unit_tests/test_training_time.py @@ -5,7 +5,9 @@ from torch import optim from torch.utils.data import DataLoader from tqdm import tqdm +from loguru import logger +from ivadomed.loader.bids_dataframe import BidsDataframe from ivadomed import losses as imed_losses from ivadomed import models as imed_models from ivadomed import utils as imed_utils @@ -99,12 +101,13 @@ def test_unet_time(download_data_testing_test_files, train_lst, target_lst, conf "target_suffix": target_lst, "extensions": [".nii.gz"], "slice_filter_params": {"filter_empty_mask": False, "filter_empty_input": True}, + "patch_filter_params": {"filter_empty_mask": False, "filter_empty_input": False}, "slice_axis": "axial" } # Update loader_params with config loader_params.update(config) # Get Training dataset - bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) + bids_df = BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) # Loader @@ -127,7 +130,7 @@ def test_unet_time(download_data_testing_test_files, train_lst, target_lst, conf model_class = getattr(imed_models, model_params["name"]) model = model_class(**model_params) - print("Training {}".format(model_params["name"])) + logger.debug(f"Training {model_params['name']}") if cuda_available: model.cuda() @@ -198,12 +201,12 @@ def test_unet_time(download_data_testing_test_files, train_lst, target_lst, conf total_time = end_time - start_time tqdm.write("Epoch {} took {:.2f} seconds.".format(epoch, total_time)) - print('Mean SD init {} -- {}'.format(np.mean(init_lst), np.std(init_lst))) - print('Mean SD load {} -- {}'.format(np.mean(load_lst), np.std(load_lst))) - print('Mean SD pred {} -- {}'.format(np.mean(pred_lst), np.std(pred_lst))) - print('Mean SDopt {} -- {}'.format(np.mean(opt_lst), np.std(opt_lst))) - print('Mean SD gen {} -- {}'.format(np.mean(gen_lst), np.std(gen_lst))) - print('Mean SD scheduler {} -- {}'.format(np.mean(schedule_lst), np.std(schedule_lst))) + logger.info(f"Mean SD init {np.mean(init_lst)} -- {np.std(init_lst)}") + logger.info(f"Mean SD load {np.mean(load_lst)} -- {np.std(load_lst)}") + logger.info(f"Mean SD pred {np.mean(pred_lst)} -- {np.std(pred_lst)}") + logger.info(f"Mean SDopt {np.mean(opt_lst)} -- {np.std(opt_lst)}") + logger.info(f"Mean SD gen {np.mean(gen_lst)} -- {np.std(gen_lst)}") + logger.info(f"Mean SD scheduler {np.mean(schedule_lst)} -- {np.std(schedule_lst)}") def teardown_function(): diff --git a/testing/unit_tests/test_transfer_learning.py b/testing/unit_tests/test_transfer_learning.py index 70c39ed29..967dec697 100644 --- a/testing/unit_tests/test_transfer_learning.py +++ b/testing/unit_tests/test_transfer_learning.py @@ -1,10 +1,11 @@ import pytest import torch import torch.backends.cudnn as cudnn -import os from ivadomed import models as imed_models from testing.unit_tests.t_utils import create_tmp_dir, __data_testing_dir__, download_data_testing_test_files from testing.common_testing_util import remove_tmp_dir +from pathlib import Path +from loguru import logger cudnn.benchmark = True @@ -18,11 +19,11 @@ def setup_function(): @pytest.mark.parametrize('fraction', [0.1, 0.2, 0.3]) -@pytest.mark.parametrize('path_model', [os.path.join(__data_testing_dir__, 'model_unet_test.pt')]) +@pytest.mark.parametrize('path_model', [str(Path(__data_testing_dir__, 'model_unet_test.pt'))]) def test_transfer_learning(download_data_testing_test_files, path_model, fraction, tolerance=0.15): device = torch.device("cpu") - print("Working on {}.".format('cpu')) - print(__data_testing_dir__) + logger.info(f"Working on {'cpu'}.") + logger.info(__data_testing_dir__) # Load pretrained model model_pretrained = torch.load(path_model, map_location=device) @@ -30,27 +31,27 @@ def test_transfer_learning(download_data_testing_test_files, path_model, fractio model_to_retrain = imed_models.set_model_for_retrain(path_model, retrain_fraction=fraction, map_location=device) - print('\nSet fraction to retrain: ' + str(fraction)) + logger.info(f"\nSet fraction to retrain: {fraction}") # Check Frozen part grad_list = [param.requires_grad for name, param in model_to_retrain.named_parameters()] fraction_retrain_measured = sum(grad_list) * 1.0 / len(grad_list) - print('\nMeasure: retrained fraction of the model: ' + str(round(fraction_retrain_measured, 1))) + logger.debug(f"\nMeasure: retrained fraction of the model: {round(fraction_retrain_measured, 1)}") # for name, param in model.named_parameters(): # print("\t", name, param.requires_grad) assert (abs(fraction_retrain_measured - fraction) <= tolerance) total_params = sum(p.numel() for p in model_to_retrain.parameters()) - print('{:,} total parameters.'.format(total_params)) + logger.info(f"{total_params} total parameters.") total_trainable_params = sum( p.numel() for p in model_to_retrain.parameters() if p.requires_grad) - print('{:,} parameters to retrain.'.format(total_trainable_params)) + logger.info(f"{total_trainable_params} parameters to retrain.") assert (total_params > total_trainable_params) # Check reset weights reset_list = [(p1.data.ne(p2.data).sum() > 0).cpu().numpy() for p1, p2 in zip(model_pretrained.parameters(), model_to_retrain.parameters())] reset_measured = sum(reset_list) * 1.0 / len(reset_list) - print('\nMeasure: reset fraction of the model: ' + str(round(reset_measured, 1))) + logger.info(f"\nMeasure: reset fraction of the model: {round(reset_measured, 1)}") assert (abs(reset_measured - fraction) <= tolerance) # weights_reset = False # for name_p1, p2 in zip(model_copy.named_parameters(), model.parameters()): diff --git a/testing/unit_tests/test_transforms.py b/testing/unit_tests/test_transforms.py index 166d161f2..9df520bbd 100644 --- a/testing/unit_tests/test_transforms.py +++ b/testing/unit_tests/test_transforms.py @@ -7,15 +7,15 @@ import numpy as np import pytest import torch -from scipy.ndimage.measurements import center_of_mass -from scipy.ndimage.measurements import label +from scipy.ndimage import center_of_mass, label from ivadomed import maths as imed_maths -from ivadomed.loader.utils import SampleMetadata +from ivadomed.loader.sample_meta_data import SampleMetadata from ivadomed.metrics import dice_score from ivadomed.transforms import Clahe, AdditiveGaussianNoise, RandomAffine, RandomReverse, \ DilateGT, ElasticTransform, ROICrop, CenterCrop, NormalizeInstance, HistogramClipping, \ NumpyToTensor, Resample +from ivadomed.keywords import MetadataKW DEBUGGING = False if DEBUGGING: @@ -71,7 +71,7 @@ def create_test_image(width, height, depth=0, num_contrasts=1, noise_max=10.0, n seg = np.ceil(image).astype(np.int32) if depth == 0: - _, _, z_slice = center_of_mass(seg.astype(np.int)) + _, _, z_slice = center_of_mass(seg.astype(int)) z_slice = int(round(z_slice)) seg = seg[:, :, z_slice] @@ -160,9 +160,9 @@ def test_NumpyToTensor(im_seg): def _test_Resample(im_seg, resample_transform, native_resolution, is_2D=False): im, seg = im_seg - metadata_ = SampleMetadata({'zooms': native_resolution, - 'data_shape': im[0].shape if len(im[0].shape) == 3 else list(im[0].shape) + [1], - 'data_type': 'im' + metadata_ = SampleMetadata({MetadataKW.ZOOMS: native_resolution, + MetadataKW.DATA_SHAPE: im[0].shape if len(im[0].shape) == 3 else list(im[0].shape) + [1], + MetadataKW.DATA_TYPE: 'im' }) metadata_in = [metadata_ for _ in im] if isinstance(im, list) else SampleMetadata({}) @@ -173,9 +173,9 @@ def _test_Resample(im_seg, resample_transform, native_resolution, is_2D=False): # Resampler for label data resample_transform.interpolation_order = 0 - metadata_ = SampleMetadata({'zooms': native_resolution, - 'data_shape': seg[0].shape if len(seg[0].shape) == 3 else list(seg[0].shape) + [1], - 'data_type': 'gt' + metadata_ = SampleMetadata({MetadataKW.ZOOMS: native_resolution, + MetadataKW.DATA_SHAPE: seg[0].shape if len(seg[0].shape) == 3 else list(seg[0].shape) + [1], + MetadataKW.DATA_TYPE: 'gt' }) metadata_in = [metadata_ for _ in seg] if isinstance(seg, list) else SampleMetadata({}) # Resample label data @@ -238,15 +238,29 @@ def test_NormalizeInstance(im_seg): assert abs(do_tensor.mean() - 0.0) <= 1e-2 assert abs(do_tensor.std() - 1.0) <= 1e-2 + # Transform on Numpy - Uniform sample + im = np.ones(im[0].shape) + do_im, _ = transform(im.copy(), metadata_in) + # Check mean-substraction + assert abs(np.mean(do_im) - 0.0) <= 1e-2 + assert abs(np.std(do_im)) < 1e-5 + + # Transform on Tensor - Uniform sample + tensor, metadata_tensor = NumpyToTensor()(im, metadata_in) + do_tensor, _ = transform(tensor, metadata_tensor) + # Check mean-subtraction + assert abs(do_tensor.mean() - 0.0) <= 1e-2 + assert abs(do_tensor.std()) < 1e-5 + def _test_Crop(im_seg, crop_transform): im, seg = im_seg - metadata_ = SampleMetadata({'data_shape': im[0].shape, 'crop_params': {}}) + metadata_ = SampleMetadata({MetadataKW.DATA_SHAPE: im[0].shape, MetadataKW.CROP_PARAMS: {}}) metadata_in = [metadata_ for _ in im] if isinstance(im, list) else {} if crop_transform.__class__.__name__ == "ROICrop": _, metadata_in = crop_transform(seg, metadata_in) for metadata in metadata_in: - assert crop_transform.__class__.__name__ in metadata["crop_params"] + assert crop_transform.__class__.__name__ in metadata[MetadataKW.CROP_PARAMS] # Apply transform do_im, do_metadata = crop_transform(im, metadata_in) @@ -259,8 +273,8 @@ def _test_Crop(im_seg, crop_transform): assert list(do_im[idx].shape) == crop_transfrom_size assert list(do_seg[idx].shape) == crop_transfrom_size # Check metadata - assert do_metadata[idx]['crop_params'][crop_transform.__class__.__name__] == \ - do_seg_metadata[idx]['crop_params'][crop_transform.__class__.__name__] + assert do_metadata[idx][MetadataKW.CROP_PARAMS][crop_transform.__class__.__name__] == \ + do_seg_metadata[idx][MetadataKW.CROP_PARAMS][crop_transform.__class__.__name__] # Apply undo transform undo_im, _ = crop_transform.undo_transform(do_im, do_metadata) @@ -275,7 +289,7 @@ def _test_Crop(im_seg, crop_transform): # Loop and check for idx, i in enumerate(im): # Check data consistency - fh, fw, fd, _, _, _ = do_metadata[idx]['crop_params'][crop_transform.__class__.__name__] + fh, fw, fd, _, _, _ = do_metadata[idx][MetadataKW.CROP_PARAMS][crop_transform.__class__.__name__] th, tw, td = crop_transform.size if not td: assert np.array_equal(i[fh:fh + th, fw:fw + tw], undo_im[idx][fh:fh + th, fw:fw + tw]) @@ -392,9 +406,9 @@ def test_DilateGT(im_seg, dilate_transform): # Check data augmentation for idx, i in enumerate(seg): # data aug - assert np.sum((do_seg[idx] > 0).astype(np.int)) >= np.sum(i) + assert np.sum((do_seg[idx] > 0).astype(int)) >= np.sum(i) # same number of objects - assert label((do_seg[idx] > 0).astype(np.int))[1] == label(i)[1] + assert label((do_seg[idx] > 0).astype(int))[1] == label(i)[1] @pytest.mark.parametrize('im_seg', [create_test_image(100, 100, 0, 1, rad_max=10), diff --git a/testing/unit_tests/test_utils.py b/testing/unit_tests/test_utils.py new file mode 100644 index 000000000..84be29caa --- /dev/null +++ b/testing/unit_tests/test_utils.py @@ -0,0 +1,54 @@ +import string +from ivadomed.utils import get_timestamp, get_win_system_memory, get_linux_system_memory, get_mac_system_memory +from loguru import logger +import platform +import pytest + +current_platform = platform.system() + + +def test_timestamp(): + """ + Test the timestamp function. + """ + output = get_timestamp() + logger.debug(output) + assert output.count("-") == 2 + assert output.count(".") == 1 + assert output.count("T") == 1 + for I in string.ascii_uppercase: + if I == "T": + assert output.count(I) == 1 + else: + assert output.count(I) == 0 + for i in string.ascii_lowercase: + assert output.count(i) == 0 + +@pytest.mark.skipif(current_platform != "Windows", reason="Function only works for Windows, skip on all other OS") +def test_get_win_system_memory(): + """ + Get Windows memory size + Returns: + + """ + # Most computers/clusters should have memory of at least 100mb and no more than 256GB RAM + assert 0.1 < get_win_system_memory() < 256 + +@pytest.mark.skipif(current_platform != "Linux", reason="Function only works for Linux, skip on all other OS") +def test_get_linux_system_memory(): + """ + Get Windows memory size + Returns: + + """ + # Most computers/clusters should have memory of at least 100mb and no more than 256GB RAM + assert 0.1 < get_linux_system_memory() < 256 +@pytest.mark.skipif(current_platform != "Darwin", reason="Function only works for Mac, skip on all other OS") +def test_get_mac_system_memory(): + """ + Get Windows memory size + Returns: + + """ + # Most computers/clusters should have memory of at least 100mb and no more than 256GB RAM + assert 0.1 < get_mac_system_memory() < 256 \ No newline at end of file