diff --git a/CHANGELOG.md b/CHANGELOG.md index 80cf34a62..89d780fd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ module on test data with partial ground truth files. (Also [522](https://github. jobs that run in AzureML. ### Changed +- ([#533](https://github.com/microsoft/InnerEye-DeepLearning/pull/533)) Better defaults for inference on ensemble children. - ([#502](https://github.com/microsoft/InnerEye-DeepLearning/pull/502)) Renamed command line option 'perform_training_set_inference' to 'inference_on_train_set'. Replaced command line option 'perform_validation_and_test_set_inference' with the pair of options 'inference_on_val_set' and 'inference_on_test_set'. - ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG. - ([#497](https://github.com/microsoft/InnerEye-DeepLearning/pull/497)) Reducing the size of the code snapshot that diff --git a/InnerEye/ML/configs/segmentation/BasicModel2Epochs.py b/InnerEye/ML/configs/segmentation/BasicModel2Epochs.py index efc4d1d10..76c0fc6c9 100644 --- a/InnerEye/ML/configs/segmentation/BasicModel2Epochs.py +++ b/InnerEye/ML/configs/segmentation/BasicModel2Epochs.py @@ -44,6 +44,8 @@ def __init__(self, **kwargs: Any) -> None: use_mixed_precision=True, azure_dataset_id=AZURE_DATASET_ID, comparison_blob_storage_paths=comparison_blob_storage_paths, + inference_on_test_set=True, + inference_on_val_set=True, dataset_mountpoint="/tmp/innereye", # Use an LR scheduler with a pronounced and clearly visible decay, to be able to easily see if that # is applied correctly in run recovery. diff --git a/InnerEye/ML/deep_learning_config.py b/InnerEye/ML/deep_learning_config.py index 1493bc77a..230650cf7 100644 --- a/InnerEye/ML/deep_learning_config.py +++ b/InnerEye/ML/deep_learning_config.py @@ -263,20 +263,42 @@ def validate(self) -> None: f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} " f"and cross_validation_split_index={self.cross_validation_split_index}") - """ Defaults for when to run inference in the absence of any command line switches. """ - INFERENCE_DEFAULTS: Dict[ModelProcessing, Dict[ModelExecutionMode, bool]] = { + """ + Defaults for when to run inference in the absence of any command line switches. + This depends on ModelProcessing, perform_cross_validation, and ModelExecutionMode. + If the current combination of these three parameters is not in this data structure, + then default to False. + """ + INFERENCE_DEFAULTS: Dict[ModelProcessing, Dict[bool, Dict[ModelExecutionMode, bool]]] = { ModelProcessing.DEFAULT: { - ModelExecutionMode.TRAIN: False, - ModelExecutionMode.TEST: True, - ModelExecutionMode.VAL: True, + False: { + ModelExecutionMode.TRAIN: False, + ModelExecutionMode.TEST: True, + ModelExecutionMode.VAL: True + } }, ModelProcessing.ENSEMBLE_CREATION: { - ModelExecutionMode.TRAIN: False, - ModelExecutionMode.TEST: True, - ModelExecutionMode.VAL: False, + True: { + ModelExecutionMode.TRAIN: False, + ModelExecutionMode.TEST: True, + ModelExecutionMode.VAL: False + } } } + def inference_defaults(self, model_proc: ModelProcessing, data_split: ModelExecutionMode) -> bool: + """ + Returns True if inference is required by default for this model_proc and data_split. + + :param model_proc: Whether we are testing an ensemble or single model. + :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed. + :return: True if inference required by default. + """ + try: + return WorkflowParams.INFERENCE_DEFAULTS[model_proc][self.perform_cross_validation][data_split] + except KeyError: + return False + def inference_options(self) -> Dict[ModelProcessing, Dict[ModelExecutionMode, Optional[bool]]]: """ Return a mapping from ModelProcesing and ModelExecutionMode to command line switch. @@ -308,7 +330,7 @@ def inference_on_set(self, model_proc: ModelProcessing, data_split: ModelExecuti if inference_option is not None: return inference_option - return WorkflowParams.INFERENCE_DEFAULTS[model_proc][data_split] + return self.inference_defaults(model_proc, data_split) @property def is_offline_run(self) -> bool: diff --git a/Tests/ML/models/test_scalar_model.py b/Tests/ML/models/test_scalar_model.py index c0c2fe07b..fe5459abd 100644 --- a/Tests/ML/models/test_scalar_model.py +++ b/Tests/ML/models/test_scalar_model.py @@ -279,6 +279,7 @@ def test_run_ml_with_classification_model(test_output_dirs: OutputFolderForTests azure_config = get_default_azure_config() azure_config.train = True config: ScalarModelBase = ModelConfigLoader().create_model_config_from_name(model_name) + config.inference_on_test_set = True config.number_of_cross_validation_splits = number_of_offline_cross_validation_splits config.set_output_to(test_output_dirs.root_dir) # Trying to run DDP from the test suite hangs, hence restrict to single GPU. diff --git a/Tests/ML/runners/test_runner.py b/Tests/ML/runners/test_runner.py index a4e7241f6..e934decb0 100644 --- a/Tests/ML/runners/test_runner.py +++ b/Tests/ML/runners/test_runner.py @@ -5,7 +5,7 @@ import logging import time from pathlib import Path -from typing import Tuple +from typing import Optional, Tuple from unittest import mock from unittest.mock import Mock @@ -99,49 +99,102 @@ def create_train_and_test_data_small_dataset(image_size: TupleInt3, return target_dir +@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows") +@pytest.mark.parametrize("perform_cross_validation", [True, False]) +def test_model_inference_train_and_test_default(test_output_dirs: OutputFolderForTests, + perform_cross_validation: bool) -> None: + """ + Test inference defaults with ModelProcessing.DEFAULT. + + :param test_output_dirs: Test output directories. + :param perform_cross_validation: Whether to test with cross validation. + :return: None. + """ + run_model_inference_train_and_test(test_output_dirs, + perform_cross_validation, + model_proc=ModelProcessing.DEFAULT) + + @pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows") @pytest.mark.parametrize("perform_cross_validation", [True, False]) @pytest.mark.parametrize("inference_on_set", [(True, False, False), (False, True, False), (False, False, True)]) def test_model_inference_train_and_test(test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, inference_on_set: Tuple[bool, bool, bool]) -> None: + """ + Test inference overrides with ModelProcessing.DEFAULT. + + :param test_output_dirs: Test output directories. + :param perform_cross_validation: Whether to test with cross validation. + :param inference_on_set: Overrides for inference on data sets. + :return: None. + """ (inference_on_train_set, inference_on_val_set, inference_on_test_set) = inference_on_set run_model_inference_train_and_test(test_output_dirs, perform_cross_validation, - inference_on_train_set, - inference_on_val_set, - inference_on_test_set, - False, - False, - False, - ModelProcessing.DEFAULT) + inference_on_train_set=inference_on_train_set, + inference_on_val_set=inference_on_val_set, + inference_on_test_set=inference_on_test_set, + model_proc=ModelProcessing.DEFAULT) + + +@pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows") +def test_ensemble_model_inference_train_and_test_default(test_output_dirs: OutputFolderForTests) -> None: + """ + Test inference defaults with ModelProcessing.ENSEMBLE_CREATION. + + :param test_output_dirs: Test output directories. + :return: None. + """ + run_model_inference_train_and_test(test_output_dirs, + True, + model_proc=ModelProcessing.ENSEMBLE_CREATION) @pytest.mark.skipif(common_util.is_windows(), reason="Too slow on windows") @pytest.mark.parametrize("ensemble_inference_on_set", [(True, False, False), (False, True, False), (False, False, True)]) def test_ensemble_model_inference_train_and_test(test_output_dirs: OutputFolderForTests, ensemble_inference_on_set: Tuple[bool, bool, bool]) -> None: + """ + Test inference overrides with ModelProcessing.ENSEMBLE_CREATION. + + :param test_output_dirs: Test output directories. + :param perform_cross_validation: Whether to test with cross validation. + :param ensemble_inference_on_set: Overrides for inference on data sets. + :return: None. + """ (ensemble_inference_on_train_set, ensemble_inference_on_val_set, ensemble_inference_on_test_set) = ensemble_inference_on_set run_model_inference_train_and_test(test_output_dirs, True, - False, - False, - False, - ensemble_inference_on_train_set, - ensemble_inference_on_val_set, - ensemble_inference_on_test_set, - ModelProcessing.ENSEMBLE_CREATION) + ensemble_inference_on_train_set=ensemble_inference_on_train_set, + ensemble_inference_on_val_set=ensemble_inference_on_val_set, + ensemble_inference_on_test_set=ensemble_inference_on_test_set, + model_proc=ModelProcessing.ENSEMBLE_CREATION) def run_model_inference_train_and_test(test_output_dirs: OutputFolderForTests, perform_cross_validation: bool, - inference_on_train_set: bool, - inference_on_val_set: bool, - inference_on_test_set: bool, - ensemble_inference_on_train_set: bool, - ensemble_inference_on_val_set: bool, - ensemble_inference_on_test_set: bool, - model_proc: ModelProcessing) -> None: + inference_on_train_set: Optional[bool] = None, + inference_on_val_set: Optional[bool] = None, + inference_on_test_set: Optional[bool] = None, + ensemble_inference_on_train_set: Optional[bool] = None, + ensemble_inference_on_val_set: Optional[bool] = None, + ensemble_inference_on_test_set: Optional[bool] = None, + model_proc: ModelProcessing = ModelProcessing.DEFAULT) -> None: + """ + Test running inference produces expected output metrics, files, folders and calls to upload_folder. + + :param test_output_dirs: Test output directories. + :param perform_cross_validation: Whether to test with cross validation. + :param inference_on_train_set: Override for inference on train data sets. + :param inference_on_val_set: Override for inference on validation data sets. + :param inference_on_test_set: Override for inference on test data sets. + :param ensemble_inference_on_train_set: Override for ensemble inference on train data sets. + :param ensemble_inference_on_val_set: Override for ensemble inference on validation data sets. + :param ensemble_inference_on_test_set: Override for ensemble inference on test data sets. + :param model_proc: Model processing to test. + :return: None. + """ dummy_model = DummyModel() config = PassThroughModel() @@ -202,6 +255,20 @@ def run_model_inference_train_and_test(test_output_dirs: OutputFolderForTests, if mode in metrics: metric = metrics[mode] assert isinstance(metric, InferenceMetricsForSegmentation) + + if flag is None: + # No override supplied, calculate the expected default: + if model_proc == ModelProcessing.DEFAULT: + if not perform_cross_validation: + # If a "normal" run then default to val or test. + flag = mode in (ModelExecutionMode.VAL, ModelExecutionMode.TEST) + else: + # If an ensemble child then default to never. + flag = False + else: + # If an ensemble then default to test only. + flag = mode == ModelExecutionMode.TEST + if mode in metrics and not flag: error = error + f"Error: {mode.value} cannot be not None." elif mode not in metrics and flag: