Enable a disabled test (microsoft#536)

Enabling required changing several stored result files because of the PL upgrade that happened in between.
hemmingway · Jul 15, 2021 · 5399987 · 5399987
1 parent 9fcc08f
commit 5399987
Show file tree

Hide file tree

Showing 25 changed files with 8,676 additions and 6,561 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,8 +21,10 @@ jobs that run in AzureML.
 - ([#509](https://github.com/microsoft/InnerEye-DeepLearning/pull/509)) Run inference on registered models (single and
   ensemble) using the parameter `model_id`.
 ### Changed
-- ([#531])(https://github.com/microsoft/InnerEye-DeepLearning/pull/531)) Updated PL to 1.3.8, torchmetrics and pl-bolts and changed relevant metrics and SSL code API.
+- ([#531](https://github.com/microsoft/InnerEye-DeepLearning/pull/531)) Updated PL to 1.3.8, torchmetrics and pl-bolts and changed relevant metrics and SSL code API.
 - ([#533](https://github.com/microsoft/InnerEye-DeepLearning/pull/533)) Better defaults for inference on ensemble children.
+- ([#536](https://github.com/microsoft/InnerEye-DeepLearning/pull/536)) Inference will not run on the validation set by default, this can be turned on
+via the `--inference_on_val_set` flag.
 - ([#502](https://github.com/microsoft/InnerEye-DeepLearning/pull/502)) Renamed command line option 'perform_training_set_inference' to 'inference_on_train_set'. Replaced command line option 'perform_validation_and_test_set_inference' with the pair of options 'inference_on_val_set' and 'inference_on_test_set'.
 - ([#496](https://github.com/microsoft/InnerEye-DeepLearning/pull/496)) All plots are now saved as PNG, rather than JPG.
 - ([#497](https://github.com/microsoft/InnerEye-DeepLearning/pull/497)) Reducing the size of the code snapshot that
@@ -49,7 +51,6 @@ in inference-only runs when using lightning containers.
 ### Removed
 
 - ([#542](https://github.com/microsoft/InnerEye-DeepLearning/pull/542)) Removed Windows test leg from build pipeline.
-- ([#520](https://github.com/microsoft/InnerEye-DeepLearning/pull/520)) Disable glaucoma job from Azure pipeline.
 - ([#509](https://github.com/microsoft/InnerEye-DeepLearning/pull/509)) Parameters `local_weights_path` and
   `weights_url` can no longer be used to initialize a training run, only inference runs.
 

diff --git a/InnerEye/Azure/azure_config.py b/InnerEye/Azure/azure_config.py
@@ -224,9 +224,8 @@ def get_service_principal_auth(self) -> Optional[Union[InteractiveLoginAuthentic
         application_key = secrets_handler.get_secret_from_environment(fixed_paths.SERVICE_PRINCIPAL_KEY,
                                                                       allow_missing=True)
         if not application_key:
-            logging.warning("Unable to retrieve the key for the Service Principal authentication "
-                            f"(expected in environment variable '{fixed_paths.SERVICE_PRINCIPAL_KEY}' or YAML). "
-                            f"Switching to interactive login.")
+            logging.info("Using interactive login to Azure. To use Service Principal authentication, "
+                         f"supply the password in in environment variable '{fixed_paths.SERVICE_PRINCIPAL_KEY}'.")
             return InteractiveLoginAuthentication()
 
         return ServicePrincipalAuthentication(

diff --git a/InnerEye/Azure/azure_util.py b/InnerEye/Azure/azure_util.py
@@ -352,7 +352,10 @@ def download_run_output_file(blob_path: Path,
     blobs_prefix = str((fixed_paths.DEFAULT_AML_UPLOAD_DIR / blob_path).as_posix())
     destination = destination / blob_path.name
     logging.info(f"Downloading single file from run {run.id}: {blobs_prefix} -> {str(destination)}")
-    run.download_file(blobs_prefix, str(destination), _validate_checksum=True)
+    try:
+        run.download_file(blobs_prefix, str(destination), _validate_checksum=True)
+    except Exception as ex:
+        raise ValueError(f"Unable to download file '{blobs_prefix}' from run {run.id}") from ex
     return destination
 
 

diff --git a/InnerEye/ML/deep_learning_config.py b/InnerEye/ML/deep_learning_config.py
@@ -5,13 +5,14 @@
 from __future__ import annotations
 
 import logging
-import param
 from enum import Enum, unique
-from pandas import DataFrame
-from param import Parameterized
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 
+import param
+from pandas import DataFrame
+from param import Parameterized
+
 from InnerEye.Azure.azure_util import DEFAULT_CROSS_VALIDATION_SPLIT_INDEX, RUN_CONTEXT, is_offline_run_context
 from InnerEye.Common import fixed_paths
 from InnerEye.Common.common_util import ModelProcessing, is_windows
@@ -276,49 +277,16 @@ def validate(self) -> None:
                              f"found number_of_cross_validation_splits = {self.number_of_cross_validation_splits} "
                              f"and cross_validation_split_index={self.cross_validation_split_index}")
 
-    """
-    Defaults for when to run inference in the absence of any command line switches.
-    This depends on ModelProcessing, perform_cross_validation, and ModelExecutionMode.
-    If the current combination of these three parameters is not in this data structure,
-    then default to False.
-    """
-    INFERENCE_DEFAULTS: Dict[ModelProcessing, Dict[bool, Dict[ModelExecutionMode, bool]]] = {
-        ModelProcessing.DEFAULT: {
-            False: {
-                ModelExecutionMode.TRAIN: False,
-                ModelExecutionMode.TEST: True,
-                ModelExecutionMode.VAL: True
-            }
-        },
-        ModelProcessing.ENSEMBLE_CREATION: {
-            True: {
-                ModelExecutionMode.TRAIN: False,
-                ModelExecutionMode.TEST: True,
-                ModelExecutionMode.VAL: False
-            }
-        }
-    }
-
-    def inference_defaults(self, model_proc: ModelProcessing, data_split: ModelExecutionMode) -> bool:
+    def is_inference_required(self,
+                              model_proc: ModelProcessing,
+                              data_split: ModelExecutionMode) -> bool:
         """
-        Returns True if inference is required by default for this model_proc and data_split.
-
+        Returns True if inference is required for this model_proc (single or ensemble) and data_split (Train/Val/Test).
         :param model_proc: Whether we are testing an ensemble or single model.
         :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed.
-        :return: True if inference required by default.
-        """
-        try:
-            return WorkflowParams.INFERENCE_DEFAULTS[model_proc][self.perform_cross_validation][data_split]
-        except KeyError:
-            return False
-
-    def inference_options(self) -> Dict[ModelProcessing, Dict[ModelExecutionMode, Optional[bool]]]:
-        """
-        Return a mapping from ModelProcesing and ModelExecutionMode to command line switch.
-
-        :return: Command line switch for each combination of ModelProcessing and ModelExecutionMode.
+        :return: True if inference required.
         """
-        return {
+        settings = {
             ModelProcessing.DEFAULT: {
                 ModelExecutionMode.TRAIN: self.inference_on_train_set,
                 ModelExecutionMode.TEST: self.inference_on_test_set,
@@ -330,20 +298,34 @@ def inference_options(self) -> Dict[ModelProcessing, Dict[ModelExecutionMode, Op
                 ModelExecutionMode.VAL: self.ensemble_inference_on_val_set,
             }
         }
-
-    def inference_on_set(self, model_proc: ModelProcessing, data_split: ModelExecutionMode) -> bool:
-        """
-        Returns True if inference is required for this model_proc and data_split.
-
-        :param model_proc: Whether we are testing an ensemble or single model.
-        :param data_split: Indicates which of the 3 sets (training, test, or validation) is being processed.
-        :return: True if inference required.
-        """
-        inference_option = self.inference_options()[model_proc][data_split]
+        inference_option = settings[model_proc][data_split]
         if inference_option is not None:
             return inference_option
 
-        return self.inference_defaults(model_proc, data_split)
+        # Defaults for when to run inference in the absence of any command line switches.
+        # This depends on ModelProcessing, perform_cross_validation, and ModelExecutionMode.
+        # If the current combination of these three parameters is not in this data structure,
+        # then default to False.
+        defaults: Dict[ModelProcessing, Dict[bool, Dict[ModelExecutionMode, bool]]] = {
+            ModelProcessing.DEFAULT: {
+                False: {
+                    ModelExecutionMode.TRAIN: False,
+                    ModelExecutionMode.VAL: False,
+                    ModelExecutionMode.TEST: True,
+                }
+            },
+            ModelProcessing.ENSEMBLE_CREATION: {
+                True: {
+                    ModelExecutionMode.TRAIN: False,
+                    ModelExecutionMode.VAL: False,
+                    ModelExecutionMode.TEST: True,
+                }
+            }
+        }
+        try:
+            return defaults[model_proc][self.perform_cross_validation][data_split]
+        except KeyError:
+            return False
 
     @property
     def is_offline_run(self) -> bool:

diff --git a/InnerEye/ML/reports/classification_crossval_report.ipynb b/InnerEye/ML/reports/classification_crossval_report.ipynb
@@ -199,8 +199,12 @@
    "outputs": [],
    "source": [
     "if test_metrics_csv.is_file():\n",
-    "    plot_pr_and_roc_curves_from_csv(metrics_csv=test_metrics_csv, data_split=ModelExecutionMode.TEST, config=config,\n",
-    "                                    is_crossval_report=is_crossval_report)"
+    "    # When analyzing cross validation runs, the report is started with a file that contains both\n",
+    "    # validation and test set metrics in one. The test set metrics however may be missing if\n",
+    "    # inference on the test set for cross validation child runs is turned off, leading to exceptions.\n",
+    "    if not is_crossval_report or (is_crossval_report and config.inference_on_test_set):\n",
+    "        plot_pr_and_roc_curves_from_csv(metrics_csv=test_metrics_csv, data_split=ModelExecutionMode.TEST, config=config,\n",
+    "                                        is_crossval_report=is_crossval_report)"
    ]
   },
   {

diff --git a/InnerEye/ML/reports/classification_report.py b/InnerEye/ML/reports/classification_report.py
@@ -76,6 +76,7 @@ def read_csv_and_filter_prediction_target(csv: Path, prediction_target: str,
     :param epoch: If specified, filter rows for given epoch (default: last epoch only; requires LoggingColumns.Epoch).
     :return: Filtered dataframe.
     """
+
     def check_column_present(dataframe: pd.DataFrame, column: LoggingColumns) -> None:
         if column.value not in dataframe:
             raise ValueError(f"Missing {column.value} column.")
@@ -365,9 +366,10 @@ def get_all_metrics(predictions_to_set_optimal_threshold: LabelsAndPredictions,
     for metric in ReportedScalarMetrics:  # type: ReportedScalarMetrics
         if is_thresholded and not metric.requires_threshold:
             continue
-        metrics[metric.description] = get_metric(predictions_to_set_optimal_threshold=predictions_to_set_optimal_threshold,
-                                                 predictions_to_compute_metrics=predictions_to_compute_metrics,
-                                                 metric=metric, optimal_threshold=optimal_threshold)
+        metrics[metric.description] = get_metric(
+            predictions_to_set_optimal_threshold=predictions_to_set_optimal_threshold,
+            predictions_to_compute_metrics=predictions_to_compute_metrics,
+            metric=metric, optimal_threshold=optimal_threshold)
 
     return metrics
 
@@ -415,8 +417,11 @@ def get_metrics_table_for_prediction_target(csv_to_set_optimal_threshold: Path,
     :return: Tuple of rows and header, where each row and the header are lists of strings of same length (2 if
         `is_crossval_report` is False, `config.number_of_cross_validation_splits`+2 otherwise).
     """
-    def get_metrics_for_crossval_split(prediction_target: str, crossval_split: Optional[int] = None) -> Dict[str, float]:
-        predictions_to_set_optimal_threshold = get_labels_and_predictions(csv_to_set_optimal_threshold, prediction_target,
+
+    def get_metrics_for_crossval_split(prediction_target: str,
+                                       crossval_split: Optional[int] = None) -> Dict[str, float]:
+        predictions_to_set_optimal_threshold = get_labels_and_predictions(csv_to_set_optimal_threshold,
+                                                                          prediction_target,
                                                                           crossval_split_index=crossval_split,
                                                                           data_split=data_split_to_set_optimal_threshold)
         predictions_to_compute_metrics = get_labels_and_predictions(csv_to_compute_metrics, prediction_target,
@@ -551,7 +556,10 @@ def get_k_best_and_worst_performing(val_metrics_csv: Path, test_metrics_csv: Pat
     return sorted
 
 
-def print_k_best_and_worst_performing(val_metrics_csv: Path, test_metrics_csv: Path, k: int, prediction_target: str) -> None:
+def print_k_best_and_worst_performing(val_metrics_csv: Path,
+                                      test_metrics_csv: Path,
+                                      k: int,
+                                      prediction_target: str) -> None:
     """
     Print the top "k" best predictions (i.e. correct classifications where the model was the most certain) and the
     top "k" worst predictions (i.e. misclassifications where the model was the most confident).

diff --git a/InnerEye/ML/run_ml.py b/InnerEye/ML/run_ml.py
@@ -11,13 +11,13 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import pandas as pd
-from pytorch_lightning.core.datamodule import LightningDataModule
 import stopit
 import torch.multiprocessing
 from azureml._restclient.constants import RunStatus
 from azureml.core import Model, Run, model
 from azureml.data import FileDataset
 from pytorch_lightning import LightningModule, seed_everything
+from pytorch_lightning.core.datamodule import LightningDataModule
 from torch.utils.data import DataLoader
 
 from InnerEye.Azure import azure_util
@@ -134,6 +134,15 @@ def log_metrics(metrics: Dict[ModelExecutionMode, InferenceMetrics],
             split.log_metrics(run_context)
 
 
+def is_classification_model(model: Any) -> bool:
+    """
+    Returns True if the given object is an InnerEye classification, but not a sequence model.
+    """
+    return (isinstance(model, ScalarModelBase)
+            and model.is_classification_model
+            and not isinstance(model, SequenceModelBase))
+
+
 class MLRunner:
 
     def __init__(self,
@@ -466,7 +475,7 @@ def run_inference_for_lightning_models(self, checkpoint_paths: List[Path]) -> No
             dataloaders: List[Tuple[DataLoader, ModelExecutionMode]] = []
             data_dataloaders = MLRunner.lightning_data_module_dataloaders(data)
             for data_split, dataloader in data_dataloaders.items():
-                if self.container.inference_on_set(ModelProcessing.DEFAULT, data_split):
+                if self.container.is_inference_required(ModelProcessing.DEFAULT, data_split):
                     dataloaders.append((dataloader(), data_split))
             checkpoint = load_checkpoint(checkpoint_paths[0], use_gpu=self.container.use_gpu)
             lightning_model.load_state_dict(checkpoint['state_dict'])
@@ -781,7 +790,7 @@ def model_inference_train_and_test(self,
         config = self.innereye_config
 
         for data_split in ModelExecutionMode:
-            if self.container.inference_on_set(model_proc, data_split):
+            if self.container.is_inference_required(model_proc, data_split):
                 opt_metrics = model_test(config, data_split=data_split, checkpoint_paths=checkpoint_paths,
                                          model_proc=model_proc)
                 if opt_metrics is not None:
@@ -880,10 +889,11 @@ def plot_cross_validation_and_upload_results(self) -> Path:
         plot_crossval_config.outputs_directory = self.innereye_config.outputs_folder
         plot_crossval_config.azure_config = self.azure_config
         cross_val_results_root = plot_cross_validation(plot_crossval_config)
-        if isinstance(self.model_config, ScalarModelBase) and not isinstance(self.model_config, SequenceModelBase):
+        if is_classification_model(self.model_config):
             crossval_report_name = f"{ModelCategory.Classification.value}_crossval"
             notebook_path = cross_val_results_root / get_ipynb_report_name(crossval_report_name)
             full_metrics_csv = cross_val_results_root / FULL_METRICS_DATAFRAME_FILE
+            assert isinstance(self.model_config, ScalarModelBase)
             generate_classification_crossval_notebook(notebook_path, self.model_config, full_metrics_csv)
         if self.post_cross_validation_hook:
             self.post_cross_validation_hook(self.innereye_config, cross_val_results_root)

diff --git a/InnerEye/ML/scalar_config.py b/InnerEye/ML/scalar_config.py
@@ -231,6 +231,9 @@ def __init__(self, num_dataset_reader_workers: int = 0, **params: Any) -> None:
             self.num_dataset_reader_workers = num_dataset_reader_workers
         if self.target_names is None:
             self.target_names = self.class_names
+        # Report generation assumes that results for the test set are available when we do cross validation on
+        # all ScalarModels.
+        self.inference_on_test_set = True
 
     def validate(self) -> None:
         if len(self.class_names) > 1 and not self.is_classification_model:

diff --git a/InnerEye/ML/visualizers/plot_cross_validation.py b/InnerEye/ML/visualizers/plot_cross_validation.py
@@ -29,7 +29,7 @@
 
 import InnerEye.Common.Statistics.mann_whitney_test as mann_whitney
 from InnerEye.Azure.azure_config import AzureConfig
-from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, download_run_output_file, \
+from InnerEye.Azure.azure_util import CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY, RUN_CONTEXT, download_run_output_file, \
     fetch_child_runs, is_offline_run_context, is_parent_run
 from InnerEye.Common import common_util, fixed_paths
 from InnerEye.Common.Statistics.wilcoxon_signed_rank_test import WilcoxonTestConfig, wilcoxon_signed_rank_test
@@ -217,7 +217,7 @@ def download_or_get_local_file(self,
         # Just copy the provided path in the outputs directory to the destination.
         if not destination.exists():
             destination.mkdir(parents=True)
-        if run is None or Run.get_context().id == run.id or is_parent_run(run) or is_offline_run_context(run):
+        if run is None or RUN_CONTEXT.id == run.id or is_parent_run(run) or is_offline_run_context(run):
             if run is None:
                 assert self.local_run_results is not None, "Local run results must be set in unit testing"
                 local_src = Path(self.local_run_results)
@@ -231,15 +231,11 @@ def download_or_get_local_file(self,
                 return Path(shutil.copy(local_src, destination))
             return None
         else:
-            try:
-                return download_run_output_file(
-                    blob_path=blob_path,
-                    destination=destination,
-                    run=run
-                )
-            except Exception as ex:
-                logging.warning(f"File {blob_to_download} not found in output of run {run.id}: {ex}")
-                return None
+            return download_run_output_file(
+                blob_path=blob_path,
+                destination=destination,
+                run=run
+            )
 
 
 @dataclass(frozen=True)