From 90a8ea2938cd8fdaa97f95470652808e3900869a Mon Sep 17 00:00:00 2001 From: Sarthak Pati Date: Mon, 11 Sep 2023 04:14:06 -0400 Subject: [PATCH 1/5] Combined writing and temp file creation in a single step This change will combine the creation of a temporary file and writing of the parameters in a single step, thereby reducing the total number of lines in the testing file. --- testing/test_full.py | 166 +++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index 2e0ad0bae..fcb73fa6d 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -218,12 +218,15 @@ def sanitize_outputDir(): Path(outputDir).mkdir(parents=True, exist_ok=True) -def get_temp_config_path(): +def write_temp_config_path(parameters_to_write): print("02_2: Creating path for temporary config file") temp_config_path = os.path.join(outputDir, "config_temp.yaml") # if found in previous run, discard. if os.path.exists(temp_config_path): os.remove(temp_config_path) + if parameters_to_write is not None: + with open(temp_config_path, "w") as file: + yaml.dump(parameters_to_write, file) return temp_config_path @@ -482,9 +485,7 @@ def test_train_regression_brainage_rad_2d(device): reset=True, ) - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(parameters_temp, file) + # file_config_temp = write_temp_config_path(parameters_temp) model_path = os.path.join(outputDir, "brain_age_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") optimization_result = post_training_model_optimization(model_path, config_path) @@ -755,10 +756,7 @@ def test_train_inference_optimize_classification_rad_3d(device): reset=True, ) - file_config_temp = get_temp_config_path() - parameters_temp["model"]["onnx_export"] = True - with open(file_config_temp, "w") as file: - yaml.dump(parameters_temp, file) + # file_config_temp = write_temp_config_path(parameters_temp) model_path = os.path.join(outputDir, all_models_regression[0] + "_best.pth.tar") config_path = os.path.join(outputDir, "parameters.pkl") optimization_result = post_training_model_optimization(model_path, config_path) @@ -964,10 +962,7 @@ def test_train_scheduler_classification_rad_2d(device): parameters["nested_training"]["validation"] = -5 sanitize_outputDir() ## ensure parameters are parsed every single time - file_config_temp = get_temp_config_path() - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) parameters = parseConfig(file_config_temp, version_check_flag=False) TrainingManager( @@ -1090,9 +1085,7 @@ def test_train_normtype_segmentation_rad_3d(device): # these should raise exceptions for norm_type in ["none", None]: parameters["model"]["norm_type"] = norm_type - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) with pytest.raises(Exception) as exc_info: parameters = parseConfig(file_config_temp, version_check_flag=False) @@ -1151,10 +1144,7 @@ def test_train_metrics_segmentation_rad_2d(device): parameters["model"]["architecture"] = "resunet" parameters["model"]["onnx_export"] = False parameters["model"]["print_summary"] = False - file_config_temp = get_temp_config_path() - - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) parameters = parseConfig(file_config_temp, version_check_flag=False) training_data, parameters["headers"] = parseTrainingCSV( @@ -1220,9 +1210,7 @@ def get_parameters_after_alteration(loss_type: str) -> dict: testingDir + "/config_segmentation.yaml", version_check_flag=False ) parameters["loss_function"] = loss_type - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) # read and parse csv parameters = parseConfig(file_config_temp, version_check_flag=True) parameters["nested_training"]["testing"] = -5 @@ -1274,15 +1262,13 @@ def get_parameters_after_alteration(loss_type: str) -> dict: def test_generic_config_read(): print("24: Starting testing reading configuration") - file_config_temp = get_temp_config_path() parameters = parseConfig( os.path.join(baseConfigDir, "config_all_options.yaml"), version_check_flag=False, ) parameters["data_preprocessing"]["resize_image"] = [128, 128] - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) # read and parse csv parameters = parseConfig(file_config_temp, version_check_flag=True) @@ -1361,7 +1347,6 @@ def test_generic_cli_function_preprocess(): print("25: Starting testing cli function preprocess") file_config = os.path.join(testingDir, "config_segmentation.yaml") sanitize_outputDir() - file_config_temp = get_temp_config_path() file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") input_data_df, _ = parseTrainingCSV(file_data, train=False) @@ -1391,9 +1376,7 @@ def test_generic_cli_function_preprocess(): parameters["data_preprocessing"]["to_canonical"] = None parameters["data_preprocessing"]["rgba_to_rgb"] = None - # store this separately for preprocess testing - with open(file_config_temp, "w") as outfile: - yaml.dump(parameters, outfile, default_flow_style=False) + file_config_temp = write_temp_config_path(parameters) preprocess_and_save(temp_csv, file_config_temp, outputDir) training_data, parameters["headers"] = parseTrainingCSV( @@ -1459,7 +1442,6 @@ def test_generic_cli_function_mainrun(device): parameters = parseConfig( testingDir + "/config_segmentation.yaml", version_check_flag=False ) - file_config_temp = get_temp_config_path() parameters["modality"] = "rad" parameters["patch_size"] = patch_size["2D"] @@ -1475,8 +1457,7 @@ def test_generic_cli_function_mainrun(device): ] parameters["model"]["architecture"] = "unet" - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) file_data = os.path.join(inputDir, "train_2d_rad_segmentation.csv") @@ -2142,7 +2123,6 @@ def test_train_inference_segmentation_histology_2d(device): Path(output_dir_patches).mkdir(parents=True, exist_ok=True) output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - file_config_temp = get_temp_config_path() parameters_patch = {} # extracting minimal number of patches to ensure that the test does not take too long @@ -2151,8 +2131,7 @@ def test_train_inference_segmentation_histology_2d(device): # define patches to be extracted in terms of microns parameters_patch["patch_size"] = ["1000m", "1000m"] - with open(file_config_temp, "w") as file: - yaml.dump(parameters_patch, file) + file_config_temp = write_temp_config_path(parameters) patch_extraction( inputDir + "/train_2d_histo_segmentation.csv", @@ -2218,7 +2197,6 @@ def test_train_inference_classification_histology_large_2d(device): Path(output_dir_patches).mkdir(parents=True, exist_ok=True) output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") Path(output_dir_patches_output).mkdir(parents=True, exist_ok=True) - file_config_temp = get_temp_config_path() for sub in ["1", "2"]: file_to_check = os.path.join( @@ -2233,8 +2211,7 @@ def test_train_inference_classification_histology_large_2d(device): parameters_patch["patch_size"] = [128, 128] parameters_patch["value_map"] = {0: 0, 255: 255} - with open(file_config_temp, "w") as file: - yaml.dump(parameters_patch, file) + file_config_temp = write_temp_config_path(parameters) patch_extraction( inputDir + "/train_2d_histo_classification.csv", @@ -2315,9 +2292,7 @@ def resize_for_ci(filename, scale): ) parameters["modality"] = "histo" parameters["patch_size"] = parameters_patch["patch_size"][0] - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) parameters = parseConfig(file_config_temp, version_check_flag=False) parameters["model"]["dimension"] = 2 # read and parse csv @@ -2394,7 +2369,6 @@ def test_train_inference_classification_histology_2d(device): shutil.rmtree(output_dir_patches) Path(output_dir_patches).mkdir(parents=True, exist_ok=True) output_dir_patches_output = os.path.join(output_dir_patches, "histo_patches_output") - file_config_temp = get_temp_config_path() parameters_patch = {} # extracting minimal number of patches to ensure that the test does not take too long @@ -2402,8 +2376,7 @@ def test_train_inference_classification_histology_2d(device): for num_patches in [-1, 3]: parameters_patch["num_patches"] = num_patches - with open(file_config_temp, "w") as file: - yaml.dump(parameters_patch, file) + file_config_temp = write_temp_config_path(parameters_patch) if os.path.exists(output_dir_patches_output): shutil.rmtree(output_dir_patches_output) @@ -2426,9 +2399,7 @@ def test_train_inference_classification_histology_2d(device): ) parameters["modality"] = "histo" parameters["patch_size"] = 128 - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) parameters = parseConfig(file_config_temp, version_check_flag=False) parameters["model"]["dimension"] = 2 # read and parse csv @@ -2846,9 +2817,7 @@ def test_generic_cli_function_configgenerator(): "key_1": {"key_2": "value"} } - file_config_temp = get_temp_config_path() - with open(file_config_temp, "w") as file: - yaml.dump(generator_config, file) + file_config_temp = write_temp_config_path(generator_config) # test for failure with pytest.raises(Exception) as exc_info: @@ -2892,7 +2861,7 @@ def test_generic_cli_function_recoverconfig(): resume=False, reset=True, ) - output_config_path = get_temp_config_path() + output_config_path = write_temp_config_path(parameters=None) assert recover_config( outputDir, output_config_path ), "recover_config returned false" @@ -2941,25 +2910,17 @@ def test_generic_deploy_docker(): reset=True, ) - custom_entrypoint = os.path.join( - gandlfRootDir, - "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", - ) - for entrypoint_script in [None, custom_entrypoint]: - result = run_deployment( - os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), - deploymentOutputDir, - "docker", - "model", - entrypoint_script=entrypoint_script, - configfile=testingDir + "/config_segmentation.yaml", - modeldir=outputDir, - requires_gpu=True, - ) - msg = "run_deployment returned false" - if entrypoint_script: - msg += " with custom entrypoint script" - assert result, msg + result = run_deployment( + os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), + deploymentOutputDir, + "docker", + "model", + configfile=testingDir + "/config_segmentation.yaml", + modeldir=outputDir, + requires_gpu=True, + ) + + assert result, "run_deployment returned false" sanitize_outputDir() print("passed") @@ -2970,7 +2931,6 @@ def test_collision_subjectid_test_segmentation_rad_2d(device): parameters = parseConfig( testingDir + "/config_segmentation.yaml", version_check_flag=False ) - file_config_temp = get_temp_config_path() parameters["modality"] = "rad" parameters["patch_size"] = patch_size["2D"] @@ -2987,8 +2947,7 @@ def test_collision_subjectid_test_segmentation_rad_2d(device): parameters["model"]["architecture"] = "unet" outputDir = os.path.join(testingDir, "data_output") - with open(file_config_temp, "w") as file: - yaml.dump(parameters, file) + file_config_temp = write_temp_config_path(parameters) # test the case where outputDir is explicitly provided to InferenceManager train_data_path = inputDir + "/train_2d_rad_segmentation.csv" @@ -3092,9 +3051,7 @@ def test_generic_cli_function_metrics_cli_rad_nd(): output_file = os.path.join(outputDir, "output.yaml") - temp_config = get_temp_config_path() - with open(temp_config, "w") as file: - yaml.dump(parameters, file) + temp_config = write_temp_config_path(parameters) # run the metrics calculation generate_metrics_dict(temp_infer_csv, temp_config, output_file) @@ -3121,3 +3078,60 @@ def test_generic_deploy_metrics_docker(): sanitize_outputDir() print("passed") + + +def test_train_synthesis_rad_3d(device): + print("XX: Starting 3D Rad synthesis tests") + # read and parse csv + # read and initialize parameters for specific data dimension + parameters = parseConfig( + testingDir + "/config_segmentation.yaml", version_check_flag=False + ) + parameters["model"]["final_layer"] = "synthesis" + training_data, parameters["headers"] = parseTrainingCSV( + inputDir + "/train_3d_rad_segmentation.csv" + ) + parameters["modality"] = "rad" + parameters["patch_size"] = patch_size["3D"] + parameters["model"]["dimension"] = 3 + parameters["model"]["class_list"] = [0, 1] + parameters["model"]["final_layer"] = "softmax" + parameters["model"]["amp"] = True + parameters["in_memory"] = True + parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) + parameters["model"]["onnx_export"] = False + parameters["model"]["print_summary"] = False + parameters = populate_header_in_parameters(parameters, parameters["headers"]) + # loop through selected models and train for single epoch + for model in all_models_segmentation: + if model == "imagenet_unet": + # imagenet_unet encoder needs to be toned down for small patch size + parameters["model"]["encoder_name"] = "mit_b0" + with pytest.raises(Exception) as exc_info: + _ = global_models_dict[model](parameters) + print("Exception raised:", exc_info.value) + parameters["model"]["encoder_name"] = "resnet34" + parameters["model"]["encoder_depth"] = 3 + parameters["model"]["decoder_channels"] = (64, 32, 16) + parameters["model"]["final_layer"] = random.choice( + ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] + ) + parameters["model"]["converter_type"] = random.choice( + ["acs", "soft", "conv3d"] + ) + parameters["model"]["architecture"] = model + parameters["nested_training"]["testing"] = -5 + parameters["nested_training"]["validation"] = -5 + sanitize_outputDir() + TrainingManager( + dataframe=training_data, + outputDir=outputDir, + parameters=parameters, + device=device, + resume=False, + reset=True, + ) + + sanitize_outputDir() + + print("passed") From 414ae8b4fd59597aa193cf9d30ae16e517e76e9a Mon Sep 17 00:00:00 2001 From: Sarthak Pati Date: Mon, 11 Sep 2023 04:43:12 -0400 Subject: [PATCH 2/5] Update test_full.py --- testing/test_full.py | 55 +------------------------------------------- 1 file changed, 1 insertion(+), 54 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index fcb73fa6d..7942de299 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -2861,7 +2861,7 @@ def test_generic_cli_function_recoverconfig(): resume=False, reset=True, ) - output_config_path = write_temp_config_path(parameters=None) + output_config_path = write_temp_config_path(None) assert recover_config( outputDir, output_config_path ), "recover_config returned false" @@ -3079,59 +3079,6 @@ def test_generic_deploy_metrics_docker(): print("passed") - -def test_train_synthesis_rad_3d(device): - print("XX: Starting 3D Rad synthesis tests") - # read and parse csv - # read and initialize parameters for specific data dimension - parameters = parseConfig( - testingDir + "/config_segmentation.yaml", version_check_flag=False - ) - parameters["model"]["final_layer"] = "synthesis" - training_data, parameters["headers"] = parseTrainingCSV( - inputDir + "/train_3d_rad_segmentation.csv" - ) - parameters["modality"] = "rad" - parameters["patch_size"] = patch_size["3D"] - parameters["model"]["dimension"] = 3 - parameters["model"]["class_list"] = [0, 1] - parameters["model"]["final_layer"] = "softmax" - parameters["model"]["amp"] = True - parameters["in_memory"] = True - parameters["model"]["num_channels"] = len(parameters["headers"]["channelHeaders"]) - parameters["model"]["onnx_export"] = False - parameters["model"]["print_summary"] = False - parameters = populate_header_in_parameters(parameters, parameters["headers"]) - # loop through selected models and train for single epoch - for model in all_models_segmentation: - if model == "imagenet_unet": - # imagenet_unet encoder needs to be toned down for small patch size - parameters["model"]["encoder_name"] = "mit_b0" - with pytest.raises(Exception) as exc_info: - _ = global_models_dict[model](parameters) - print("Exception raised:", exc_info.value) - parameters["model"]["encoder_name"] = "resnet34" - parameters["model"]["encoder_depth"] = 3 - parameters["model"]["decoder_channels"] = (64, 32, 16) - parameters["model"]["final_layer"] = random.choice( - ["sigmoid", "softmax", "logsoftmax", "tanh", "identity"] - ) - parameters["model"]["converter_type"] = random.choice( - ["acs", "soft", "conv3d"] - ) - parameters["model"]["architecture"] = model - parameters["nested_training"]["testing"] = -5 - parameters["nested_training"]["validation"] = -5 - sanitize_outputDir() - TrainingManager( - dataframe=training_data, - outputDir=outputDir, - parameters=parameters, - device=device, - resume=False, - reset=True, - ) - sanitize_outputDir() print("passed") From 5aa3c9ebac672f0f5d8815f3ae38c6e7f4bb95c1 Mon Sep 17 00:00:00 2001 From: Sarthak Pati Date: Mon, 11 Sep 2023 05:19:28 -0400 Subject: [PATCH 3/5] Update test_full.py --- testing/test_full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index 7942de299..cddfda529 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -2211,7 +2211,7 @@ def test_train_inference_classification_histology_large_2d(device): parameters_patch["patch_size"] = [128, 128] parameters_patch["value_map"] = {0: 0, 255: 255} - file_config_temp = write_temp_config_path(parameters) + file_config_temp = write_temp_config_path(parameters_patch) patch_extraction( inputDir + "/train_2d_histo_classification.csv", From c796e392c51f1284cf4818a6fdac7e79c3a4cbd3 Mon Sep 17 00:00:00 2001 From: Sarthak Pati Date: Mon, 11 Sep 2023 06:10:35 -0400 Subject: [PATCH 4/5] typo fixed --- testing/test_full.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test_full.py b/testing/test_full.py index cddfda529..1a9872c04 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -2131,7 +2131,7 @@ def test_train_inference_segmentation_histology_2d(device): # define patches to be extracted in terms of microns parameters_patch["patch_size"] = ["1000m", "1000m"] - file_config_temp = write_temp_config_path(parameters) + file_config_temp = write_temp_config_path(parameters_patch) patch_extraction( inputDir + "/train_2d_histo_segmentation.csv", From 3911c2620bbf988ff3b72c7febab5dea0a0ad987 Mon Sep 17 00:00:00 2001 From: Sarthak Pati Date: Mon, 11 Sep 2023 09:47:22 -0400 Subject: [PATCH 5/5] revert some changes --- testing/test_full.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/testing/test_full.py b/testing/test_full.py index 1a9872c04..4680a71ae 100644 --- a/testing/test_full.py +++ b/testing/test_full.py @@ -2910,17 +2910,26 @@ def test_generic_deploy_docker(): reset=True, ) - result = run_deployment( - os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), - deploymentOutputDir, - "docker", - "model", - configfile=testingDir + "/config_segmentation.yaml", - modeldir=outputDir, - requires_gpu=True, - ) + custom_entrypoint = os.path.join( + gandlfRootDir, + "mlcube/model_mlcube/example_custom_entrypoint/getting_started_3d_rad_seg.py", + ) + for entrypoint_script in [None, custom_entrypoint]: + result = run_deployment( + os.path.join(gandlfRootDir, "mlcube/model_mlcube/"), + deploymentOutputDir, + "docker", + "model", + entrypoint_script=entrypoint_script, + configfile=testingDir + "/config_segmentation.yaml", + modeldir=outputDir, + requires_gpu=True, + ) + msg = "run_deployment returned false" + if entrypoint_script: + msg += " with custom entrypoint script" + assert result, msg - assert result, "run_deployment returned false" sanitize_outputDir() print("passed")