From 94df5f18373e88100f431b5cc138efee6c523134 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Wed, 5 Jul 2023 17:42:39 -0400 Subject: [PATCH 01/25] Adding in ability to select redact approach --- .../dicom_image_redactor_engine.py | 100 +++++++++++++----- 1 file changed, 75 insertions(+), 25 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index dfb8f98bd..c2f735990 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -14,7 +14,8 @@ from presidio_image_redactor import ImageRedactorEngine from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401 -from presidio_analyzer import PatternRecognizer +from presidio_analyzer import Pattern, PatternRecognizer +from presidio_image_redactor.entities import ImageRecognizerResult class DicomImageRedactorEngine(ImageRedactorEngine): @@ -29,6 +30,7 @@ def redact( fill: str = "contrast", padding_width: int = 25, crop_ratio: float = 0.75, + redact_approach: Union[str, PatternRecognizer] = "metadata", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ): @@ -42,6 +44,7 @@ def redact( :param padding_width: Padding width to use when running OCR. :param crop_ratio: Portion of image to consider when selecting most common pixel value as the background color value. + :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -64,18 +67,8 @@ def redact( loaded_image = Image.open(png_filepath) image = self._add_padding(loaded_image, is_greyscale, padding_width) - # Create custom recognizer using DICOM metadata - original_metadata, is_name, is_patient = self._get_text_metadata(instance) - phi_list = self._make_phi_list(original_metadata, is_name, is_patient) - deny_list_recognizer = PatternRecognizer( - supported_entity="PERSON", deny_list=phi_list - ) - analyzer_results = self.image_analyzer_engine.analyze( - image, - ad_hoc_recognizers=[deny_list_recognizer], - ocr_kwargs=ocr_kwargs, - **text_analyzer_kwargs, - ) + # Detect PII + analyzer_results = self._get_analyzer_results(image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs) # Redact all bounding boxes from DICOM file analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results( @@ -95,6 +88,7 @@ def redact_from_file( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", + redact_approach: Union[str, PatternRecognizer] = "metadata", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ) -> None: @@ -108,6 +102,7 @@ def redact_from_file( :param padding_width : Padding width to use when running OCR. :param fill: Color setting to use for redaction box ("contrast" or "background"). + :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -131,6 +126,7 @@ def redact_from_file( crop_ratio=crop_ratio, fill=fill, padding_width=padding_width, + redact_approach=redact_approach, overwrite=True, dst_parent_dir=".", ocr_kwargs=ocr_kwargs, @@ -148,6 +144,7 @@ def redact_from_directory( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", + redact_approach: Union[str, PatternRecognizer] = "metadata", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ) -> None: @@ -163,6 +160,7 @@ def redact_from_directory( most common pixel value as the background color value. :param fill: Color setting to use for redaction box ("contrast" or "background"). + :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -186,6 +184,7 @@ def redact_from_directory( crop_ratio=crop_ratio, fill=fill, padding_width=padding_width, + redact_approach=redact_approach, overwrite=True, dst_parent_dir=".", ocr_kwargs=ocr_kwargs, @@ -720,12 +719,69 @@ def _add_redact_box( return redacted_instance + def _get_analyzer_results( + self, + image: PIL.PngImagePlugin.PngImageFile, + instance: pydicom.dataset.FileDataset, + redact_approach: Union[str, PatternRecognizer], + ocr_kwargs: Optional[dict], + **text_analyzer_kwargs + ) -> List[ImageRecognizerResult]: + """Analyze image with selected redaction approach. + + :param image: DICOM pixel data as PIL image. + :param instance: DICOM instance (with metadata). + :param redact_approach: What approach to use when redacting ("default", "metadata", "allow", or a PatternRecognizer object). + :param ocr_kwargs: Additional params for OCR methods. + :param text_analyzer_kwargs: Additional values for the analyze method + in AnalyzerEngine (e.g., allow_list). + + :return: Analyzer results. + """ + # Detect PII + if type(redact_approach) == str: + if redact_approach.lower() == "default": + # Use default redactor + analyzer_results = self.image_analyzer_engine.analyze( + image, + ocr_kwargs=ocr_kwargs, + **text_analyzer_kwargs, + ) + elif redact_approach.lower() == "metadata": + # Create custom recognizer using DICOM metadata + original_metadata, is_name, is_patient = self._get_text_metadata(instance) + phi_list = self._make_phi_list(original_metadata, is_name, is_patient) + deny_list_recognizer = PatternRecognizer( + supported_entity="PERSON", deny_list=phi_list + ) + analyzer_results = self.image_analyzer_engine.analyze( + image, + ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=[deny_list_recognizer], + **text_analyzer_kwargs, + ) + else: + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") + elif type(redact_approach)==PatternRecognizer: + # Use passed in recognizer + analyzer_results = self.image_analyzer_engine.analyze( + image, + ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=[redact_approach], + **text_analyzer_kwargs, + ) + else: + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") + + return analyzer_results + def _redact_single_dicom_image( self, dcm_path: str, crop_ratio: float, fill: str, padding_width: int, + redact_approach: Union[str, PatternRecognizer], overwrite: bool, dst_parent_dir: str, ocr_kwargs: Optional[dict] = None, @@ -739,6 +795,7 @@ def _redact_single_dicom_image( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). + :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). :param overwrite: Only set to True if you are providing the duplicated DICOM path in dcm_path. :param dst_parent_dir: String path to parent directory of where to store copies. @@ -771,18 +828,8 @@ def _redact_single_dicom_image( loaded_image = Image.open(png_filepath) image = self._add_padding(loaded_image, is_greyscale, padding_width) - # Create custom recognizer using DICOM metadata - original_metadata, is_name, is_patient = self._get_text_metadata(instance) - phi_list = self._make_phi_list(original_metadata, is_name, is_patient) - deny_list_recognizer = PatternRecognizer( - supported_entity="PERSON", deny_list=phi_list - ) - analyzer_results = self.image_analyzer_engine.analyze( - image, - ad_hoc_recognizers=[deny_list_recognizer], - ocr_kwargs=ocr_kwargs, - **text_analyzer_kwargs, - ) + # Detect PII + analyzer_results = self._get_analyzer_results(image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs) # Redact all bounding boxes from DICOM file analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results( @@ -804,6 +851,7 @@ def _redact_multiple_dicom_images( crop_ratio: float, fill: str, padding_width: int, + redact_approach: Union[str, PatternRecognizer], overwrite: bool, dst_parent_dir: str, ocr_kwargs: Optional[dict] = None, @@ -817,6 +865,7 @@ def _redact_multiple_dicom_images( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). + :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). :param overwrite: Only set to True if you are providing the duplicated DICOM dir in dcm_dir. :param dst_parent_dir: String path to parent directory of where to store copies. @@ -847,6 +896,7 @@ def _redact_multiple_dicom_images( crop_ratio, fill, padding_width, + redact_approach, overwrite, dst_parent_dir, ocr_kwargs=ocr_kwargs, From 22d18a1a02077fec2e1b21cb0e6bb5e22116cd3d Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Wed, 5 Jul 2023 17:53:46 -0400 Subject: [PATCH 02/25] Adding redact_approach arg into tests so they pass correctly --- .../tests/test_dicom_image_redactor_engine.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 7bea8fcb6..de823a497 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1137,7 +1137,7 @@ def test_DicomImageRedactorEngine_redact_exceptions( """ with pytest.raises(Exception) as exc_info: # Act - mock_engine.redact(image, "contrast", 25, False, "." + mock_engine.redact(image, fill="contrast", padding_width=25, redact_approach="metadata" ) # Assert @@ -1230,7 +1230,7 @@ def save_as(self, dst_path: str): # Act mock_engine._redact_single_dicom_image( - dcm_path, crop_ratio, "contrast", 25, overwrite, output_dir + dcm_path, crop_ratio, "contrast", 25, "metadata", overwrite, output_dir ) # Assert @@ -1272,7 +1272,7 @@ def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( with pytest.raises(Exception) as exc_info: # Act mock_engine._redact_single_dicom_image( - dcm_path, 0.75, "contrast", 25, False, "." + dcm_path, 0.75, "contrast", 25, "metadata", False, "." ) # Assert @@ -1329,7 +1329,7 @@ def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( # Act mock_engine._redact_multiple_dicom_images( - dcm_path, crop_ratio, "contrast", 25, overwrite, output_dir + dcm_path, crop_ratio, "contrast", 25, "metadata", overwrite, output_dir ) # Assert @@ -1363,7 +1363,7 @@ def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( with pytest.raises(Exception) as exc_info: # Act mock_engine._redact_multiple_dicom_images( - dcm_path, 0.75, "contrast", 25, False, "." + dcm_path, 0.75, "contrast", 25, "metadata", False, "." ) # Assert @@ -1418,7 +1418,7 @@ def test_DicomImageRedactorEngine_redact_from_file_happy_path( ) # Act - mock_engine.redact_from_file(dcm_path, "output", 25, "contrast") + mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", redact_approach="metadata") # Assert assert mock_copy_files.call_count == 1 @@ -1460,7 +1460,7 @@ def test_DicomImageRedactorEngine_redact_from_file_exceptions( """ with pytest.raises(Exception) as exc_info: # Act - mock_engine.redact_from_file(input_path, output_path, 25, "contrast") + mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", redact_approach="metadata") # Assert assert expected_error_type == exc_info.typename @@ -1502,7 +1502,7 @@ def test_DicomImageRedactorEngine_redact_from_directory_happy_path( ) # Act - mock_engine.redact_from_directory(dcm_path, "output", 25, "contrast") + mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", redact_approach="metadata") # Assert assert mock_copy_files.call_count == 1 @@ -1533,7 +1533,7 @@ def test_DicomImageRedactorEngine_redact_from_directory_exceptions( """ with pytest.raises(Exception) as exc_info: # Act - mock_engine.redact_from_directory(input_path, output_path, 25, "contrast") + mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", redact_approach="metadata") # Assert assert expected_error_type == exc_info.typename From 5ac0095834943a268ae57d644fcd2a425c89785d Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 6 Jul 2023 10:13:10 -0400 Subject: [PATCH 03/25] Adding test for _get_analyzer_results --- .../tests/test_dicom_image_redactor_engine.py | 179 ++++++++++++++---- 1 file changed, 138 insertions(+), 41 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 15a8518e9..d83af2c9b 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -978,7 +978,7 @@ def test_add_redact_box_happy_path( ) # Assert - mock_check_if_greyscale.call_count == 1 + assert mock_check_if_greyscale.call_count == 1 if mock_is_greyscale is True: original_pixel_values = np.array(test_instance.pixel_array).flatten() redacted_pixel_values = np.array(test_redacted_instance.pixel_array).flatten() @@ -988,7 +988,7 @@ def test_add_redact_box_happy_path( box_color_pixels_redacted = len( np.where(redacted_pixel_values == mock_box_color)[0] ) - mock_get_common_pixel.call_count == 1 + assert mock_get_common_pixel.call_count == 1 else: list_of_RGB_pixels_original = np.vstack(test_instance.pixel_array).tolist() list_of_RGB_pixels_redacted = np.vstack( @@ -1004,10 +1004,140 @@ def test_add_redact_box_happy_path( np.where(np.array(list_of_RGB_pixels_redacted) == mock_box_color)[0] ) ) - mock_set_bbox_color.call_count == 1 + assert mock_set_bbox_color.call_count == 1 assert box_color_pixels_redacted > box_color_pixels_original +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_analyzer_results() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "image, dcm_path, redact_approach", + [ + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "default" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "DEFAULT" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "metadata" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "METADATA" + ), + ], +) +def test_get_analyzer_results_happy_path( + mocker, + image: Image, + dcm_path: str, + redact_approach: str +): + """Test happy path for DicomImageRedactorEngine._get_analyzer_results + + Args: + image (PIL.Image): A PIL image. + dcm_path (pathlib.Path): Path to DICOM file. + redact_approach (str): The redact approach to use. + """ + # Arrange + mock_analyze = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", + return_value=None, + ) + mock_get_text_metadata = mocker.patch.object( + DicomImageRedactorEngine, + "_get_text_metadata", + return_value=[None, None, None], + ) + mock_make_phi_list = mocker.patch.object( + DicomImageRedactorEngine, + "_make_phi_list", + return_value=None, + ) + mock_pattern_recognizer = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", + return_value=None, + ) + mock_engine = DicomImageRedactorEngine() + test_instance = pydicom.dcmread(dcm_path) + + # Act + _ = mock_engine._get_analyzer_results( + image, test_instance, redact_approach, None + ) + + # Assert + if redact_approach.lower() == "default": + assert mock_analyze.call_count == 1 + assert mock_get_text_metadata.call_count == 0 + assert mock_make_phi_list.call_count == 0 + assert mock_pattern_recognizer.call_count == 0 + elif redact_approach.lower() == "metadata": + assert mock_analyze.call_count == 1 + assert mock_get_text_metadata.call_count == 1 + assert mock_make_phi_list.call_count == 1 + assert mock_pattern_recognizer.call_count == 1 + +@pytest.mark.parametrize( + "image, dcm_path, redact_approach, expected_error_type", + [ + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "someotherapproach", + "ValueError" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "deefault", + "ValueError" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "meta", + "ValueError" + ), + ], +) +def test_get_analyzer_results_exceptions( + mock_engine: DicomImageRedactorEngine, + image: Image, + dcm_path: str, + redact_approach: str, + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine _get_analyzer_results() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + image (PIL.Image): A PIL image. + dcm_path (pathlib.Path): Path to DICOM file. + redact_approach (str): The redact approach to use. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + test_instance = pydicom.dcmread(dcm_path) + + # Act + _ = mock_engine._get_analyzer_results( + image, test_instance, redact_approach, None + ) + + # Assert + assert expected_error_type == exc_info.typename # ------------------------------------------------------ # DicomImageRedactorEngine redact() @@ -1058,24 +1188,9 @@ def test_DicomImageRedactorEngine_redact_happy_path( return_value=None, ) - mock_get_text_metadata = mocker.patch.object( + mock_analyze = mocker.patch.object( DicomImageRedactorEngine, - "_get_text_metadata", - return_value=[None, None, None], - ) - mock_make_phi_list = mocker.patch.object( - DicomImageRedactorEngine, - "_make_phi_list", - return_value=None, - ) - - mock_pattern_recognizer = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", - return_value=None, - ) - - mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", + "_get_analyzer_results", return_value=None, ) @@ -1106,9 +1221,6 @@ def test_DicomImageRedactorEngine_redact_happy_path( assert mock_save_pixel_array.call_count == 1 assert mock_image_open.call_count == 1 assert mock_add_padding.call_count == 1 - assert mock_get_text_metadata.call_count == 1 - assert mock_make_phi_list.call_count == 1 - assert mock_pattern_recognizer.call_count == 1 assert mock_analyze.call_count == 1 assert mock_get_analyze_bbox.call_count == 1 assert mock_remove_bbox_padding.call_count == 1 @@ -1192,22 +1304,10 @@ def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", return_value=None, ) - mock_get_text_metadata = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", - return_value=[None, None, None], - ) - mock_make_phi_list = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", - return_value=None, - ) - - mock_pattern_recognizer = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", - return_value=None, - ) - mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", + mock_analyze = mocker.patch.object( + DicomImageRedactorEngine, + "_get_analyzer_results", return_value=None, ) @@ -1243,9 +1343,6 @@ def save_as(self, dst_path: str): assert mock_convert_dcm_to_png.call_count == 1 assert mock_image_open.call_count == 1 assert mock_add_padding.call_count == 1 - assert mock_get_text_metadata.call_count == 1 - assert mock_make_phi_list.call_count == 1 - assert mock_pattern_recognizer.call_count == 1 assert mock_analyze.call_count == 1 assert mock_get_analyze_bbox.call_count == 1 assert mock_remove_bbox_padding.call_count == 1 From d79b033fededf74e73e44be0b8b642cbe1808d11 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 6 Jul 2023 10:27:00 -0400 Subject: [PATCH 04/25] Linting fixes --- .../dicom_image_redactor_engine.py | 42 ++++++++++++------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index 536995155..e39d90d31 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -15,7 +15,7 @@ from presidio_image_redactor import ImageRedactorEngine from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401 -from presidio_analyzer import Pattern, PatternRecognizer +from presidio_analyzer import PatternRecognizer from presidio_image_redactor.entities import ImageRecognizerResult @@ -45,7 +45,8 @@ def redact( :param padding_width: Padding width to use when running OCR. :param crop_ratio: Portion of image to consider when selecting most common pixel value as the background color value. - :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -75,7 +76,9 @@ def redact( image = self._add_padding(loaded_image, is_greyscale, padding_width) # Detect PII - analyzer_results = self._get_analyzer_results(image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs) + analyzer_results = self._get_analyzer_results( + image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs + ) # Redact all bounding boxes from DICOM file analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results( @@ -109,7 +112,8 @@ def redact_from_file( :param padding_width : Padding width to use when running OCR. :param fill: Color setting to use for redaction box ("contrast" or "background"). - :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -167,7 +171,8 @@ def redact_from_directory( most common pixel value as the background color value. :param fill: Color setting to use for redaction box ("contrast" or "background"). - :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -744,7 +749,8 @@ def _get_analyzer_results( :param image: DICOM pixel data as PIL image. :param instance: DICOM instance (with metadata). - :param redact_approach: What approach to use when redacting ("default", "metadata", "allow", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", "allow", or a PatternRecognizer object). :param ocr_kwargs: Additional params for OCR methods. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine (e.g., allow_list). @@ -762,8 +768,12 @@ def _get_analyzer_results( ) elif redact_approach.lower() == "metadata": # Create custom recognizer using DICOM metadata - original_metadata, is_name, is_patient = self._get_text_metadata(instance) - phi_list = self._make_phi_list(original_metadata, is_name, is_patient) + original_metadata, is_name, is_patient = self._get_text_metadata( + instance + ) + phi_list = self._make_phi_list( + original_metadata, is_name, is_patient + ) deny_list_recognizer = PatternRecognizer( supported_entity="PERSON", deny_list=phi_list ) @@ -774,7 +784,7 @@ def _get_analyzer_results( **text_analyzer_kwargs, ) else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 elif type(redact_approach)==PatternRecognizer: # Use passed in recognizer analyzer_results = self.image_analyzer_engine.analyze( @@ -784,10 +794,10 @@ def _get_analyzer_results( **text_analyzer_kwargs, ) else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 return analyzer_results - + def _redact_single_dicom_image( self, dcm_path: str, @@ -808,7 +818,8 @@ def _redact_single_dicom_image( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). - :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", or a PatternRecognizer object). :param overwrite: Only set to True if you are providing the duplicated DICOM path in dcm_path. :param dst_parent_dir: String path to parent directory of where to store copies. @@ -847,7 +858,9 @@ def _redact_single_dicom_image( image = self._add_padding(loaded_image, is_greyscale, padding_width) # Detect PII - analyzer_results = self._get_analyzer_results(image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs) + analyzer_results = self._get_analyzer_results( + image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs + ) # Redact all bounding boxes from DICOM file analyzer_bboxes = self.bbox_processor.get_bboxes_from_analyzer_results( @@ -883,7 +896,8 @@ def _redact_multiple_dicom_images( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). - :param redact_approach: What approach to use when redacting ("default", "metadata", or a PatternRecognizer object). + :param redact_approach: What approach to use when redacting + ("default", "metadata", or a PatternRecognizer object). :param overwrite: Only set to True if you are providing the duplicated DICOM dir in dcm_dir. :param dst_parent_dir: String path to parent directory of where to store copies. From 22561ba6f3d621b6f67176bc9ad7885c0bcd16f1 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 6 Jul 2023 10:39:03 -0400 Subject: [PATCH 05/25] Additional linting fixes --- .../presidio_image_redactor/dicom_image_redactor_engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index e39d90d31..dceed343a 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -784,8 +784,8 @@ def _get_analyzer_results( **text_analyzer_kwargs, ) else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 - elif type(redact_approach)==PatternRecognizer: + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 + elif type(redact_approach) == PatternRecognizer: # Use passed in recognizer analyzer_results = self.image_analyzer_engine.analyze( image, @@ -794,7 +794,7 @@ def _get_analyzer_results( **text_analyzer_kwargs, ) else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 + raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 return analyzer_results From a2167c1d0db474f0ad3e3d91b4f0800cd473f276 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 10:28:37 -0400 Subject: [PATCH 06/25] Making default approach the default --- .../presidio_image_redactor/dicom_image_redactor_engine.py | 6 +++--- .../test_dicom_image_redactor_engine_integration.py | 4 +++- .../tests/test_dicom_image_redactor_engine.py | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index dceed343a..f674645d0 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -31,7 +31,7 @@ def redact( fill: str = "contrast", padding_width: int = 25, crop_ratio: float = 0.75, - redact_approach: Union[str, PatternRecognizer] = "metadata", + redact_approach: Union[str, PatternRecognizer] = "default", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ): @@ -98,7 +98,7 @@ def redact_from_file( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", - redact_approach: Union[str, PatternRecognizer] = "metadata", + redact_approach: Union[str, PatternRecognizer] = "default", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ) -> None: @@ -155,7 +155,7 @@ def redact_from_directory( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", - redact_approach: Union[str, PatternRecognizer] = "metadata", + redact_approach: Union[str, PatternRecognizer] = "default", ocr_kwargs: Optional[dict] = None, **text_analyzer_kwargs, ) -> None: diff --git a/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py b/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py index eae298a7c..d82a815e4 100644 --- a/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py +++ b/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py @@ -43,7 +43,7 @@ def test_redact_image_correctly( dcm_filepath (Path): Path to DICOM file to load. """ test_image = pydicom.dcmread(dcm_filepath) - test_redacted_image = mock_engine.redact(test_image) + test_redacted_image = mock_engine.redact(test_image, redact_approach="metadata") assert ( np.array_equal(test_image.pixel_array, test_redacted_image.pixel_array) is False @@ -63,6 +63,7 @@ def test_redact_from_single_file_correctly(mock_engine: DicomImageRedactorEngine input_dicom_path=str(input_path), output_dir=tmpdirname, fill="contrast", + redact_approach="metadata" ) output_path = Path(tmpdirname, f"{input_path.stem}.dcm") @@ -105,6 +106,7 @@ def test_redact_from_directory_correctly(mock_engine: DicomImageRedactorEngine): input_dicom_path=str(input_path), output_dir=tmpdirname, fill="contrast", + redact_approach="metadata" ) # Get list of all DICOM files diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index d83af2c9b..eca8fac74 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1213,7 +1213,7 @@ def test_DicomImageRedactorEngine_redact_happy_path( mock_engine = DicomImageRedactorEngine() # Act - mock_engine.redact(test_image) + mock_engine.redact(test_image, redact_approach="metadata") # Assert assert mock_check_greyscale.call_count == 1 From 90663490e949aa4a9ca1e786df0b0ffe9a8c657f Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 12:25:24 -0400 Subject: [PATCH 07/25] Linting fix --- .../presidio_image_redactor/dicom_image_redactor_engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index fc157c92b..c9bc042c3 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -922,6 +922,7 @@ def _get_analyzer_results( raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 return analyzer_results + @staticmethod def _save_bbox_json(output_dcm_path: str, bboxes: List[Dict[str, int]]) -> None: """Save the redacted bounding box info as a json file. From 360a386468aaf8d0ae5bb7aaba01f1cff718b5b8 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 12:55:11 -0400 Subject: [PATCH 08/25] Replacing patch.object() with patch() in modified tests --- .../tests/test_dicom_image_redactor_engine.py | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 0856c4f1f..739e3a766 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1203,14 +1203,12 @@ def test_get_analyzer_results_happy_path( "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", return_value=None, ) - mock_get_text_metadata = mocker.patch.object( - DicomImageRedactorEngine, - "_get_text_metadata", + mock_get_text_metadata = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", return_value=[None, None, None], ) - mock_make_phi_list = mocker.patch.object( - DicomImageRedactorEngine, - "_make_phi_list", + mock_make_phi_list = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", return_value=None, ) mock_pattern_recognizer = mocker.patch( @@ -1436,9 +1434,8 @@ def test_DicomImageRedactorEngine_redact_happy_path( # Arrange test_image = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - mock_redact_return_bbox = mocker.patch.object( - DicomImageRedactorEngine, - "redact_and_return_bbox", + mock_redact_return_bbox = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", return_value=[test_image, [{}, {}, {}]] ) test_mock_redact_engine = DicomImageRedactorEngine() @@ -1448,7 +1445,7 @@ def test_DicomImageRedactorEngine_redact_happy_path( # Assert assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] - assert mock_redact_return_bbox.call_count == 1 + mock_redact_return_bbox.assert_called_once() # ------------------------------------------------------ # DicomImageRedactorEngine _save_bbox_json() @@ -1582,9 +1579,8 @@ def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( return_value=None, ) - mock_analyze = mocker.patch.object( - DicomImageRedactorEngine, - "_get_analyzer_results", + mock_analyze = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", return_value=None, ) From a3ce5f88143c083caa39e19a71ddd770560b6e95 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 12:58:52 -0400 Subject: [PATCH 09/25] Fixed mocker patch for new _get_analyzer_results in redact_and_return_bbox --- .../tests/test_dicom_image_redactor_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 739e3a766..b82443efa 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1347,7 +1347,7 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox( ) mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", return_value=None, ) From e2ab725f673a2fb6e834e220684b950f7543e051 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 13:12:49 -0400 Subject: [PATCH 10/25] Removing old assertions brought over from incorrect merge conflict resolution --- .../tests/test_dicom_image_redactor_engine.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index b82443efa..66806721c 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1332,20 +1332,6 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox( "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", return_value=None, ) - mock_get_text_metadata = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", - return_value=[None, None, None], - ) - mock_make_phi_list = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", - return_value=None, - ) - - mock_pattern_recognizer = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", - return_value=None, - ) - mock_analyze = mocker.patch( "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", return_value=None, @@ -1377,9 +1363,6 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox( mock_save_pixel_array.assert_called_once() mock_image_open.assert_called_once() mock_add_padding.assert_called_once() - mock_get_text_metadata.assert_called_once() - mock_make_phi_list.assert_called_once() - mock_pattern_recognizer.assert_called_once() mock_analyze.assert_called_once() mock_get_analyze_bbox.assert_called_once() mock_remove_bbox_padding.assert_called_once() From ec2649a80c06b550419c8d4011392edb78ed3223 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 13:30:10 -0400 Subject: [PATCH 11/25] Replacing call_count == statements with assert_called_once etc --- .../tests/test_dicom_image_redactor_engine.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 66806721c..45b8f876e 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1225,15 +1225,15 @@ def test_get_analyzer_results_happy_path( # Assert if redact_approach.lower() == "default": - assert mock_analyze.call_count == 1 - assert mock_get_text_metadata.call_count == 0 - assert mock_make_phi_list.call_count == 0 - assert mock_pattern_recognizer.call_count == 0 + mock_analyze.assert_called_once() + mock_get_text_metadata.assert_not_called() + mock_make_phi_list.assert_not_called() + mock_pattern_recognizer.assert_not_called() elif redact_approach.lower() == "metadata": - assert mock_analyze.call_count == 1 - assert mock_get_text_metadata.call_count == 1 - assert mock_make_phi_list.call_count == 1 - assert mock_pattern_recognizer.call_count == 1 + mock_analyze.assert_called_once() + mock_get_text_metadata.assert_called_once() + mock_make_phi_list.assert_called_once() + mock_pattern_recognizer.assert_called_once() @pytest.mark.parametrize( "image, dcm_path, redact_approach, expected_error_type", From 529cbd67d11dfe313110ccc16d018c47935e0aa2 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Mon, 7 Aug 2023 14:17:18 -0400 Subject: [PATCH 12/25] Replacing in-test instantiation with passing in mock_engine --- .../tests/test_dicom_image_redactor_engine.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 45b8f876e..d5f1dacb5 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1187,6 +1187,7 @@ def test_add_redact_box_happy_path( ) def test_get_analyzer_results_happy_path( mocker, + mock_engine: DicomImageRedactorEngine, image: Image, dcm_path: str, redact_approach: str @@ -1194,6 +1195,7 @@ def test_get_analyzer_results_happy_path( """Test happy path for DicomImageRedactorEngine._get_analyzer_results Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. image (PIL.Image): A PIL image. dcm_path (pathlib.Path): Path to DICOM file. redact_approach (str): The redact approach to use. @@ -1215,7 +1217,6 @@ def test_get_analyzer_results_happy_path( "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", return_value=None, ) - mock_engine = DicomImageRedactorEngine() test_instance = pydicom.dcmread(dcm_path) # Act @@ -1411,6 +1412,7 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( # ------------------------------------------------------ def test_DicomImageRedactorEngine_redact_happy_path( mocker, + mock_engine: DicomImageRedactorEngine, ): """Test happy path for DicomImageRedactorEngine redact() """ @@ -1421,10 +1423,9 @@ def test_DicomImageRedactorEngine_redact_happy_path( "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", return_value=[test_image, [{}, {}, {}]] ) - test_mock_redact_engine = DicomImageRedactorEngine() # Act - test_redacted_image = test_mock_redact_engine.redact(test_image) + test_redacted_image = mock_engine.redact(test_image) # Assert assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] From 212dc8e5a8d166029f200cafbc829da238159fed Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 11:51:32 -0400 Subject: [PATCH 13/25] Suggested change from PR comment. redact_approach replaced with use_metadata and ability to pass in ad_hoc_recognizers --- .../dicom_image_redactor_engine.py | 140 +++++++++++------- ...dicom_image_redactor_engine_integration.py | 6 +- .../tests/test_dicom_image_redactor_engine.py | 95 ++++++++---- 3 files changed, 154 insertions(+), 87 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index c9bc042c3..5fd00c623 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -16,6 +16,8 @@ from presidio_image_redactor import ImageRedactorEngine from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401 +import presidio_analyzer # required for isinstance check +# which throws an error when trying to specify PatternRecognizer from presidio_analyzer import PatternRecognizer from presidio_image_redactor.entities import ImageRecognizerResult @@ -32,8 +34,9 @@ def redact_and_return_bbox( fill: str = "contrast", padding_width: int = 25, crop_ratio: float = 0.75, - redact_approach: Union[str, PatternRecognizer] = "default", + use_metadata: bool = True, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> Tuple[pydicom.dataset.FileDataset, List[Dict[str, int]]]: """Redact method to redact the given DICOM image and return redacted bboxes. @@ -46,9 +49,11 @@ def redact_and_return_bbox( :param padding_width: Padding width to use when running OCR. :param crop_ratio: Portion of image to consider when selecting most common pixel value as the background color value. - :param redact_approach: What approach to use when redacting - ("default", "metadata", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -82,7 +87,7 @@ def redact_and_return_bbox( # Detect PII analyzer_results = self._get_analyzer_results( - image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs + image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, **text_analyzer_kwargs ) # Redact all bounding boxes from DICOM file @@ -103,6 +108,7 @@ def redact( padding_width: int = 25, crop_ratio: float = 0.75, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> pydicom.dataset.FileDataset: """Redact method to redact the given DICOM image. @@ -116,6 +122,8 @@ def redact( :param crop_ratio: Portion of image to consider when selecting most common pixel value as the background color value. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -127,6 +135,7 @@ def redact( padding_width=padding_width, crop_ratio=crop_ratio, ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, **text_analyzer_kwargs ) @@ -139,9 +148,10 @@ def redact_from_file( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", - redact_approach: Union[str, PatternRecognizer] = "default", + use_metadata: bool = True, save_bboxes: bool = False, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> None: """Redact method to redact from a given file. @@ -154,10 +164,12 @@ def redact_from_file( :param padding_width : Padding width to use when running OCR. :param fill: Color setting to use for redaction box ("contrast" or "background"). - :param redact_approach: What approach to use when redacting - ("default", "metadata", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param save_bboxes: True if we want to save boundings boxes. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. """ @@ -180,11 +192,12 @@ def redact_from_file( crop_ratio=crop_ratio, fill=fill, padding_width=padding_width, - redact_approach=redact_approach, + use_metadata=use_metadata, overwrite=True, dst_parent_dir=".", save_bboxes=save_bboxes, ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, **text_analyzer_kwargs, ) @@ -199,9 +212,10 @@ def redact_from_directory( padding_width: int = 25, crop_ratio: float = 0.75, fill: str = "contrast", - redact_approach: Union[str, PatternRecognizer] = "default", + use_metadata: bool = True, save_bboxes: bool = False, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> None: """Redact method to redact from a directory of files. @@ -216,10 +230,12 @@ def redact_from_directory( most common pixel value as the background color value. :param fill: Color setting to use for redaction box ("contrast" or "background"). - :param redact_approach: What approach to use when redacting - ("default", "metadata", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param save_bboxes: True if we want to save boundings boxes. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. """ @@ -242,7 +258,8 @@ def redact_from_directory( crop_ratio=crop_ratio, fill=fill, padding_width=padding_width, - redact_approach=redact_approach, + use_metadata=use_metadata, + ad_hoc_recognizers=ad_hoc_recognizers, overwrite=True, dst_parent_dir=".", save_bboxes=save_bboxes, @@ -866,60 +883,68 @@ def _get_analyzer_results( self, image: PIL.PngImagePlugin.PngImageFile, instance: pydicom.dataset.FileDataset, - redact_approach: Union[str, PatternRecognizer], + use_metadata: bool, ocr_kwargs: Optional[dict], + ad_hoc_recognizers: Optional[List[PatternRecognizer]], **text_analyzer_kwargs ) -> List[ImageRecognizerResult]: """Analyze image with selected redaction approach. :param image: DICOM pixel data as PIL image. :param instance: DICOM instance (with metadata). - :param redact_approach: What approach to use when redacting - ("default", "metadata", "allow", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine (e.g., allow_list). :return: Analyzer results. """ + # Check the ad-hoc recognizres list + if isinstance(ad_hoc_recognizers, (list, type(None))): + if isinstance(ad_hoc_recognizers, list): + if len(ad_hoc_recognizers) >= 1: + are_recognizers = all(isinstance(x, presidio_analyzer.pattern_recognizer.PatternRecognizer) for x in ad_hoc_recognizers) + if are_recognizers is False: + raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") + else: + raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") + else: + raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") + + # Create custom recognizer using DICOM metadata + if use_metadata: + original_metadata, is_name, is_patient = self._get_text_metadata( + instance + ) + phi_list = self._make_phi_list( + original_metadata, is_name, is_patient + ) + deny_list_recognizer = PatternRecognizer( + supported_entity="PERSON", deny_list=phi_list + ) + + if type(ad_hoc_recognizers) is None: + ad_hoc_recognizers = [deny_list_recognizer] + elif type(ad_hoc_recognizers) is list: + ad_hoc_recognizers.append(deny_list_recognizer) + # Detect PII - if type(redact_approach) == str: - if redact_approach.lower() == "default": - # Use default redactor - analyzer_results = self.image_analyzer_engine.analyze( - image, - ocr_kwargs=ocr_kwargs, - **text_analyzer_kwargs, - ) - elif redact_approach.lower() == "metadata": - # Create custom recognizer using DICOM metadata - original_metadata, is_name, is_patient = self._get_text_metadata( - instance - ) - phi_list = self._make_phi_list( - original_metadata, is_name, is_patient - ) - deny_list_recognizer = PatternRecognizer( - supported_entity="PERSON", deny_list=phi_list - ) - analyzer_results = self.image_analyzer_engine.analyze( - image, - ocr_kwargs=ocr_kwargs, - ad_hoc_recognizers=[deny_list_recognizer], - **text_analyzer_kwargs, - ) - else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 - elif type(redact_approach) == PatternRecognizer: - # Use passed in recognizer + if ad_hoc_recognizers is None: analyzer_results = self.image_analyzer_engine.analyze( image, ocr_kwargs=ocr_kwargs, - ad_hoc_recognizers=[redact_approach], **text_analyzer_kwargs, ) else: - raise ValueError("Please enter valid string or PatternRecognizer object for redact_approach") # noqa: E501 + analyzer_results = self.image_analyzer_engine.analyze( + image, + ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, + **text_analyzer_kwargs, + ) return analyzer_results @@ -942,11 +967,12 @@ def _redact_single_dicom_image( crop_ratio: float, fill: str, padding_width: int, - redact_approach: Union[str, PatternRecognizer], + use_metadata: bool, overwrite: bool, dst_parent_dir: str, save_bboxes: bool, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> str: """Redact text PHI present on a DICOM image. @@ -957,13 +983,15 @@ def _redact_single_dicom_image( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). - :param redact_approach: What approach to use when redacting - ("default", "metadata", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param overwrite: Only set to True if you are providing the duplicated DICOM path in dcm_path. :param dst_parent_dir: String path to parent directory of where to store copies. :param save_bboxes: True if we want to save boundings boxes. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -999,7 +1027,7 @@ def _redact_single_dicom_image( # Detect PII analyzer_results = self._get_analyzer_results( - image, instance, redact_approach, ocr_kwargs, **text_analyzer_kwargs + image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, **text_analyzer_kwargs ) # Redact all bounding boxes from DICOM file @@ -1026,11 +1054,12 @@ def _redact_multiple_dicom_images( crop_ratio: float, fill: str, padding_width: int, - redact_approach: Union[str, PatternRecognizer], + use_metadata: bool, overwrite: bool, dst_parent_dir: str, save_bboxes: bool, ocr_kwargs: Optional[dict] = None, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] = None, **text_analyzer_kwargs, ) -> str: """Redact text PHI present on all DICOM images in a directory. @@ -1041,13 +1070,15 @@ def _redact_multiple_dicom_images( :param fill: Color setting to use for bounding boxes ("contrast" or "background"). :param padding_width: Pixel width of padding (uniform). - :param redact_approach: What approach to use when redacting - ("default", "metadata", or a PatternRecognizer object). + :param use_metadata: Whether to redact text in the image that + are present in the metadata. :param overwrite: Only set to True if you are providing the duplicated DICOM dir in dcm_dir. :param dst_parent_dir: String path to parent directory of where to store copies. :param save_bboxes: True if we want to save boundings boxes. :param ocr_kwargs: Additional params for OCR methods. + :param ad_hoc_recognizers: List of PatternRecognizer objects to use + for ad-hoc recognizer. :param text_analyzer_kwargs: Additional values for the analyze method in AnalyzerEngine. @@ -1074,11 +1105,12 @@ def _redact_multiple_dicom_images( crop_ratio, fill, padding_width, - redact_approach, + use_metadata, overwrite, dst_parent_dir, save_bboxes, ocr_kwargs=ocr_kwargs, + ad_hoc_recognizers=ad_hoc_recognizers, **text_analyzer_kwargs, ) diff --git a/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py b/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py index d82a815e4..3d2aabb60 100644 --- a/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py +++ b/presidio-image-redactor/tests/integration/test_dicom_image_redactor_engine_integration.py @@ -43,7 +43,7 @@ def test_redact_image_correctly( dcm_filepath (Path): Path to DICOM file to load. """ test_image = pydicom.dcmread(dcm_filepath) - test_redacted_image = mock_engine.redact(test_image, redact_approach="metadata") + test_redacted_image = mock_engine.redact(test_image, use_metadata=True) assert ( np.array_equal(test_image.pixel_array, test_redacted_image.pixel_array) is False @@ -63,7 +63,7 @@ def test_redact_from_single_file_correctly(mock_engine: DicomImageRedactorEngine input_dicom_path=str(input_path), output_dir=tmpdirname, fill="contrast", - redact_approach="metadata" + use_metadata=True ) output_path = Path(tmpdirname, f"{input_path.stem}.dcm") @@ -106,7 +106,7 @@ def test_redact_from_directory_correctly(mock_engine: DicomImageRedactorEngine): input_dicom_path=str(input_path), output_dir=tmpdirname, fill="contrast", - redact_approach="metadata" + use_metadata=True ) # Get list of all DICOM files diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index d5f1dacb5..015211350 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -7,7 +7,8 @@ from PIL import Image import pydicom from presidio_image_redactor.dicom_image_redactor_engine import DicomImageRedactorEngine -from typing import Union, List, Tuple, Dict, TypeVar +from presidio_analyzer import PatternRecognizer +from typing import Union, List, Tuple, Dict, TypeVar, Optional import pytest T = TypeVar('T') @@ -1161,27 +1162,31 @@ def test_add_redact_box_happy_path( # DicomImageRedactorEngine._get_analyzer_results() # ------------------------------------------------------ @pytest.mark.parametrize( - "image, dcm_path, redact_approach", + "image, dcm_path, use_metadata, ad_hoc_recognizers", [ ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "default" + False, + None ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "DEFAULT" + False, + [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "metadata" + True, + None ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "METADATA" + True, + [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] ), ], ) @@ -1190,7 +1195,8 @@ def test_get_analyzer_results_happy_path( mock_engine: DicomImageRedactorEngine, image: Image, dcm_path: str, - redact_approach: str + use_metadata: bool, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] ): """Test happy path for DicomImageRedactorEngine._get_analyzer_results @@ -1198,7 +1204,8 @@ def test_get_analyzer_results_happy_path( mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. image (PIL.Image): A PIL image. dcm_path (pathlib.Path): Path to DICOM file. - redact_approach (str): The redact approach to use. + use_metadata (bool): Whether to consider metadata when running analysis. + ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. """ # Arrange mock_analyze = mocker.patch( @@ -1221,41 +1228,41 @@ def test_get_analyzer_results_happy_path( # Act _ = mock_engine._get_analyzer_results( - image, test_instance, redact_approach, None + image, test_instance, use_metadata, None, ad_hoc_recognizers ) # Assert - if redact_approach.lower() == "default": + if use_metadata is False: mock_analyze.assert_called_once() mock_get_text_metadata.assert_not_called() mock_make_phi_list.assert_not_called() mock_pattern_recognizer.assert_not_called() - elif redact_approach.lower() == "metadata": + elif use_metadata is True: mock_analyze.assert_called_once() mock_get_text_metadata.assert_called_once() mock_make_phi_list.assert_called_once() mock_pattern_recognizer.assert_called_once() @pytest.mark.parametrize( - "image, dcm_path, redact_approach, expected_error_type", + "image, dcm_path, ad_hoc_recognizers, expected_error_type", [ ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "someotherapproach", - "ValueError" + "invalidType", + "TypeError" ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "deefault", + [], "ValueError" ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "meta", - "ValueError" + [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], + "TypeError" ), ], ) @@ -1263,7 +1270,7 @@ def test_get_analyzer_results_exceptions( mock_engine: DicomImageRedactorEngine, image: Image, dcm_path: str, - redact_approach: str, + ad_hoc_recognizers: Optional[List[PatternRecognizer]], expected_error_type: str, ): """Test error handling of DicomImageRedactorEngine _get_analyzer_results() @@ -1272,7 +1279,7 @@ def test_get_analyzer_results_exceptions( mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. image (PIL.Image): A PIL image. dcm_path (pathlib.Path): Path to DICOM file. - redact_approach (str): The redact approach to use. + ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. expected_error_type (str): Type of error we expect to be raised. """ with pytest.raises(Exception) as exc_info: @@ -1281,7 +1288,7 @@ def test_get_analyzer_results_exceptions( # Act _ = mock_engine._get_analyzer_results( - image, test_instance, redact_approach, None + image, test_instance, True, None, ad_hoc_recognizers ) # Assert @@ -1354,7 +1361,7 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox( ) # Act - test_redacted_image, test_bboxes = mock_engine.redact_and_return_bbox(test_image, redact_approach="metadata") + test_redacted_image, test_bboxes = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) # Assert assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] @@ -1401,7 +1408,7 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( else: test_image = image # Act - mock_engine.redact(test_image, fill="contrast", padding_width=25, redact_approach="metadata" + mock_engine.redact(test_image, fill="contrast", padding_width=25, use_metadata=True ) # Assert @@ -1589,7 +1596,14 @@ def save_as(self, dst_path: str): # Act mock_engine._redact_single_dicom_image( - dcm_path, crop_ratio, "contrast", 25, "metadata", overwrite, output_dir, False + dcm_path=dcm_path, + crop_ratio=crop_ratio, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=overwrite, + dst_parent_dir=output_dir, + save_bboxes=False ) # Assert @@ -1628,7 +1642,14 @@ def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( with pytest.raises(Exception) as exc_info: # Act mock_engine._redact_single_dicom_image( - dcm_path, 0.75, "contrast", 25, "metadata", False, ".", False + dcm_path=dcm_path, + crop_ratio=0.75, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=False, + dst_parent_dir=".", + save_bboxes=False ) # Assert @@ -1685,7 +1706,14 @@ def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( # Act mock_engine._redact_multiple_dicom_images( - dcm_path, crop_ratio, "contrast", 25, "metadata", overwrite, output_dir, False + dcm_dir=dcm_path, + crop_ratio=crop_ratio, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=overwrite, + dst_parent_dir=output_dir, + save_bboxes=False ) # Assert @@ -1719,7 +1747,14 @@ def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( with pytest.raises(Exception) as exc_info: # Act mock_engine._redact_multiple_dicom_images( - dcm_path, 0.75, "contrast", 25, "metadata", False, ".", False + dcm_dir=dcm_path, + crop_ratio=0.75, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=False, + dst_parent_dir=".", + save_bboxes=False ) # Assert @@ -1774,7 +1809,7 @@ def test_DicomImageRedactorEngine_redact_from_file_happy_path( ) # Act - mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", redact_approach="metadata") + mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) # Assert assert mock_copy_files.call_count == 1 @@ -1816,7 +1851,7 @@ def test_DicomImageRedactorEngine_redact_from_file_exceptions( """ with pytest.raises(Exception) as exc_info: # Act - mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", redact_approach="metadata") + mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) # Assert assert expected_error_type == exc_info.typename @@ -1858,7 +1893,7 @@ def test_DicomImageRedactorEngine_redact_from_directory_happy_path( ) # Act - mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", redact_approach="metadata") + mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) # Assert assert mock_copy_files.call_count == 1 @@ -1889,7 +1924,7 @@ def test_DicomImageRedactorEngine_redact_from_directory_exceptions( """ with pytest.raises(Exception) as exc_info: # Act - mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", redact_approach="metadata") + mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) # Assert assert expected_error_type == exc_info.typename From 490f3d4d3f94b9d08dcbcb1a5ceb8d042e072772 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 11:59:30 -0400 Subject: [PATCH 14/25] Linting fixes --- .../dicom_image_redactor_engine.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index 5fd00c623..e371907df 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -17,7 +17,7 @@ from presidio_image_redactor import ImageRedactorEngine from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401 import presidio_analyzer # required for isinstance check -# which throws an error when trying to specify PatternRecognizer + # which throws an error when trying to specify PatternRecognizer from presidio_analyzer import PatternRecognizer from presidio_image_redactor.entities import ImageRecognizerResult @@ -87,7 +87,8 @@ def redact_and_return_bbox( # Detect PII analyzer_results = self._get_analyzer_results( - image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, **text_analyzer_kwargs + image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, + **text_analyzer_kwargs ) # Redact all bounding boxes from DICOM file @@ -902,18 +903,18 @@ def _get_analyzer_results( :return: Analyzer results. """ - # Check the ad-hoc recognizres list + # Check the ad-hoc recognizers list if isinstance(ad_hoc_recognizers, (list, type(None))): if isinstance(ad_hoc_recognizers, list): if len(ad_hoc_recognizers) >= 1: - are_recognizers = all(isinstance(x, presidio_analyzer.pattern_recognizer.PatternRecognizer) for x in ad_hoc_recognizers) + are_recognizers = all(isinstance(x, presidio_analyzer.pattern_recognizer.PatternRecognizer) for x in ad_hoc_recognizers) # noqa: E501 if are_recognizers is False: - raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") + raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") # noqa: E501 else: - raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") + raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 else: - raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") - + raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 + # Create custom recognizer using DICOM metadata if use_metadata: original_metadata, is_name, is_patient = self._get_text_metadata( @@ -930,7 +931,7 @@ def _get_analyzer_results( ad_hoc_recognizers = [deny_list_recognizer] elif type(ad_hoc_recognizers) is list: ad_hoc_recognizers.append(deny_list_recognizer) - + # Detect PII if ad_hoc_recognizers is None: analyzer_results = self.image_analyzer_engine.analyze( @@ -1027,7 +1028,8 @@ def _redact_single_dicom_image( # Detect PII analyzer_results = self._get_analyzer_results( - image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, **text_analyzer_kwargs + image, instance, use_metadata, ocr_kwargs, ad_hoc_recognizers, + **text_analyzer_kwargs ) # Redact all bounding boxes from DICOM file From fc1f0c2cff83932d280623198f4140524f91d135 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 12:08:15 -0400 Subject: [PATCH 15/25] Additional linting fixes --- .../dicom_image_redactor_engine.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index e371907df..8f0820596 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -16,8 +16,7 @@ from presidio_image_redactor import ImageRedactorEngine from presidio_image_redactor import ImageAnalyzerEngine # noqa: F401 -import presidio_analyzer # required for isinstance check - # which throws an error when trying to specify PatternRecognizer +import presidio_analyzer # required for isinstance check which throws an error when trying to specify PatternRecognizer # noqa: E501 from presidio_analyzer import PatternRecognizer from presidio_image_redactor.entities import ImageRecognizerResult @@ -907,13 +906,13 @@ def _get_analyzer_results( if isinstance(ad_hoc_recognizers, (list, type(None))): if isinstance(ad_hoc_recognizers, list): if len(ad_hoc_recognizers) >= 1: - are_recognizers = all(isinstance(x, presidio_analyzer.pattern_recognizer.PatternRecognizer) for x in ad_hoc_recognizers) # noqa: E501 + are_recognizers = all(isinstance(x, presidio_analyzer.pattern_recognizer.PatternRecognizer) for x in ad_hoc_recognizers) # noqa: E501 if are_recognizers is False: - raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") # noqa: E501 + raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") # noqa: E501 else: - raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 + raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 else: - raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 + raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 # Create custom recognizer using DICOM metadata if use_metadata: From fde5088e8020d58d48fe58cc812745d084f7bbc3 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 12:40:47 -0400 Subject: [PATCH 16/25] Remove output that is not checked --- .../tests/test_dicom_image_redactor_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 015211350..5f2c6b708 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1361,7 +1361,7 @@ def test_DicomImageRedactorEngine_redact_and_return_bbox( ) # Act - test_redacted_image, test_bboxes = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) + test_redacted_image, _ = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) # Assert assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] From 916fed667e3a85b6b3a8bf67a8989c4d6ed9f5b6 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:03:11 -0400 Subject: [PATCH 17/25] Commenting out whole unit test file to see impact on build pipeline hangup --- .../tests/test_dicom_image_redactor_engine.py | 3860 ++++++++--------- 1 file changed, 1930 insertions(+), 1930 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 5f2c6b708..2cfc0acb7 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1,1930 +1,1930 @@ -"""Test suite for dicom_image_redactor_engine.py""" -from pathlib import Path -import tempfile -import os -import json -import numpy as np -from PIL import Image -import pydicom -from presidio_image_redactor.dicom_image_redactor_engine import DicomImageRedactorEngine -from presidio_analyzer import PatternRecognizer -from typing import Union, List, Tuple, Dict, TypeVar, Optional -import pytest - -T = TypeVar('T') - -SCRIPT_DIR = os.path.dirname(__file__) -TEST_DICOM_PARENT_DIR = f"{SCRIPT_DIR}/test_data" -TEST_DICOM_DIR_1 = f"{SCRIPT_DIR}/test_data/dicom_dir_1" -TEST_DICOM_DIR_2 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_2" -TEST_DICOM_DIR_3 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_3" -TEST_NUMPY_DIR = f"{SCRIPT_DIR}/test_data/numpy_arrays" -TEST_PNG_DIR = f"{SCRIPT_DIR}/test_data/png_images" - - -@pytest.fixture(scope="module") -def mock_engine(): - """Instance of the DicomImageRedactorEngine""" - # Arrange - - # Act - dicom_image_redactor_engine = DicomImageRedactorEngine() - - return dicom_image_redactor_engine - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_all_dcm_files() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_dir, expected_list", - [ - ( - Path(TEST_DICOM_PARENT_DIR), - [ - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), - Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), - Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), - Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), - Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), - ], - ), - ( - Path(TEST_DICOM_DIR_1), - [ - Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), - Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), - Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), - ], - ), - ( - Path(TEST_DICOM_DIR_2), - [ - Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), - Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), - ], - ), - ( - Path(TEST_DICOM_DIR_3), - [ - Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), - ], - ), - ], -) -def test_get_all_dcm_files_happy_path( - mock_engine: DicomImageRedactorEngine, dcm_dir: Path, expected_list: list -): - """Test happy path for DicomImageRedactorEngine._get_all_dcm_files - - Args: - dcm_dir (pathlib.Path): Path to a directory containing at least one .dcm file. - expected_list (list): List of pathlib Path objects. - """ - # Arrange - - # Act - test_files = mock_engine._get_all_dcm_files(dcm_dir) - print("test_files") - print(test_files) - print("expected file") - print(expected_list) - - # Assert - assert set(test_files) == set(expected_list) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._check_if_greyscale() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_file, expected_result", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), - ], -) -def test_check_if_greyscale_happy_path( - mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_result: bool -): - """Test happy path for DicomImageRedactorEngine._check_if_greyscale - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - expected_result (bool): Expected output of _check_if_greyscale. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_file) - - # Act - test_is_greyscale = mock_engine._check_if_greyscale(test_instance) - - # Assert - assert test_is_greyscale == expected_result - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._rescale_dcm_pixel_array() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_file, is_greyscale", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), True), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), True), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), True), - ], -) -def test_check_if_greyscale_happy_path( - mock_engine: DicomImageRedactorEngine, dcm_file: Path, is_greyscale: bool -): - """Test happy path for DicomImageRedactorEngine._rescale_dcm_pixel_array - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - is_greyscale (bool): If loaded DICOM image is greyscale or not. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_file) - test_original_image = test_instance.pixel_array - - # Act - test_scaled_image = mock_engine._rescale_dcm_pixel_array( - test_instance, is_greyscale - ) - - # Assert - assert np.shape(test_original_image) == np.shape(test_scaled_image) - assert np.min(test_scaled_image) >= 0 - assert np.max(test_scaled_image) <= 255 - if is_greyscale is True: - assert np.max(test_original_image) != np.max(test_scaled_image) - assert len(np.shape(test_scaled_image)) == 2 - else: - assert len(np.shape(test_scaled_image)) == 3 - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._save_pixel_array_as_png() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_file, is_greyscale, rescaled_image_numpy_path", - [ - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - True, - Path(TEST_NUMPY_DIR, "0_ORIGINAL.npy"), - ), - ( - Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), - False, - Path(TEST_NUMPY_DIR, "RGB_ORIGINAL.npy"), - ), - ( - Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), - True, - Path(TEST_NUMPY_DIR, "1_ORIGINAL.npy"), - ), - ( - Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), - True, - Path(TEST_NUMPY_DIR, "2_ORIGINAL.npy"), - ), - ( - Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), - True, - Path(TEST_NUMPY_DIR, "3_ORIGINAL.npy"), - ), - ], -) -def test_save_pixel_array_as_png_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_file: Path, - is_greyscale: bool, - rescaled_image_numpy_path: Path, -): - """Test happy path for DicomImageRedactorEngine._save_pixel_array_as_png - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - is_greyscale (bool): If loaded DICOM image is greyscale or not. - rescaled_image_numpy_path (pathlib.Path): Path to file containing numpy array of rescaled image. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_file) - test_image = mock_engine._rescale_dcm_pixel_array(test_instance, is_greyscale) - filename = "test" - with open(rescaled_image_numpy_path, "rb") as f: - loaded_numpy_array = np.load(f) - - with tempfile.TemporaryDirectory() as tmpdirname: - # Act - _ = mock_engine._save_pixel_array_as_png( - test_image, is_greyscale, filename, tmpdirname - ) - - # Assert - assert np.shape(test_image) == np.shape(loaded_numpy_array) - assert f"{filename}.png" in os.listdir(tmpdirname) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._convert_dcm_to_png() -# ------------------------------------------------------ -def test_convert_dcm_to_png_happy_path(mocker): - """Test happy path for DicomImageRedactorEngine._convert_dcm_to_png""" - # Arrange - mock_dcm_read = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.pydicom.dcmread", - return_value=None, - ) - mock_check_if_gresycale = mocker.patch.object( - DicomImageRedactorEngine, - "_check_if_greyscale", - return_value=True, - ) - mock_rescale_dcm_pixel_array = mocker.patch.object( - DicomImageRedactorEngine, - "_rescale_dcm_pixel_array", - return_value=np.array([1, 2, 3]), - ) - mock_save_array_as_png = mocker.patch.object( - DicomImageRedactorEngine, "_save_pixel_array_as_png", return_value=None - ) - mock_engine = DicomImageRedactorEngine() - - with tempfile.TemporaryDirectory() as tmpdirname: - # Act - _, _ = mock_engine._convert_dcm_to_png(Path("filename.dcm"), tmpdirname) - - # Assert - assert mock_dcm_read.call_count == 1 - assert mock_check_if_gresycale.call_count == 1 - assert mock_rescale_dcm_pixel_array.call_count == 1 - assert mock_save_array_as_png.call_count == 1 - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_bg_color() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "png_file, is_greyscale, invert_flag, expected_bg_color", - [ - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, False, 243), - (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, False, (0, 0, 0)), - (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, False, 0), - (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, False, 0), - (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, False, 0), - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, True, 12), - (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, True, (255, 255, 255)), - (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, True, 255), - (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, True, 255), - (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, True, 255), - ], -) -def test_get_bg_color_happy_path( - mock_engine: DicomImageRedactorEngine, - png_file: Path, - is_greyscale: bool, - invert_flag: bool, - expected_bg_color: Union[int, Tuple[int, int, int]], -): - """Test happy path for DicomImageRedactorEngine._get_bg_color - - Args: - png_file (pathlib.Path): Path to a PNG file. - is_greyscale (bool): If loaded DICOM image is greyscale or not. - invert_flag (bool): True if we want to invert image colors to get foreground. - expected_bg_color (int or Tuple of int): The expected background color of the image. - """ - # Arrange - test_image = Image.open(png_file) - - # Act - test_bg_color = mock_engine._get_bg_color(test_image, is_greyscale, invert_flag) - - # Assert - assert test_bg_color == expected_bg_color - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_array_corners() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_file, crop_ratio", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.5), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.5), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.5), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.5), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.75), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.25), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.31), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.83), - ], -) -def test_get_array_corners_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_file: Path, - crop_ratio: float, -): - """Test happy path for DicomImageRedactorEngine._get_array_corners - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - crop_ratio (float): Ratio to crop to. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_file) - test_pixel_array = test_instance.pixel_array - full_width = test_pixel_array.shape[0] - full_height = test_pixel_array.shape[1] - full_min_pixel_val = np.min(test_pixel_array) - full_max_pixel_val = np.max(test_pixel_array) - - # Act - test_cropped_array = mock_engine._get_array_corners(test_pixel_array, crop_ratio) - cropped_width = test_cropped_array.shape[0] - cropped_height = test_cropped_array.shape[1] - cropped_min_pixel_val = np.min(test_cropped_array) - cropped_max_pixel_val = np.max(test_cropped_array) - - # Assert - assert cropped_width * cropped_height < full_width * full_height - assert cropped_min_pixel_val >= full_min_pixel_val - assert cropped_max_pixel_val <= full_max_pixel_val - - -@pytest.mark.parametrize( - "crop_ratio, expected_error_type", - [ - (0, "ValueError"), - (-0.4, "ValueError"), - (1.3, "ValueError"), - ], -) -def test_get_array_corners_exceptions( - mock_engine: DicomImageRedactorEngine, crop_ratio: float, expected_error_type: str -): - """Test error handling of _get_array_corners - - Args: - crop_ratio (float): Ratio to crop to. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - dcm_file = Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm") - test_instance = pydicom.dcmread(dcm_file) - test_pixel_array = test_instance.pixel_array - - # Act - _ = mock_engine._get_array_corners(test_pixel_array, crop_ratio) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_most_common_pixel_value() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_file, fill, expected_color", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "contrast", 50), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "contrast", 16383), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "contrast", 32767), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "contrast", 4095), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "background", 973), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "background", 0), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "background", 0), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "background", 0), - ], -) -def test_get_most_common_pixel_value_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_file: Path, - fill: str, - expected_color: Union[int, Tuple[int, int, int]], -): - """Test happy path for DicomImageRedactorEngine._get_most_common_pixel_value - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - fill (str): Color setting to use ("contrast" or "background"). - expected_color (int or Tuple of int): The expected color returned for the image. - """ - # Arrange - crop_ratio = 0.75 - test_instance = pydicom.dcmread(dcm_file) - - # Act - test_color = mock_engine._get_most_common_pixel_value( - test_instance, crop_ratio, fill - ) - - # Assert - assert test_color == expected_color - - -@pytest.mark.parametrize( - "dcm_file, expected_error_type", - [ - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "TypeError"), - ], -) -def test_get_most_common_pixel_value_exceptions( - mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_error_type: str -): - """Test error handling of _get_most_common_pixel_value - - Args: - dcm_file (pathlib.Path): Path to a DICOM file. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - crop_ratio = 0.75 - test_instance = pydicom.dcmread(dcm_file) - - # Act - _ = mock_engine._get_most_common_pixel_value( - test_instance, crop_ratio, "contrast" - ) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._add_padding() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "png_file, is_greyscale, padding_width", - [ - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 15), - (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, 15), - (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, 15), - (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, 15), - (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, 15), - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 30), - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 99), - ], -) -def test_add_padding_happy_path( - mock_engine: DicomImageRedactorEngine, - png_file: Path, - is_greyscale: bool, - padding_width: int, -): - """Test happy path for DicomImageRedactorEngine._add_padding - - Args: - png_file (pathlib.Path): Path to a PNG file. - is_greyscale (bool): If loaded DICOM image is greyscale or not. - padding_width (int): Pixel width of padding (uniform). - """ - # Arrange - test_image = Image.open(png_file) - - # Act - test_image_with_padding = mock_engine._add_padding( - test_image, is_greyscale, padding_width - ) - - # Assert - assert test_image_with_padding.height - test_image.height == 2 * padding_width - assert test_image_with_padding.width - test_image.width == 2 * padding_width - - -@pytest.mark.parametrize( - "png_file, is_greyscale, padding_width, expected_error_type", - [ - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, -1, "ValueError"), - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 0, "ValueError"), - (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 200, "ValueError"), - ], -) -def test_add_padding_exceptions( - mock_engine: DicomImageRedactorEngine, - png_file: Path, - is_greyscale: bool, - padding_width: int, - expected_error_type: str, -): - """Test error handling of _add_padding - - Args: - png_file (pathlib.Path): Path to a PNG file. - is_greyscale (bool): If loaded DICOM image is greyscale or not. - padding_width (int): Pixel width of padding (uniform). - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - test_image = Image.open(png_file) - - # Act - _, _ = mock_engine._add_padding(test_image, is_greyscale, padding_width) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._copy_files_for_processing() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "src_path, expected_num_of_files", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 1), - (Path(TEST_DICOM_PARENT_DIR), 18), - (Path(TEST_DICOM_DIR_1), 3), - (Path(TEST_DICOM_DIR_2), 2), - (Path(TEST_DICOM_DIR_3), 1), - (Path(TEST_PNG_DIR), 5), - (Path(TEST_NUMPY_DIR), 5), - ], -) -def test_copy_files_for_processing_happy_path( - mock_engine: DicomImageRedactorEngine, src_path: Path, expected_num_of_files: int -): - """Test happy path for DicomImageRedactorEngine._copy_files_for_processing - - Args: - src_path (pathlib.Path): Path to a file or directory to copy. - expected_num_of_files (int): Expected number of files to be copied. - """ - # Arrange - - with tempfile.TemporaryDirectory() as tmpdirname: - # Act - test_dst_path = mock_engine._copy_files_for_processing(src_path, tmpdirname) - - # Arrange - p = Path(tmpdirname).glob(f"**/*") - files = [x for x in p if x.is_file()] - - # Assert - assert Path(tmpdirname) < test_dst_path - assert expected_num_of_files == len(files) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_text_metadata() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, metadata_text_len, is_name_true_len, is_patient_true_len", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 82, 4, 6), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), 93, 9, 10), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 83, 9, 8), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 118, 6, 10), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 135, 8, 10), - ], -) -def test_get_text_metadata_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_path: Path, - metadata_text_len: int, - is_name_true_len: int, - is_patient_true_len: int, -): - """Test happy path for DicomImageRedactorEngine._get_text_metadata - - Args: - dcm_path (pathlib.Path): Path to DICOM file. - metadata_text_len (int): Length of the expected returned metadata_text list. - is_name_true_len (int): Number of true values in the returned is_name list. - is_patient_true_len (int): Number of true values in the returned is_name list. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - test_metadata_text, test_is_name, test_is_patient = mock_engine._get_text_metadata( - test_instance - ) - - # Assert - idx_is_name = list(np.where(np.array(test_is_name) == True)[0]) - idx_is_patient = list(np.where(np.array(test_is_patient) == True)[0]) - - assert len(test_metadata_text) == len(test_is_name) == len(test_is_patient) - assert len(idx_is_name) == is_name_true_len - assert len(idx_is_patient) == is_patient_true_len - assert type(test_metadata_text[idx_is_name[0]]) == str - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._process_names() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "text_metadata, is_name, expected_phi_list", - [ - ([], [], []), - ( - ["JOHN^DOE", "City Hospital", "12345"], - [True, False, False], - [ - "JOHN^DOE", - "City Hospital", - "12345", - "JOHN", - "DOE", - "John", - "Doe", - "john", - "doe", - "JOHN DOE", - "John Doe", - "john doe", - ], - ), - ], -) -def test_process_names_happy_path( - mock_engine: DicomImageRedactorEngine, - text_metadata: list, - is_name: list, - expected_phi_list: list, -): - """Test happy path for DicomImageRedactorEngine._process_names - - Args: - text_metadata (list): List of text metadata. - is_name (list): Whether each element is a name or not. - expected_phi_list (list): List of expected output. - """ - # Arrange - - # Act - test_phi_list = mock_engine._process_names(text_metadata, is_name) - - # Assert - assert set(test_phi_list) == set(expected_phi_list) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._add_known_generic_phi() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "phi_list, expected_return_list", - [ - ([], ["M", "[M]", "F", "[F]", "X", "[X]", "U", "[U]"]), - ( - ["JOHN^DOE", "City Hospital", "12345"], - [ - "JOHN^DOE", - "City Hospital", - "12345", - "M", - "[M]", - "F", - "[F]", - "X", - "[X]", - "U", - "[U]", - ], - ), - ], -) -def test_add_known_generic_phi_happy_path( - mock_engine: DicomImageRedactorEngine, phi_list: list, expected_return_list: list -): - """Test happy path for DicomImageRedactorEngine._add_known_generic_phi - - Args: - phi_list (list): List of PHI. - expected_return_list (list): List of expected output. - """ - # Arrange - - # Act - test_phi_list = mock_engine._add_known_generic_phi(phi_list) - - # Assert - assert set(test_phi_list) == set(expected_return_list) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._make_phi_list() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "original_metadata, mock_process_names_val, mock_add_known_val, expected_return_list", - [ - ( - [ - ["A", "B"], - "A", - 123, - "JOHN^DOE", - "City Hospital", - "12345", - ], - [ - ["A", "B"], - "A", - 123, - "JOHN^DOE", - "City Hospital", - "12345", - "JOHN", - "DOE", - "John", - "Doe", - "john", - "doe", - "JOHN DOE", - "John Doe", - "john doe", - ], - [ - ["A", "B"], - "A", - 123, - "JOHN^DOE", - "City Hospital", - "12345", - "JOHN", - "DOE", - "John", - "Doe", - "john", - "doe", - "JOHN DOE", - "John Doe", - "john doe", - "M", - "[M]", - "F", - "[F]", - "X", - "[X]", - "U", - "[U]", - ], - [ - "A", - "B", - "123", - "JOHN^DOE", - "City Hospital", - "12345", - "JOHN", - "DOE", - "John", - "Doe", - "john", - "doe", - "JOHN DOE", - "John Doe", - "john doe", - "M", - "[M]", - "F", - "[F]", - "X", - "[X]", - "U", - "[U]", - ], - ), - ], -) -def test_make_phi_list_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - original_metadata: list, - mock_process_names_val: list, - mock_add_known_val: list, - expected_return_list: list, -): - """Test happy path for DicomImageRedactorEngine._make_phi_list - - Args: - original_metadata (list): List extracted metadata (excluding pixel array). - mock_process_names_val (list): Value to provide to mock process_names. - mock_add_known_val (list): Value to provide to mock _add_known_generic_phi. - expected_return_list (list): List of expected output. - """ - # Arrange - mock_process_names = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._process_names", - return_value=mock_process_names_val, - ) - mock_add_known_generic_phi = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_known_generic_phi", - return_value=mock_add_known_val, - ) - - # Act - test_phi_str_list = mock_engine._make_phi_list(original_metadata, [], []) - - # Assert - assert mock_process_names.call_count == 1 - assert mock_add_known_generic_phi.call_count == 1 - assert set(test_phi_str_list) == set(expected_return_list) - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._set_bbox_color() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "fill, mock_box_color", - [ - ("contrast", 0), - ("contrast", (0, 0, 0)), - ("background", 255), - ("background", (255, 255, 255)), - ], -) -def test_set_bbox_color_happy_path( - mocker, - fill: str, - mock_box_color: Union[int, Tuple[int, int, int]], -): - """Test happy path for DicomImageRedactorEngine._set_bbox_color - - Args: - fill (str): Determines how box color is selected. - mock_box_color (int or Tuple of int): Color value to assign to mocker. - """ - # Arrange - test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - - mock_convert_dcm_to_png = mocker.patch.object( - DicomImageRedactorEngine, "_convert_dcm_to_png", return_value=[None, True] - ) - mock_Image_open = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.Image.open", - return_value=None, - ) - mock_get_bg_color = mocker.patch.object( - DicomImageRedactorEngine, - "_get_bg_color", - return_value=mock_box_color, - ) - mock_engine = DicomImageRedactorEngine() - - # Act - test_box_color = mock_engine._set_bbox_color(test_instance, fill) - - # Assert - assert mock_convert_dcm_to_png.call_count == 1 - assert mock_Image_open.call_count == 1 - assert mock_get_bg_color.call_count == 1 - assert test_box_color == mock_box_color - - -@pytest.mark.parametrize( - "fill, expected_error_type", - [ - ("typo", "ValueError"), - ("somecolor", "ValueError"), - ("0", "ValueError"), - ("255", "ValueError"), - ], -) -def test_set_bbox_color_exceptions( - mock_engine: DicomImageRedactorEngine, - fill: str, - expected_error_type: str, -): - """Test error handling of _set_bbox_color - - Args: - fill (str): Determines how box color is selected. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - - # Act - _ = mock_engine._set_bbox_color(test_instance, fill) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine._check_if_compressed() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, compression_status", - [ - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - False - ), - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), - True - ), - ], -) -def test_check_if_compressed_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_path: Path, - compression_status: bool, -): - """Test happy path for DicomImageRedactorEngine._check_if_compressed - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (pathlib.Path): Path to DICOM file. - compression_status (bool): If the pixel data is compressed. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - test_is_compressed = mock_engine._check_if_compressed(test_instance) - - # Assert - assert test_is_compressed == compression_status - -# ------------------------------------------------------ -# DicomImageRedactorEngine._compress_pixel_data() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")) - ], -) -def test_compress_pixel_data_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_path: Path, -): - """Test happy path for DicomImageRedactorEngine._compress_pixel_data - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (pathlib.Path): Path to DICOM file. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - test_compressed = mock_engine._compress_pixel_data(test_instance) - - # Assert - assert mock_engine._check_if_compressed(test_compressed) == True - -# ------------------------------------------------------ -# DicomImageRedactorEngine._check_if_has_image_icon_sequence() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, has_sequence", - [ - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - False - ), - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), - True - ), - ], -) -def test_check_if_has_image_icon_sequence_happy_path( - mock_engine: DicomImageRedactorEngine, - dcm_path: Path, - has_sequence: bool, -): - """Test happy path for DicomImageRedactorEngine._check_if_has_image_icon_sequence - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (pathlib.Path): Path to DICOM file. - has_sequence (bool): If additional pixel data is available in the instance. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - test_has_sequence = mock_engine._check_if_has_image_icon_sequence(test_instance) - - # Assert - assert test_has_sequence == has_sequence - -# ------------------------------------------------------ -# DicomImageRedactorEngine._add_redact_box() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, mock_is_compressed, mock_has_image_icon_sequence, mock_is_greyscale, mock_box_color, bounding_boxes_coordinates", - [ - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - False, - False, - True, - 0, - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 24, "left": 0, "width": 75, "height": 51}, - {"top": 1, "left": 588, "width": 226, "height": 35}, - ], - ), - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), - True, - False, - True, - 0, - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 24, "left": 0, "width": 75, "height": 51}, - {"top": 1, "left": 588, "width": 226, "height": 35}, - ], - ), - ( - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), - False, - True, - True, - 0, - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 24, "left": 0, "width": 75, "height": 51}, - {"top": 1, "left": 588, "width": 226, "height": 35}, - ], - ), - ( - Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), - False, - False, - False, - (0, 0, 0), - [ - {"top": 0, "left": 0, "width": 500, "height": 500}, - {"top": 24, "left": 0, "width": 75, "height": 51}, - {"top": 1, "left": 588, "width": 100, "height": 100}, - ], - ), - ], -) -def test_add_redact_box_happy_path( - mocker, - dcm_path: Path, - mock_is_compressed: bool, - mock_has_image_icon_sequence: bool, - mock_is_greyscale: bool, - mock_box_color: Union[int, Tuple[int, int, int]], - bounding_boxes_coordinates: dict, -): - """Test happy path for DicomImageRedactorEngine._add_redact_box - - Args: - dcm_path (pathlib.Path): Path to DICOM file. - mock_is_compressed (bool): If the pixel data is compressed. - mock_has_image_icon_sequence (bool): If there is more than one set of pixel data. - mock_is_greyscale (bool): Value to use when mocking _check_if_greyscale. - mock_box_color (int or Tuple of int): Color value to assign to mocker. - bouding_boxes_coordinates (dict): Formatted bbox coordinates. - """ - # Arrange - test_instance = pydicom.dcmread(dcm_path) - crop_ratio = 0.75 - mock_check_if_compressed = mocker.patch.object( - DicomImageRedactorEngine, - "_check_if_compressed", - return_value=mock_is_compressed - ) - mock_check_if_has_image_icon_sequence = mocker.patch.object( - DicomImageRedactorEngine, - "_check_if_has_image_icon_sequence", - return_value=mock_has_image_icon_sequence - ) - mock_check_if_greyscale = mocker.patch.object( - DicomImageRedactorEngine, - "_check_if_greyscale", - return_value=mock_is_greyscale, - ) - mock_get_common_pixel = mocker.patch.object( - DicomImageRedactorEngine, - "_get_most_common_pixel_value", - return_value=mock_box_color, - ) - mock_set_bbox_color = mocker.patch.object( - DicomImageRedactorEngine, - "_set_bbox_color", - return_value=mock_box_color, - ) - mock_engine = DicomImageRedactorEngine() - - # Act - test_redacted_instance = mock_engine._add_redact_box( - test_instance, bounding_boxes_coordinates, crop_ratio - ) - - # Assert - assert mock_check_if_compressed.call_count == 1 - assert mock_check_if_has_image_icon_sequence.call_count == 1 - assert mock_check_if_greyscale.call_count == 1 - if mock_is_greyscale is True: - original_pixel_values = np.array(test_instance.pixel_array).flatten() - redacted_pixel_values = np.array(test_redacted_instance.pixel_array).flatten() - box_color_pixels_original = len( - np.where(original_pixel_values == mock_box_color)[0] - ) - box_color_pixels_redacted = len( - np.where(redacted_pixel_values == mock_box_color)[0] - ) - assert mock_get_common_pixel.call_count == 1 - else: - list_of_RGB_pixels_original = np.vstack(test_instance.pixel_array).tolist() - list_of_RGB_pixels_redacted = np.vstack( - test_redacted_instance.pixel_array - ).tolist() - box_color_pixels_original = len( - np.unique( - np.where(np.array(list_of_RGB_pixels_original) == mock_box_color)[0] - ) - ) - box_color_pixels_redacted = len( - np.unique( - np.where(np.array(list_of_RGB_pixels_redacted) == mock_box_color)[0] - ) - ) - assert mock_set_bbox_color.call_count == 1 - - assert box_color_pixels_redacted > box_color_pixels_original - -# ------------------------------------------------------ -# DicomImageRedactorEngine._get_analyzer_results() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "image, dcm_path, use_metadata, ad_hoc_recognizers", - [ - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - False, - None - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - False, - [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - True, - None - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - True, - [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] - ), - ], -) -def test_get_analyzer_results_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - image: Image, - dcm_path: str, - use_metadata: bool, - ad_hoc_recognizers: Optional[List[PatternRecognizer]] -): - """Test happy path for DicomImageRedactorEngine._get_analyzer_results - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - image (PIL.Image): A PIL image. - dcm_path (pathlib.Path): Path to DICOM file. - use_metadata (bool): Whether to consider metadata when running analysis. - ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. - """ - # Arrange - mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", - return_value=None, - ) - mock_get_text_metadata = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", - return_value=[None, None, None], - ) - mock_make_phi_list = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", - return_value=None, - ) - mock_pattern_recognizer = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", - return_value=None, - ) - test_instance = pydicom.dcmread(dcm_path) - - # Act - _ = mock_engine._get_analyzer_results( - image, test_instance, use_metadata, None, ad_hoc_recognizers - ) - - # Assert - if use_metadata is False: - mock_analyze.assert_called_once() - mock_get_text_metadata.assert_not_called() - mock_make_phi_list.assert_not_called() - mock_pattern_recognizer.assert_not_called() - elif use_metadata is True: - mock_analyze.assert_called_once() - mock_get_text_metadata.assert_called_once() - mock_make_phi_list.assert_called_once() - mock_pattern_recognizer.assert_called_once() - -@pytest.mark.parametrize( - "image, dcm_path, ad_hoc_recognizers, expected_error_type", - [ - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "invalidType", - "TypeError" - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [], - "ValueError" - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], - "TypeError" - ), - ], -) -def test_get_analyzer_results_exceptions( - mock_engine: DicomImageRedactorEngine, - image: Image, - dcm_path: str, - ad_hoc_recognizers: Optional[List[PatternRecognizer]], - expected_error_type: str, -): - """Test error handling of DicomImageRedactorEngine _get_analyzer_results() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - image (PIL.Image): A PIL image. - dcm_path (pathlib.Path): Path to DICOM file. - ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - _ = mock_engine._get_analyzer_results( - image, test_instance, True, None, ad_hoc_recognizers - ) - - # Assert - assert expected_error_type == exc_info.typename - -# ------------------------------------------------------ -# DicomImageRedactorEngine redact_and_return_bbox() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm")), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm")), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM")), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom")), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM")), - ], -) -def test_DicomImageRedactorEngine_redact_and_return_bbox( - mocker, - mock_engine: DicomImageRedactorEngine, - dcm_path: str, -): - """Test happy path for DicomImageRedactorEngine redact_and_return_bbox() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - """ - # Arrange - test_image = pydicom.dcmread(dcm_path) - - mock_check_greyscale = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._check_if_greyscale", return_value=None - ) - mock_rescale_dcm = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._rescale_dcm_pixel_array", return_value=None - ) - mock_save_pixel_array = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._save_pixel_array_as_png", return_value=None - ) - mock_image_open = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.Image.open", - return_value=None, - ) - mock_add_padding = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", - return_value=None, - ) - mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", - return_value=None, - ) - - mock_get_analyze_bbox = mocker.patch( - "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", - return_value=None, - ) - - mock_remove_bbox_padding = mocker.patch( - "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", - return_value=[{"mock_data": 1}, {"mock_data": 2}, {"mock_data": 3}], - ) - - mock_add_redact_box = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", - return_value=test_image, - ) - - # Act - test_redacted_image, _ = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) - - # Assert - assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] - # assertions for test_bboxes type causes silent failures/hangups for Python 3.11 - mock_check_greyscale.assert_called_once() - mock_rescale_dcm.assert_called_once() - mock_save_pixel_array.assert_called_once() - mock_image_open.assert_called_once() - mock_add_padding.assert_called_once() - mock_analyze.assert_called_once() - mock_get_analyze_bbox.assert_called_once() - mock_remove_bbox_padding.assert_called_once() - mock_add_redact_box.assert_called_once() - -@pytest.mark.parametrize( - "image, load_file, expected_error_type", - [ - (Path(TEST_DICOM_PARENT_DIR), True, ["TypeError", "IsADirectoryError", "PermissionError"]), - (Path(TEST_DICOM_PARENT_DIR), False, ["TypeError"]), - ("path_here", False, ["TypeError"]), - (np.random.randint(255, size=(64, 64)), False, ["TypeError"]), - (Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), False, ["TypeError"]), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), True, ["AttributeError"]), - ], -) -def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( - mock_engine: DicomImageRedactorEngine, - image: T, - load_file: bool, - expected_error_type: List[str], -): - """Test error handling of DicomImageRedactorEngine redact_and_return_bbox() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - image (any): Input "image". - load_file (bool): Whether to run pydicom.dcmread() on the input image. - expected_error_type (List(str)): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Arrange - if load_file: - test_image = pydicom.dcmread(image) - else: - test_image = image - # Act - mock_engine.redact(test_image, fill="contrast", padding_width=25, use_metadata=True - ) - - # Assert - assert exc_info.typename in expected_error_type - -# ------------------------------------------------------ -# DicomImageRedactorEngine redact() -# ------------------------------------------------------ -def test_DicomImageRedactorEngine_redact_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, -): - """Test happy path for DicomImageRedactorEngine redact() - """ - # Arrange - test_image = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - - mock_redact_return_bbox = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", - return_value=[test_image, [{}, {}, {}]] - ) - - # Act - test_redacted_image = mock_engine.redact(test_image) - - # Assert - assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] - mock_redact_return_bbox.assert_called_once() - -# ------------------------------------------------------ -# DicomImageRedactorEngine _save_bbox_json() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "output_path, expected_output_json_path, bboxes", - [ - ( - "dir1/dir2/output_dicom.dcm", - "dir1/dir2/output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ), - ( - "dir1/output_dicom.dcm", - "dir1/output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ), - ( - "output_dicom.dcm", - "output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ), - ( - "dir1/dir2/output_dicom.DCM", - "dir1/dir2/output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ), - ( - "dir1/dir2/output_dicom.dicom", - "dir1/dir2/output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ), - ( - "dir1/dir2/output_dicom.DICOM", - "dir1/dir2/output_dicom.json", - [ - {"top": 0, "left": 0, "width": 100, "height": 100}, - {"top": 35, "left": 72, "width": 50, "height": 14} - ] - ) - ], -) -def test_DicomImageRedactorEngine_save_bbox_json_happy_path( - mock_engine: DicomImageRedactorEngine, - output_path: str, - expected_output_json_path: str, - bboxes: List[Dict[str, int]], -): - """Test happy path for DicomImageRedactorEngine _save_bbox_json() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - output_path (str): Path to output DICOM file. - expected_output_json_path (str): Expected path to the output JSON file. - bboxes (list): Bounding boxes to write out to JSON file. - """ - with tempfile.TemporaryDirectory() as tmpdirname: - # Arrange - temp_output_path = Path(tmpdirname, output_path) - temp_output_path.mkdir(parents=True, exist_ok=True) - temp_expected_json_path = Path(tmpdirname, expected_output_json_path) - - # Act - mock_engine._save_bbox_json(temp_output_path, bboxes) - - # Assert - with open(temp_expected_json_path, "r") as read_file: - loaded_json = json.load(read_file) - assert loaded_json == bboxes - -# ------------------------------------------------------ -# DicomImageRedactorEngine _redact_single_dicom_image() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, output_dir, overwrite", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", False), - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", True), - (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "output", False), - (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "output", False), - (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "output", False), - (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "output", False), - ], -) -def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - output_dir: str, - overwrite: bool, -): - """Test happy path for DicomImageRedactorEngine _redact_single_dicom_image() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - output_dir (str): Path to parent directory to write output to. - overwrite (bool): True if overwriting original files. - """ - # Arrange - crop_ratio = 0.75 - mock_copy_files = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", - return_value=dcm_path, - ) - mock_convert_dcm_to_png = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._convert_dcm_to_png", - return_value=[None, None], - ) - mock_image_open = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.Image.open", - return_value=None, - ) - mock_add_padding = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", - return_value=None, - ) - - mock_analyze = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", - return_value=None, - ) - - mock_get_analyze_bbox = mocker.patch( - "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", - return_value=None, - ) - - mock_remove_bbox_padding = mocker.patch( - "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", - return_value=None, - ) - - class MockInstance: - def save_as(self, dst_path: str): - return None - - mock_add_redact_box = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", - return_value=MockInstance(), - ) - - # Act - mock_engine._redact_single_dicom_image( - dcm_path=dcm_path, - crop_ratio=crop_ratio, - fill="contrast", - padding_width=25, - use_metadata=True, - overwrite=overwrite, - dst_parent_dir=output_dir, - save_bboxes=False - ) - - # Assert - if overwrite is True: - assert mock_copy_files.call_count == 0 - else: - assert mock_copy_files.call_count == 1 - assert mock_convert_dcm_to_png.call_count == 1 - assert mock_image_open.call_count == 1 - assert mock_add_padding.call_count == 1 - assert mock_analyze.call_count == 1 - assert mock_get_analyze_bbox.call_count == 1 - assert mock_remove_bbox_padding.call_count == 1 - assert mock_add_redact_box.call_count == 1 - - -@pytest.mark.parametrize( - "dcm_path, expected_error_type", - [ - (Path(TEST_DICOM_PARENT_DIR), "FileNotFoundError"), - (Path("nonexistentfile.extension"), "FileNotFoundError"), - ], -) -def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - expected_error_type: str, -): - """Test error handling of DicomImageRedactorEngine _redact_single_dicom_image() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Act - mock_engine._redact_single_dicom_image( - dcm_path=dcm_path, - crop_ratio=0.75, - fill="contrast", - padding_width=25, - use_metadata=True, - overwrite=False, - dst_parent_dir=".", - save_bboxes=False - ) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine _redact_multiple_dicom_images() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, output_dir, overwrite", - [ - (Path(TEST_DICOM_PARENT_DIR), "output", False), - (Path(TEST_DICOM_PARENT_DIR), "output", True), - (Path(TEST_DICOM_DIR_1), "output", False), - (Path(TEST_DICOM_DIR_2), "output", False), - (Path(TEST_DICOM_DIR_3), "output", False), - ], -) -def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - output_dir: str, - overwrite: bool, -): - """Test happy path for DicomImageRedactorEngine _redact_multiple_dicom_images() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - output_dir (str): Path to parent directory to write output to. - overwrite (bool): True if overwriting original files. - """ - # Arrange - crop_ratio = 0.75 - mock_copy_files = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", - return_value=dcm_path, - ) - mock_dcm_files = [ - Path("dir1/dir2/file1.dcm"), - Path("dir1/dir2/file2.dcm"), - Path("dir1/dir2/dir3/file3.dcm"), - ] - mock_get_all_dcm_files = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_all_dcm_files", - return_value=mock_dcm_files, - ) - mock_redact_single = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", - return_value=None, - ) - - # Act - mock_engine._redact_multiple_dicom_images( - dcm_dir=dcm_path, - crop_ratio=crop_ratio, - fill="contrast", - padding_width=25, - use_metadata=True, - overwrite=overwrite, - dst_parent_dir=output_dir, - save_bboxes=False - ) - - # Assert - if overwrite is True: - assert mock_copy_files.call_count == 0 - else: - assert mock_copy_files.call_count == 1 - assert mock_get_all_dcm_files.call_count == 1 - assert mock_redact_single.call_count == len(mock_dcm_files) - - -@pytest.mark.parametrize( - "dcm_path, expected_error_type", - [ - (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "FileNotFoundError"), - (Path("nonexistentdir"), "FileNotFoundError"), - ], -) -def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - expected_error_type: str, -): - """Test error handling of DicomImageRedactorEngine _redact_multiple_dicom_images() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Act - mock_engine._redact_multiple_dicom_images( - dcm_dir=dcm_path, - crop_ratio=0.75, - fill="contrast", - padding_width=25, - use_metadata=True, - overwrite=False, - dst_parent_dir=".", - save_bboxes=False - ) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine redact_from_file() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, mock_dst_path", - [ - ( - f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - ), - ( - f"{TEST_DICOM_DIR_2}/1_ORIGINAL.DCM", - Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), - ), - ( - f"{TEST_DICOM_DIR_2}/2_ORIGINAL.dicom", - Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), - ), - ( - f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", - Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), - ), - ], -) -def test_DicomImageRedactorEngine_redact_from_file_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - mock_dst_path: Path, -): - """Test happy path for DicomImageRedactorEngine redact_from_file() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - mock_dst_path (pathlib.Path): Path to DICOM dir or file. - """ - # Arrange - mock_copy_files = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", - return_value=mock_dst_path, - ) - mock_redact_single = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", - return_value=None, - ) - - # Act - mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) - - # Assert - assert mock_copy_files.call_count == 1 - assert mock_redact_single.call_count == 1 - - -@pytest.mark.parametrize( - "input_path, output_path, expected_error_type", - [ - (TEST_DICOM_PARENT_DIR, "output", "TypeError"), - (TEST_DICOM_DIR_1, "output", "TypeError"), - (TEST_DICOM_DIR_2, "output", "TypeError"), - (TEST_DICOM_DIR_3, "output", "TypeError"), - ( - f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", - f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", - "TypeError", - ), - ( - f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", - f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", - "TypeError", - ), - ], -) -def test_DicomImageRedactorEngine_redact_from_file_exceptions( - mock_engine: DicomImageRedactorEngine, - input_path: str, - output_path: Path, - expected_error_type: str, -): - """Test error handling of DicomImageRedactorEngine redact_from_file() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - input_path (str): Path to input DICOM file or dir. - output_path (pathlib.Path): Path to DICOM dir or file. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Act - mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) - - # Assert - assert expected_error_type == exc_info.typename - - -# ------------------------------------------------------ -# DicomImageRedactorEngine redact_from_directory() -# ------------------------------------------------------ -@pytest.mark.parametrize( - "dcm_path, mock_dst_path", - [ - (TEST_DICOM_PARENT_DIR, Path(TEST_DICOM_PARENT_DIR)), - (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_1)), - (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_2)), - (TEST_DICOM_DIR_3, Path(TEST_DICOM_DIR_3)), - ], -) -def test_DicomImageRedactorEngine_redact_from_directory_happy_path( - mocker, - mock_engine: DicomImageRedactorEngine, - dcm_path: str, - mock_dst_path: Path, -): - """Test happy path for DicomImageRedactorEngine redact_from_directory() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - dcm_path (str): Path to input DICOM file or dir. - mock_dst_path (pathlib.Path): Path to DICOM dir or file. - """ - # Arrange - mock_copy_files = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", - return_value=mock_dst_path, - ) - mock_redact_multiple = mocker.patch( - "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_multiple_dicom_images", - return_value=None, - ) - - # Act - mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) - - # Assert - assert mock_copy_files.call_count == 1 - assert mock_redact_multiple.call_count == 1 - - -@pytest.mark.parametrize( - "input_path, output_path, expected_error_type", - [ - (f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "output", "TypeError"), - (TEST_DICOM_DIR_1, f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "TypeError"), - ("nonexistentdir", "output", "TypeError"), - ], -) -def test_DicomImageRedactorEngine_redact_from_directory_exceptions( - mock_engine: DicomImageRedactorEngine, - input_path: str, - output_path: Path, - expected_error_type: str, -): - """Test error handling of DicomImageRedactorEngine redact_from_directory() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - input_path (str): Path to input DICOM file or dir. - output_path (pathlib.Path): Path to DICOM dir or file. - expected_error_type (str): Type of error we expect to be raised. - """ - with pytest.raises(Exception) as exc_info: - # Act - mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) - - # Assert - assert expected_error_type == exc_info.typename +# """Test suite for dicom_image_redactor_engine.py""" +# from pathlib import Path +# import tempfile +# import os +# import json +# import numpy as np +# from PIL import Image +# import pydicom +# from presidio_image_redactor.dicom_image_redactor_engine import DicomImageRedactorEngine +# from presidio_analyzer import PatternRecognizer +# from typing import Union, List, Tuple, Dict, TypeVar, Optional +# import pytest + +# T = TypeVar('T') + +# SCRIPT_DIR = os.path.dirname(__file__) +# TEST_DICOM_PARENT_DIR = f"{SCRIPT_DIR}/test_data" +# TEST_DICOM_DIR_1 = f"{SCRIPT_DIR}/test_data/dicom_dir_1" +# TEST_DICOM_DIR_2 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_2" +# TEST_DICOM_DIR_3 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_3" +# TEST_NUMPY_DIR = f"{SCRIPT_DIR}/test_data/numpy_arrays" +# TEST_PNG_DIR = f"{SCRIPT_DIR}/test_data/png_images" + + +# @pytest.fixture(scope="module") +# def mock_engine(): +# """Instance of the DicomImageRedactorEngine""" +# # Arrange + +# # Act +# dicom_image_redactor_engine = DicomImageRedactorEngine() + +# return dicom_image_redactor_engine + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_all_dcm_files() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_dir, expected_list", +# [ +# ( +# Path(TEST_DICOM_PARENT_DIR), +# [ +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), +# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), +# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), +# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# ], +# ), +# ( +# Path(TEST_DICOM_DIR_1), +# [ +# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), +# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# ], +# ), +# ( +# Path(TEST_DICOM_DIR_2), +# [ +# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), +# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# ], +# ), +# ( +# Path(TEST_DICOM_DIR_3), +# [ +# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# ], +# ), +# ], +# ) +# def test_get_all_dcm_files_happy_path( +# mock_engine: DicomImageRedactorEngine, dcm_dir: Path, expected_list: list +# ): +# """Test happy path for DicomImageRedactorEngine._get_all_dcm_files + +# Args: +# dcm_dir (pathlib.Path): Path to a directory containing at least one .dcm file. +# expected_list (list): List of pathlib Path objects. +# """ +# # Arrange + +# # Act +# test_files = mock_engine._get_all_dcm_files(dcm_dir) +# print("test_files") +# print(test_files) +# print("expected file") +# print(expected_list) + +# # Assert +# assert set(test_files) == set(expected_list) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._check_if_greyscale() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_file, expected_result", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), +# ], +# ) +# def test_check_if_greyscale_happy_path( +# mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_result: bool +# ): +# """Test happy path for DicomImageRedactorEngine._check_if_greyscale + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# expected_result (bool): Expected output of _check_if_greyscale. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_file) + +# # Act +# test_is_greyscale = mock_engine._check_if_greyscale(test_instance) + +# # Assert +# assert test_is_greyscale == expected_result + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._rescale_dcm_pixel_array() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_file, is_greyscale", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), True), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), True), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), True), +# ], +# ) +# def test_check_if_greyscale_happy_path( +# mock_engine: DicomImageRedactorEngine, dcm_file: Path, is_greyscale: bool +# ): +# """Test happy path for DicomImageRedactorEngine._rescale_dcm_pixel_array + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# is_greyscale (bool): If loaded DICOM image is greyscale or not. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_file) +# test_original_image = test_instance.pixel_array + +# # Act +# test_scaled_image = mock_engine._rescale_dcm_pixel_array( +# test_instance, is_greyscale +# ) + +# # Assert +# assert np.shape(test_original_image) == np.shape(test_scaled_image) +# assert np.min(test_scaled_image) >= 0 +# assert np.max(test_scaled_image) <= 255 +# if is_greyscale is True: +# assert np.max(test_original_image) != np.max(test_scaled_image) +# assert len(np.shape(test_scaled_image)) == 2 +# else: +# assert len(np.shape(test_scaled_image)) == 3 + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._save_pixel_array_as_png() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_file, is_greyscale, rescaled_image_numpy_path", +# [ +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# True, +# Path(TEST_NUMPY_DIR, "0_ORIGINAL.npy"), +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), +# False, +# Path(TEST_NUMPY_DIR, "RGB_ORIGINAL.npy"), +# ), +# ( +# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), +# True, +# Path(TEST_NUMPY_DIR, "1_ORIGINAL.npy"), +# ), +# ( +# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# True, +# Path(TEST_NUMPY_DIR, "2_ORIGINAL.npy"), +# ), +# ( +# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# True, +# Path(TEST_NUMPY_DIR, "3_ORIGINAL.npy"), +# ), +# ], +# ) +# def test_save_pixel_array_as_png_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_file: Path, +# is_greyscale: bool, +# rescaled_image_numpy_path: Path, +# ): +# """Test happy path for DicomImageRedactorEngine._save_pixel_array_as_png + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# is_greyscale (bool): If loaded DICOM image is greyscale or not. +# rescaled_image_numpy_path (pathlib.Path): Path to file containing numpy array of rescaled image. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_file) +# test_image = mock_engine._rescale_dcm_pixel_array(test_instance, is_greyscale) +# filename = "test" +# with open(rescaled_image_numpy_path, "rb") as f: +# loaded_numpy_array = np.load(f) + +# with tempfile.TemporaryDirectory() as tmpdirname: +# # Act +# _ = mock_engine._save_pixel_array_as_png( +# test_image, is_greyscale, filename, tmpdirname +# ) + +# # Assert +# assert np.shape(test_image) == np.shape(loaded_numpy_array) +# assert f"{filename}.png" in os.listdir(tmpdirname) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._convert_dcm_to_png() +# # ------------------------------------------------------ +# def test_convert_dcm_to_png_happy_path(mocker): +# """Test happy path for DicomImageRedactorEngine._convert_dcm_to_png""" +# # Arrange +# mock_dcm_read = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.pydicom.dcmread", +# return_value=None, +# ) +# mock_check_if_gresycale = mocker.patch.object( +# DicomImageRedactorEngine, +# "_check_if_greyscale", +# return_value=True, +# ) +# mock_rescale_dcm_pixel_array = mocker.patch.object( +# DicomImageRedactorEngine, +# "_rescale_dcm_pixel_array", +# return_value=np.array([1, 2, 3]), +# ) +# mock_save_array_as_png = mocker.patch.object( +# DicomImageRedactorEngine, "_save_pixel_array_as_png", return_value=None +# ) +# mock_engine = DicomImageRedactorEngine() + +# with tempfile.TemporaryDirectory() as tmpdirname: +# # Act +# _, _ = mock_engine._convert_dcm_to_png(Path("filename.dcm"), tmpdirname) + +# # Assert +# assert mock_dcm_read.call_count == 1 +# assert mock_check_if_gresycale.call_count == 1 +# assert mock_rescale_dcm_pixel_array.call_count == 1 +# assert mock_save_array_as_png.call_count == 1 + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_bg_color() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "png_file, is_greyscale, invert_flag, expected_bg_color", +# [ +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, False, 243), +# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, False, (0, 0, 0)), +# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, False, 0), +# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, False, 0), +# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, False, 0), +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, True, 12), +# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, True, (255, 255, 255)), +# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, True, 255), +# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, True, 255), +# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, True, 255), +# ], +# ) +# def test_get_bg_color_happy_path( +# mock_engine: DicomImageRedactorEngine, +# png_file: Path, +# is_greyscale: bool, +# invert_flag: bool, +# expected_bg_color: Union[int, Tuple[int, int, int]], +# ): +# """Test happy path for DicomImageRedactorEngine._get_bg_color + +# Args: +# png_file (pathlib.Path): Path to a PNG file. +# is_greyscale (bool): If loaded DICOM image is greyscale or not. +# invert_flag (bool): True if we want to invert image colors to get foreground. +# expected_bg_color (int or Tuple of int): The expected background color of the image. +# """ +# # Arrange +# test_image = Image.open(png_file) + +# # Act +# test_bg_color = mock_engine._get_bg_color(test_image, is_greyscale, invert_flag) + +# # Assert +# assert test_bg_color == expected_bg_color + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_array_corners() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_file, crop_ratio", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.5), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.5), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.5), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.5), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.75), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.25), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.31), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.83), +# ], +# ) +# def test_get_array_corners_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_file: Path, +# crop_ratio: float, +# ): +# """Test happy path for DicomImageRedactorEngine._get_array_corners + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# crop_ratio (float): Ratio to crop to. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_file) +# test_pixel_array = test_instance.pixel_array +# full_width = test_pixel_array.shape[0] +# full_height = test_pixel_array.shape[1] +# full_min_pixel_val = np.min(test_pixel_array) +# full_max_pixel_val = np.max(test_pixel_array) + +# # Act +# test_cropped_array = mock_engine._get_array_corners(test_pixel_array, crop_ratio) +# cropped_width = test_cropped_array.shape[0] +# cropped_height = test_cropped_array.shape[1] +# cropped_min_pixel_val = np.min(test_cropped_array) +# cropped_max_pixel_val = np.max(test_cropped_array) + +# # Assert +# assert cropped_width * cropped_height < full_width * full_height +# assert cropped_min_pixel_val >= full_min_pixel_val +# assert cropped_max_pixel_val <= full_max_pixel_val + + +# @pytest.mark.parametrize( +# "crop_ratio, expected_error_type", +# [ +# (0, "ValueError"), +# (-0.4, "ValueError"), +# (1.3, "ValueError"), +# ], +# ) +# def test_get_array_corners_exceptions( +# mock_engine: DicomImageRedactorEngine, crop_ratio: float, expected_error_type: str +# ): +# """Test error handling of _get_array_corners + +# Args: +# crop_ratio (float): Ratio to crop to. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# dcm_file = Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm") +# test_instance = pydicom.dcmread(dcm_file) +# test_pixel_array = test_instance.pixel_array + +# # Act +# _ = mock_engine._get_array_corners(test_pixel_array, crop_ratio) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_most_common_pixel_value() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_file, fill, expected_color", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "contrast", 50), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "contrast", 16383), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "contrast", 32767), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "contrast", 4095), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "background", 973), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "background", 0), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "background", 0), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "background", 0), +# ], +# ) +# def test_get_most_common_pixel_value_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_file: Path, +# fill: str, +# expected_color: Union[int, Tuple[int, int, int]], +# ): +# """Test happy path for DicomImageRedactorEngine._get_most_common_pixel_value + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# fill (str): Color setting to use ("contrast" or "background"). +# expected_color (int or Tuple of int): The expected color returned for the image. +# """ +# # Arrange +# crop_ratio = 0.75 +# test_instance = pydicom.dcmread(dcm_file) + +# # Act +# test_color = mock_engine._get_most_common_pixel_value( +# test_instance, crop_ratio, fill +# ) + +# # Assert +# assert test_color == expected_color + + +# @pytest.mark.parametrize( +# "dcm_file, expected_error_type", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "TypeError"), +# ], +# ) +# def test_get_most_common_pixel_value_exceptions( +# mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_error_type: str +# ): +# """Test error handling of _get_most_common_pixel_value + +# Args: +# dcm_file (pathlib.Path): Path to a DICOM file. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# crop_ratio = 0.75 +# test_instance = pydicom.dcmread(dcm_file) + +# # Act +# _ = mock_engine._get_most_common_pixel_value( +# test_instance, crop_ratio, "contrast" +# ) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._add_padding() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "png_file, is_greyscale, padding_width", +# [ +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 15), +# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, 15), +# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, 15), +# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, 15), +# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, 15), +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 30), +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 99), +# ], +# ) +# def test_add_padding_happy_path( +# mock_engine: DicomImageRedactorEngine, +# png_file: Path, +# is_greyscale: bool, +# padding_width: int, +# ): +# """Test happy path for DicomImageRedactorEngine._add_padding + +# Args: +# png_file (pathlib.Path): Path to a PNG file. +# is_greyscale (bool): If loaded DICOM image is greyscale or not. +# padding_width (int): Pixel width of padding (uniform). +# """ +# # Arrange +# test_image = Image.open(png_file) + +# # Act +# test_image_with_padding = mock_engine._add_padding( +# test_image, is_greyscale, padding_width +# ) + +# # Assert +# assert test_image_with_padding.height - test_image.height == 2 * padding_width +# assert test_image_with_padding.width - test_image.width == 2 * padding_width + + +# @pytest.mark.parametrize( +# "png_file, is_greyscale, padding_width, expected_error_type", +# [ +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, -1, "ValueError"), +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 0, "ValueError"), +# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 200, "ValueError"), +# ], +# ) +# def test_add_padding_exceptions( +# mock_engine: DicomImageRedactorEngine, +# png_file: Path, +# is_greyscale: bool, +# padding_width: int, +# expected_error_type: str, +# ): +# """Test error handling of _add_padding + +# Args: +# png_file (pathlib.Path): Path to a PNG file. +# is_greyscale (bool): If loaded DICOM image is greyscale or not. +# padding_width (int): Pixel width of padding (uniform). +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# test_image = Image.open(png_file) + +# # Act +# _, _ = mock_engine._add_padding(test_image, is_greyscale, padding_width) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._copy_files_for_processing() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "src_path, expected_num_of_files", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 1), +# (Path(TEST_DICOM_PARENT_DIR), 18), +# (Path(TEST_DICOM_DIR_1), 3), +# (Path(TEST_DICOM_DIR_2), 2), +# (Path(TEST_DICOM_DIR_3), 1), +# (Path(TEST_PNG_DIR), 5), +# (Path(TEST_NUMPY_DIR), 5), +# ], +# ) +# def test_copy_files_for_processing_happy_path( +# mock_engine: DicomImageRedactorEngine, src_path: Path, expected_num_of_files: int +# ): +# """Test happy path for DicomImageRedactorEngine._copy_files_for_processing + +# Args: +# src_path (pathlib.Path): Path to a file or directory to copy. +# expected_num_of_files (int): Expected number of files to be copied. +# """ +# # Arrange + +# with tempfile.TemporaryDirectory() as tmpdirname: +# # Act +# test_dst_path = mock_engine._copy_files_for_processing(src_path, tmpdirname) + +# # Arrange +# p = Path(tmpdirname).glob(f"**/*") +# files = [x for x in p if x.is_file()] + +# # Assert +# assert Path(tmpdirname) < test_dst_path +# assert expected_num_of_files == len(files) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_text_metadata() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, metadata_text_len, is_name_true_len, is_patient_true_len", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 82, 4, 6), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), 93, 9, 10), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 83, 9, 8), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 118, 6, 10), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 135, 8, 10), +# ], +# ) +# def test_get_text_metadata_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: Path, +# metadata_text_len: int, +# is_name_true_len: int, +# is_patient_true_len: int, +# ): +# """Test happy path for DicomImageRedactorEngine._get_text_metadata + +# Args: +# dcm_path (pathlib.Path): Path to DICOM file. +# metadata_text_len (int): Length of the expected returned metadata_text list. +# is_name_true_len (int): Number of true values in the returned is_name list. +# is_patient_true_len (int): Number of true values in the returned is_name list. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# test_metadata_text, test_is_name, test_is_patient = mock_engine._get_text_metadata( +# test_instance +# ) + +# # Assert +# idx_is_name = list(np.where(np.array(test_is_name) == True)[0]) +# idx_is_patient = list(np.where(np.array(test_is_patient) == True)[0]) + +# assert len(test_metadata_text) == len(test_is_name) == len(test_is_patient) +# assert len(idx_is_name) == is_name_true_len +# assert len(idx_is_patient) == is_patient_true_len +# assert type(test_metadata_text[idx_is_name[0]]) == str + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._process_names() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "text_metadata, is_name, expected_phi_list", +# [ +# ([], [], []), +# ( +# ["JOHN^DOE", "City Hospital", "12345"], +# [True, False, False], +# [ +# "JOHN^DOE", +# "City Hospital", +# "12345", +# "JOHN", +# "DOE", +# "John", +# "Doe", +# "john", +# "doe", +# "JOHN DOE", +# "John Doe", +# "john doe", +# ], +# ), +# ], +# ) +# def test_process_names_happy_path( +# mock_engine: DicomImageRedactorEngine, +# text_metadata: list, +# is_name: list, +# expected_phi_list: list, +# ): +# """Test happy path for DicomImageRedactorEngine._process_names + +# Args: +# text_metadata (list): List of text metadata. +# is_name (list): Whether each element is a name or not. +# expected_phi_list (list): List of expected output. +# """ +# # Arrange + +# # Act +# test_phi_list = mock_engine._process_names(text_metadata, is_name) + +# # Assert +# assert set(test_phi_list) == set(expected_phi_list) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._add_known_generic_phi() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "phi_list, expected_return_list", +# [ +# ([], ["M", "[M]", "F", "[F]", "X", "[X]", "U", "[U]"]), +# ( +# ["JOHN^DOE", "City Hospital", "12345"], +# [ +# "JOHN^DOE", +# "City Hospital", +# "12345", +# "M", +# "[M]", +# "F", +# "[F]", +# "X", +# "[X]", +# "U", +# "[U]", +# ], +# ), +# ], +# ) +# def test_add_known_generic_phi_happy_path( +# mock_engine: DicomImageRedactorEngine, phi_list: list, expected_return_list: list +# ): +# """Test happy path for DicomImageRedactorEngine._add_known_generic_phi + +# Args: +# phi_list (list): List of PHI. +# expected_return_list (list): List of expected output. +# """ +# # Arrange + +# # Act +# test_phi_list = mock_engine._add_known_generic_phi(phi_list) + +# # Assert +# assert set(test_phi_list) == set(expected_return_list) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._make_phi_list() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "original_metadata, mock_process_names_val, mock_add_known_val, expected_return_list", +# [ +# ( +# [ +# ["A", "B"], +# "A", +# 123, +# "JOHN^DOE", +# "City Hospital", +# "12345", +# ], +# [ +# ["A", "B"], +# "A", +# 123, +# "JOHN^DOE", +# "City Hospital", +# "12345", +# "JOHN", +# "DOE", +# "John", +# "Doe", +# "john", +# "doe", +# "JOHN DOE", +# "John Doe", +# "john doe", +# ], +# [ +# ["A", "B"], +# "A", +# 123, +# "JOHN^DOE", +# "City Hospital", +# "12345", +# "JOHN", +# "DOE", +# "John", +# "Doe", +# "john", +# "doe", +# "JOHN DOE", +# "John Doe", +# "john doe", +# "M", +# "[M]", +# "F", +# "[F]", +# "X", +# "[X]", +# "U", +# "[U]", +# ], +# [ +# "A", +# "B", +# "123", +# "JOHN^DOE", +# "City Hospital", +# "12345", +# "JOHN", +# "DOE", +# "John", +# "Doe", +# "john", +# "doe", +# "JOHN DOE", +# "John Doe", +# "john doe", +# "M", +# "[M]", +# "F", +# "[F]", +# "X", +# "[X]", +# "U", +# "[U]", +# ], +# ), +# ], +# ) +# def test_make_phi_list_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# original_metadata: list, +# mock_process_names_val: list, +# mock_add_known_val: list, +# expected_return_list: list, +# ): +# """Test happy path for DicomImageRedactorEngine._make_phi_list + +# Args: +# original_metadata (list): List extracted metadata (excluding pixel array). +# mock_process_names_val (list): Value to provide to mock process_names. +# mock_add_known_val (list): Value to provide to mock _add_known_generic_phi. +# expected_return_list (list): List of expected output. +# """ +# # Arrange +# mock_process_names = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._process_names", +# return_value=mock_process_names_val, +# ) +# mock_add_known_generic_phi = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_known_generic_phi", +# return_value=mock_add_known_val, +# ) + +# # Act +# test_phi_str_list = mock_engine._make_phi_list(original_metadata, [], []) + +# # Assert +# assert mock_process_names.call_count == 1 +# assert mock_add_known_generic_phi.call_count == 1 +# assert set(test_phi_str_list) == set(expected_return_list) + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._set_bbox_color() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "fill, mock_box_color", +# [ +# ("contrast", 0), +# ("contrast", (0, 0, 0)), +# ("background", 255), +# ("background", (255, 255, 255)), +# ], +# ) +# def test_set_bbox_color_happy_path( +# mocker, +# fill: str, +# mock_box_color: Union[int, Tuple[int, int, int]], +# ): +# """Test happy path for DicomImageRedactorEngine._set_bbox_color + +# Args: +# fill (str): Determines how box color is selected. +# mock_box_color (int or Tuple of int): Color value to assign to mocker. +# """ +# # Arrange +# test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + +# mock_convert_dcm_to_png = mocker.patch.object( +# DicomImageRedactorEngine, "_convert_dcm_to_png", return_value=[None, True] +# ) +# mock_Image_open = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", +# return_value=None, +# ) +# mock_get_bg_color = mocker.patch.object( +# DicomImageRedactorEngine, +# "_get_bg_color", +# return_value=mock_box_color, +# ) +# mock_engine = DicomImageRedactorEngine() + +# # Act +# test_box_color = mock_engine._set_bbox_color(test_instance, fill) + +# # Assert +# assert mock_convert_dcm_to_png.call_count == 1 +# assert mock_Image_open.call_count == 1 +# assert mock_get_bg_color.call_count == 1 +# assert test_box_color == mock_box_color + + +# @pytest.mark.parametrize( +# "fill, expected_error_type", +# [ +# ("typo", "ValueError"), +# ("somecolor", "ValueError"), +# ("0", "ValueError"), +# ("255", "ValueError"), +# ], +# ) +# def test_set_bbox_color_exceptions( +# mock_engine: DicomImageRedactorEngine, +# fill: str, +# expected_error_type: str, +# ): +# """Test error handling of _set_bbox_color + +# Args: +# fill (str): Determines how box color is selected. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + +# # Act +# _ = mock_engine._set_bbox_color(test_instance, fill) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._check_if_compressed() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, compression_status", +# [ +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), +# True +# ), +# ], +# ) +# def test_check_if_compressed_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: Path, +# compression_status: bool, +# ): +# """Test happy path for DicomImageRedactorEngine._check_if_compressed + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (pathlib.Path): Path to DICOM file. +# compression_status (bool): If the pixel data is compressed. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# test_is_compressed = mock_engine._check_if_compressed(test_instance) + +# # Assert +# assert test_is_compressed == compression_status + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._compress_pixel_data() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")) +# ], +# ) +# def test_compress_pixel_data_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: Path, +# ): +# """Test happy path for DicomImageRedactorEngine._compress_pixel_data + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (pathlib.Path): Path to DICOM file. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# test_compressed = mock_engine._compress_pixel_data(test_instance) + +# # Assert +# assert mock_engine._check_if_compressed(test_compressed) == True + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._check_if_has_image_icon_sequence() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, has_sequence", +# [ +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), +# True +# ), +# ], +# ) +# def test_check_if_has_image_icon_sequence_happy_path( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: Path, +# has_sequence: bool, +# ): +# """Test happy path for DicomImageRedactorEngine._check_if_has_image_icon_sequence +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (pathlib.Path): Path to DICOM file. +# has_sequence (bool): If additional pixel data is available in the instance. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# test_has_sequence = mock_engine._check_if_has_image_icon_sequence(test_instance) + +# # Assert +# assert test_has_sequence == has_sequence + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._add_redact_box() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, mock_is_compressed, mock_has_image_icon_sequence, mock_is_greyscale, mock_box_color, bounding_boxes_coordinates", +# [ +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False, +# False, +# True, +# 0, +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 24, "left": 0, "width": 75, "height": 51}, +# {"top": 1, "left": 588, "width": 226, "height": 35}, +# ], +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), +# True, +# False, +# True, +# 0, +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 24, "left": 0, "width": 75, "height": 51}, +# {"top": 1, "left": 588, "width": 226, "height": 35}, +# ], +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), +# False, +# True, +# True, +# 0, +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 24, "left": 0, "width": 75, "height": 51}, +# {"top": 1, "left": 588, "width": 226, "height": 35}, +# ], +# ), +# ( +# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), +# False, +# False, +# False, +# (0, 0, 0), +# [ +# {"top": 0, "left": 0, "width": 500, "height": 500}, +# {"top": 24, "left": 0, "width": 75, "height": 51}, +# {"top": 1, "left": 588, "width": 100, "height": 100}, +# ], +# ), +# ], +# ) +# def test_add_redact_box_happy_path( +# mocker, +# dcm_path: Path, +# mock_is_compressed: bool, +# mock_has_image_icon_sequence: bool, +# mock_is_greyscale: bool, +# mock_box_color: Union[int, Tuple[int, int, int]], +# bounding_boxes_coordinates: dict, +# ): +# """Test happy path for DicomImageRedactorEngine._add_redact_box + +# Args: +# dcm_path (pathlib.Path): Path to DICOM file. +# mock_is_compressed (bool): If the pixel data is compressed. +# mock_has_image_icon_sequence (bool): If there is more than one set of pixel data. +# mock_is_greyscale (bool): Value to use when mocking _check_if_greyscale. +# mock_box_color (int or Tuple of int): Color value to assign to mocker. +# bouding_boxes_coordinates (dict): Formatted bbox coordinates. +# """ +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) +# crop_ratio = 0.75 +# mock_check_if_compressed = mocker.patch.object( +# DicomImageRedactorEngine, +# "_check_if_compressed", +# return_value=mock_is_compressed +# ) +# mock_check_if_has_image_icon_sequence = mocker.patch.object( +# DicomImageRedactorEngine, +# "_check_if_has_image_icon_sequence", +# return_value=mock_has_image_icon_sequence +# ) +# mock_check_if_greyscale = mocker.patch.object( +# DicomImageRedactorEngine, +# "_check_if_greyscale", +# return_value=mock_is_greyscale, +# ) +# mock_get_common_pixel = mocker.patch.object( +# DicomImageRedactorEngine, +# "_get_most_common_pixel_value", +# return_value=mock_box_color, +# ) +# mock_set_bbox_color = mocker.patch.object( +# DicomImageRedactorEngine, +# "_set_bbox_color", +# return_value=mock_box_color, +# ) +# mock_engine = DicomImageRedactorEngine() + +# # Act +# test_redacted_instance = mock_engine._add_redact_box( +# test_instance, bounding_boxes_coordinates, crop_ratio +# ) + +# # Assert +# assert mock_check_if_compressed.call_count == 1 +# assert mock_check_if_has_image_icon_sequence.call_count == 1 +# assert mock_check_if_greyscale.call_count == 1 +# if mock_is_greyscale is True: +# original_pixel_values = np.array(test_instance.pixel_array).flatten() +# redacted_pixel_values = np.array(test_redacted_instance.pixel_array).flatten() +# box_color_pixels_original = len( +# np.where(original_pixel_values == mock_box_color)[0] +# ) +# box_color_pixels_redacted = len( +# np.where(redacted_pixel_values == mock_box_color)[0] +# ) +# assert mock_get_common_pixel.call_count == 1 +# else: +# list_of_RGB_pixels_original = np.vstack(test_instance.pixel_array).tolist() +# list_of_RGB_pixels_redacted = np.vstack( +# test_redacted_instance.pixel_array +# ).tolist() +# box_color_pixels_original = len( +# np.unique( +# np.where(np.array(list_of_RGB_pixels_original) == mock_box_color)[0] +# ) +# ) +# box_color_pixels_redacted = len( +# np.unique( +# np.where(np.array(list_of_RGB_pixels_redacted) == mock_box_color)[0] +# ) +# ) +# assert mock_set_bbox_color.call_count == 1 + +# assert box_color_pixels_redacted > box_color_pixels_original + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine._get_analyzer_results() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "image, dcm_path, use_metadata, ad_hoc_recognizers", +# [ +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False, +# None +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False, +# [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# True, +# None +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# True, +# [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] +# ), +# ], +# ) +# def test_get_analyzer_results_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# image: Image, +# dcm_path: str, +# use_metadata: bool, +# ad_hoc_recognizers: Optional[List[PatternRecognizer]] +# ): +# """Test happy path for DicomImageRedactorEngine._get_analyzer_results + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# image (PIL.Image): A PIL image. +# dcm_path (pathlib.Path): Path to DICOM file. +# use_metadata (bool): Whether to consider metadata when running analysis. +# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. +# """ +# # Arrange +# mock_analyze = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", +# return_value=None, +# ) +# mock_get_text_metadata = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", +# return_value=[None, None, None], +# ) +# mock_make_phi_list = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", +# return_value=None, +# ) +# mock_pattern_recognizer = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", +# return_value=None, +# ) +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# _ = mock_engine._get_analyzer_results( +# image, test_instance, use_metadata, None, ad_hoc_recognizers +# ) + +# # Assert +# if use_metadata is False: +# mock_analyze.assert_called_once() +# mock_get_text_metadata.assert_not_called() +# mock_make_phi_list.assert_not_called() +# mock_pattern_recognizer.assert_not_called() +# elif use_metadata is True: +# mock_analyze.assert_called_once() +# mock_get_text_metadata.assert_called_once() +# mock_make_phi_list.assert_called_once() +# mock_pattern_recognizer.assert_called_once() + +# @pytest.mark.parametrize( +# "image, dcm_path, ad_hoc_recognizers, expected_error_type", +# [ +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# "invalidType", +# "TypeError" +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [], +# "ValueError" +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], +# "TypeError" +# ), +# ], +# ) +# def test_get_analyzer_results_exceptions( +# mock_engine: DicomImageRedactorEngine, +# image: Image, +# dcm_path: str, +# ad_hoc_recognizers: Optional[List[PatternRecognizer]], +# expected_error_type: str, +# ): +# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# image (PIL.Image): A PIL image. +# dcm_path (pathlib.Path): Path to DICOM file. +# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# _ = mock_engine._get_analyzer_results( +# image, test_instance, True, None, ad_hoc_recognizers +# ) + +# # Assert +# assert expected_error_type == exc_info.typename + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine redact_and_return_bbox() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm")), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm")), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM")), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom")), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM")), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_and_return_bbox( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# ): +# """Test happy path for DicomImageRedactorEngine redact_and_return_bbox() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# """ +# # Arrange +# test_image = pydicom.dcmread(dcm_path) + +# mock_check_greyscale = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._check_if_greyscale", return_value=None +# ) +# mock_rescale_dcm = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._rescale_dcm_pixel_array", return_value=None +# ) +# mock_save_pixel_array = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._save_pixel_array_as_png", return_value=None +# ) +# mock_image_open = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", +# return_value=None, +# ) +# mock_add_padding = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", +# return_value=None, +# ) +# mock_analyze = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", +# return_value=None, +# ) + +# mock_get_analyze_bbox = mocker.patch( +# "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", +# return_value=None, +# ) + +# mock_remove_bbox_padding = mocker.patch( +# "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", +# return_value=[{"mock_data": 1}, {"mock_data": 2}, {"mock_data": 3}], +# ) + +# mock_add_redact_box = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", +# return_value=test_image, +# ) + +# # Act +# test_redacted_image, _ = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) + +# # Assert +# assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] +# # assertions for test_bboxes type causes silent failures/hangups for Python 3.11 +# mock_check_greyscale.assert_called_once() +# mock_rescale_dcm.assert_called_once() +# mock_save_pixel_array.assert_called_once() +# mock_image_open.assert_called_once() +# mock_add_padding.assert_called_once() +# mock_analyze.assert_called_once() +# mock_get_analyze_bbox.assert_called_once() +# mock_remove_bbox_padding.assert_called_once() +# mock_add_redact_box.assert_called_once() + +# @pytest.mark.parametrize( +# "image, load_file, expected_error_type", +# [ +# (Path(TEST_DICOM_PARENT_DIR), True, ["TypeError", "IsADirectoryError", "PermissionError"]), +# (Path(TEST_DICOM_PARENT_DIR), False, ["TypeError"]), +# ("path_here", False, ["TypeError"]), +# (np.random.randint(255, size=(64, 64)), False, ["TypeError"]), +# (Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), False, ["TypeError"]), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), True, ["AttributeError"]), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( +# mock_engine: DicomImageRedactorEngine, +# image: T, +# load_file: bool, +# expected_error_type: List[str], +# ): +# """Test error handling of DicomImageRedactorEngine redact_and_return_bbox() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# image (any): Input "image". +# load_file (bool): Whether to run pydicom.dcmread() on the input image. +# expected_error_type (List(str)): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Arrange +# if load_file: +# test_image = pydicom.dcmread(image) +# else: +# test_image = image +# # Act +# mock_engine.redact(test_image, fill="contrast", padding_width=25, use_metadata=True +# ) + +# # Assert +# assert exc_info.typename in expected_error_type + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine redact() +# # ------------------------------------------------------ +# def test_DicomImageRedactorEngine_redact_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# ): +# """Test happy path for DicomImageRedactorEngine redact() +# """ +# # Arrange +# test_image = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + +# mock_redact_return_bbox = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", +# return_value=[test_image, [{}, {}, {}]] +# ) + +# # Act +# test_redacted_image = mock_engine.redact(test_image) + +# # Assert +# assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] +# mock_redact_return_bbox.assert_called_once() + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine _save_bbox_json() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "output_path, expected_output_json_path, bboxes", +# [ +# ( +# "dir1/dir2/output_dicom.dcm", +# "dir1/dir2/output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ), +# ( +# "dir1/output_dicom.dcm", +# "dir1/output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ), +# ( +# "output_dicom.dcm", +# "output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ), +# ( +# "dir1/dir2/output_dicom.DCM", +# "dir1/dir2/output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ), +# ( +# "dir1/dir2/output_dicom.dicom", +# "dir1/dir2/output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ), +# ( +# "dir1/dir2/output_dicom.DICOM", +# "dir1/dir2/output_dicom.json", +# [ +# {"top": 0, "left": 0, "width": 100, "height": 100}, +# {"top": 35, "left": 72, "width": 50, "height": 14} +# ] +# ) +# ], +# ) +# def test_DicomImageRedactorEngine_save_bbox_json_happy_path( +# mock_engine: DicomImageRedactorEngine, +# output_path: str, +# expected_output_json_path: str, +# bboxes: List[Dict[str, int]], +# ): +# """Test happy path for DicomImageRedactorEngine _save_bbox_json() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# output_path (str): Path to output DICOM file. +# expected_output_json_path (str): Expected path to the output JSON file. +# bboxes (list): Bounding boxes to write out to JSON file. +# """ +# with tempfile.TemporaryDirectory() as tmpdirname: +# # Arrange +# temp_output_path = Path(tmpdirname, output_path) +# temp_output_path.mkdir(parents=True, exist_ok=True) +# temp_expected_json_path = Path(tmpdirname, expected_output_json_path) + +# # Act +# mock_engine._save_bbox_json(temp_output_path, bboxes) + +# # Assert +# with open(temp_expected_json_path, "r") as read_file: +# loaded_json = json.load(read_file) +# assert loaded_json == bboxes + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine _redact_single_dicom_image() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, output_dir, overwrite", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", False), +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", True), +# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "output", False), +# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "output", False), +# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "output", False), +# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "output", False), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# output_dir: str, +# overwrite: bool, +# ): +# """Test happy path for DicomImageRedactorEngine _redact_single_dicom_image() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# output_dir (str): Path to parent directory to write output to. +# overwrite (bool): True if overwriting original files. +# """ +# # Arrange +# crop_ratio = 0.75 +# mock_copy_files = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", +# return_value=dcm_path, +# ) +# mock_convert_dcm_to_png = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._convert_dcm_to_png", +# return_value=[None, None], +# ) +# mock_image_open = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", +# return_value=None, +# ) +# mock_add_padding = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", +# return_value=None, +# ) + +# mock_analyze = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", +# return_value=None, +# ) + +# mock_get_analyze_bbox = mocker.patch( +# "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", +# return_value=None, +# ) + +# mock_remove_bbox_padding = mocker.patch( +# "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", +# return_value=None, +# ) + +# class MockInstance: +# def save_as(self, dst_path: str): +# return None + +# mock_add_redact_box = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", +# return_value=MockInstance(), +# ) + +# # Act +# mock_engine._redact_single_dicom_image( +# dcm_path=dcm_path, +# crop_ratio=crop_ratio, +# fill="contrast", +# padding_width=25, +# use_metadata=True, +# overwrite=overwrite, +# dst_parent_dir=output_dir, +# save_bboxes=False +# ) + +# # Assert +# if overwrite is True: +# assert mock_copy_files.call_count == 0 +# else: +# assert mock_copy_files.call_count == 1 +# assert mock_convert_dcm_to_png.call_count == 1 +# assert mock_image_open.call_count == 1 +# assert mock_add_padding.call_count == 1 +# assert mock_analyze.call_count == 1 +# assert mock_get_analyze_bbox.call_count == 1 +# assert mock_remove_bbox_padding.call_count == 1 +# assert mock_add_redact_box.call_count == 1 + + +# @pytest.mark.parametrize( +# "dcm_path, expected_error_type", +# [ +# (Path(TEST_DICOM_PARENT_DIR), "FileNotFoundError"), +# (Path("nonexistentfile.extension"), "FileNotFoundError"), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# expected_error_type: str, +# ): +# """Test error handling of DicomImageRedactorEngine _redact_single_dicom_image() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Act +# mock_engine._redact_single_dicom_image( +# dcm_path=dcm_path, +# crop_ratio=0.75, +# fill="contrast", +# padding_width=25, +# use_metadata=True, +# overwrite=False, +# dst_parent_dir=".", +# save_bboxes=False +# ) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine _redact_multiple_dicom_images() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, output_dir, overwrite", +# [ +# (Path(TEST_DICOM_PARENT_DIR), "output", False), +# (Path(TEST_DICOM_PARENT_DIR), "output", True), +# (Path(TEST_DICOM_DIR_1), "output", False), +# (Path(TEST_DICOM_DIR_2), "output", False), +# (Path(TEST_DICOM_DIR_3), "output", False), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# output_dir: str, +# overwrite: bool, +# ): +# """Test happy path for DicomImageRedactorEngine _redact_multiple_dicom_images() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# output_dir (str): Path to parent directory to write output to. +# overwrite (bool): True if overwriting original files. +# """ +# # Arrange +# crop_ratio = 0.75 +# mock_copy_files = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", +# return_value=dcm_path, +# ) +# mock_dcm_files = [ +# Path("dir1/dir2/file1.dcm"), +# Path("dir1/dir2/file2.dcm"), +# Path("dir1/dir2/dir3/file3.dcm"), +# ] +# mock_get_all_dcm_files = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_all_dcm_files", +# return_value=mock_dcm_files, +# ) +# mock_redact_single = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", +# return_value=None, +# ) + +# # Act +# mock_engine._redact_multiple_dicom_images( +# dcm_dir=dcm_path, +# crop_ratio=crop_ratio, +# fill="contrast", +# padding_width=25, +# use_metadata=True, +# overwrite=overwrite, +# dst_parent_dir=output_dir, +# save_bboxes=False +# ) + +# # Assert +# if overwrite is True: +# assert mock_copy_files.call_count == 0 +# else: +# assert mock_copy_files.call_count == 1 +# assert mock_get_all_dcm_files.call_count == 1 +# assert mock_redact_single.call_count == len(mock_dcm_files) + + +# @pytest.mark.parametrize( +# "dcm_path, expected_error_type", +# [ +# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "FileNotFoundError"), +# (Path("nonexistentdir"), "FileNotFoundError"), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# expected_error_type: str, +# ): +# """Test error handling of DicomImageRedactorEngine _redact_multiple_dicom_images() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Act +# mock_engine._redact_multiple_dicom_images( +# dcm_dir=dcm_path, +# crop_ratio=0.75, +# fill="contrast", +# padding_width=25, +# use_metadata=True, +# overwrite=False, +# dst_parent_dir=".", +# save_bboxes=False +# ) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine redact_from_file() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, mock_dst_path", +# [ +# ( +# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# ), +# ( +# f"{TEST_DICOM_DIR_2}/1_ORIGINAL.DCM", +# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), +# ), +# ( +# f"{TEST_DICOM_DIR_2}/2_ORIGINAL.dicom", +# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# ), +# ( +# f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", +# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# ), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_from_file_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# mock_dst_path: Path, +# ): +# """Test happy path for DicomImageRedactorEngine redact_from_file() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# mock_dst_path (pathlib.Path): Path to DICOM dir or file. +# """ +# # Arrange +# mock_copy_files = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", +# return_value=mock_dst_path, +# ) +# mock_redact_single = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", +# return_value=None, +# ) + +# # Act +# mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) + +# # Assert +# assert mock_copy_files.call_count == 1 +# assert mock_redact_single.call_count == 1 + + +# @pytest.mark.parametrize( +# "input_path, output_path, expected_error_type", +# [ +# (TEST_DICOM_PARENT_DIR, "output", "TypeError"), +# (TEST_DICOM_DIR_1, "output", "TypeError"), +# (TEST_DICOM_DIR_2, "output", "TypeError"), +# (TEST_DICOM_DIR_3, "output", "TypeError"), +# ( +# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", +# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", +# "TypeError", +# ), +# ( +# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", +# f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", +# "TypeError", +# ), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_from_file_exceptions( +# mock_engine: DicomImageRedactorEngine, +# input_path: str, +# output_path: Path, +# expected_error_type: str, +# ): +# """Test error handling of DicomImageRedactorEngine redact_from_file() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# input_path (str): Path to input DICOM file or dir. +# output_path (pathlib.Path): Path to DICOM dir or file. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Act +# mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) + +# # Assert +# assert expected_error_type == exc_info.typename + + +# # ------------------------------------------------------ +# # DicomImageRedactorEngine redact_from_directory() +# # ------------------------------------------------------ +# @pytest.mark.parametrize( +# "dcm_path, mock_dst_path", +# [ +# (TEST_DICOM_PARENT_DIR, Path(TEST_DICOM_PARENT_DIR)), +# (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_1)), +# (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_2)), +# (TEST_DICOM_DIR_3, Path(TEST_DICOM_DIR_3)), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_from_directory_happy_path( +# mocker, +# mock_engine: DicomImageRedactorEngine, +# dcm_path: str, +# mock_dst_path: Path, +# ): +# """Test happy path for DicomImageRedactorEngine redact_from_directory() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# dcm_path (str): Path to input DICOM file or dir. +# mock_dst_path (pathlib.Path): Path to DICOM dir or file. +# """ +# # Arrange +# mock_copy_files = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", +# return_value=mock_dst_path, +# ) +# mock_redact_multiple = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_multiple_dicom_images", +# return_value=None, +# ) + +# # Act +# mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) + +# # Assert +# assert mock_copy_files.call_count == 1 +# assert mock_redact_multiple.call_count == 1 + + +# @pytest.mark.parametrize( +# "input_path, output_path, expected_error_type", +# [ +# (f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "output", "TypeError"), +# (TEST_DICOM_DIR_1, f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "TypeError"), +# ("nonexistentdir", "output", "TypeError"), +# ], +# ) +# def test_DicomImageRedactorEngine_redact_from_directory_exceptions( +# mock_engine: DicomImageRedactorEngine, +# input_path: str, +# output_path: Path, +# expected_error_type: str, +# ): +# """Test error handling of DicomImageRedactorEngine redact_from_directory() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# input_path (str): Path to input DICOM file or dir. +# output_path (pathlib.Path): Path to DICOM dir or file. +# expected_error_type (str): Type of error we expect to be raised. +# """ +# with pytest.raises(Exception) as exc_info: +# # Act +# mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) + +# # Assert +# assert expected_error_type == exc_info.typename From 510cbac040320d110f7bbc9404ecc3ca770150f7 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:11:42 -0400 Subject: [PATCH 18/25] Only commenting out get_analyzer_results tests --- .../tests/test_dicom_image_redactor_engine.py | 3732 ++++++++--------- 1 file changed, 1866 insertions(+), 1866 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 2cfc0acb7..56d9ed126 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1,1930 +1,1930 @@ -# """Test suite for dicom_image_redactor_engine.py""" -# from pathlib import Path -# import tempfile -# import os -# import json -# import numpy as np -# from PIL import Image -# import pydicom -# from presidio_image_redactor.dicom_image_redactor_engine import DicomImageRedactorEngine -# from presidio_analyzer import PatternRecognizer -# from typing import Union, List, Tuple, Dict, TypeVar, Optional -# import pytest - -# T = TypeVar('T') - -# SCRIPT_DIR = os.path.dirname(__file__) -# TEST_DICOM_PARENT_DIR = f"{SCRIPT_DIR}/test_data" -# TEST_DICOM_DIR_1 = f"{SCRIPT_DIR}/test_data/dicom_dir_1" -# TEST_DICOM_DIR_2 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_2" -# TEST_DICOM_DIR_3 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_3" -# TEST_NUMPY_DIR = f"{SCRIPT_DIR}/test_data/numpy_arrays" -# TEST_PNG_DIR = f"{SCRIPT_DIR}/test_data/png_images" - - -# @pytest.fixture(scope="module") -# def mock_engine(): -# """Instance of the DicomImageRedactorEngine""" -# # Arrange - -# # Act -# dicom_image_redactor_engine = DicomImageRedactorEngine() - -# return dicom_image_redactor_engine - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_all_dcm_files() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_dir, expected_list", -# [ -# ( -# Path(TEST_DICOM_PARENT_DIR), -# [ -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), -# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), -# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), -# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), -# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), -# ], -# ), -# ( -# Path(TEST_DICOM_DIR_1), -# [ -# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), -# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), -# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), -# ], -# ), -# ( -# Path(TEST_DICOM_DIR_2), -# [ -# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), -# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), -# ], -# ), -# ( -# Path(TEST_DICOM_DIR_3), -# [ -# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), -# ], -# ), -# ], -# ) -# def test_get_all_dcm_files_happy_path( -# mock_engine: DicomImageRedactorEngine, dcm_dir: Path, expected_list: list -# ): -# """Test happy path for DicomImageRedactorEngine._get_all_dcm_files - -# Args: -# dcm_dir (pathlib.Path): Path to a directory containing at least one .dcm file. -# expected_list (list): List of pathlib Path objects. -# """ -# # Arrange - -# # Act -# test_files = mock_engine._get_all_dcm_files(dcm_dir) -# print("test_files") -# print(test_files) -# print("expected file") -# print(expected_list) - -# # Assert -# assert set(test_files) == set(expected_list) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._check_if_greyscale() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_file, expected_result", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), -# ], -# ) -# def test_check_if_greyscale_happy_path( -# mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_result: bool -# ): -# """Test happy path for DicomImageRedactorEngine._check_if_greyscale - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# expected_result (bool): Expected output of _check_if_greyscale. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_file) - -# # Act -# test_is_greyscale = mock_engine._check_if_greyscale(test_instance) - -# # Assert -# assert test_is_greyscale == expected_result - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._rescale_dcm_pixel_array() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_file, is_greyscale", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), True), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), True), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), True), -# ], -# ) -# def test_check_if_greyscale_happy_path( -# mock_engine: DicomImageRedactorEngine, dcm_file: Path, is_greyscale: bool -# ): -# """Test happy path for DicomImageRedactorEngine._rescale_dcm_pixel_array - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# is_greyscale (bool): If loaded DICOM image is greyscale or not. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_file) -# test_original_image = test_instance.pixel_array - -# # Act -# test_scaled_image = mock_engine._rescale_dcm_pixel_array( -# test_instance, is_greyscale -# ) - -# # Assert -# assert np.shape(test_original_image) == np.shape(test_scaled_image) -# assert np.min(test_scaled_image) >= 0 -# assert np.max(test_scaled_image) <= 255 -# if is_greyscale is True: -# assert np.max(test_original_image) != np.max(test_scaled_image) -# assert len(np.shape(test_scaled_image)) == 2 -# else: -# assert len(np.shape(test_scaled_image)) == 3 - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._save_pixel_array_as_png() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_file, is_greyscale, rescaled_image_numpy_path", -# [ -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# True, -# Path(TEST_NUMPY_DIR, "0_ORIGINAL.npy"), -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), -# False, -# Path(TEST_NUMPY_DIR, "RGB_ORIGINAL.npy"), -# ), -# ( -# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), -# True, -# Path(TEST_NUMPY_DIR, "1_ORIGINAL.npy"), -# ), -# ( -# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), -# True, -# Path(TEST_NUMPY_DIR, "2_ORIGINAL.npy"), -# ), -# ( -# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), -# True, -# Path(TEST_NUMPY_DIR, "3_ORIGINAL.npy"), -# ), -# ], -# ) -# def test_save_pixel_array_as_png_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_file: Path, -# is_greyscale: bool, -# rescaled_image_numpy_path: Path, -# ): -# """Test happy path for DicomImageRedactorEngine._save_pixel_array_as_png - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# is_greyscale (bool): If loaded DICOM image is greyscale or not. -# rescaled_image_numpy_path (pathlib.Path): Path to file containing numpy array of rescaled image. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_file) -# test_image = mock_engine._rescale_dcm_pixel_array(test_instance, is_greyscale) -# filename = "test" -# with open(rescaled_image_numpy_path, "rb") as f: -# loaded_numpy_array = np.load(f) - -# with tempfile.TemporaryDirectory() as tmpdirname: -# # Act -# _ = mock_engine._save_pixel_array_as_png( -# test_image, is_greyscale, filename, tmpdirname -# ) - -# # Assert -# assert np.shape(test_image) == np.shape(loaded_numpy_array) -# assert f"{filename}.png" in os.listdir(tmpdirname) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._convert_dcm_to_png() -# # ------------------------------------------------------ -# def test_convert_dcm_to_png_happy_path(mocker): -# """Test happy path for DicomImageRedactorEngine._convert_dcm_to_png""" -# # Arrange -# mock_dcm_read = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.pydicom.dcmread", -# return_value=None, -# ) -# mock_check_if_gresycale = mocker.patch.object( -# DicomImageRedactorEngine, -# "_check_if_greyscale", -# return_value=True, -# ) -# mock_rescale_dcm_pixel_array = mocker.patch.object( -# DicomImageRedactorEngine, -# "_rescale_dcm_pixel_array", -# return_value=np.array([1, 2, 3]), -# ) -# mock_save_array_as_png = mocker.patch.object( -# DicomImageRedactorEngine, "_save_pixel_array_as_png", return_value=None -# ) -# mock_engine = DicomImageRedactorEngine() - -# with tempfile.TemporaryDirectory() as tmpdirname: -# # Act -# _, _ = mock_engine._convert_dcm_to_png(Path("filename.dcm"), tmpdirname) - -# # Assert -# assert mock_dcm_read.call_count == 1 -# assert mock_check_if_gresycale.call_count == 1 -# assert mock_rescale_dcm_pixel_array.call_count == 1 -# assert mock_save_array_as_png.call_count == 1 - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_bg_color() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "png_file, is_greyscale, invert_flag, expected_bg_color", -# [ -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, False, 243), -# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, False, (0, 0, 0)), -# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, False, 0), -# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, False, 0), -# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, False, 0), -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, True, 12), -# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, True, (255, 255, 255)), -# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, True, 255), -# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, True, 255), -# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, True, 255), -# ], -# ) -# def test_get_bg_color_happy_path( -# mock_engine: DicomImageRedactorEngine, -# png_file: Path, -# is_greyscale: bool, -# invert_flag: bool, -# expected_bg_color: Union[int, Tuple[int, int, int]], -# ): -# """Test happy path for DicomImageRedactorEngine._get_bg_color - -# Args: -# png_file (pathlib.Path): Path to a PNG file. -# is_greyscale (bool): If loaded DICOM image is greyscale or not. -# invert_flag (bool): True if we want to invert image colors to get foreground. -# expected_bg_color (int or Tuple of int): The expected background color of the image. -# """ -# # Arrange -# test_image = Image.open(png_file) - -# # Act -# test_bg_color = mock_engine._get_bg_color(test_image, is_greyscale, invert_flag) - -# # Assert -# assert test_bg_color == expected_bg_color - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_array_corners() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_file, crop_ratio", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.5), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.5), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.5), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.5), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.75), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.25), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.31), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.83), -# ], -# ) -# def test_get_array_corners_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_file: Path, -# crop_ratio: float, -# ): -# """Test happy path for DicomImageRedactorEngine._get_array_corners - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# crop_ratio (float): Ratio to crop to. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_file) -# test_pixel_array = test_instance.pixel_array -# full_width = test_pixel_array.shape[0] -# full_height = test_pixel_array.shape[1] -# full_min_pixel_val = np.min(test_pixel_array) -# full_max_pixel_val = np.max(test_pixel_array) - -# # Act -# test_cropped_array = mock_engine._get_array_corners(test_pixel_array, crop_ratio) -# cropped_width = test_cropped_array.shape[0] -# cropped_height = test_cropped_array.shape[1] -# cropped_min_pixel_val = np.min(test_cropped_array) -# cropped_max_pixel_val = np.max(test_cropped_array) - -# # Assert -# assert cropped_width * cropped_height < full_width * full_height -# assert cropped_min_pixel_val >= full_min_pixel_val -# assert cropped_max_pixel_val <= full_max_pixel_val - - -# @pytest.mark.parametrize( -# "crop_ratio, expected_error_type", -# [ -# (0, "ValueError"), -# (-0.4, "ValueError"), -# (1.3, "ValueError"), -# ], -# ) -# def test_get_array_corners_exceptions( -# mock_engine: DicomImageRedactorEngine, crop_ratio: float, expected_error_type: str -# ): -# """Test error handling of _get_array_corners - -# Args: -# crop_ratio (float): Ratio to crop to. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# dcm_file = Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm") -# test_instance = pydicom.dcmread(dcm_file) -# test_pixel_array = test_instance.pixel_array - -# # Act -# _ = mock_engine._get_array_corners(test_pixel_array, crop_ratio) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_most_common_pixel_value() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_file, fill, expected_color", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "contrast", 50), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "contrast", 16383), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "contrast", 32767), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "contrast", 4095), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "background", 973), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "background", 0), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "background", 0), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "background", 0), -# ], -# ) -# def test_get_most_common_pixel_value_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_file: Path, -# fill: str, -# expected_color: Union[int, Tuple[int, int, int]], -# ): -# """Test happy path for DicomImageRedactorEngine._get_most_common_pixel_value - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# fill (str): Color setting to use ("contrast" or "background"). -# expected_color (int or Tuple of int): The expected color returned for the image. -# """ -# # Arrange -# crop_ratio = 0.75 -# test_instance = pydicom.dcmread(dcm_file) - -# # Act -# test_color = mock_engine._get_most_common_pixel_value( -# test_instance, crop_ratio, fill -# ) - -# # Assert -# assert test_color == expected_color - - -# @pytest.mark.parametrize( -# "dcm_file, expected_error_type", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "TypeError"), -# ], -# ) -# def test_get_most_common_pixel_value_exceptions( -# mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_error_type: str -# ): -# """Test error handling of _get_most_common_pixel_value - -# Args: -# dcm_file (pathlib.Path): Path to a DICOM file. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# crop_ratio = 0.75 -# test_instance = pydicom.dcmread(dcm_file) - -# # Act -# _ = mock_engine._get_most_common_pixel_value( -# test_instance, crop_ratio, "contrast" -# ) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._add_padding() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "png_file, is_greyscale, padding_width", -# [ -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 15), -# (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, 15), -# (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, 15), -# (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, 15), -# (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, 15), -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 30), -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 99), -# ], -# ) -# def test_add_padding_happy_path( -# mock_engine: DicomImageRedactorEngine, -# png_file: Path, -# is_greyscale: bool, -# padding_width: int, -# ): -# """Test happy path for DicomImageRedactorEngine._add_padding - -# Args: -# png_file (pathlib.Path): Path to a PNG file. -# is_greyscale (bool): If loaded DICOM image is greyscale or not. -# padding_width (int): Pixel width of padding (uniform). -# """ -# # Arrange -# test_image = Image.open(png_file) - -# # Act -# test_image_with_padding = mock_engine._add_padding( -# test_image, is_greyscale, padding_width -# ) - -# # Assert -# assert test_image_with_padding.height - test_image.height == 2 * padding_width -# assert test_image_with_padding.width - test_image.width == 2 * padding_width - - -# @pytest.mark.parametrize( -# "png_file, is_greyscale, padding_width, expected_error_type", -# [ -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, -1, "ValueError"), -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 0, "ValueError"), -# (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 200, "ValueError"), -# ], -# ) -# def test_add_padding_exceptions( -# mock_engine: DicomImageRedactorEngine, -# png_file: Path, -# is_greyscale: bool, -# padding_width: int, -# expected_error_type: str, -# ): -# """Test error handling of _add_padding - -# Args: -# png_file (pathlib.Path): Path to a PNG file. -# is_greyscale (bool): If loaded DICOM image is greyscale or not. -# padding_width (int): Pixel width of padding (uniform). -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# test_image = Image.open(png_file) - -# # Act -# _, _ = mock_engine._add_padding(test_image, is_greyscale, padding_width) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._copy_files_for_processing() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "src_path, expected_num_of_files", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 1), -# (Path(TEST_DICOM_PARENT_DIR), 18), -# (Path(TEST_DICOM_DIR_1), 3), -# (Path(TEST_DICOM_DIR_2), 2), -# (Path(TEST_DICOM_DIR_3), 1), -# (Path(TEST_PNG_DIR), 5), -# (Path(TEST_NUMPY_DIR), 5), -# ], -# ) -# def test_copy_files_for_processing_happy_path( -# mock_engine: DicomImageRedactorEngine, src_path: Path, expected_num_of_files: int -# ): -# """Test happy path for DicomImageRedactorEngine._copy_files_for_processing - -# Args: -# src_path (pathlib.Path): Path to a file or directory to copy. -# expected_num_of_files (int): Expected number of files to be copied. -# """ -# # Arrange - -# with tempfile.TemporaryDirectory() as tmpdirname: -# # Act -# test_dst_path = mock_engine._copy_files_for_processing(src_path, tmpdirname) - -# # Arrange -# p = Path(tmpdirname).glob(f"**/*") -# files = [x for x in p if x.is_file()] - -# # Assert -# assert Path(tmpdirname) < test_dst_path -# assert expected_num_of_files == len(files) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_text_metadata() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, metadata_text_len, is_name_true_len, is_patient_true_len", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 82, 4, 6), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), 93, 9, 10), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 83, 9, 8), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 118, 6, 10), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 135, 8, 10), -# ], -# ) -# def test_get_text_metadata_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: Path, -# metadata_text_len: int, -# is_name_true_len: int, -# is_patient_true_len: int, -# ): -# """Test happy path for DicomImageRedactorEngine._get_text_metadata - -# Args: -# dcm_path (pathlib.Path): Path to DICOM file. -# metadata_text_len (int): Length of the expected returned metadata_text list. -# is_name_true_len (int): Number of true values in the returned is_name list. -# is_patient_true_len (int): Number of true values in the returned is_name list. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# test_metadata_text, test_is_name, test_is_patient = mock_engine._get_text_metadata( -# test_instance -# ) - -# # Assert -# idx_is_name = list(np.where(np.array(test_is_name) == True)[0]) -# idx_is_patient = list(np.where(np.array(test_is_patient) == True)[0]) - -# assert len(test_metadata_text) == len(test_is_name) == len(test_is_patient) -# assert len(idx_is_name) == is_name_true_len -# assert len(idx_is_patient) == is_patient_true_len -# assert type(test_metadata_text[idx_is_name[0]]) == str - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._process_names() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "text_metadata, is_name, expected_phi_list", -# [ -# ([], [], []), -# ( -# ["JOHN^DOE", "City Hospital", "12345"], -# [True, False, False], -# [ -# "JOHN^DOE", -# "City Hospital", -# "12345", -# "JOHN", -# "DOE", -# "John", -# "Doe", -# "john", -# "doe", -# "JOHN DOE", -# "John Doe", -# "john doe", -# ], -# ), -# ], -# ) -# def test_process_names_happy_path( -# mock_engine: DicomImageRedactorEngine, -# text_metadata: list, -# is_name: list, -# expected_phi_list: list, -# ): -# """Test happy path for DicomImageRedactorEngine._process_names - -# Args: -# text_metadata (list): List of text metadata. -# is_name (list): Whether each element is a name or not. -# expected_phi_list (list): List of expected output. -# """ -# # Arrange - -# # Act -# test_phi_list = mock_engine._process_names(text_metadata, is_name) - -# # Assert -# assert set(test_phi_list) == set(expected_phi_list) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._add_known_generic_phi() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "phi_list, expected_return_list", -# [ -# ([], ["M", "[M]", "F", "[F]", "X", "[X]", "U", "[U]"]), -# ( -# ["JOHN^DOE", "City Hospital", "12345"], -# [ -# "JOHN^DOE", -# "City Hospital", -# "12345", -# "M", -# "[M]", -# "F", -# "[F]", -# "X", -# "[X]", -# "U", -# "[U]", -# ], -# ), -# ], -# ) -# def test_add_known_generic_phi_happy_path( -# mock_engine: DicomImageRedactorEngine, phi_list: list, expected_return_list: list -# ): -# """Test happy path for DicomImageRedactorEngine._add_known_generic_phi - -# Args: -# phi_list (list): List of PHI. -# expected_return_list (list): List of expected output. -# """ -# # Arrange - -# # Act -# test_phi_list = mock_engine._add_known_generic_phi(phi_list) - -# # Assert -# assert set(test_phi_list) == set(expected_return_list) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._make_phi_list() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "original_metadata, mock_process_names_val, mock_add_known_val, expected_return_list", -# [ -# ( -# [ -# ["A", "B"], -# "A", -# 123, -# "JOHN^DOE", -# "City Hospital", -# "12345", -# ], -# [ -# ["A", "B"], -# "A", -# 123, -# "JOHN^DOE", -# "City Hospital", -# "12345", -# "JOHN", -# "DOE", -# "John", -# "Doe", -# "john", -# "doe", -# "JOHN DOE", -# "John Doe", -# "john doe", -# ], -# [ -# ["A", "B"], -# "A", -# 123, -# "JOHN^DOE", -# "City Hospital", -# "12345", -# "JOHN", -# "DOE", -# "John", -# "Doe", -# "john", -# "doe", -# "JOHN DOE", -# "John Doe", -# "john doe", -# "M", -# "[M]", -# "F", -# "[F]", -# "X", -# "[X]", -# "U", -# "[U]", -# ], -# [ -# "A", -# "B", -# "123", -# "JOHN^DOE", -# "City Hospital", -# "12345", -# "JOHN", -# "DOE", -# "John", -# "Doe", -# "john", -# "doe", -# "JOHN DOE", -# "John Doe", -# "john doe", -# "M", -# "[M]", -# "F", -# "[F]", -# "X", -# "[X]", -# "U", -# "[U]", -# ], -# ), -# ], -# ) -# def test_make_phi_list_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# original_metadata: list, -# mock_process_names_val: list, -# mock_add_known_val: list, -# expected_return_list: list, -# ): -# """Test happy path for DicomImageRedactorEngine._make_phi_list - -# Args: -# original_metadata (list): List extracted metadata (excluding pixel array). -# mock_process_names_val (list): Value to provide to mock process_names. -# mock_add_known_val (list): Value to provide to mock _add_known_generic_phi. -# expected_return_list (list): List of expected output. -# """ -# # Arrange -# mock_process_names = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._process_names", -# return_value=mock_process_names_val, -# ) -# mock_add_known_generic_phi = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_known_generic_phi", -# return_value=mock_add_known_val, -# ) - -# # Act -# test_phi_str_list = mock_engine._make_phi_list(original_metadata, [], []) - -# # Assert -# assert mock_process_names.call_count == 1 -# assert mock_add_known_generic_phi.call_count == 1 -# assert set(test_phi_str_list) == set(expected_return_list) - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._set_bbox_color() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "fill, mock_box_color", -# [ -# ("contrast", 0), -# ("contrast", (0, 0, 0)), -# ("background", 255), -# ("background", (255, 255, 255)), -# ], -# ) -# def test_set_bbox_color_happy_path( -# mocker, -# fill: str, -# mock_box_color: Union[int, Tuple[int, int, int]], -# ): -# """Test happy path for DicomImageRedactorEngine._set_bbox_color - -# Args: -# fill (str): Determines how box color is selected. -# mock_box_color (int or Tuple of int): Color value to assign to mocker. -# """ -# # Arrange -# test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - -# mock_convert_dcm_to_png = mocker.patch.object( -# DicomImageRedactorEngine, "_convert_dcm_to_png", return_value=[None, True] -# ) -# mock_Image_open = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", -# return_value=None, -# ) -# mock_get_bg_color = mocker.patch.object( -# DicomImageRedactorEngine, -# "_get_bg_color", -# return_value=mock_box_color, -# ) -# mock_engine = DicomImageRedactorEngine() - -# # Act -# test_box_color = mock_engine._set_bbox_color(test_instance, fill) - -# # Assert -# assert mock_convert_dcm_to_png.call_count == 1 -# assert mock_Image_open.call_count == 1 -# assert mock_get_bg_color.call_count == 1 -# assert test_box_color == mock_box_color - - -# @pytest.mark.parametrize( -# "fill, expected_error_type", -# [ -# ("typo", "ValueError"), -# ("somecolor", "ValueError"), -# ("0", "ValueError"), -# ("255", "ValueError"), -# ], -# ) -# def test_set_bbox_color_exceptions( -# mock_engine: DicomImageRedactorEngine, -# fill: str, -# expected_error_type: str, -# ): -# """Test error handling of _set_bbox_color - -# Args: -# fill (str): Determines how box color is selected. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - -# # Act -# _ = mock_engine._set_bbox_color(test_instance, fill) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._check_if_compressed() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, compression_status", -# [ -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), -# True -# ), -# ], -# ) -# def test_check_if_compressed_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: Path, -# compression_status: bool, -# ): -# """Test happy path for DicomImageRedactorEngine._check_if_compressed - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (pathlib.Path): Path to DICOM file. -# compression_status (bool): If the pixel data is compressed. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# test_is_compressed = mock_engine._check_if_compressed(test_instance) - -# # Assert -# assert test_is_compressed == compression_status - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._compress_pixel_data() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")) -# ], -# ) -# def test_compress_pixel_data_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: Path, -# ): -# """Test happy path for DicomImageRedactorEngine._compress_pixel_data - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (pathlib.Path): Path to DICOM file. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# test_compressed = mock_engine._compress_pixel_data(test_instance) - -# # Assert -# assert mock_engine._check_if_compressed(test_compressed) == True - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._check_if_has_image_icon_sequence() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, has_sequence", -# [ -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), -# True -# ), -# ], -# ) -# def test_check_if_has_image_icon_sequence_happy_path( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: Path, -# has_sequence: bool, -# ): -# """Test happy path for DicomImageRedactorEngine._check_if_has_image_icon_sequence -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (pathlib.Path): Path to DICOM file. -# has_sequence (bool): If additional pixel data is available in the instance. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# test_has_sequence = mock_engine._check_if_has_image_icon_sequence(test_instance) - -# # Assert -# assert test_has_sequence == has_sequence - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._add_redact_box() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, mock_is_compressed, mock_has_image_icon_sequence, mock_is_greyscale, mock_box_color, bounding_boxes_coordinates", -# [ -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False, -# False, -# True, -# 0, -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 24, "left": 0, "width": 75, "height": 51}, -# {"top": 1, "left": 588, "width": 226, "height": 35}, -# ], -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), -# True, -# False, -# True, -# 0, -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 24, "left": 0, "width": 75, "height": 51}, -# {"top": 1, "left": 588, "width": 226, "height": 35}, -# ], -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), -# False, -# True, -# True, -# 0, -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 24, "left": 0, "width": 75, "height": 51}, -# {"top": 1, "left": 588, "width": 226, "height": 35}, -# ], -# ), -# ( -# Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), -# False, -# False, -# False, -# (0, 0, 0), -# [ -# {"top": 0, "left": 0, "width": 500, "height": 500}, -# {"top": 24, "left": 0, "width": 75, "height": 51}, -# {"top": 1, "left": 588, "width": 100, "height": 100}, -# ], -# ), -# ], -# ) -# def test_add_redact_box_happy_path( -# mocker, -# dcm_path: Path, -# mock_is_compressed: bool, -# mock_has_image_icon_sequence: bool, -# mock_is_greyscale: bool, -# mock_box_color: Union[int, Tuple[int, int, int]], -# bounding_boxes_coordinates: dict, -# ): -# """Test happy path for DicomImageRedactorEngine._add_redact_box - -# Args: -# dcm_path (pathlib.Path): Path to DICOM file. -# mock_is_compressed (bool): If the pixel data is compressed. -# mock_has_image_icon_sequence (bool): If there is more than one set of pixel data. -# mock_is_greyscale (bool): Value to use when mocking _check_if_greyscale. -# mock_box_color (int or Tuple of int): Color value to assign to mocker. -# bouding_boxes_coordinates (dict): Formatted bbox coordinates. -# """ -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) -# crop_ratio = 0.75 -# mock_check_if_compressed = mocker.patch.object( -# DicomImageRedactorEngine, -# "_check_if_compressed", -# return_value=mock_is_compressed -# ) -# mock_check_if_has_image_icon_sequence = mocker.patch.object( -# DicomImageRedactorEngine, -# "_check_if_has_image_icon_sequence", -# return_value=mock_has_image_icon_sequence -# ) -# mock_check_if_greyscale = mocker.patch.object( -# DicomImageRedactorEngine, -# "_check_if_greyscale", -# return_value=mock_is_greyscale, -# ) -# mock_get_common_pixel = mocker.patch.object( -# DicomImageRedactorEngine, -# "_get_most_common_pixel_value", -# return_value=mock_box_color, -# ) -# mock_set_bbox_color = mocker.patch.object( -# DicomImageRedactorEngine, -# "_set_bbox_color", -# return_value=mock_box_color, -# ) -# mock_engine = DicomImageRedactorEngine() - -# # Act -# test_redacted_instance = mock_engine._add_redact_box( -# test_instance, bounding_boxes_coordinates, crop_ratio -# ) - -# # Assert -# assert mock_check_if_compressed.call_count == 1 -# assert mock_check_if_has_image_icon_sequence.call_count == 1 -# assert mock_check_if_greyscale.call_count == 1 -# if mock_is_greyscale is True: -# original_pixel_values = np.array(test_instance.pixel_array).flatten() -# redacted_pixel_values = np.array(test_redacted_instance.pixel_array).flatten() -# box_color_pixels_original = len( -# np.where(original_pixel_values == mock_box_color)[0] -# ) -# box_color_pixels_redacted = len( -# np.where(redacted_pixel_values == mock_box_color)[0] -# ) -# assert mock_get_common_pixel.call_count == 1 -# else: -# list_of_RGB_pixels_original = np.vstack(test_instance.pixel_array).tolist() -# list_of_RGB_pixels_redacted = np.vstack( -# test_redacted_instance.pixel_array -# ).tolist() -# box_color_pixels_original = len( -# np.unique( -# np.where(np.array(list_of_RGB_pixels_original) == mock_box_color)[0] -# ) -# ) -# box_color_pixels_redacted = len( -# np.unique( -# np.where(np.array(list_of_RGB_pixels_redacted) == mock_box_color)[0] -# ) -# ) -# assert mock_set_bbox_color.call_count == 1 - -# assert box_color_pixels_redacted > box_color_pixels_original - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_analyzer_results() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "image, dcm_path, use_metadata, ad_hoc_recognizers", -# [ -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False, -# None -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False, -# [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# True, -# None -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# True, -# [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] -# ), -# ], -# ) -# def test_get_analyzer_results_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# image: Image, -# dcm_path: str, -# use_metadata: bool, -# ad_hoc_recognizers: Optional[List[PatternRecognizer]] -# ): -# """Test happy path for DicomImageRedactorEngine._get_analyzer_results - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (PIL.Image): A PIL image. -# dcm_path (pathlib.Path): Path to DICOM file. -# use_metadata (bool): Whether to consider metadata when running analysis. -# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. -# """ -# # Arrange -# mock_analyze = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", -# return_value=None, -# ) -# mock_get_text_metadata = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", -# return_value=[None, None, None], -# ) -# mock_make_phi_list = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", -# return_value=None, -# ) -# mock_pattern_recognizer = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", -# return_value=None, -# ) -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# _ = mock_engine._get_analyzer_results( -# image, test_instance, use_metadata, None, ad_hoc_recognizers -# ) - -# # Assert -# if use_metadata is False: -# mock_analyze.assert_called_once() -# mock_get_text_metadata.assert_not_called() -# mock_make_phi_list.assert_not_called() -# mock_pattern_recognizer.assert_not_called() -# elif use_metadata is True: -# mock_analyze.assert_called_once() -# mock_get_text_metadata.assert_called_once() -# mock_make_phi_list.assert_called_once() -# mock_pattern_recognizer.assert_called_once() - -# @pytest.mark.parametrize( -# "image, dcm_path, ad_hoc_recognizers, expected_error_type", -# [ -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# "invalidType", -# "TypeError" -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [], -# "ValueError" -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], -# "TypeError" -# ), -# ], -# ) -# def test_get_analyzer_results_exceptions( -# mock_engine: DicomImageRedactorEngine, -# image: Image, -# dcm_path: str, -# ad_hoc_recognizers: Optional[List[PatternRecognizer]], -# expected_error_type: str, -# ): -# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (PIL.Image): A PIL image. -# dcm_path (pathlib.Path): Path to DICOM file. -# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# _ = mock_engine._get_analyzer_results( -# image, test_instance, True, None, ad_hoc_recognizers -# ) - -# # Assert -# assert expected_error_type == exc_info.typename - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine redact_and_return_bbox() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm")), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm")), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM")), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom")), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM")), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_and_return_bbox( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# dcm_path: str, -# ): -# """Test happy path for DicomImageRedactorEngine redact_and_return_bbox() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# """ -# # Arrange -# test_image = pydicom.dcmread(dcm_path) - -# mock_check_greyscale = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._check_if_greyscale", return_value=None -# ) -# mock_rescale_dcm = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._rescale_dcm_pixel_array", return_value=None -# ) -# mock_save_pixel_array = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._save_pixel_array_as_png", return_value=None -# ) -# mock_image_open = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", -# return_value=None, -# ) -# mock_add_padding = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", -# return_value=None, -# ) -# mock_analyze = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", -# return_value=None, -# ) - -# mock_get_analyze_bbox = mocker.patch( -# "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", -# return_value=None, -# ) - -# mock_remove_bbox_padding = mocker.patch( -# "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", -# return_value=[{"mock_data": 1}, {"mock_data": 2}, {"mock_data": 3}], -# ) - -# mock_add_redact_box = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", -# return_value=test_image, -# ) - -# # Act -# test_redacted_image, _ = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) - -# # Assert -# assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] -# # assertions for test_bboxes type causes silent failures/hangups for Python 3.11 -# mock_check_greyscale.assert_called_once() -# mock_rescale_dcm.assert_called_once() -# mock_save_pixel_array.assert_called_once() -# mock_image_open.assert_called_once() -# mock_add_padding.assert_called_once() -# mock_analyze.assert_called_once() -# mock_get_analyze_bbox.assert_called_once() -# mock_remove_bbox_padding.assert_called_once() -# mock_add_redact_box.assert_called_once() - -# @pytest.mark.parametrize( -# "image, load_file, expected_error_type", -# [ -# (Path(TEST_DICOM_PARENT_DIR), True, ["TypeError", "IsADirectoryError", "PermissionError"]), -# (Path(TEST_DICOM_PARENT_DIR), False, ["TypeError"]), -# ("path_here", False, ["TypeError"]), -# (np.random.randint(255, size=(64, 64)), False, ["TypeError"]), -# (Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), False, ["TypeError"]), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), True, ["AttributeError"]), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( -# mock_engine: DicomImageRedactorEngine, -# image: T, -# load_file: bool, -# expected_error_type: List[str], -# ): -# """Test error handling of DicomImageRedactorEngine redact_and_return_bbox() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (any): Input "image". -# load_file (bool): Whether to run pydicom.dcmread() on the input image. -# expected_error_type (List(str)): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# if load_file: -# test_image = pydicom.dcmread(image) -# else: -# test_image = image -# # Act -# mock_engine.redact(test_image, fill="contrast", padding_width=25, use_metadata=True -# ) - -# # Assert -# assert exc_info.typename in expected_error_type +"""Test suite for dicom_image_redactor_engine.py""" +from pathlib import Path +import tempfile +import os +import json +import numpy as np +from PIL import Image +import pydicom +from presidio_image_redactor.dicom_image_redactor_engine import DicomImageRedactorEngine +from presidio_analyzer import PatternRecognizer +from typing import Union, List, Tuple, Dict, TypeVar, Optional +import pytest + +T = TypeVar('T') + +SCRIPT_DIR = os.path.dirname(__file__) +TEST_DICOM_PARENT_DIR = f"{SCRIPT_DIR}/test_data" +TEST_DICOM_DIR_1 = f"{SCRIPT_DIR}/test_data/dicom_dir_1" +TEST_DICOM_DIR_2 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_2" +TEST_DICOM_DIR_3 = f"{SCRIPT_DIR}/test_data/dicom_dir_1/dicom_dir_3" +TEST_NUMPY_DIR = f"{SCRIPT_DIR}/test_data/numpy_arrays" +TEST_PNG_DIR = f"{SCRIPT_DIR}/test_data/png_images" + + +@pytest.fixture(scope="module") +def mock_engine(): + """Instance of the DicomImageRedactorEngine""" + # Arrange + + # Act + dicom_image_redactor_engine = DicomImageRedactorEngine() + + return dicom_image_redactor_engine + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_all_dcm_files() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_dir, expected_list", + [ + ( + Path(TEST_DICOM_PARENT_DIR), + [ + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), + Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), + Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), + Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), + Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), + ], + ), + ( + Path(TEST_DICOM_DIR_1), + [ + Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), + Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), + Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), + ], + ), + ( + Path(TEST_DICOM_DIR_2), + [ + Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), + Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), + ], + ), + ( + Path(TEST_DICOM_DIR_3), + [ + Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), + ], + ), + ], +) +def test_get_all_dcm_files_happy_path( + mock_engine: DicomImageRedactorEngine, dcm_dir: Path, expected_list: list +): + """Test happy path for DicomImageRedactorEngine._get_all_dcm_files + + Args: + dcm_dir (pathlib.Path): Path to a directory containing at least one .dcm file. + expected_list (list): List of pathlib Path objects. + """ + # Arrange + + # Act + test_files = mock_engine._get_all_dcm_files(dcm_dir) + print("test_files") + print(test_files) + print("expected file") + print(expected_list) + + # Assert + assert set(test_files) == set(expected_list) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._check_if_greyscale() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_file, expected_result", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), + ], +) +def test_check_if_greyscale_happy_path( + mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_result: bool +): + """Test happy path for DicomImageRedactorEngine._check_if_greyscale + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + expected_result (bool): Expected output of _check_if_greyscale. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_file) + + # Act + test_is_greyscale = mock_engine._check_if_greyscale(test_instance) + + # Assert + assert test_is_greyscale == expected_result + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._rescale_dcm_pixel_array() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_file, is_greyscale", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), True), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), False), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), True), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), True), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), True), + ], +) +def test_check_if_greyscale_happy_path( + mock_engine: DicomImageRedactorEngine, dcm_file: Path, is_greyscale: bool +): + """Test happy path for DicomImageRedactorEngine._rescale_dcm_pixel_array + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + is_greyscale (bool): If loaded DICOM image is greyscale or not. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_file) + test_original_image = test_instance.pixel_array + + # Act + test_scaled_image = mock_engine._rescale_dcm_pixel_array( + test_instance, is_greyscale + ) + + # Assert + assert np.shape(test_original_image) == np.shape(test_scaled_image) + assert np.min(test_scaled_image) >= 0 + assert np.max(test_scaled_image) <= 255 + if is_greyscale is True: + assert np.max(test_original_image) != np.max(test_scaled_image) + assert len(np.shape(test_scaled_image)) == 2 + else: + assert len(np.shape(test_scaled_image)) == 3 + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._save_pixel_array_as_png() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_file, is_greyscale, rescaled_image_numpy_path", + [ + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + True, + Path(TEST_NUMPY_DIR, "0_ORIGINAL.npy"), + ), + ( + Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), + False, + Path(TEST_NUMPY_DIR, "RGB_ORIGINAL.npy"), + ), + ( + Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), + True, + Path(TEST_NUMPY_DIR, "1_ORIGINAL.npy"), + ), + ( + Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), + True, + Path(TEST_NUMPY_DIR, "2_ORIGINAL.npy"), + ), + ( + Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), + True, + Path(TEST_NUMPY_DIR, "3_ORIGINAL.npy"), + ), + ], +) +def test_save_pixel_array_as_png_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_file: Path, + is_greyscale: bool, + rescaled_image_numpy_path: Path, +): + """Test happy path for DicomImageRedactorEngine._save_pixel_array_as_png + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + is_greyscale (bool): If loaded DICOM image is greyscale or not. + rescaled_image_numpy_path (pathlib.Path): Path to file containing numpy array of rescaled image. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_file) + test_image = mock_engine._rescale_dcm_pixel_array(test_instance, is_greyscale) + filename = "test" + with open(rescaled_image_numpy_path, "rb") as f: + loaded_numpy_array = np.load(f) + + with tempfile.TemporaryDirectory() as tmpdirname: + # Act + _ = mock_engine._save_pixel_array_as_png( + test_image, is_greyscale, filename, tmpdirname + ) + + # Assert + assert np.shape(test_image) == np.shape(loaded_numpy_array) + assert f"{filename}.png" in os.listdir(tmpdirname) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._convert_dcm_to_png() +# ------------------------------------------------------ +def test_convert_dcm_to_png_happy_path(mocker): + """Test happy path for DicomImageRedactorEngine._convert_dcm_to_png""" + # Arrange + mock_dcm_read = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.pydicom.dcmread", + return_value=None, + ) + mock_check_if_gresycale = mocker.patch.object( + DicomImageRedactorEngine, + "_check_if_greyscale", + return_value=True, + ) + mock_rescale_dcm_pixel_array = mocker.patch.object( + DicomImageRedactorEngine, + "_rescale_dcm_pixel_array", + return_value=np.array([1, 2, 3]), + ) + mock_save_array_as_png = mocker.patch.object( + DicomImageRedactorEngine, "_save_pixel_array_as_png", return_value=None + ) + mock_engine = DicomImageRedactorEngine() + + with tempfile.TemporaryDirectory() as tmpdirname: + # Act + _, _ = mock_engine._convert_dcm_to_png(Path("filename.dcm"), tmpdirname) + + # Assert + assert mock_dcm_read.call_count == 1 + assert mock_check_if_gresycale.call_count == 1 + assert mock_rescale_dcm_pixel_array.call_count == 1 + assert mock_save_array_as_png.call_count == 1 + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_bg_color() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "png_file, is_greyscale, invert_flag, expected_bg_color", + [ + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, False, 243), + (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, False, (0, 0, 0)), + (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, False, 0), + (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, False, 0), + (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, False, 0), + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, True, 12), + (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, True, (255, 255, 255)), + (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, True, 255), + (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, True, 255), + (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, True, 255), + ], +) +def test_get_bg_color_happy_path( + mock_engine: DicomImageRedactorEngine, + png_file: Path, + is_greyscale: bool, + invert_flag: bool, + expected_bg_color: Union[int, Tuple[int, int, int]], +): + """Test happy path for DicomImageRedactorEngine._get_bg_color + + Args: + png_file (pathlib.Path): Path to a PNG file. + is_greyscale (bool): If loaded DICOM image is greyscale or not. + invert_flag (bool): True if we want to invert image colors to get foreground. + expected_bg_color (int or Tuple of int): The expected background color of the image. + """ + # Arrange + test_image = Image.open(png_file) + + # Act + test_bg_color = mock_engine._get_bg_color(test_image, is_greyscale, invert_flag) + + # Assert + assert test_bg_color == expected_bg_color + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_array_corners() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_file, crop_ratio", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.5), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.5), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.5), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.5), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 0.75), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 0.25), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 0.31), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 0.83), + ], +) +def test_get_array_corners_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_file: Path, + crop_ratio: float, +): + """Test happy path for DicomImageRedactorEngine._get_array_corners + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + crop_ratio (float): Ratio to crop to. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_file) + test_pixel_array = test_instance.pixel_array + full_width = test_pixel_array.shape[0] + full_height = test_pixel_array.shape[1] + full_min_pixel_val = np.min(test_pixel_array) + full_max_pixel_val = np.max(test_pixel_array) + + # Act + test_cropped_array = mock_engine._get_array_corners(test_pixel_array, crop_ratio) + cropped_width = test_cropped_array.shape[0] + cropped_height = test_cropped_array.shape[1] + cropped_min_pixel_val = np.min(test_cropped_array) + cropped_max_pixel_val = np.max(test_cropped_array) + + # Assert + assert cropped_width * cropped_height < full_width * full_height + assert cropped_min_pixel_val >= full_min_pixel_val + assert cropped_max_pixel_val <= full_max_pixel_val + + +@pytest.mark.parametrize( + "crop_ratio, expected_error_type", + [ + (0, "ValueError"), + (-0.4, "ValueError"), + (1.3, "ValueError"), + ], +) +def test_get_array_corners_exceptions( + mock_engine: DicomImageRedactorEngine, crop_ratio: float, expected_error_type: str +): + """Test error handling of _get_array_corners + + Args: + crop_ratio (float): Ratio to crop to. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + dcm_file = Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm") + test_instance = pydicom.dcmread(dcm_file) + test_pixel_array = test_instance.pixel_array + + # Act + _ = mock_engine._get_array_corners(test_pixel_array, crop_ratio) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_most_common_pixel_value() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_file, fill, expected_color", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "contrast", 50), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "contrast", 16383), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "contrast", 32767), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "contrast", 4095), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "background", 973), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "background", 0), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "background", 0), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "background", 0), + ], +) +def test_get_most_common_pixel_value_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_file: Path, + fill: str, + expected_color: Union[int, Tuple[int, int, int]], +): + """Test happy path for DicomImageRedactorEngine._get_most_common_pixel_value + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + fill (str): Color setting to use ("contrast" or "background"). + expected_color (int or Tuple of int): The expected color returned for the image. + """ + # Arrange + crop_ratio = 0.75 + test_instance = pydicom.dcmread(dcm_file) + + # Act + test_color = mock_engine._get_most_common_pixel_value( + test_instance, crop_ratio, fill + ) + + # Assert + assert test_color == expected_color + + +@pytest.mark.parametrize( + "dcm_file, expected_error_type", + [ + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "TypeError"), + ], +) +def test_get_most_common_pixel_value_exceptions( + mock_engine: DicomImageRedactorEngine, dcm_file: Path, expected_error_type: str +): + """Test error handling of _get_most_common_pixel_value + + Args: + dcm_file (pathlib.Path): Path to a DICOM file. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + crop_ratio = 0.75 + test_instance = pydicom.dcmread(dcm_file) + + # Act + _ = mock_engine._get_most_common_pixel_value( + test_instance, crop_ratio, "contrast" + ) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._add_padding() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "png_file, is_greyscale, padding_width", + [ + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 15), + (Path(TEST_PNG_DIR, "RGB_ORIGINAL.png"), False, 15), + (Path(TEST_PNG_DIR, "1_ORIGINAL.png"), True, 15), + (Path(TEST_PNG_DIR, "2_ORIGINAL.png"), True, 15), + (Path(TEST_PNG_DIR, "3_ORIGINAL.png"), True, 15), + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 30), + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 99), + ], +) +def test_add_padding_happy_path( + mock_engine: DicomImageRedactorEngine, + png_file: Path, + is_greyscale: bool, + padding_width: int, +): + """Test happy path for DicomImageRedactorEngine._add_padding + + Args: + png_file (pathlib.Path): Path to a PNG file. + is_greyscale (bool): If loaded DICOM image is greyscale or not. + padding_width (int): Pixel width of padding (uniform). + """ + # Arrange + test_image = Image.open(png_file) + + # Act + test_image_with_padding = mock_engine._add_padding( + test_image, is_greyscale, padding_width + ) + + # Assert + assert test_image_with_padding.height - test_image.height == 2 * padding_width + assert test_image_with_padding.width - test_image.width == 2 * padding_width + + +@pytest.mark.parametrize( + "png_file, is_greyscale, padding_width, expected_error_type", + [ + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, -1, "ValueError"), + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 0, "ValueError"), + (Path(TEST_PNG_DIR, "0_ORIGINAL.png"), True, 200, "ValueError"), + ], +) +def test_add_padding_exceptions( + mock_engine: DicomImageRedactorEngine, + png_file: Path, + is_greyscale: bool, + padding_width: int, + expected_error_type: str, +): + """Test error handling of _add_padding + + Args: + png_file (pathlib.Path): Path to a PNG file. + is_greyscale (bool): If loaded DICOM image is greyscale or not. + padding_width (int): Pixel width of padding (uniform). + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + test_image = Image.open(png_file) + + # Act + _, _ = mock_engine._add_padding(test_image, is_greyscale, padding_width) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._copy_files_for_processing() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "src_path, expected_num_of_files", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 1), + (Path(TEST_DICOM_PARENT_DIR), 18), + (Path(TEST_DICOM_DIR_1), 3), + (Path(TEST_DICOM_DIR_2), 2), + (Path(TEST_DICOM_DIR_3), 1), + (Path(TEST_PNG_DIR), 5), + (Path(TEST_NUMPY_DIR), 5), + ], +) +def test_copy_files_for_processing_happy_path( + mock_engine: DicomImageRedactorEngine, src_path: Path, expected_num_of_files: int +): + """Test happy path for DicomImageRedactorEngine._copy_files_for_processing + + Args: + src_path (pathlib.Path): Path to a file or directory to copy. + expected_num_of_files (int): Expected number of files to be copied. + """ + # Arrange + + with tempfile.TemporaryDirectory() as tmpdirname: + # Act + test_dst_path = mock_engine._copy_files_for_processing(src_path, tmpdirname) + + # Arrange + p = Path(tmpdirname).glob(f"**/*") + files = [x for x in p if x.is_file()] + + # Assert + assert Path(tmpdirname) < test_dst_path + assert expected_num_of_files == len(files) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_text_metadata() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, metadata_text_len, is_name_true_len, is_patient_true_len", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), 82, 4, 6), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), 93, 9, 10), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), 83, 9, 8), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), 118, 6, 10), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), 135, 8, 10), + ], +) +def test_get_text_metadata_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_path: Path, + metadata_text_len: int, + is_name_true_len: int, + is_patient_true_len: int, +): + """Test happy path for DicomImageRedactorEngine._get_text_metadata + + Args: + dcm_path (pathlib.Path): Path to DICOM file. + metadata_text_len (int): Length of the expected returned metadata_text list. + is_name_true_len (int): Number of true values in the returned is_name list. + is_patient_true_len (int): Number of true values in the returned is_name list. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_path) + + # Act + test_metadata_text, test_is_name, test_is_patient = mock_engine._get_text_metadata( + test_instance + ) + + # Assert + idx_is_name = list(np.where(np.array(test_is_name) == True)[0]) + idx_is_patient = list(np.where(np.array(test_is_patient) == True)[0]) + + assert len(test_metadata_text) == len(test_is_name) == len(test_is_patient) + assert len(idx_is_name) == is_name_true_len + assert len(idx_is_patient) == is_patient_true_len + assert type(test_metadata_text[idx_is_name[0]]) == str + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._process_names() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "text_metadata, is_name, expected_phi_list", + [ + ([], [], []), + ( + ["JOHN^DOE", "City Hospital", "12345"], + [True, False, False], + [ + "JOHN^DOE", + "City Hospital", + "12345", + "JOHN", + "DOE", + "John", + "Doe", + "john", + "doe", + "JOHN DOE", + "John Doe", + "john doe", + ], + ), + ], +) +def test_process_names_happy_path( + mock_engine: DicomImageRedactorEngine, + text_metadata: list, + is_name: list, + expected_phi_list: list, +): + """Test happy path for DicomImageRedactorEngine._process_names + + Args: + text_metadata (list): List of text metadata. + is_name (list): Whether each element is a name or not. + expected_phi_list (list): List of expected output. + """ + # Arrange + + # Act + test_phi_list = mock_engine._process_names(text_metadata, is_name) + + # Assert + assert set(test_phi_list) == set(expected_phi_list) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._add_known_generic_phi() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "phi_list, expected_return_list", + [ + ([], ["M", "[M]", "F", "[F]", "X", "[X]", "U", "[U]"]), + ( + ["JOHN^DOE", "City Hospital", "12345"], + [ + "JOHN^DOE", + "City Hospital", + "12345", + "M", + "[M]", + "F", + "[F]", + "X", + "[X]", + "U", + "[U]", + ], + ), + ], +) +def test_add_known_generic_phi_happy_path( + mock_engine: DicomImageRedactorEngine, phi_list: list, expected_return_list: list +): + """Test happy path for DicomImageRedactorEngine._add_known_generic_phi + + Args: + phi_list (list): List of PHI. + expected_return_list (list): List of expected output. + """ + # Arrange + + # Act + test_phi_list = mock_engine._add_known_generic_phi(phi_list) + + # Assert + assert set(test_phi_list) == set(expected_return_list) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._make_phi_list() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "original_metadata, mock_process_names_val, mock_add_known_val, expected_return_list", + [ + ( + [ + ["A", "B"], + "A", + 123, + "JOHN^DOE", + "City Hospital", + "12345", + ], + [ + ["A", "B"], + "A", + 123, + "JOHN^DOE", + "City Hospital", + "12345", + "JOHN", + "DOE", + "John", + "Doe", + "john", + "doe", + "JOHN DOE", + "John Doe", + "john doe", + ], + [ + ["A", "B"], + "A", + 123, + "JOHN^DOE", + "City Hospital", + "12345", + "JOHN", + "DOE", + "John", + "Doe", + "john", + "doe", + "JOHN DOE", + "John Doe", + "john doe", + "M", + "[M]", + "F", + "[F]", + "X", + "[X]", + "U", + "[U]", + ], + [ + "A", + "B", + "123", + "JOHN^DOE", + "City Hospital", + "12345", + "JOHN", + "DOE", + "John", + "Doe", + "john", + "doe", + "JOHN DOE", + "John Doe", + "john doe", + "M", + "[M]", + "F", + "[F]", + "X", + "[X]", + "U", + "[U]", + ], + ), + ], +) +def test_make_phi_list_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + original_metadata: list, + mock_process_names_val: list, + mock_add_known_val: list, + expected_return_list: list, +): + """Test happy path for DicomImageRedactorEngine._make_phi_list + + Args: + original_metadata (list): List extracted metadata (excluding pixel array). + mock_process_names_val (list): Value to provide to mock process_names. + mock_add_known_val (list): Value to provide to mock _add_known_generic_phi. + expected_return_list (list): List of expected output. + """ + # Arrange + mock_process_names = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._process_names", + return_value=mock_process_names_val, + ) + mock_add_known_generic_phi = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_known_generic_phi", + return_value=mock_add_known_val, + ) + + # Act + test_phi_str_list = mock_engine._make_phi_list(original_metadata, [], []) + + # Assert + assert mock_process_names.call_count == 1 + assert mock_add_known_generic_phi.call_count == 1 + assert set(test_phi_str_list) == set(expected_return_list) + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._set_bbox_color() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "fill, mock_box_color", + [ + ("contrast", 0), + ("contrast", (0, 0, 0)), + ("background", 255), + ("background", (255, 255, 255)), + ], +) +def test_set_bbox_color_happy_path( + mocker, + fill: str, + mock_box_color: Union[int, Tuple[int, int, int]], +): + """Test happy path for DicomImageRedactorEngine._set_bbox_color + + Args: + fill (str): Determines how box color is selected. + mock_box_color (int or Tuple of int): Color value to assign to mocker. + """ + # Arrange + test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + + mock_convert_dcm_to_png = mocker.patch.object( + DicomImageRedactorEngine, "_convert_dcm_to_png", return_value=[None, True] + ) + mock_Image_open = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.Image.open", + return_value=None, + ) + mock_get_bg_color = mocker.patch.object( + DicomImageRedactorEngine, + "_get_bg_color", + return_value=mock_box_color, + ) + mock_engine = DicomImageRedactorEngine() + + # Act + test_box_color = mock_engine._set_bbox_color(test_instance, fill) + + # Assert + assert mock_convert_dcm_to_png.call_count == 1 + assert mock_Image_open.call_count == 1 + assert mock_get_bg_color.call_count == 1 + assert test_box_color == mock_box_color + + +@pytest.mark.parametrize( + "fill, expected_error_type", + [ + ("typo", "ValueError"), + ("somecolor", "ValueError"), + ("0", "ValueError"), + ("255", "ValueError"), + ], +) +def test_set_bbox_color_exceptions( + mock_engine: DicomImageRedactorEngine, + fill: str, + expected_error_type: str, +): + """Test error handling of _set_bbox_color + + Args: + fill (str): Determines how box color is selected. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + + # Act + _ = mock_engine._set_bbox_color(test_instance, fill) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine._check_if_compressed() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, compression_status", + [ + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + False + ), + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), + True + ), + ], +) +def test_check_if_compressed_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_path: Path, + compression_status: bool, +): + """Test happy path for DicomImageRedactorEngine._check_if_compressed + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (pathlib.Path): Path to DICOM file. + compression_status (bool): If the pixel data is compressed. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_path) + + # Act + test_is_compressed = mock_engine._check_if_compressed(test_instance) + + # Assert + assert test_is_compressed == compression_status + +# ------------------------------------------------------ +# DicomImageRedactorEngine._compress_pixel_data() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")) + ], +) +def test_compress_pixel_data_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_path: Path, +): + """Test happy path for DicomImageRedactorEngine._compress_pixel_data + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (pathlib.Path): Path to DICOM file. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_path) + + # Act + test_compressed = mock_engine._compress_pixel_data(test_instance) + + # Assert + assert mock_engine._check_if_compressed(test_compressed) == True + +# ------------------------------------------------------ +# DicomImageRedactorEngine._check_if_has_image_icon_sequence() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, has_sequence", + [ + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + False + ), + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), + True + ), + ], +) +def test_check_if_has_image_icon_sequence_happy_path( + mock_engine: DicomImageRedactorEngine, + dcm_path: Path, + has_sequence: bool, +): + """Test happy path for DicomImageRedactorEngine._check_if_has_image_icon_sequence + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (pathlib.Path): Path to DICOM file. + has_sequence (bool): If additional pixel data is available in the instance. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_path) + + # Act + test_has_sequence = mock_engine._check_if_has_image_icon_sequence(test_instance) + + # Assert + assert test_has_sequence == has_sequence + +# ------------------------------------------------------ +# DicomImageRedactorEngine._add_redact_box() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, mock_is_compressed, mock_has_image_icon_sequence, mock_is_greyscale, mock_box_color, bounding_boxes_coordinates", + [ + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + False, + False, + True, + 0, + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 24, "left": 0, "width": 75, "height": 51}, + {"top": 1, "left": 588, "width": 226, "height": 35}, + ], + ), + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm"), + True, + False, + True, + 0, + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 24, "left": 0, "width": 75, "height": 51}, + {"top": 1, "left": 588, "width": 226, "height": 35}, + ], + ), + ( + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm"), + False, + True, + True, + 0, + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 24, "left": 0, "width": 75, "height": 51}, + {"top": 1, "left": 588, "width": 226, "height": 35}, + ], + ), + ( + Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), + False, + False, + False, + (0, 0, 0), + [ + {"top": 0, "left": 0, "width": 500, "height": 500}, + {"top": 24, "left": 0, "width": 75, "height": 51}, + {"top": 1, "left": 588, "width": 100, "height": 100}, + ], + ), + ], +) +def test_add_redact_box_happy_path( + mocker, + dcm_path: Path, + mock_is_compressed: bool, + mock_has_image_icon_sequence: bool, + mock_is_greyscale: bool, + mock_box_color: Union[int, Tuple[int, int, int]], + bounding_boxes_coordinates: dict, +): + """Test happy path for DicomImageRedactorEngine._add_redact_box + + Args: + dcm_path (pathlib.Path): Path to DICOM file. + mock_is_compressed (bool): If the pixel data is compressed. + mock_has_image_icon_sequence (bool): If there is more than one set of pixel data. + mock_is_greyscale (bool): Value to use when mocking _check_if_greyscale. + mock_box_color (int or Tuple of int): Color value to assign to mocker. + bouding_boxes_coordinates (dict): Formatted bbox coordinates. + """ + # Arrange + test_instance = pydicom.dcmread(dcm_path) + crop_ratio = 0.75 + mock_check_if_compressed = mocker.patch.object( + DicomImageRedactorEngine, + "_check_if_compressed", + return_value=mock_is_compressed + ) + mock_check_if_has_image_icon_sequence = mocker.patch.object( + DicomImageRedactorEngine, + "_check_if_has_image_icon_sequence", + return_value=mock_has_image_icon_sequence + ) + mock_check_if_greyscale = mocker.patch.object( + DicomImageRedactorEngine, + "_check_if_greyscale", + return_value=mock_is_greyscale, + ) + mock_get_common_pixel = mocker.patch.object( + DicomImageRedactorEngine, + "_get_most_common_pixel_value", + return_value=mock_box_color, + ) + mock_set_bbox_color = mocker.patch.object( + DicomImageRedactorEngine, + "_set_bbox_color", + return_value=mock_box_color, + ) + mock_engine = DicomImageRedactorEngine() + + # Act + test_redacted_instance = mock_engine._add_redact_box( + test_instance, bounding_boxes_coordinates, crop_ratio + ) + + # Assert + assert mock_check_if_compressed.call_count == 1 + assert mock_check_if_has_image_icon_sequence.call_count == 1 + assert mock_check_if_greyscale.call_count == 1 + if mock_is_greyscale is True: + original_pixel_values = np.array(test_instance.pixel_array).flatten() + redacted_pixel_values = np.array(test_redacted_instance.pixel_array).flatten() + box_color_pixels_original = len( + np.where(original_pixel_values == mock_box_color)[0] + ) + box_color_pixels_redacted = len( + np.where(redacted_pixel_values == mock_box_color)[0] + ) + assert mock_get_common_pixel.call_count == 1 + else: + list_of_RGB_pixels_original = np.vstack(test_instance.pixel_array).tolist() + list_of_RGB_pixels_redacted = np.vstack( + test_redacted_instance.pixel_array + ).tolist() + box_color_pixels_original = len( + np.unique( + np.where(np.array(list_of_RGB_pixels_original) == mock_box_color)[0] + ) + ) + box_color_pixels_redacted = len( + np.unique( + np.where(np.array(list_of_RGB_pixels_redacted) == mock_box_color)[0] + ) + ) + assert mock_set_bbox_color.call_count == 1 + + assert box_color_pixels_redacted > box_color_pixels_original # # ------------------------------------------------------ -# # DicomImageRedactorEngine redact() -# # ------------------------------------------------------ -# def test_DicomImageRedactorEngine_redact_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# ): -# """Test happy path for DicomImageRedactorEngine redact() -# """ -# # Arrange -# test_image = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) - -# mock_redact_return_bbox = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", -# return_value=[test_image, [{}, {}, {}]] -# ) - -# # Act -# test_redacted_image = mock_engine.redact(test_image) - -# # Assert -# assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] -# mock_redact_return_bbox.assert_called_once() - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine _save_bbox_json() +# # DicomImageRedactorEngine._get_analyzer_results() # # ------------------------------------------------------ # @pytest.mark.parametrize( -# "output_path, expected_output_json_path, bboxes", +# "image, dcm_path, use_metadata, ad_hoc_recognizers", # [ # ( -# "dir1/dir2/output_dicom.dcm", -# "dir1/dir2/output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] -# ), -# ( -# "dir1/output_dicom.dcm", -# "dir1/output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False, +# None # ), # ( -# "output_dicom.dcm", -# "output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# False, +# [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] # ), # ( -# "dir1/dir2/output_dicom.DCM", -# "dir1/dir2/output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# True, +# None # ), # ( -# "dir1/dir2/output_dicom.dicom", -# "dir1/dir2/output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# True, +# [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] # ), -# ( -# "dir1/dir2/output_dicom.DICOM", -# "dir1/dir2/output_dicom.json", -# [ -# {"top": 0, "left": 0, "width": 100, "height": 100}, -# {"top": 35, "left": 72, "width": 50, "height": 14} -# ] -# ) -# ], -# ) -# def test_DicomImageRedactorEngine_save_bbox_json_happy_path( -# mock_engine: DicomImageRedactorEngine, -# output_path: str, -# expected_output_json_path: str, -# bboxes: List[Dict[str, int]], -# ): -# """Test happy path for DicomImageRedactorEngine _save_bbox_json() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# output_path (str): Path to output DICOM file. -# expected_output_json_path (str): Expected path to the output JSON file. -# bboxes (list): Bounding boxes to write out to JSON file. -# """ -# with tempfile.TemporaryDirectory() as tmpdirname: -# # Arrange -# temp_output_path = Path(tmpdirname, output_path) -# temp_output_path.mkdir(parents=True, exist_ok=True) -# temp_expected_json_path = Path(tmpdirname, expected_output_json_path) - -# # Act -# mock_engine._save_bbox_json(temp_output_path, bboxes) - -# # Assert -# with open(temp_expected_json_path, "r") as read_file: -# loaded_json = json.load(read_file) -# assert loaded_json == bboxes - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine _redact_single_dicom_image() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, output_dir, overwrite", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", False), -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", True), -# (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "output", False), -# (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "output", False), -# (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "output", False), -# (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "output", False), # ], # ) -# def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( +# def test_get_analyzer_results_happy_path( # mocker, # mock_engine: DicomImageRedactorEngine, +# image: Image, # dcm_path: str, -# output_dir: str, -# overwrite: bool, +# use_metadata: bool, +# ad_hoc_recognizers: Optional[List[PatternRecognizer]] # ): -# """Test happy path for DicomImageRedactorEngine _redact_single_dicom_image() +# """Test happy path for DicomImageRedactorEngine._get_analyzer_results # Args: # mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# output_dir (str): Path to parent directory to write output to. -# overwrite (bool): True if overwriting original files. +# image (PIL.Image): A PIL image. +# dcm_path (pathlib.Path): Path to DICOM file. +# use_metadata (bool): Whether to consider metadata when running analysis. +# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. # """ # # Arrange -# crop_ratio = 0.75 -# mock_copy_files = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", -# return_value=dcm_path, -# ) -# mock_convert_dcm_to_png = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._convert_dcm_to_png", -# return_value=[None, None], -# ) -# mock_image_open = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.Image.open", -# return_value=None, -# ) -# mock_add_padding = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", -# return_value=None, -# ) - # mock_analyze = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", +# "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", # return_value=None, # ) - -# mock_get_analyze_bbox = mocker.patch( -# "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", -# return_value=None, +# mock_get_text_metadata = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", +# return_value=[None, None, None], # ) - -# mock_remove_bbox_padding = mocker.patch( -# "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", +# mock_make_phi_list = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", # return_value=None, # ) - -# class MockInstance: -# def save_as(self, dst_path: str): -# return None - -# mock_add_redact_box = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", -# return_value=MockInstance(), -# ) - -# # Act -# mock_engine._redact_single_dicom_image( -# dcm_path=dcm_path, -# crop_ratio=crop_ratio, -# fill="contrast", -# padding_width=25, -# use_metadata=True, -# overwrite=overwrite, -# dst_parent_dir=output_dir, -# save_bboxes=False -# ) - -# # Assert -# if overwrite is True: -# assert mock_copy_files.call_count == 0 -# else: -# assert mock_copy_files.call_count == 1 -# assert mock_convert_dcm_to_png.call_count == 1 -# assert mock_image_open.call_count == 1 -# assert mock_add_padding.call_count == 1 -# assert mock_analyze.call_count == 1 -# assert mock_get_analyze_bbox.call_count == 1 -# assert mock_remove_bbox_padding.call_count == 1 -# assert mock_add_redact_box.call_count == 1 - - -# @pytest.mark.parametrize( -# "dcm_path, expected_error_type", -# [ -# (Path(TEST_DICOM_PARENT_DIR), "FileNotFoundError"), -# (Path("nonexistentfile.extension"), "FileNotFoundError"), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: str, -# expected_error_type: str, -# ): -# """Test error handling of DicomImageRedactorEngine _redact_single_dicom_image() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Act -# mock_engine._redact_single_dicom_image( -# dcm_path=dcm_path, -# crop_ratio=0.75, -# fill="contrast", -# padding_width=25, -# use_metadata=True, -# overwrite=False, -# dst_parent_dir=".", -# save_bboxes=False -# ) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine _redact_multiple_dicom_images() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, output_dir, overwrite", -# [ -# (Path(TEST_DICOM_PARENT_DIR), "output", False), -# (Path(TEST_DICOM_PARENT_DIR), "output", True), -# (Path(TEST_DICOM_DIR_1), "output", False), -# (Path(TEST_DICOM_DIR_2), "output", False), -# (Path(TEST_DICOM_DIR_3), "output", False), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# dcm_path: str, -# output_dir: str, -# overwrite: bool, -# ): -# """Test happy path for DicomImageRedactorEngine _redact_multiple_dicom_images() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# output_dir (str): Path to parent directory to write output to. -# overwrite (bool): True if overwriting original files. -# """ -# # Arrange -# crop_ratio = 0.75 -# mock_copy_files = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", -# return_value=dcm_path, -# ) -# mock_dcm_files = [ -# Path("dir1/dir2/file1.dcm"), -# Path("dir1/dir2/file2.dcm"), -# Path("dir1/dir2/dir3/file3.dcm"), -# ] -# mock_get_all_dcm_files = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_all_dcm_files", -# return_value=mock_dcm_files, -# ) -# mock_redact_single = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", +# mock_pattern_recognizer = mocker.patch( +# "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", # return_value=None, # ) +# test_instance = pydicom.dcmread(dcm_path) # # Act -# mock_engine._redact_multiple_dicom_images( -# dcm_dir=dcm_path, -# crop_ratio=crop_ratio, -# fill="contrast", -# padding_width=25, -# use_metadata=True, -# overwrite=overwrite, -# dst_parent_dir=output_dir, -# save_bboxes=False +# _ = mock_engine._get_analyzer_results( +# image, test_instance, use_metadata, None, ad_hoc_recognizers # ) # # Assert -# if overwrite is True: -# assert mock_copy_files.call_count == 0 -# else: -# assert mock_copy_files.call_count == 1 -# assert mock_get_all_dcm_files.call_count == 1 -# assert mock_redact_single.call_count == len(mock_dcm_files) - - -# @pytest.mark.parametrize( -# "dcm_path, expected_error_type", -# [ -# (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "FileNotFoundError"), -# (Path("nonexistentdir"), "FileNotFoundError"), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( -# mock_engine: DicomImageRedactorEngine, -# dcm_path: str, -# expected_error_type: str, -# ): -# """Test error handling of DicomImageRedactorEngine _redact_multiple_dicom_images() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Act -# mock_engine._redact_multiple_dicom_images( -# dcm_dir=dcm_path, -# crop_ratio=0.75, -# fill="contrast", -# padding_width=25, -# use_metadata=True, -# overwrite=False, -# dst_parent_dir=".", -# save_bboxes=False -# ) - -# # Assert -# assert expected_error_type == exc_info.typename - +# if use_metadata is False: +# mock_analyze.assert_called_once() +# mock_get_text_metadata.assert_not_called() +# mock_make_phi_list.assert_not_called() +# mock_pattern_recognizer.assert_not_called() +# elif use_metadata is True: +# mock_analyze.assert_called_once() +# mock_get_text_metadata.assert_called_once() +# mock_make_phi_list.assert_called_once() +# mock_pattern_recognizer.assert_called_once() -# # ------------------------------------------------------ -# # DicomImageRedactorEngine redact_from_file() -# # ------------------------------------------------------ # @pytest.mark.parametrize( -# "dcm_path, mock_dst_path", +# "image, dcm_path, ad_hoc_recognizers, expected_error_type", # [ # ( -# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), # Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# "invalidType", +# "TypeError" # ), # ( -# f"{TEST_DICOM_DIR_2}/1_ORIGINAL.DCM", -# Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), -# ), -# ( -# f"{TEST_DICOM_DIR_2}/2_ORIGINAL.dicom", -# Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [], +# "ValueError" # ), # ( -# f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", -# Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], +# "TypeError" # ), # ], # ) -# def test_DicomImageRedactorEngine_redact_from_file_happy_path( -# mocker, +# def test_get_analyzer_results_exceptions( # mock_engine: DicomImageRedactorEngine, +# image: Image, # dcm_path: str, -# mock_dst_path: Path, -# ): -# """Test happy path for DicomImageRedactorEngine redact_from_file() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# mock_dst_path (pathlib.Path): Path to DICOM dir or file. -# """ -# # Arrange -# mock_copy_files = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", -# return_value=mock_dst_path, -# ) -# mock_redact_single = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", -# return_value=None, -# ) - -# # Act -# mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) - -# # Assert -# assert mock_copy_files.call_count == 1 -# assert mock_redact_single.call_count == 1 - - -# @pytest.mark.parametrize( -# "input_path, output_path, expected_error_type", -# [ -# (TEST_DICOM_PARENT_DIR, "output", "TypeError"), -# (TEST_DICOM_DIR_1, "output", "TypeError"), -# (TEST_DICOM_DIR_2, "output", "TypeError"), -# (TEST_DICOM_DIR_3, "output", "TypeError"), -# ( -# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", -# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", -# "TypeError", -# ), -# ( -# f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", -# f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", -# "TypeError", -# ), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_from_file_exceptions( -# mock_engine: DicomImageRedactorEngine, -# input_path: str, -# output_path: Path, +# ad_hoc_recognizers: Optional[List[PatternRecognizer]], # expected_error_type: str, # ): -# """Test error handling of DicomImageRedactorEngine redact_from_file() +# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() # Args: # mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# input_path (str): Path to input DICOM file or dir. -# output_path (pathlib.Path): Path to DICOM dir or file. +# image (PIL.Image): A PIL image. +# dcm_path (pathlib.Path): Path to DICOM file. +# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. # expected_error_type (str): Type of error we expect to be raised. # """ # with pytest.raises(Exception) as exc_info: -# # Act -# mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) - -# # Assert -# assert expected_error_type == exc_info.typename - - -# # ------------------------------------------------------ -# # DicomImageRedactorEngine redact_from_directory() -# # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "dcm_path, mock_dst_path", -# [ -# (TEST_DICOM_PARENT_DIR, Path(TEST_DICOM_PARENT_DIR)), -# (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_1)), -# (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_2)), -# (TEST_DICOM_DIR_3, Path(TEST_DICOM_DIR_3)), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_from_directory_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# dcm_path: str, -# mock_dst_path: Path, -# ): -# """Test happy path for DicomImageRedactorEngine redact_from_directory() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# dcm_path (str): Path to input DICOM file or dir. -# mock_dst_path (pathlib.Path): Path to DICOM dir or file. -# """ -# # Arrange -# mock_copy_files = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", -# return_value=mock_dst_path, -# ) -# mock_redact_multiple = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_multiple_dicom_images", -# return_value=None, -# ) - -# # Act -# mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) - -# # Assert -# assert mock_copy_files.call_count == 1 -# assert mock_redact_multiple.call_count == 1 - - -# @pytest.mark.parametrize( -# "input_path, output_path, expected_error_type", -# [ -# (f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "output", "TypeError"), -# (TEST_DICOM_DIR_1, f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "TypeError"), -# ("nonexistentdir", "output", "TypeError"), -# ], -# ) -# def test_DicomImageRedactorEngine_redact_from_directory_exceptions( -# mock_engine: DicomImageRedactorEngine, -# input_path: str, -# output_path: Path, -# expected_error_type: str, -# ): -# """Test error handling of DicomImageRedactorEngine redact_from_directory() +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# input_path (str): Path to input DICOM file or dir. -# output_path (pathlib.Path): Path to DICOM dir or file. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: # # Act -# mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) +# _ = mock_engine._get_analyzer_results( +# image, test_instance, True, None, ad_hoc_recognizers +# ) # # Assert # assert expected_error_type == exc_info.typename + +# ------------------------------------------------------ +# DicomImageRedactorEngine redact_and_return_bbox() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_compressed.dcm")), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_icon_image_sequence.dcm")), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm")), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM")), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom")), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM")), + ], +) +def test_DicomImageRedactorEngine_redact_and_return_bbox( + mocker, + mock_engine: DicomImageRedactorEngine, + dcm_path: str, +): + """Test happy path for DicomImageRedactorEngine redact_and_return_bbox() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + """ + # Arrange + test_image = pydicom.dcmread(dcm_path) + + mock_check_greyscale = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._check_if_greyscale", return_value=None + ) + mock_rescale_dcm = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._rescale_dcm_pixel_array", return_value=None + ) + mock_save_pixel_array = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._save_pixel_array_as_png", return_value=None + ) + mock_image_open = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.Image.open", + return_value=None, + ) + mock_add_padding = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", + return_value=None, + ) + mock_analyze = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", + return_value=None, + ) + + mock_get_analyze_bbox = mocker.patch( + "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", + return_value=None, + ) + + mock_remove_bbox_padding = mocker.patch( + "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", + return_value=[{"mock_data": 1}, {"mock_data": 2}, {"mock_data": 3}], + ) + + mock_add_redact_box = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", + return_value=test_image, + ) + + # Act + test_redacted_image, _ = mock_engine.redact_and_return_bbox(test_image, use_metadata=True) + + # Assert + assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] + # assertions for test_bboxes type causes silent failures/hangups for Python 3.11 + mock_check_greyscale.assert_called_once() + mock_rescale_dcm.assert_called_once() + mock_save_pixel_array.assert_called_once() + mock_image_open.assert_called_once() + mock_add_padding.assert_called_once() + mock_analyze.assert_called_once() + mock_get_analyze_bbox.assert_called_once() + mock_remove_bbox_padding.assert_called_once() + mock_add_redact_box.assert_called_once() + +@pytest.mark.parametrize( + "image, load_file, expected_error_type", + [ + (Path(TEST_DICOM_PARENT_DIR), True, ["TypeError", "IsADirectoryError", "PermissionError"]), + (Path(TEST_DICOM_PARENT_DIR), False, ["TypeError"]), + ("path_here", False, ["TypeError"]), + (np.random.randint(255, size=(64, 64)), False, ["TypeError"]), + (Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), False, ["TypeError"]), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL_no_pixels.dcm"), True, ["AttributeError"]), + ], +) +def test_DicomImageRedactorEngine_redact_and_return_bbox_exceptions( + mock_engine: DicomImageRedactorEngine, + image: T, + load_file: bool, + expected_error_type: List[str], +): + """Test error handling of DicomImageRedactorEngine redact_and_return_bbox() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + image (any): Input "image". + load_file (bool): Whether to run pydicom.dcmread() on the input image. + expected_error_type (List(str)): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + if load_file: + test_image = pydicom.dcmread(image) + else: + test_image = image + # Act + mock_engine.redact(test_image, fill="contrast", padding_width=25, use_metadata=True + ) + + # Assert + assert exc_info.typename in expected_error_type + +# ------------------------------------------------------ +# DicomImageRedactorEngine redact() +# ------------------------------------------------------ +def test_DicomImageRedactorEngine_redact_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, +): + """Test happy path for DicomImageRedactorEngine redact() + """ + # Arrange + test_image = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + + mock_redact_return_bbox = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine.redact_and_return_bbox", + return_value=[test_image, [{}, {}, {}]] + ) + + # Act + test_redacted_image = mock_engine.redact(test_image) + + # Assert + assert type(test_redacted_image) in [pydicom.dataset.FileDataset, pydicom.dataset.Dataset] + mock_redact_return_bbox.assert_called_once() + +# ------------------------------------------------------ +# DicomImageRedactorEngine _save_bbox_json() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "output_path, expected_output_json_path, bboxes", + [ + ( + "dir1/dir2/output_dicom.dcm", + "dir1/dir2/output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ), + ( + "dir1/output_dicom.dcm", + "dir1/output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ), + ( + "output_dicom.dcm", + "output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ), + ( + "dir1/dir2/output_dicom.DCM", + "dir1/dir2/output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ), + ( + "dir1/dir2/output_dicom.dicom", + "dir1/dir2/output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ), + ( + "dir1/dir2/output_dicom.DICOM", + "dir1/dir2/output_dicom.json", + [ + {"top": 0, "left": 0, "width": 100, "height": 100}, + {"top": 35, "left": 72, "width": 50, "height": 14} + ] + ) + ], +) +def test_DicomImageRedactorEngine_save_bbox_json_happy_path( + mock_engine: DicomImageRedactorEngine, + output_path: str, + expected_output_json_path: str, + bboxes: List[Dict[str, int]], +): + """Test happy path for DicomImageRedactorEngine _save_bbox_json() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + output_path (str): Path to output DICOM file. + expected_output_json_path (str): Expected path to the output JSON file. + bboxes (list): Bounding boxes to write out to JSON file. + """ + with tempfile.TemporaryDirectory() as tmpdirname: + # Arrange + temp_output_path = Path(tmpdirname, output_path) + temp_output_path.mkdir(parents=True, exist_ok=True) + temp_expected_json_path = Path(tmpdirname, expected_output_json_path) + + # Act + mock_engine._save_bbox_json(temp_output_path, bboxes) + + # Assert + with open(temp_expected_json_path, "r") as read_file: + loaded_json = json.load(read_file) + assert loaded_json == bboxes + +# ------------------------------------------------------ +# DicomImageRedactorEngine _redact_single_dicom_image() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, output_dir, overwrite", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", False), + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "output", True), + (Path(TEST_DICOM_PARENT_DIR, "RGB_ORIGINAL.dcm"), "output", False), + (Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), "output", False), + (Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), "output", False), + (Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), "output", False), + ], +) +def test_DicomImageRedactorEngine_redact_single_dicom_image_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + output_dir: str, + overwrite: bool, +): + """Test happy path for DicomImageRedactorEngine _redact_single_dicom_image() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + output_dir (str): Path to parent directory to write output to. + overwrite (bool): True if overwriting original files. + """ + # Arrange + crop_ratio = 0.75 + mock_copy_files = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", + return_value=dcm_path, + ) + mock_convert_dcm_to_png = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._convert_dcm_to_png", + return_value=[None, None], + ) + mock_image_open = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.Image.open", + return_value=None, + ) + mock_add_padding = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_padding", + return_value=None, + ) + + mock_analyze = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_analyzer_results", + return_value=None, + ) + + mock_get_analyze_bbox = mocker.patch( + "presidio_image_redactor.image_redactor_engine.BboxProcessor.get_bboxes_from_analyzer_results", + return_value=None, + ) + + mock_remove_bbox_padding = mocker.patch( + "presidio_image_redactor.image_redactor_engine.BboxProcessor.remove_bbox_padding", + return_value=None, + ) + + class MockInstance: + def save_as(self, dst_path: str): + return None + + mock_add_redact_box = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._add_redact_box", + return_value=MockInstance(), + ) + + # Act + mock_engine._redact_single_dicom_image( + dcm_path=dcm_path, + crop_ratio=crop_ratio, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=overwrite, + dst_parent_dir=output_dir, + save_bboxes=False + ) + + # Assert + if overwrite is True: + assert mock_copy_files.call_count == 0 + else: + assert mock_copy_files.call_count == 1 + assert mock_convert_dcm_to_png.call_count == 1 + assert mock_image_open.call_count == 1 + assert mock_add_padding.call_count == 1 + assert mock_analyze.call_count == 1 + assert mock_get_analyze_bbox.call_count == 1 + assert mock_remove_bbox_padding.call_count == 1 + assert mock_add_redact_box.call_count == 1 + + +@pytest.mark.parametrize( + "dcm_path, expected_error_type", + [ + (Path(TEST_DICOM_PARENT_DIR), "FileNotFoundError"), + (Path("nonexistentfile.extension"), "FileNotFoundError"), + ], +) +def test_DicomImageRedactorEngine_redact_single_dicom_image_exceptions( + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine _redact_single_dicom_image() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Act + mock_engine._redact_single_dicom_image( + dcm_path=dcm_path, + crop_ratio=0.75, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=False, + dst_parent_dir=".", + save_bboxes=False + ) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine _redact_multiple_dicom_images() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, output_dir, overwrite", + [ + (Path(TEST_DICOM_PARENT_DIR), "output", False), + (Path(TEST_DICOM_PARENT_DIR), "output", True), + (Path(TEST_DICOM_DIR_1), "output", False), + (Path(TEST_DICOM_DIR_2), "output", False), + (Path(TEST_DICOM_DIR_3), "output", False), + ], +) +def test_DicomImageRedactorEngine_redact_multiple_dicom_images_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + output_dir: str, + overwrite: bool, +): + """Test happy path for DicomImageRedactorEngine _redact_multiple_dicom_images() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + output_dir (str): Path to parent directory to write output to. + overwrite (bool): True if overwriting original files. + """ + # Arrange + crop_ratio = 0.75 + mock_copy_files = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", + return_value=dcm_path, + ) + mock_dcm_files = [ + Path("dir1/dir2/file1.dcm"), + Path("dir1/dir2/file2.dcm"), + Path("dir1/dir2/dir3/file3.dcm"), + ] + mock_get_all_dcm_files = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_all_dcm_files", + return_value=mock_dcm_files, + ) + mock_redact_single = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", + return_value=None, + ) + + # Act + mock_engine._redact_multiple_dicom_images( + dcm_dir=dcm_path, + crop_ratio=crop_ratio, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=overwrite, + dst_parent_dir=output_dir, + save_bboxes=False + ) + + # Assert + if overwrite is True: + assert mock_copy_files.call_count == 0 + else: + assert mock_copy_files.call_count == 1 + assert mock_get_all_dcm_files.call_count == 1 + assert mock_redact_single.call_count == len(mock_dcm_files) + + +@pytest.mark.parametrize( + "dcm_path, expected_error_type", + [ + (Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), "FileNotFoundError"), + (Path("nonexistentdir"), "FileNotFoundError"), + ], +) +def test_DicomImageRedactorEngine_redact_multiple_dicom_images_exceptions( + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine _redact_multiple_dicom_images() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Act + mock_engine._redact_multiple_dicom_images( + dcm_dir=dcm_path, + crop_ratio=0.75, + fill="contrast", + padding_width=25, + use_metadata=True, + overwrite=False, + dst_parent_dir=".", + save_bboxes=False + ) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine redact_from_file() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, mock_dst_path", + [ + ( + f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + ), + ( + f"{TEST_DICOM_DIR_2}/1_ORIGINAL.DCM", + Path(TEST_DICOM_DIR_2, "1_ORIGINAL.DCM"), + ), + ( + f"{TEST_DICOM_DIR_2}/2_ORIGINAL.dicom", + Path(TEST_DICOM_DIR_2, "2_ORIGINAL.dicom"), + ), + ( + f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", + Path(TEST_DICOM_DIR_3, "3_ORIGINAL.DICOM"), + ), + ], +) +def test_DicomImageRedactorEngine_redact_from_file_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + mock_dst_path: Path, +): + """Test happy path for DicomImageRedactorEngine redact_from_file() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + mock_dst_path (pathlib.Path): Path to DICOM dir or file. + """ + # Arrange + mock_copy_files = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", + return_value=mock_dst_path, + ) + mock_redact_single = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_single_dicom_image", + return_value=None, + ) + + # Act + mock_engine.redact_from_file(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) + + # Assert + assert mock_copy_files.call_count == 1 + assert mock_redact_single.call_count == 1 + + +@pytest.mark.parametrize( + "input_path, output_path, expected_error_type", + [ + (TEST_DICOM_PARENT_DIR, "output", "TypeError"), + (TEST_DICOM_DIR_1, "output", "TypeError"), + (TEST_DICOM_DIR_2, "output", "TypeError"), + (TEST_DICOM_DIR_3, "output", "TypeError"), + ( + f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", + f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", + "TypeError", + ), + ( + f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", + f"{TEST_DICOM_DIR_3}/3_ORIGINAL.DICOM", + "TypeError", + ), + ], +) +def test_DicomImageRedactorEngine_redact_from_file_exceptions( + mock_engine: DicomImageRedactorEngine, + input_path: str, + output_path: Path, + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine redact_from_file() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + input_path (str): Path to input DICOM file or dir. + output_path (pathlib.Path): Path to DICOM dir or file. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Act + mock_engine.redact_from_file(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) + + # Assert + assert expected_error_type == exc_info.typename + + +# ------------------------------------------------------ +# DicomImageRedactorEngine redact_from_directory() +# ------------------------------------------------------ +@pytest.mark.parametrize( + "dcm_path, mock_dst_path", + [ + (TEST_DICOM_PARENT_DIR, Path(TEST_DICOM_PARENT_DIR)), + (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_1)), + (TEST_DICOM_DIR_2, Path(TEST_DICOM_DIR_2)), + (TEST_DICOM_DIR_3, Path(TEST_DICOM_DIR_3)), + ], +) +def test_DicomImageRedactorEngine_redact_from_directory_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + dcm_path: str, + mock_dst_path: Path, +): + """Test happy path for DicomImageRedactorEngine redact_from_directory() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + dcm_path (str): Path to input DICOM file or dir. + mock_dst_path (pathlib.Path): Path to DICOM dir or file. + """ + # Arrange + mock_copy_files = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._copy_files_for_processing", + return_value=mock_dst_path, + ) + mock_redact_multiple = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._redact_multiple_dicom_images", + return_value=None, + ) + + # Act + mock_engine.redact_from_directory(dcm_path, "output", padding_width=25, fill="contrast", use_metadata=True) + + # Assert + assert mock_copy_files.call_count == 1 + assert mock_redact_multiple.call_count == 1 + + +@pytest.mark.parametrize( + "input_path, output_path, expected_error_type", + [ + (f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "output", "TypeError"), + (TEST_DICOM_DIR_1, f"{TEST_DICOM_PARENT_DIR}/0_ORIGINAL.dcm", "TypeError"), + ("nonexistentdir", "output", "TypeError"), + ], +) +def test_DicomImageRedactorEngine_redact_from_directory_exceptions( + mock_engine: DicomImageRedactorEngine, + input_path: str, + output_path: Path, + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine redact_from_directory() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + input_path (str): Path to input DICOM file or dir. + output_path (pathlib.Path): Path to DICOM dir or file. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Act + mock_engine.redact_from_directory(input_path, output_path, padding_width=25, fill="contrast", use_metadata=True) + + # Assert + assert expected_error_type == exc_info.typename From 4e7c8a6feb1ec0f7d8894722587546a00d3b992a Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:19:48 -0400 Subject: [PATCH 19/25] Only commenting out happy path test for get_analyzer_results --- .../tests/test_dicom_image_redactor_engine.py | 100 +++++++++--------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 56d9ed126..528e0603a 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1158,9 +1158,9 @@ def test_add_redact_box_happy_path( assert box_color_pixels_redacted > box_color_pixels_original -# # ------------------------------------------------------ -# # DicomImageRedactorEngine._get_analyzer_results() -# # ------------------------------------------------------ +# ------------------------------------------------------ +# DicomImageRedactorEngine._get_analyzer_results() +# ------------------------------------------------------ # @pytest.mark.parametrize( # "image, dcm_path, use_metadata, ad_hoc_recognizers", # [ @@ -1243,56 +1243,56 @@ def test_add_redact_box_happy_path( # mock_make_phi_list.assert_called_once() # mock_pattern_recognizer.assert_called_once() -# @pytest.mark.parametrize( -# "image, dcm_path, ad_hoc_recognizers, expected_error_type", -# [ -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# "invalidType", -# "TypeError" -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [], -# "ValueError" -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], -# "TypeError" -# ), -# ], -# ) -# def test_get_analyzer_results_exceptions( -# mock_engine: DicomImageRedactorEngine, -# image: Image, -# dcm_path: str, -# ad_hoc_recognizers: Optional[List[PatternRecognizer]], -# expected_error_type: str, -# ): -# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() +@pytest.mark.parametrize( + "image, dcm_path, ad_hoc_recognizers, expected_error_type", + [ + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + "invalidType", + "TypeError" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + [], + "ValueError" + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], + "TypeError" + ), + ], +) +def test_get_analyzer_results_exceptions( + mock_engine: DicomImageRedactorEngine, + image: Image, + dcm_path: str, + ad_hoc_recognizers: Optional[List[PatternRecognizer]], + expected_error_type: str, +): + """Test error handling of DicomImageRedactorEngine _get_analyzer_results() -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (PIL.Image): A PIL image. -# dcm_path (pathlib.Path): Path to DICOM file. -# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. -# expected_error_type (str): Type of error we expect to be raised. -# """ -# with pytest.raises(Exception) as exc_info: -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + image (PIL.Image): A PIL image. + dcm_path (pathlib.Path): Path to DICOM file. + ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. + expected_error_type (str): Type of error we expect to be raised. + """ + with pytest.raises(Exception) as exc_info: + # Arrange + test_instance = pydicom.dcmread(dcm_path) -# # Act -# _ = mock_engine._get_analyzer_results( -# image, test_instance, True, None, ad_hoc_recognizers -# ) + # Act + _ = mock_engine._get_analyzer_results( + image, test_instance, True, None, ad_hoc_recognizers + ) -# # Assert -# assert expected_error_type == exc_info.typename + # Assert + assert expected_error_type == exc_info.typename # ------------------------------------------------------ # DicomImageRedactorEngine redact_and_return_bbox() From 57f2a862cbd2414aba09b69315bcb33c4a80c442 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:34:51 -0400 Subject: [PATCH 20/25] Changing ad_hoc_recognizers type check to raise only TypeError --- .../presidio_image_redactor/dicom_image_redactor_engine.py | 2 +- .../tests/test_dicom_image_redactor_engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py index 8f0820596..552297cdc 100644 --- a/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py +++ b/presidio-image-redactor/presidio_image_redactor/dicom_image_redactor_engine.py @@ -910,7 +910,7 @@ def _get_analyzer_results( if are_recognizers is False: raise TypeError("All items in ad_hoc_recognizers list must be PatternRecognizer objects") # noqa: E501 else: - raise ValueError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 + raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 else: raise TypeError("ad_hoc_recognizers must be None or list of PatternRecognizer") # noqa: E501 diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 528e0603a..7190b1304 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1256,7 +1256,7 @@ def test_add_redact_box_happy_path( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), [], - "ValueError" + "TypeError" ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), From ed2608d40f9c065536bf0b32a4e53d52db788cef Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:49:56 -0400 Subject: [PATCH 21/25] Removing type assertion for exception test --- .../tests/test_dicom_image_redactor_engine.py | 26 ++++++++----------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 7190b1304..2f751be04 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1244,26 +1244,23 @@ def test_add_redact_box_happy_path( # mock_pattern_recognizer.assert_called_once() @pytest.mark.parametrize( - "image, dcm_path, ad_hoc_recognizers, expected_error_type", + "image, dcm_path, ad_hoc_recognizers", [ ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "invalidType", - "TypeError" + "invalidType" ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [], - "TypeError" + [] ), ( Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2], - "TypeError" - ), + [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2] + ) ], ) def test_get_analyzer_results_exceptions( @@ -1271,7 +1268,6 @@ def test_get_analyzer_results_exceptions( image: Image, dcm_path: str, ad_hoc_recognizers: Optional[List[PatternRecognizer]], - expected_error_type: str, ): """Test error handling of DicomImageRedactorEngine _get_analyzer_results() @@ -1280,20 +1276,20 @@ def test_get_analyzer_results_exceptions( image (PIL.Image): A PIL image. dcm_path (pathlib.Path): Path to DICOM file. ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. - expected_error_type (str): Type of error we expect to be raised. """ - with pytest.raises(Exception) as exc_info: + with pytest.raises(Exception): # Arrange test_instance = pydicom.dcmread(dcm_path) # Act _ = mock_engine._get_analyzer_results( - image, test_instance, True, None, ad_hoc_recognizers + image=image, + instance=test_instance, + use_metadata=True, + ocr_kwargs=None, + ad_hoc_recognizers=ad_hoc_recognizers ) - # Assert - assert expected_error_type == exc_info.typename - # ------------------------------------------------------ # DicomImageRedactorEngine redact_and_return_bbox() # ------------------------------------------------------ From a5909a3d6f941155632f2816a0e4036683ca4123 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 13:59:24 -0400 Subject: [PATCH 22/25] Reintroduce the happy path test for get_analyzer_results --- .../tests/test_dicom_image_redactor_engine.py | 162 +++++++++--------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 2f751be04..73ffb43e8 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1161,87 +1161,87 @@ def test_add_redact_box_happy_path( # ------------------------------------------------------ # DicomImageRedactorEngine._get_analyzer_results() # ------------------------------------------------------ -# @pytest.mark.parametrize( -# "image, dcm_path, use_metadata, ad_hoc_recognizers", -# [ -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False, -# None -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# False, -# [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# True, -# None -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# True, -# [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] -# ), -# ], -# ) -# def test_get_analyzer_results_happy_path( -# mocker, -# mock_engine: DicomImageRedactorEngine, -# image: Image, -# dcm_path: str, -# use_metadata: bool, -# ad_hoc_recognizers: Optional[List[PatternRecognizer]] -# ): -# """Test happy path for DicomImageRedactorEngine._get_analyzer_results - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (PIL.Image): A PIL image. -# dcm_path (pathlib.Path): Path to DICOM file. -# use_metadata (bool): Whether to consider metadata when running analysis. -# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. -# """ -# # Arrange -# mock_analyze = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", -# return_value=None, -# ) -# mock_get_text_metadata = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", -# return_value=[None, None, None], -# ) -# mock_make_phi_list = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", -# return_value=None, -# ) -# mock_pattern_recognizer = mocker.patch( -# "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", -# return_value=None, -# ) -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# _ = mock_engine._get_analyzer_results( -# image, test_instance, use_metadata, None, ad_hoc_recognizers -# ) - -# # Assert -# if use_metadata is False: -# mock_analyze.assert_called_once() -# mock_get_text_metadata.assert_not_called() -# mock_make_phi_list.assert_not_called() -# mock_pattern_recognizer.assert_not_called() -# elif use_metadata is True: -# mock_analyze.assert_called_once() -# mock_get_text_metadata.assert_called_once() -# mock_make_phi_list.assert_called_once() -# mock_pattern_recognizer.assert_called_once() +@pytest.mark.parametrize( + "image, dcm_path, use_metadata, ad_hoc_recognizers", + [ + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + False, + None + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + False, + [PatternRecognizer(supported_entity="PERSON", deny_list=["1"])] + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + True, + None + ), + ( + Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), + Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), + True, + [PatternRecognizer(supported_entity="PERSON", deny_list=["2"])] + ), + ], +) +def test_get_analyzer_results_happy_path( + mocker, + mock_engine: DicomImageRedactorEngine, + image: Image, + dcm_path: str, + use_metadata: bool, + ad_hoc_recognizers: Optional[List[PatternRecognizer]] +): + """Test happy path for DicomImageRedactorEngine._get_analyzer_results + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + image (PIL.Image): A PIL image. + dcm_path (pathlib.Path): Path to DICOM file. + use_metadata (bool): Whether to consider metadata when running analysis. + ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. + """ + # Arrange + mock_analyze = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.ImageAnalyzerEngine.analyze", + return_value=None, + ) + mock_get_text_metadata = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._get_text_metadata", + return_value=[None, None, None], + ) + mock_make_phi_list = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.DicomImageRedactorEngine._make_phi_list", + return_value=None, + ) + mock_pattern_recognizer = mocker.patch( + "presidio_image_redactor.dicom_image_redactor_engine.PatternRecognizer", + return_value=None, + ) + test_instance = pydicom.dcmread(dcm_path) + + # Act + _ = mock_engine._get_analyzer_results( + image, test_instance, use_metadata, None, ad_hoc_recognizers + ) + + # Assert + if use_metadata is False: + mock_analyze.assert_called_once() + mock_get_text_metadata.assert_not_called() + mock_make_phi_list.assert_not_called() + mock_pattern_recognizer.assert_not_called() + elif use_metadata is True: + mock_analyze.assert_called_once() + mock_get_text_metadata.assert_called_once() + mock_make_phi_list.assert_called_once() + mock_pattern_recognizer.assert_called_once() @pytest.mark.parametrize( "image, dcm_path, ad_hoc_recognizers", From ad01b435abe5bc96668be36e8647f08313f91279 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 14:12:42 -0400 Subject: [PATCH 23/25] Commenting out exception test and keeping in happy path test --- .../tests/test_dicom_image_redactor_engine.py | 92 +++++++++---------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index 73ffb43e8..c66628ae1 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1243,52 +1243,52 @@ def test_get_analyzer_results_happy_path( mock_make_phi_list.assert_called_once() mock_pattern_recognizer.assert_called_once() -@pytest.mark.parametrize( - "image, dcm_path, ad_hoc_recognizers", - [ - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - "invalidType" - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [] - ), - ( - Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), - Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), - [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2] - ) - ], -) -def test_get_analyzer_results_exceptions( - mock_engine: DicomImageRedactorEngine, - image: Image, - dcm_path: str, - ad_hoc_recognizers: Optional[List[PatternRecognizer]], -): - """Test error handling of DicomImageRedactorEngine _get_analyzer_results() - - Args: - mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. - image (PIL.Image): A PIL image. - dcm_path (pathlib.Path): Path to DICOM file. - ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. - """ - with pytest.raises(Exception): - # Arrange - test_instance = pydicom.dcmread(dcm_path) - - # Act - _ = mock_engine._get_analyzer_results( - image=image, - instance=test_instance, - use_metadata=True, - ocr_kwargs=None, - ad_hoc_recognizers=ad_hoc_recognizers - ) +# @pytest.mark.parametrize( +# "image, dcm_path, ad_hoc_recognizers", +# [ +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# "invalidType" +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [] +# ), +# ( +# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), +# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), +# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2] +# ) +# ], +# ) +# def test_get_analyzer_results_exceptions( +# mock_engine: DicomImageRedactorEngine, +# image: Image, +# dcm_path: str, +# ad_hoc_recognizers: Optional[List[PatternRecognizer]], +# ): +# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() + +# Args: +# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. +# image (PIL.Image): A PIL image. +# dcm_path (pathlib.Path): Path to DICOM file. +# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. +# """ +# with pytest.raises(Exception): +# # Arrange +# test_instance = pydicom.dcmread(dcm_path) + +# # Act +# _ = mock_engine._get_analyzer_results( +# image=image, +# instance=test_instance, +# use_metadata=True, +# ocr_kwargs=None, +# ad_hoc_recognizers=ad_hoc_recognizers +# ) # ------------------------------------------------------ # DicomImageRedactorEngine redact_and_return_bbox() From 9a1f56bd6a376702b5798fe226f0ebd6f61d2227 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 14:25:32 -0400 Subject: [PATCH 24/25] Removing constants from parameterize for exception test --- .../tests/test_dicom_image_redactor_engine.py | 77 ++++++++----------- 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index c66628ae1..bfd9f8872 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1243,52 +1243,37 @@ def test_get_analyzer_results_happy_path( mock_make_phi_list.assert_called_once() mock_pattern_recognizer.assert_called_once() -# @pytest.mark.parametrize( -# "image, dcm_path, ad_hoc_recognizers", -# [ -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# "invalidType" -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [] -# ), -# ( -# Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)), -# Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm"), -# [PatternRecognizer(supported_entity="PERSON", deny_list=["a"]), 2] -# ) -# ], -# ) -# def test_get_analyzer_results_exceptions( -# mock_engine: DicomImageRedactorEngine, -# image: Image, -# dcm_path: str, -# ad_hoc_recognizers: Optional[List[PatternRecognizer]], -# ): -# """Test error handling of DicomImageRedactorEngine _get_analyzer_results() - -# Args: -# mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. -# image (PIL.Image): A PIL image. -# dcm_path (pathlib.Path): Path to DICOM file. -# ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. -# """ -# with pytest.raises(Exception): -# # Arrange -# test_instance = pydicom.dcmread(dcm_path) - -# # Act -# _ = mock_engine._get_analyzer_results( -# image=image, -# instance=test_instance, -# use_metadata=True, -# ocr_kwargs=None, -# ad_hoc_recognizers=ad_hoc_recognizers -# ) +@pytest.mark.parametrize( + "ad_hoc_recognizers", + [ + ("invalidType"), + ([]), + ([PatternRecognizer(supported_entity="TITLE", deny_list=["Mr", "Ms"]), 2]) + ], +) +def test_get_analyzer_results_exceptions( + mock_engine: DicomImageRedactorEngine, + ad_hoc_recognizers: Optional[List[PatternRecognizer]], +): + """Test error handling of DicomImageRedactorEngine _get_analyzer_results() + + Args: + mock_engine (DicomImageRedactorEngine): DicomImageRedactorEngine object. + ad_hoc_recognizers(None or list): Ad-hoc recognizers to use. + """ + with pytest.raises(Exception): + # Arrange + image = Image.fromarray(np.random.randint(255, size=(400, 400),dtype=np.uint8)) + test_instance = pydicom.dcmread(Path(TEST_DICOM_PARENT_DIR, "0_ORIGINAL.dcm")) + + # Act + _ = mock_engine._get_analyzer_results( + image=image, + instance=test_instance, + use_metadata=True, + ocr_kwargs=None, + ad_hoc_recognizers=ad_hoc_recognizers + ) # ------------------------------------------------------ # DicomImageRedactorEngine redact_and_return_bbox() From dabc579f67558e7cd0dba57e76787d08df8a2e53 Mon Sep 17 00:00:00 2001 From: Nile Wilson Date: Thu, 17 Aug 2023 14:53:16 -0400 Subject: [PATCH 25/25] Adding argument= in call to get_analyzer_results in happy path --- .../tests/test_dicom_image_redactor_engine.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py index bfd9f8872..5b432b366 100644 --- a/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py +++ b/presidio-image-redactor/tests/test_dicom_image_redactor_engine.py @@ -1228,7 +1228,11 @@ def test_get_analyzer_results_happy_path( # Act _ = mock_engine._get_analyzer_results( - image, test_instance, use_metadata, None, ad_hoc_recognizers + image=image, + instance=test_instance, + use_metadata=use_metadata, + ocr_kwargs=None, + ad_hoc_recognizers=ad_hoc_recognizers ) # Assert