From de9739118c70375128b3dba5f0498b91b8b0943b Mon Sep 17 00:00:00 2001 From: Behrooz <3968947+drbeh@users.noreply.github.com> Date: Sat, 2 Apr 2022 03:45:15 -0400 Subject: [PATCH] Add support for `level` in `PatchWSIDataset` (#4036) * Add support for level as input Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Add unittests for levels Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docstring Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Add kwargs for WSIReader in all datasets Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> * Update docstring Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com> --- monai/apps/pathology/data/datasets.py | 17 ++-- tests/test_patch_wsi_dataset.py | 120 +++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 7 deletions(-) diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py index 77e3bb34c4..71f3214ea4 100644 --- a/monai/apps/pathology/data/datasets.py +++ b/monai/apps/pathology/data/datasets.py @@ -35,6 +35,7 @@ class PatchWSIDataset(Dataset): transform: transforms to be executed on input data. image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide. Defaults to CuCIM. + kwargs: additional parameters for ``WSIReader`` Note: The input data has the following form as an example: @@ -56,6 +57,7 @@ def __init__( patch_size: Union[int, Tuple[int, int]], transform: Optional[Callable] = None, image_reader_name: str = "cuCIM", + **kwargs, ): super().__init__(data, transform) @@ -65,7 +67,7 @@ def __init__( self.image_path_list = list({x["image"] for x in self.data}) self.image_reader_name = image_reader_name.lower() - self.image_reader = WSIReader(image_reader_name) + self.image_reader = WSIReader(backend=image_reader_name, **kwargs) self.wsi_object_dict = None if self.image_reader_name != "openslide": # OpenSlide causes memory issue if we prefetch image objects @@ -119,10 +121,10 @@ class SmartCachePatchWSIDataset(SmartCacheDataset): will take the minimum of (cache_num, data_length x cache_rate, data_length). num_init_workers: the number of worker threads to initialize the cache for first epoch. If num_init_workers is None then the number returned by os.cpu_count() is used. - If a value less than 1 is speficied, 1 will be used instead. + If a value less than 1 is specified, 1 will be used instead. num_replace_workers: the number of worker threads to prepare the replacement cache for every epoch. If num_replace_workers is None then the number returned by os.cpu_count() is used. - If a value less than 1 is speficied, 1 will be used instead. + If a value less than 1 is specified, 1 will be used instead. progress: whether to display a progress bar when caching for the first epoch. copy_cache: whether to `deepcopy` the cache content before applying the random transforms, default to `True`. if the random transforms don't modify the cache content @@ -130,6 +132,7 @@ class SmartCachePatchWSIDataset(SmartCacheDataset): may set `copy=False` for better performance. as_contiguous: whether to convert the cached NumPy array or PyTorch tensor to be contiguous. it may help improve the performance of following logic. + kwargs: additional parameters for ``WSIReader`` """ @@ -149,6 +152,7 @@ def __init__( progress: bool = True, copy_cache: bool = True, as_contiguous: bool = True, + **kwargs, ): patch_wsi_dataset = PatchWSIDataset( data=data, @@ -156,6 +160,7 @@ def __init__( grid_shape=grid_shape, patch_size=patch_size, image_reader_name=image_reader_name, + **kwargs, ) super().__init__( data=patch_wsi_dataset, # type: ignore @@ -183,7 +188,8 @@ class MaskedInferenceWSIDataset(Dataset): patch_size: the size of patches to be extracted from the whole slide image for inference. transform: transforms to be executed on extracted patches. image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide. - Defaults to CuCIM. + Defaults to CuCIM. + kwargs: additional parameters for ``WSIReader`` Note: The resulting output (probability maps) after performing inference using this dataset is @@ -196,6 +202,7 @@ def __init__( patch_size: Union[int, Tuple[int, int]], transform: Optional[Callable] = None, image_reader_name: str = "cuCIM", + **kwargs, ) -> None: super().__init__(data, transform) @@ -203,7 +210,7 @@ def __init__( # set up whole slide image reader self.image_reader_name = image_reader_name.lower() - self.image_reader = WSIReader(image_reader_name) + self.image_reader = WSIReader(backend=image_reader_name, **kwargs) # process data and create a list of dictionaries containing all required data and metadata self.data = self._prepare_data(data) diff --git a/tests/test_patch_wsi_dataset.py b/tests/test_patch_wsi_dataset.py index c351ce5f79..20d7f22988 100644 --- a/tests/test_patch_wsi_dataset.py +++ b/tests/test_patch_wsi_dataset.py @@ -41,6 +41,31 @@ [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], ] +TEST_CASE_0_L1 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "region_size": (1, 1), + "grid_shape": (1, 1), + "patch_size": 1, + "level": 1, + "image_reader_name": "cuCIM", + }, + [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], +] + +TEST_CASE_0_L2 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "region_size": (1, 1), + "grid_shape": (1, 1), + "patch_size": 1, + "level": 1, + "image_reader_name": "cuCIM", + }, + [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], +] + + TEST_CASE_1 = [ { "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}], @@ -57,6 +82,41 @@ ], ] + +TEST_CASE_1_L0 = [ + { + "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}], + "region_size": (8, 8), + "grid_shape": (2, 2), + "patch_size": 1, + "level": 0, + "image_reader_name": "cuCIM", + }, + [ + {"image": np.array([[[247]], [[245]], [[248]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[245]], [[247]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[1]]])}, + ], +] + + +TEST_CASE_1_L1 = [ + { + "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}], + "region_size": (8, 8), + "grid_shape": (2, 2), + "patch_size": 1, + "level": 1, + "image_reader_name": "cuCIM", + }, + [ + {"image": np.array([[[248]], [[246]], [[249]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[196]], [[187]], [[192]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[245]], [[243]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])}, + {"image": np.array([[[246]], [[242]], [[243]]], dtype=np.uint8), "label": np.array([[[1]]])}, + ], +] TEST_CASE_2 = [ { "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], @@ -90,6 +150,43 @@ [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], ] +TEST_CASE_OPENSLIDE_0_L0 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "region_size": (1, 1), + "grid_shape": (1, 1), + "patch_size": 1, + "level": 0, + "image_reader_name": "OpenSlide", + }, + [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], +] + +TEST_CASE_OPENSLIDE_0_L1 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "region_size": (1, 1), + "grid_shape": (1, 1), + "patch_size": 1, + "level": 1, + "image_reader_name": "OpenSlide", + }, + [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], +] + + +TEST_CASE_OPENSLIDE_0_L2 = [ + { + "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}], + "region_size": (1, 1), + "grid_shape": (1, 1), + "patch_size": 1, + "level": 2, + "image_reader_name": "OpenSlide", + }, + [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}], +] + TEST_CASE_OPENSLIDE_1 = [ { "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}], @@ -113,7 +210,18 @@ def setUp(self): hash_val = testing_data_config("images", FILE_KEY, "hash_val") download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val) - @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3]) + @parameterized.expand( + [ + TEST_CASE_0, + TEST_CASE_0_L1, + TEST_CASE_0_L2, + TEST_CASE_1, + TEST_CASE_1_L0, + TEST_CASE_1_L1, + TEST_CASE_2, + TEST_CASE_3, + ] + ) @skipUnless(has_cim, "Requires CuCIM") def test_read_patches_cucim(self, input_parameters, expected): dataset = PatchWSIDataset(**input_parameters) @@ -124,7 +232,15 @@ def test_read_patches_cucim(self, input_parameters, expected): self.assertIsNone(assert_array_equal(samples[i]["label"], expected[i]["label"])) self.assertIsNone(assert_array_equal(samples[i]["image"], expected[i]["image"])) - @parameterized.expand([TEST_CASE_OPENSLIDE_0, TEST_CASE_OPENSLIDE_1]) + @parameterized.expand( + [ + TEST_CASE_OPENSLIDE_0, + TEST_CASE_OPENSLIDE_0_L0, + TEST_CASE_OPENSLIDE_0_L1, + TEST_CASE_OPENSLIDE_0_L2, + TEST_CASE_OPENSLIDE_1, + ] + ) @skipUnless(has_osl, "Requires OpenSlide") def test_read_patches_openslide(self, input_parameters, expected): dataset = PatchWSIDataset(**input_parameters)