From de9739118c70375128b3dba5f0498b91b8b0943b Mon Sep 17 00:00:00 2001
From: Behrooz <3968947+drbeh@users.noreply.github.com>
Date: Sat, 2 Apr 2022 03:45:15 -0400
Subject: [PATCH] Add support for `level` in `PatchWSIDataset` (#4036)

* Add support for level as input

Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com>

* Add unittests for levels

Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com>

* Update docstring

Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com>

* Add kwargs for WSIReader in all datasets

Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com>

* Update docstring

Signed-off-by: Behrooz <3968947+drbeh@users.noreply.github.com>
---
 monai/apps/pathology/data/datasets.py |  17 ++--
 tests/test_patch_wsi_dataset.py       | 120 +++++++++++++++++++++++++-
 2 files changed, 130 insertions(+), 7 deletions(-)

diff --git a/monai/apps/pathology/data/datasets.py b/monai/apps/pathology/data/datasets.py
index 77e3bb34c4..71f3214ea4 100644
--- a/monai/apps/pathology/data/datasets.py
+++ b/monai/apps/pathology/data/datasets.py
@@ -35,6 +35,7 @@ class PatchWSIDataset(Dataset):
         transform: transforms to be executed on input data.
         image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide.
             Defaults to CuCIM.
+        kwargs: additional parameters for ``WSIReader``
 
     Note:
         The input data has the following form as an example:
@@ -56,6 +57,7 @@ def __init__(
         patch_size: Union[int, Tuple[int, int]],
         transform: Optional[Callable] = None,
         image_reader_name: str = "cuCIM",
+        **kwargs,
     ):
         super().__init__(data, transform)
 
@@ -65,7 +67,7 @@ def __init__(
 
         self.image_path_list = list({x["image"] for x in self.data})
         self.image_reader_name = image_reader_name.lower()
-        self.image_reader = WSIReader(image_reader_name)
+        self.image_reader = WSIReader(backend=image_reader_name, **kwargs)
         self.wsi_object_dict = None
         if self.image_reader_name != "openslide":
             # OpenSlide causes memory issue if we prefetch image objects
@@ -119,10 +121,10 @@ class SmartCachePatchWSIDataset(SmartCacheDataset):
             will take the minimum of (cache_num, data_length x cache_rate, data_length).
         num_init_workers: the number of worker threads to initialize the cache for first epoch.
             If num_init_workers is None then the number returned by os.cpu_count() is used.
-            If a value less than 1 is speficied, 1 will be used instead.
+            If a value less than 1 is specified, 1 will be used instead.
         num_replace_workers: the number of worker threads to prepare the replacement cache for every epoch.
             If num_replace_workers is None then the number returned by os.cpu_count() is used.
-            If a value less than 1 is speficied, 1 will be used instead.
+            If a value less than 1 is specified, 1 will be used instead.
         progress: whether to display a progress bar when caching for the first epoch.
         copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
             default to `True`. if the random transforms don't modify the cache content
@@ -130,6 +132,7 @@ class SmartCachePatchWSIDataset(SmartCacheDataset):
             may set `copy=False` for better performance.
         as_contiguous: whether to convert the cached NumPy array or PyTorch tensor to be contiguous.
             it may help improve the performance of following logic.
+        kwargs: additional parameters for ``WSIReader``
 
     """
 
@@ -149,6 +152,7 @@ def __init__(
         progress: bool = True,
         copy_cache: bool = True,
         as_contiguous: bool = True,
+        **kwargs,
     ):
         patch_wsi_dataset = PatchWSIDataset(
             data=data,
@@ -156,6 +160,7 @@ def __init__(
             grid_shape=grid_shape,
             patch_size=patch_size,
             image_reader_name=image_reader_name,
+            **kwargs,
         )
         super().__init__(
             data=patch_wsi_dataset,  # type: ignore
@@ -183,7 +188,8 @@ class MaskedInferenceWSIDataset(Dataset):
         patch_size: the size of patches to be extracted from the whole slide image for inference.
         transform: transforms to be executed on extracted patches.
         image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide.
-        Defaults to CuCIM.
+            Defaults to CuCIM.
+        kwargs: additional parameters for ``WSIReader``
 
     Note:
         The resulting output (probability maps) after performing inference using this dataset is
@@ -196,6 +202,7 @@ def __init__(
         patch_size: Union[int, Tuple[int, int]],
         transform: Optional[Callable] = None,
         image_reader_name: str = "cuCIM",
+        **kwargs,
     ) -> None:
         super().__init__(data, transform)
 
@@ -203,7 +210,7 @@ def __init__(
 
         # set up whole slide image reader
         self.image_reader_name = image_reader_name.lower()
-        self.image_reader = WSIReader(image_reader_name)
+        self.image_reader = WSIReader(backend=image_reader_name, **kwargs)
 
         # process data and create a list of dictionaries containing all required data and metadata
         self.data = self._prepare_data(data)
diff --git a/tests/test_patch_wsi_dataset.py b/tests/test_patch_wsi_dataset.py
index c351ce5f79..20d7f22988 100644
--- a/tests/test_patch_wsi_dataset.py
+++ b/tests/test_patch_wsi_dataset.py
@@ -41,6 +41,31 @@
     [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
 ]
 
+TEST_CASE_0_L1 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
+        "region_size": (1, 1),
+        "grid_shape": (1, 1),
+        "patch_size": 1,
+        "level": 1,
+        "image_reader_name": "cuCIM",
+    },
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
+]
+
+TEST_CASE_0_L2 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
+        "region_size": (1, 1),
+        "grid_shape": (1, 1),
+        "patch_size": 1,
+        "level": 1,
+        "image_reader_name": "cuCIM",
+    },
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
+]
+
+
 TEST_CASE_1 = [
     {
         "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
@@ -57,6 +82,41 @@
     ],
 ]
 
+
+TEST_CASE_1_L0 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
+        "region_size": (8, 8),
+        "grid_shape": (2, 2),
+        "patch_size": 1,
+        "level": 0,
+        "image_reader_name": "cuCIM",
+    },
+    [
+        {"image": np.array([[[247]], [[245]], [[248]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[245]], [[247]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[1]]])},
+    ],
+]
+
+
+TEST_CASE_1_L1 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
+        "region_size": (8, 8),
+        "grid_shape": (2, 2),
+        "patch_size": 1,
+        "level": 1,
+        "image_reader_name": "cuCIM",
+    },
+    [
+        {"image": np.array([[[248]], [[246]], [[249]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[196]], [[187]], [[192]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[245]], [[243]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])},
+        {"image": np.array([[[246]], [[242]], [[243]]], dtype=np.uint8), "label": np.array([[[1]]])},
+    ],
+]
 TEST_CASE_2 = [
     {
         "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
@@ -90,6 +150,43 @@
     [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
 ]
 
+TEST_CASE_OPENSLIDE_0_L0 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
+        "region_size": (1, 1),
+        "grid_shape": (1, 1),
+        "patch_size": 1,
+        "level": 0,
+        "image_reader_name": "OpenSlide",
+    },
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
+]
+
+TEST_CASE_OPENSLIDE_0_L1 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
+        "region_size": (1, 1),
+        "grid_shape": (1, 1),
+        "patch_size": 1,
+        "level": 1,
+        "image_reader_name": "OpenSlide",
+    },
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
+]
+
+
+TEST_CASE_OPENSLIDE_0_L2 = [
+    {
+        "data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
+        "region_size": (1, 1),
+        "grid_shape": (1, 1),
+        "patch_size": 1,
+        "level": 2,
+        "image_reader_name": "OpenSlide",
+    },
+    [{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
+]
+
 TEST_CASE_OPENSLIDE_1 = [
     {
         "data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
@@ -113,7 +210,18 @@ def setUp(self):
         hash_val = testing_data_config("images", FILE_KEY, "hash_val")
         download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val)
 
-    @parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
+    @parameterized.expand(
+        [
+            TEST_CASE_0,
+            TEST_CASE_0_L1,
+            TEST_CASE_0_L2,
+            TEST_CASE_1,
+            TEST_CASE_1_L0,
+            TEST_CASE_1_L1,
+            TEST_CASE_2,
+            TEST_CASE_3,
+        ]
+    )
     @skipUnless(has_cim, "Requires CuCIM")
     def test_read_patches_cucim(self, input_parameters, expected):
         dataset = PatchWSIDataset(**input_parameters)
@@ -124,7 +232,15 @@ def test_read_patches_cucim(self, input_parameters, expected):
             self.assertIsNone(assert_array_equal(samples[i]["label"], expected[i]["label"]))
             self.assertIsNone(assert_array_equal(samples[i]["image"], expected[i]["image"]))
 
-    @parameterized.expand([TEST_CASE_OPENSLIDE_0, TEST_CASE_OPENSLIDE_1])
+    @parameterized.expand(
+        [
+            TEST_CASE_OPENSLIDE_0,
+            TEST_CASE_OPENSLIDE_0_L0,
+            TEST_CASE_OPENSLIDE_0_L1,
+            TEST_CASE_OPENSLIDE_0_L2,
+            TEST_CASE_OPENSLIDE_1,
+        ]
+    )
     @skipUnless(has_osl, "Requires OpenSlide")
     def test_read_patches_openslide(self, input_parameters, expected):
         dataset = PatchWSIDataset(**input_parameters)