Skip to content

Commit de97391

Browse files
authored
Add support for level in PatchWSIDataset (#4036)
* Add support for level as input Signed-off-by: Behrooz <[email protected]> * Add unittests for levels Signed-off-by: Behrooz <[email protected]> * Update docstring Signed-off-by: Behrooz <[email protected]> * Add kwargs for WSIReader in all datasets Signed-off-by: Behrooz <[email protected]> * Update docstring Signed-off-by: Behrooz <[email protected]>
1 parent 541d7ab commit de97391

File tree

2 files changed

+130
-7
lines changed

2 files changed

+130
-7
lines changed

monai/apps/pathology/data/datasets.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class PatchWSIDataset(Dataset):
3535
transform: transforms to be executed on input data.
3636
image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide.
3737
Defaults to CuCIM.
38+
kwargs: additional parameters for ``WSIReader``
3839
3940
Note:
4041
The input data has the following form as an example:
@@ -56,6 +57,7 @@ def __init__(
5657
patch_size: Union[int, Tuple[int, int]],
5758
transform: Optional[Callable] = None,
5859
image_reader_name: str = "cuCIM",
60+
**kwargs,
5961
):
6062
super().__init__(data, transform)
6163

@@ -65,7 +67,7 @@ def __init__(
6567

6668
self.image_path_list = list({x["image"] for x in self.data})
6769
self.image_reader_name = image_reader_name.lower()
68-
self.image_reader = WSIReader(image_reader_name)
70+
self.image_reader = WSIReader(backend=image_reader_name, **kwargs)
6971
self.wsi_object_dict = None
7072
if self.image_reader_name != "openslide":
7173
# OpenSlide causes memory issue if we prefetch image objects
@@ -119,17 +121,18 @@ class SmartCachePatchWSIDataset(SmartCacheDataset):
119121
will take the minimum of (cache_num, data_length x cache_rate, data_length).
120122
num_init_workers: the number of worker threads to initialize the cache for first epoch.
121123
If num_init_workers is None then the number returned by os.cpu_count() is used.
122-
If a value less than 1 is speficied, 1 will be used instead.
124+
If a value less than 1 is specified, 1 will be used instead.
123125
num_replace_workers: the number of worker threads to prepare the replacement cache for every epoch.
124126
If num_replace_workers is None then the number returned by os.cpu_count() is used.
125-
If a value less than 1 is speficied, 1 will be used instead.
127+
If a value less than 1 is specified, 1 will be used instead.
126128
progress: whether to display a progress bar when caching for the first epoch.
127129
copy_cache: whether to `deepcopy` the cache content before applying the random transforms,
128130
default to `True`. if the random transforms don't modify the cache content
129131
or every cache item is only used once in a `multi-processing` environment,
130132
may set `copy=False` for better performance.
131133
as_contiguous: whether to convert the cached NumPy array or PyTorch tensor to be contiguous.
132134
it may help improve the performance of following logic.
135+
kwargs: additional parameters for ``WSIReader``
133136
134137
"""
135138

@@ -149,13 +152,15 @@ def __init__(
149152
progress: bool = True,
150153
copy_cache: bool = True,
151154
as_contiguous: bool = True,
155+
**kwargs,
152156
):
153157
patch_wsi_dataset = PatchWSIDataset(
154158
data=data,
155159
region_size=region_size,
156160
grid_shape=grid_shape,
157161
patch_size=patch_size,
158162
image_reader_name=image_reader_name,
163+
**kwargs,
159164
)
160165
super().__init__(
161166
data=patch_wsi_dataset, # type: ignore
@@ -183,7 +188,8 @@ class MaskedInferenceWSIDataset(Dataset):
183188
patch_size: the size of patches to be extracted from the whole slide image for inference.
184189
transform: transforms to be executed on extracted patches.
185190
image_reader_name: the name of library to be used for loading whole slide imaging, either CuCIM or OpenSlide.
186-
Defaults to CuCIM.
191+
Defaults to CuCIM.
192+
kwargs: additional parameters for ``WSIReader``
187193
188194
Note:
189195
The resulting output (probability maps) after performing inference using this dataset is
@@ -196,14 +202,15 @@ def __init__(
196202
patch_size: Union[int, Tuple[int, int]],
197203
transform: Optional[Callable] = None,
198204
image_reader_name: str = "cuCIM",
205+
**kwargs,
199206
) -> None:
200207
super().__init__(data, transform)
201208

202209
self.patch_size = ensure_tuple_rep(patch_size, 2)
203210

204211
# set up whole slide image reader
205212
self.image_reader_name = image_reader_name.lower()
206-
self.image_reader = WSIReader(image_reader_name)
213+
self.image_reader = WSIReader(backend=image_reader_name, **kwargs)
207214

208215
# process data and create a list of dictionaries containing all required data and metadata
209216
self.data = self._prepare_data(data)

tests/test_patch_wsi_dataset.py

Lines changed: 118 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,31 @@
4141
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
4242
]
4343

44+
TEST_CASE_0_L1 = [
45+
{
46+
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
47+
"region_size": (1, 1),
48+
"grid_shape": (1, 1),
49+
"patch_size": 1,
50+
"level": 1,
51+
"image_reader_name": "cuCIM",
52+
},
53+
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
54+
]
55+
56+
TEST_CASE_0_L2 = [
57+
{
58+
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
59+
"region_size": (1, 1),
60+
"grid_shape": (1, 1),
61+
"patch_size": 1,
62+
"level": 1,
63+
"image_reader_name": "cuCIM",
64+
},
65+
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
66+
]
67+
68+
4469
TEST_CASE_1 = [
4570
{
4671
"data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
@@ -57,6 +82,41 @@
5782
],
5883
]
5984

85+
86+
TEST_CASE_1_L0 = [
87+
{
88+
"data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
89+
"region_size": (8, 8),
90+
"grid_shape": (2, 2),
91+
"patch_size": 1,
92+
"level": 0,
93+
"image_reader_name": "cuCIM",
94+
},
95+
[
96+
{"image": np.array([[[247]], [[245]], [[248]]], dtype=np.uint8), "label": np.array([[[0]]])},
97+
{"image": np.array([[[245]], [[247]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])},
98+
{"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[0]]])},
99+
{"image": np.array([[[246]], [[246]], [[246]]], dtype=np.uint8), "label": np.array([[[1]]])},
100+
],
101+
]
102+
103+
104+
TEST_CASE_1_L1 = [
105+
{
106+
"data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
107+
"region_size": (8, 8),
108+
"grid_shape": (2, 2),
109+
"patch_size": 1,
110+
"level": 1,
111+
"image_reader_name": "cuCIM",
112+
},
113+
[
114+
{"image": np.array([[[248]], [[246]], [[249]]], dtype=np.uint8), "label": np.array([[[0]]])},
115+
{"image": np.array([[[196]], [[187]], [[192]]], dtype=np.uint8), "label": np.array([[[0]]])},
116+
{"image": np.array([[[245]], [[243]], [[244]]], dtype=np.uint8), "label": np.array([[[0]]])},
117+
{"image": np.array([[[246]], [[242]], [[243]]], dtype=np.uint8), "label": np.array([[[1]]])},
118+
],
119+
]
60120
TEST_CASE_2 = [
61121
{
62122
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
@@ -90,6 +150,43 @@
90150
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
91151
]
92152

153+
TEST_CASE_OPENSLIDE_0_L0 = [
154+
{
155+
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
156+
"region_size": (1, 1),
157+
"grid_shape": (1, 1),
158+
"patch_size": 1,
159+
"level": 0,
160+
"image_reader_name": "OpenSlide",
161+
},
162+
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
163+
]
164+
165+
TEST_CASE_OPENSLIDE_0_L1 = [
166+
{
167+
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
168+
"region_size": (1, 1),
169+
"grid_shape": (1, 1),
170+
"patch_size": 1,
171+
"level": 1,
172+
"image_reader_name": "OpenSlide",
173+
},
174+
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
175+
]
176+
177+
178+
TEST_CASE_OPENSLIDE_0_L2 = [
179+
{
180+
"data": [{"image": FILE_PATH, "location": [0, 0], "label": [1]}],
181+
"region_size": (1, 1),
182+
"grid_shape": (1, 1),
183+
"patch_size": 1,
184+
"level": 2,
185+
"image_reader_name": "OpenSlide",
186+
},
187+
[{"image": np.array([[[239]], [[239]], [[239]]], dtype=np.uint8), "label": np.array([[[1]]])}],
188+
]
189+
93190
TEST_CASE_OPENSLIDE_1 = [
94191
{
95192
"data": [{"image": FILE_PATH, "location": [10004, 20004], "label": [0, 0, 0, 1]}],
@@ -113,7 +210,18 @@ def setUp(self):
113210
hash_val = testing_data_config("images", FILE_KEY, "hash_val")
114211
download_url_or_skip_test(FILE_URL, FILE_PATH, hash_type=hash_type, hash_val=hash_val)
115212

116-
@parameterized.expand([TEST_CASE_0, TEST_CASE_1, TEST_CASE_2, TEST_CASE_3])
213+
@parameterized.expand(
214+
[
215+
TEST_CASE_0,
216+
TEST_CASE_0_L1,
217+
TEST_CASE_0_L2,
218+
TEST_CASE_1,
219+
TEST_CASE_1_L0,
220+
TEST_CASE_1_L1,
221+
TEST_CASE_2,
222+
TEST_CASE_3,
223+
]
224+
)
117225
@skipUnless(has_cim, "Requires CuCIM")
118226
def test_read_patches_cucim(self, input_parameters, expected):
119227
dataset = PatchWSIDataset(**input_parameters)
@@ -124,7 +232,15 @@ def test_read_patches_cucim(self, input_parameters, expected):
124232
self.assertIsNone(assert_array_equal(samples[i]["label"], expected[i]["label"]))
125233
self.assertIsNone(assert_array_equal(samples[i]["image"], expected[i]["image"]))
126234

127-
@parameterized.expand([TEST_CASE_OPENSLIDE_0, TEST_CASE_OPENSLIDE_1])
235+
@parameterized.expand(
236+
[
237+
TEST_CASE_OPENSLIDE_0,
238+
TEST_CASE_OPENSLIDE_0_L0,
239+
TEST_CASE_OPENSLIDE_0_L1,
240+
TEST_CASE_OPENSLIDE_0_L2,
241+
TEST_CASE_OPENSLIDE_1,
242+
]
243+
)
128244
@skipUnless(has_osl, "Requires OpenSlide")
129245
def test_read_patches_openslide(self, input_parameters, expected):
130246
dataset = PatchWSIDataset(**input_parameters)

0 commit comments

Comments
 (0)