From 367aa375e168a04f3a6ab61ff375cebb45ca6e37 Mon Sep 17 00:00:00 2001 From: Nicolas Kaenzig Date: Tue, 7 May 2024 09:07:37 +0200 Subject: [PATCH 1/2] restructured samplers into seperate module & class files --- src/eva/vision/data/wsi/patching/samplers.py | 236 ------------------ .../data/wsi/patching/samplers/__init__.py | 8 + .../data/wsi/patching/samplers/_utils.py | 38 +++ .../vision/data/wsi/patching/samplers/base.py | 48 ++++ .../wsi/patching/samplers/foreground_grid.py | 84 +++++++ .../vision/data/wsi/patching/samplers/grid.py | 44 ++++ .../data/wsi/patching/samplers/random.py | 40 +++ 7 files changed, 262 insertions(+), 236 deletions(-) delete mode 100644 src/eva/vision/data/wsi/patching/samplers.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/__init__.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/_utils.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/base.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/foreground_grid.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/grid.py create mode 100644 src/eva/vision/data/wsi/patching/samplers/random.py diff --git a/src/eva/vision/data/wsi/patching/samplers.py b/src/eva/vision/data/wsi/patching/samplers.py deleted file mode 100644 index 58df27fd..00000000 --- a/src/eva/vision/data/wsi/patching/samplers.py +++ /dev/null @@ -1,236 +0,0 @@ -"""Samplers for WSI patch extraction.""" - -import abc -import random -from typing import Generator, Tuple - -import numpy as np - -from eva.vision.data.wsi.patching.mask import Mask - - -class Sampler(abc.ABC): - """Base class for samplers.""" - - @abc.abstractmethod - def sample( - self, - width: int, - height: int, - layer_shape: Tuple[int, int], - mask: Mask | None = None, - ) -> Generator[Tuple[int, int], None, None]: - """Sample patche coordinates. - - Args: - width: The width of the patches. - height: The height of the patches. - layer_shape: The shape of the layer. - mask: Tuple containing the mask array and the scaling factor with respect to the - provided layer_shape. Optional, only required for samplers with foreground - filtering. - - Returns: - A generator producing sampled patch coordinates. - """ - - -class ForegroundSampler(Sampler): - """Base class for samplers with foreground filtering capabilities.""" - - @abc.abstractmethod - def is_foreground( - self, - mask: Mask, - x: int, - y: int, - width: int, - height: int, - min_foreground_ratio: float, - ) -> bool: - """Check if a patch contains sufficient foreground.""" - - -class RandomSampler(Sampler): - """Sample patch coordinates randomly. - - Args: - n_samples: The number of samples to return. - seed: The random seed. - """ - - def __init__(self, n_samples: int = 1, seed: int = 42): - """Initializes the sampler.""" - self.seed = seed - self.n_samples = n_samples - - def sample( - self, - width: int, - height: int, - layer_shape: Tuple[int, int], - ) -> Generator[Tuple[int, int], None, None]: - """Sample random patches. - - Args: - width: The width of the patches. - height: The height of the patches. - layer_shape: The shape of the layer. - """ - _set_seed(self.seed) - - for _ in range(self.n_samples): - x_max, y_max = layer_shape[0], layer_shape[1] - x, y = random.randint(0, x_max - width), random.randint(0, y_max - height) # nosec - yield x, y - - -class GridSampler(Sampler): - """Sample patches based on a grid. - - Args: - max_samples: The maximum number of samples to return. - overlap: The overlap between patches in the grid. - seed: The random seed. - """ - - def __init__( - self, - max_samples: int | None = None, - overlap: Tuple[int, int] = (0, 0), - seed: int = 42, - ): - """Initializes the sampler.""" - self.max_samples = max_samples - self.overlap = overlap - self.seed = seed - - def sample( - self, - width: int, - height: int, - layer_shape: Tuple[int, int], - ) -> Generator[Tuple[int, int], None, None]: - """Sample patches from a grid. - - Args: - width: The width of the patches. - height: The height of the patches. - layer_shape: The shape of the layer. - """ - x_y, indices = _get_grid_coords_and_indices(layer_shape, width, height, self.overlap) - max_samples = len(indices) if self.max_samples is None else self.max_samples - for i in indices[:max_samples]: - yield x_y[i] - - -class ForegroundGridSampler(ForegroundSampler): - """Sample patches based on a grid, only returning patches containing foreground. - - Args: - max_samples: The maximum number of samples to return. - overlap: The overlap between patches in the grid. - min_foreground_ratio: The minimum amount of foreground within a sampled patch. - seed: The random seed. - """ - - def __init__( - self, - max_samples: int = 20, - overlap: Tuple[int, int] = (0, 0), - min_foreground_ratio: float = 0.35, - seed: int = 42, - ): - """Initializes the sampler.""" - self.max_samples = max_samples - self.overlap = overlap - self.min_foreground_ratio = min_foreground_ratio - self.seed = seed - - def sample( - self, - width: int, - height: int, - layer_shape: Tuple[int, int], - mask: Mask, - ): - """Sample patches from a grid containing foreground. - - Args: - width: The width of the patches. - height: The height of the patches. - layer_shape: The shape of the layer. - mask: The mask of the image. - """ - x_y, indices = _get_grid_coords_and_indices(layer_shape, width, height, self.overlap) - - count = 0 - for i in indices: - if count >= self.max_samples: - break - if self.is_foreground( - mask, x_y[i][0], x_y[i][1], width, height, self.min_foreground_ratio - ): - count += 1 - yield x_y[i] - - def is_foreground( - self, - mask: Mask, - x: int, - y: int, - width: int, - height: int, - min_foreground_ratio: float, - ) -> bool: - """Check if a patch contains sufficient foreground. - - Args: - mask: The mask of the image. - x: The x-coordinate of the patch. - y: The y-coordinate of the patch. - width: The width of the patch. - height: The height of the patch. - min_foreground_ratio: The minimum amount of foreground in the patch. - """ - x_, y_ = self._scale_coords(x, y, mask.scale_factors) - width_, height_ = self._scale_coords(width, height, mask.scale_factors) - patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_] - return patch_mask.sum() / patch_mask.size > min_foreground_ratio - - def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]: - return int(x / scale_factors[0]), int(y / scale_factors[1]) - - -def _get_grid_coords_and_indices( - layer_shape: Tuple[int, int], - width: int, - height: int, - overlap: Tuple[int, int], - shuffle: bool = True, - seed: int = 42, -): - """Get grid coordinates and indices. - - Args: - layer_shape: The shape of the layer. - width: The width of the patches. - height: The height of the patches. - overlap: The overlap between patches in the grid. - shuffle: Whether to shuffle the indices. - seed: The random seed. - """ - x_range = range(0, layer_shape[0] - width, width - overlap[0]) - y_range = range(0, layer_shape[1] - height, height - overlap[1]) - x_y = [(x, y) for x in x_range for y in y_range] - - indices = list(range(len(x_y))) - if shuffle: - _set_seed(seed) - np.random.shuffle(indices) - return x_y, indices - - -def _set_seed(seed: int) -> None: - random.seed(seed) - np.random.seed(seed) diff --git a/src/eva/vision/data/wsi/patching/samplers/__init__.py b/src/eva/vision/data/wsi/patching/samplers/__init__.py new file mode 100644 index 00000000..49860968 --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/__init__.py @@ -0,0 +1,8 @@ +"""Patch Sampler API.""" + +from eva.vision.data.wsi.patching.samplers.base import ForegroundSampler, Sampler +from eva.vision.data.wsi.patching.samplers.foreground_grid import ForegroundGridSampler +from eva.vision.data.wsi.patching.samplers.grid import GridSampler +from eva.vision.data.wsi.patching.samplers.random import RandomSampler + +__all__ = ["Sampler", "ForegroundSampler", "RandomSampler", "GridSampler", "ForegroundGridSampler"] diff --git a/src/eva/vision/data/wsi/patching/samplers/_utils.py b/src/eva/vision/data/wsi/patching/samplers/_utils.py new file mode 100644 index 00000000..9598bc8d --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/_utils.py @@ -0,0 +1,38 @@ +import random +from typing import Tuple + +import numpy as np + + +def set_seed(seed: int) -> None: + random.seed(seed) + np.random.seed(seed) + + +def get_grid_coords_and_indices( + layer_shape: Tuple[int, int], + width: int, + height: int, + overlap: Tuple[int, int], + shuffle: bool = True, + seed: int = 42, +): + """Get grid coordinates and indices. + + Args: + layer_shape: The shape of the layer. + width: The width of the patches. + height: The height of the patches. + overlap: The overlap between patches in the grid. + shuffle: Whether to shuffle the indices. + seed: The random seed. + """ + x_range = range(0, layer_shape[0] - width, width - overlap[0]) + y_range = range(0, layer_shape[1] - height, height - overlap[1]) + x_y = [(x, y) for x in x_range for y in y_range] + + indices = list(range(len(x_y))) + if shuffle: + set_seed(seed) + np.random.shuffle(indices) + return x_y, indices diff --git a/src/eva/vision/data/wsi/patching/samplers/base.py b/src/eva/vision/data/wsi/patching/samplers/base.py new file mode 100644 index 00000000..fa9a24ac --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/base.py @@ -0,0 +1,48 @@ +"""Base classes for samplers.""" + +import abc +from typing import Generator, Tuple + +from eva.vision.data.wsi.patching.mask import Mask + + +class Sampler(abc.ABC): + """Base class for samplers.""" + + @abc.abstractmethod + def sample( + self, + width: int, + height: int, + layer_shape: Tuple[int, int], + mask: Mask | None = None, + ) -> Generator[Tuple[int, int], None, None]: + """Sample patche coordinates. + + Args: + width: The width of the patches. + height: The height of the patches. + layer_shape: The shape of the layer. + mask: Tuple containing the mask array and the scaling factor with respect to the + provided layer_shape. Optional, only required for samplers with foreground + filtering. + + Returns: + A generator producing sampled patch coordinates. + """ + + +class ForegroundSampler(Sampler): + """Base class for samplers with foreground filtering capabilities.""" + + @abc.abstractmethod + def is_foreground( + self, + mask: Mask, + x: int, + y: int, + width: int, + height: int, + min_foreground_ratio: float, + ) -> bool: + """Check if a patch contains sufficient foreground.""" diff --git a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py new file mode 100644 index 00000000..1b00e599 --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py @@ -0,0 +1,84 @@ +"""Foreground grid sampler.""" + +from typing import Tuple + +from eva.vision.data.wsi.patching.mask import Mask +from eva.vision.data.wsi.patching.samplers import _utils, base + + +class ForegroundGridSampler(base.ForegroundSampler): + """Sample patches based on a grid, only returning patches containing foreground. + + Args: + max_samples: The maximum number of samples to return. + overlap: The overlap between patches in the grid. + min_foreground_ratio: The minimum amount of foreground within a sampled patch. + seed: The random seed. + """ + + def __init__( + self, + max_samples: int = 20, + overlap: Tuple[int, int] = (0, 0), + min_foreground_ratio: float = 0.35, + seed: int = 42, + ): + """Initializes the sampler.""" + self.max_samples = max_samples + self.overlap = overlap + self.min_foreground_ratio = min_foreground_ratio + self.seed = seed + + def sample( + self, + width: int, + height: int, + layer_shape: Tuple[int, int], + mask: Mask, + ): + """Sample patches from a grid containing foreground. + + Args: + width: The width of the patches. + height: The height of the patches. + layer_shape: The shape of the layer. + mask: The mask of the image. + """ + x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap) + + count = 0 + for i in indices: + if count >= self.max_samples: + break + if self.is_foreground( + mask, x_y[i][0], x_y[i][1], width, height, self.min_foreground_ratio + ): + count += 1 + yield x_y[i] + + def is_foreground( + self, + mask: Mask, + x: int, + y: int, + width: int, + height: int, + min_foreground_ratio: float, + ) -> bool: + """Check if a patch contains sufficient foreground. + + Args: + mask: The mask of the image. + x: The x-coordinate of the patch. + y: The y-coordinate of the patch. + width: The width of the patch. + height: The height of the patch. + min_foreground_ratio: The minimum amount of foreground in the patch. + """ + x_, y_ = self._scale_coords(x, y, mask.scale_factors) + width_, height_ = self._scale_coords(width, height, mask.scale_factors) + patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_] + return patch_mask.sum() / patch_mask.size > min_foreground_ratio + + def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]: + return int(x / scale_factors[0]), int(y / scale_factors[1]) diff --git a/src/eva/vision/data/wsi/patching/samplers/grid.py b/src/eva/vision/data/wsi/patching/samplers/grid.py new file mode 100644 index 00000000..aaf3b110 --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/grid.py @@ -0,0 +1,44 @@ +"""Grid sampler.""" + +from typing import Generator, Tuple + +from eva.vision.data.wsi.patching.samplers import _utils, base + + +class GridSampler(base.Sampler): + """Sample patches based on a grid. + + Args: + max_samples: The maximum number of samples to return. + overlap: The overlap between patches in the grid. + seed: The random seed. + """ + + def __init__( + self, + max_samples: int | None = None, + overlap: Tuple[int, int] = (0, 0), + seed: int = 42, + ): + """Initializes the sampler.""" + self.max_samples = max_samples + self.overlap = overlap + self.seed = seed + + def sample( + self, + width: int, + height: int, + layer_shape: Tuple[int, int], + ) -> Generator[Tuple[int, int], None, None]: + """Sample patches from a grid. + + Args: + width: The width of the patches. + height: The height of the patches. + layer_shape: The shape of the layer. + """ + x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap) + max_samples = len(indices) if self.max_samples is None else self.max_samples + for i in indices[:max_samples]: + yield x_y[i] diff --git a/src/eva/vision/data/wsi/patching/samplers/random.py b/src/eva/vision/data/wsi/patching/samplers/random.py new file mode 100644 index 00000000..33404c1b --- /dev/null +++ b/src/eva/vision/data/wsi/patching/samplers/random.py @@ -0,0 +1,40 @@ +"""Random sampler.""" + +import random +from typing import Generator, Tuple + +from eva.vision.data.wsi.patching.samplers import _utils, base + + +class RandomSampler(base.Sampler): + """Sample patch coordinates randomly. + + Args: + n_samples: The number of samples to return. + seed: The random seed. + """ + + def __init__(self, n_samples: int = 1, seed: int = 42): + """Initializes the sampler.""" + self.seed = seed + self.n_samples = n_samples + + def sample( + self, + width: int, + height: int, + layer_shape: Tuple[int, int], + ) -> Generator[Tuple[int, int], None, None]: + """Sample random patches. + + Args: + width: The width of the patches. + height: The height of the patches. + layer_shape: The shape of the layer. + """ + _utils.set_seed(self.seed) + + for _ in range(self.n_samples): + x_max, y_max = layer_shape[0], layer_shape[1] + x, y = random.randint(0, x_max - width), random.randint(0, y_max - height) # nosec + yield x, y From 96d13059e03d7289661a81321fec21b32eef1178 Mon Sep 17 00:00:00 2001 From: Nicolas Kaenzig Date: Tue, 7 May 2024 10:44:44 +0200 Subject: [PATCH 2/2] added unit test for samplers --- .../data/wsi/patching/samplers/_utils.py | 16 +++- .../wsi/patching/samplers/foreground_grid.py | 7 +- .../vision/data/wsi/patching/samplers/grid.py | 5 +- .../data/wsi/patching/samplers/random.py | 3 +- tests/eva/vision/data/wsi/__init__.py | 1 + .../eva/vision/data/wsi/patching/__init__.py | 1 + .../data/wsi/patching/samplers/__init__.py | 1 + .../patching/samplers/test_foreground_grid.py | 93 +++++++++++++++++++ .../data/wsi/patching/samplers/test_grid.py | 69 ++++++++++++++ .../data/wsi/patching/samplers/test_random.py | 48 ++++++++++ 10 files changed, 238 insertions(+), 6 deletions(-) create mode 100644 tests/eva/vision/data/wsi/__init__.py create mode 100644 tests/eva/vision/data/wsi/patching/__init__.py create mode 100644 tests/eva/vision/data/wsi/patching/samplers/__init__.py create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_grid.py create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_random.py diff --git a/src/eva/vision/data/wsi/patching/samplers/_utils.py b/src/eva/vision/data/wsi/patching/samplers/_utils.py index 9598bc8d..af8418df 100644 --- a/src/eva/vision/data/wsi/patching/samplers/_utils.py +++ b/src/eva/vision/data/wsi/patching/samplers/_utils.py @@ -27,8 +27,8 @@ def get_grid_coords_and_indices( shuffle: Whether to shuffle the indices. seed: The random seed. """ - x_range = range(0, layer_shape[0] - width, width - overlap[0]) - y_range = range(0, layer_shape[1] - height, height - overlap[1]) + x_range = range(0, layer_shape[0] - width + 1, width - overlap[0]) + y_range = range(0, layer_shape[1] - height + 1, height - overlap[1]) x_y = [(x, y) for x in x_range for y in y_range] indices = list(range(len(x_y))) @@ -36,3 +36,15 @@ def get_grid_coords_and_indices( set_seed(seed) np.random.shuffle(indices) return x_y, indices + + +def validate_dimensions(width: int, height: int, layer_shape: Tuple[int, int]) -> None: + """Checks if the width / height is bigger than the layer shape. + + Args: + width: The width of the patches. + height: The height of the patches. + layer_shape: The shape of the layer. + """ + if width > layer_shape[0] or height > layer_shape[1]: + raise ValueError("The width / height cannot be bigger than the layer shape.") diff --git a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py index 1b00e599..e062caf5 100644 --- a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py +++ b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py @@ -44,7 +44,10 @@ def sample( layer_shape: The shape of the layer. mask: The mask of the image. """ - x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap) + _utils.validate_dimensions(width, height, layer_shape) + x_y, indices = _utils.get_grid_coords_and_indices( + layer_shape, width, height, self.overlap, seed=self.seed + ) count = 0 for i in indices: @@ -78,7 +81,7 @@ def is_foreground( x_, y_ = self._scale_coords(x, y, mask.scale_factors) width_, height_ = self._scale_coords(width, height, mask.scale_factors) patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_] - return patch_mask.sum() / patch_mask.size > min_foreground_ratio + return patch_mask.sum() / patch_mask.size >= min_foreground_ratio def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]: return int(x / scale_factors[0]), int(y / scale_factors[1]) diff --git a/src/eva/vision/data/wsi/patching/samplers/grid.py b/src/eva/vision/data/wsi/patching/samplers/grid.py index aaf3b110..3f2b0081 100644 --- a/src/eva/vision/data/wsi/patching/samplers/grid.py +++ b/src/eva/vision/data/wsi/patching/samplers/grid.py @@ -38,7 +38,10 @@ def sample( height: The height of the patches. layer_shape: The shape of the layer. """ - x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap) + _utils.validate_dimensions(width, height, layer_shape) + x_y, indices = _utils.get_grid_coords_and_indices( + layer_shape, width, height, self.overlap, seed=self.seed + ) max_samples = len(indices) if self.max_samples is None else self.max_samples for i in indices[:max_samples]: yield x_y[i] diff --git a/src/eva/vision/data/wsi/patching/samplers/random.py b/src/eva/vision/data/wsi/patching/samplers/random.py index 33404c1b..09ae5729 100644 --- a/src/eva/vision/data/wsi/patching/samplers/random.py +++ b/src/eva/vision/data/wsi/patching/samplers/random.py @@ -32,9 +32,10 @@ def sample( height: The height of the patches. layer_shape: The shape of the layer. """ + _utils.validate_dimensions(width, height, layer_shape) _utils.set_seed(self.seed) + x_max, y_max = layer_shape[0], layer_shape[1] for _ in range(self.n_samples): - x_max, y_max = layer_shape[0], layer_shape[1] x, y = random.randint(0, x_max - width), random.randint(0, y_max - height) # nosec yield x, y diff --git a/tests/eva/vision/data/wsi/__init__.py b/tests/eva/vision/data/wsi/__init__.py new file mode 100644 index 00000000..c3adfdd3 --- /dev/null +++ b/tests/eva/vision/data/wsi/__init__.py @@ -0,0 +1 @@ +"""WSI module tests.""" diff --git a/tests/eva/vision/data/wsi/patching/__init__.py b/tests/eva/vision/data/wsi/patching/__init__.py new file mode 100644 index 00000000..686c6e8d --- /dev/null +++ b/tests/eva/vision/data/wsi/patching/__init__.py @@ -0,0 +1 @@ +"""WSI patch extraction tests.""" diff --git a/tests/eva/vision/data/wsi/patching/samplers/__init__.py b/tests/eva/vision/data/wsi/patching/samplers/__init__.py new file mode 100644 index 00000000..e7064022 --- /dev/null +++ b/tests/eva/vision/data/wsi/patching/samplers/__init__.py @@ -0,0 +1 @@ +"""WSI patch samplers tests.""" diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py new file mode 100644 index 00000000..9a5510ac --- /dev/null +++ b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py @@ -0,0 +1,93 @@ +"""ForegroundGridSampler tests.""" + +import numpy as np +import pytest + +from eva.vision.data.wsi.patching import mask, samplers + +TEST_MASK = mask.Mask( + mask_array=np.array( + [ + [0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0], + ] + ), + mask_level_idx=3, + scale_factors=(6.0, 6.0), +) + +TEST_ARGS = {"width": 12, "height": 12, "layer_shape": (36, 36), "mask": TEST_MASK} + + +@pytest.mark.parametrize( + "min_foreground_ratio, max_samples, expected_n_samples", + [(0.0, 3, 3), (0.0, 100, 9), (0.5, 100, 5), (0.9, 100, 1)], +) +def test_length(min_foreground_ratio: float, max_samples: int, expected_n_samples: int) -> None: + """Tests if the sampler returns the correct number of samples.""" + sampler = samplers.ForegroundGridSampler( + max_samples=max_samples, min_foreground_ratio=min_foreground_ratio + ) + + x_y = list(sampler.sample(**TEST_ARGS)) + + assert len(x_y) == expected_n_samples + + +@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)]) +def test_same_seed(n_samples: int, seed: int) -> None: + """Tests if the sampler returns the same samples for the same seed.""" + sampler = samplers.ForegroundGridSampler( + max_samples=n_samples, seed=seed, min_foreground_ratio=0.5 + ) + + x_y_1 = list(sampler.sample(**TEST_ARGS)) + x_y_2 = list(sampler.sample(**TEST_ARGS)) + + assert x_y_1 == x_y_2 + + +@pytest.mark.parametrize("n_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)]) +def test_different_seed(n_samples: int, seed_1: int, seed_2: int) -> None: + """Tests if the sampler returns different samples for different seeds.""" + sampler_1 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_1) + sampler_2 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_2) + + x_y_1 = list(sampler_1.sample(**TEST_ARGS)) + x_y_2 = list(sampler_2.sample(**TEST_ARGS)) + + assert x_y_1 != x_y_2 + + +def test_invalid_width_height() -> None: + """Tests if the sampler raises an error when width / height is bigger than layer_shape.""" + sampler = samplers.ForegroundGridSampler(max_samples=10, seed=42) + + with pytest.raises(ValueError): + list(sampler.sample(width=200, height=200, layer_shape=(100, 100), mask=TEST_MASK)) + + +@pytest.mark.parametrize("min_foreground_ratio", [0.0, 0.5, 0.9]) +def test_min_foreground_ratio(min_foreground_ratio: float) -> None: + """Tests if sampled coordinates respect the min_foreground_ratio.""" + sampler = samplers.ForegroundGridSampler( + max_samples=100, min_foreground_ratio=min_foreground_ratio + ) + + x_y = list(sampler.sample(**TEST_ARGS)) + + mask = TEST_MASK + width, height = TEST_ARGS["width"], TEST_ARGS["height"] + + for x, y in x_y: + x_, y_ = sampler._scale_coords(x, y, mask.scale_factors) + width_, height_ = sampler._scale_coords(width, height, mask.scale_factors) + + patch_mask = mask.mask_array[x_ : x_ + width_, y_ : y_ + height_] + foreground_ratio = patch_mask.sum() / patch_mask.size + + assert foreground_ratio >= min_foreground_ratio diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py new file mode 100644 index 00000000..efeecf54 --- /dev/null +++ b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py @@ -0,0 +1,69 @@ +"""GridSampler tests.""" + +from typing import Tuple + +import pytest + +from eva.vision.data.wsi.patching import samplers + +TEST_ARGS = {"width": 10, "height": 10, "layer_shape": (100, 100)} + + +@pytest.mark.parametrize("max_samples, expected_n_samples", [(3, 3), (10, 10), (200, 100)]) +def test_length(max_samples: int, expected_n_samples: int) -> None: + """Tests if the sampler returns the correct number of samples.""" + sampler = samplers.GridSampler(max_samples=max_samples) + + x_y = list(sampler.sample(**TEST_ARGS)) + + assert len(x_y) == expected_n_samples + + +@pytest.mark.parametrize("max_samples, seed", [(10, 8), (22, 42)]) +def test_same_seed(max_samples: int, seed: int) -> None: + """Tests if the sampler returns the same samples for the same seed.""" + sampler = samplers.GridSampler(max_samples=max_samples, seed=seed) + + x_y_1 = list(sampler.sample(**TEST_ARGS)) + x_y_2 = list(sampler.sample(**TEST_ARGS)) + + assert x_y_1 == x_y_2 + + +@pytest.mark.parametrize("max_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)]) +def test_different_seed(max_samples: int, seed_1: int, seed_2: int) -> None: + """Tests if the sampler returns different samples for different seeds.""" + sampler_1 = samplers.GridSampler(max_samples=max_samples, seed=seed_1) + sampler_2 = samplers.GridSampler(max_samples=max_samples, seed=seed_2) + + x_y_1 = list(sampler_1.sample(**TEST_ARGS)) + x_y_2 = list(sampler_2.sample(**TEST_ARGS)) + + assert x_y_1 != x_y_2 + + +def test_invalid_width_height() -> None: + """Tests if the sampler raises an error when width / height is bigger than layer_shape.""" + sampler = samplers.GridSampler(max_samples=10, seed=42) + + with pytest.raises(ValueError): + list(sampler.sample(width=200, height=200, layer_shape=(100, 100))) + + +@pytest.mark.parametrize( + "width, height, layer_shape", + [ + (5, 5, (25, 25)), + (5, 5, (100, 100)), + (224, 224, (1000, 1000)), + ], +) +def test_expected_n_patches(width: int, height: int, layer_shape: Tuple[int, int]) -> None: + """Tests if the sampler respects the max_samples limit.""" + sampler = samplers.GridSampler(max_samples=None) + + expected_max_samples = (layer_shape[0] // width) * (layer_shape[1] // height) + + x_y = list(sampler.sample(width=width, height=height, layer_shape=layer_shape)) + + assert len(x_y) == expected_max_samples diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_random.py b/tests/eva/vision/data/wsi/patching/samplers/test_random.py new file mode 100644 index 00000000..85110a6c --- /dev/null +++ b/tests/eva/vision/data/wsi/patching/samplers/test_random.py @@ -0,0 +1,48 @@ +"""RandomSampler tests.""" + +import pytest + +from eva.vision.data.wsi.patching import samplers + +TEST_ARGS = {"width": 10, "height": 10, "layer_shape": (100, 100)} + + +@pytest.mark.parametrize("n_samples", [3, 10, 22]) +def test_length(n_samples: int) -> None: + """Tests if the sampler returns the correct number of samples.""" + sampler = samplers.RandomSampler(n_samples=n_samples) + + x_y = list(sampler.sample(**TEST_ARGS)) + + assert len(x_y) == n_samples + + +@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)]) +def test_same_seed(n_samples: int, seed: int) -> None: + """Tests if the sampler returns the same samples for the same seed.""" + sampler = samplers.RandomSampler(n_samples=n_samples, seed=seed) + + x_y_1 = list(sampler.sample(**TEST_ARGS)) + x_y_2 = list(sampler.sample(**TEST_ARGS)) + + assert x_y_1 == x_y_2 + + +@pytest.mark.parametrize("n_samples, seed_1, seed_2", [(10, 1, 2), (22, 3, 4)]) +def test_different_seed(n_samples: int, seed_1: int, seed_2: int) -> None: + """Tests if the sampler returns different samples for different seeds.""" + sampler_1 = samplers.RandomSampler(n_samples=n_samples, seed=seed_1) + sampler_2 = samplers.RandomSampler(n_samples=n_samples, seed=seed_2) + + x_y_1 = list(sampler_1.sample(**TEST_ARGS)) + x_y_2 = list(sampler_2.sample(**TEST_ARGS)) + + assert x_y_1 != x_y_2 + + +def test_invalid_width_height() -> None: + """Tests if the sampler raises an error when width / height is bigger than layer_shape.""" + sampler = samplers.RandomSampler(n_samples=10, seed=42) + + with pytest.raises(ValueError): + list(sampler.sample(width=200, height=200, layer_shape=(100, 100)))