From 367aa375e168a04f3a6ab61ff375cebb45ca6e37 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Tue, 7 May 2024 09:07:37 +0200
Subject: [PATCH 1/2] restructured samplers into seperate module & class files

---
 src/eva/vision/data/wsi/patching/samplers.py  | 236 ------------------
 .../data/wsi/patching/samplers/__init__.py    |   8 +
 .../data/wsi/patching/samplers/_utils.py      |  38 +++
 .../vision/data/wsi/patching/samplers/base.py |  48 ++++
 .../wsi/patching/samplers/foreground_grid.py  |  84 +++++++
 .../vision/data/wsi/patching/samplers/grid.py |  44 ++++
 .../data/wsi/patching/samplers/random.py      |  40 +++
 7 files changed, 262 insertions(+), 236 deletions(-)
 delete mode 100644 src/eva/vision/data/wsi/patching/samplers.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/__init__.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/_utils.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/base.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/grid.py
 create mode 100644 src/eva/vision/data/wsi/patching/samplers/random.py

diff --git a/src/eva/vision/data/wsi/patching/samplers.py b/src/eva/vision/data/wsi/patching/samplers.py
deleted file mode 100644
index 58df27fd..00000000
--- a/src/eva/vision/data/wsi/patching/samplers.py
+++ /dev/null
@@ -1,236 +0,0 @@
-"""Samplers for WSI patch extraction."""
-
-import abc
-import random
-from typing import Generator, Tuple
-
-import numpy as np
-
-from eva.vision.data.wsi.patching.mask import Mask
-
-
-class Sampler(abc.ABC):
-    """Base class for samplers."""
-
-    @abc.abstractmethod
-    def sample(
-        self,
-        width: int,
-        height: int,
-        layer_shape: Tuple[int, int],
-        mask: Mask | None = None,
-    ) -> Generator[Tuple[int, int], None, None]:
-        """Sample patche coordinates.
-
-        Args:
-            width: The width of the patches.
-            height: The height of the patches.
-            layer_shape: The shape of the layer.
-            mask: Tuple containing the mask array and the scaling factor with respect to the
-                provided layer_shape. Optional, only required for samplers with foreground
-                filtering.
-
-        Returns:
-            A generator producing sampled patch coordinates.
-        """
-
-
-class ForegroundSampler(Sampler):
-    """Base class for samplers with foreground filtering capabilities."""
-
-    @abc.abstractmethod
-    def is_foreground(
-        self,
-        mask: Mask,
-        x: int,
-        y: int,
-        width: int,
-        height: int,
-        min_foreground_ratio: float,
-    ) -> bool:
-        """Check if a patch contains sufficient foreground."""
-
-
-class RandomSampler(Sampler):
-    """Sample patch coordinates randomly.
-
-    Args:
-        n_samples: The number of samples to return.
-        seed: The random seed.
-    """
-
-    def __init__(self, n_samples: int = 1, seed: int = 42):
-        """Initializes the sampler."""
-        self.seed = seed
-        self.n_samples = n_samples
-
-    def sample(
-        self,
-        width: int,
-        height: int,
-        layer_shape: Tuple[int, int],
-    ) -> Generator[Tuple[int, int], None, None]:
-        """Sample random patches.
-
-        Args:
-            width: The width of the patches.
-            height: The height of the patches.
-            layer_shape: The shape of the layer.
-        """
-        _set_seed(self.seed)
-
-        for _ in range(self.n_samples):
-            x_max, y_max = layer_shape[0], layer_shape[1]
-            x, y = random.randint(0, x_max - width), random.randint(0, y_max - height)  # nosec
-            yield x, y
-
-
-class GridSampler(Sampler):
-    """Sample patches based on a grid.
-
-    Args:
-        max_samples: The maximum number of samples to return.
-        overlap: The overlap between patches in the grid.
-        seed: The random seed.
-    """
-
-    def __init__(
-        self,
-        max_samples: int | None = None,
-        overlap: Tuple[int, int] = (0, 0),
-        seed: int = 42,
-    ):
-        """Initializes the sampler."""
-        self.max_samples = max_samples
-        self.overlap = overlap
-        self.seed = seed
-
-    def sample(
-        self,
-        width: int,
-        height: int,
-        layer_shape: Tuple[int, int],
-    ) -> Generator[Tuple[int, int], None, None]:
-        """Sample patches from a grid.
-
-        Args:
-            width: The width of the patches.
-            height: The height of the patches.
-            layer_shape: The shape of the layer.
-        """
-        x_y, indices = _get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
-        max_samples = len(indices) if self.max_samples is None else self.max_samples
-        for i in indices[:max_samples]:
-            yield x_y[i]
-
-
-class ForegroundGridSampler(ForegroundSampler):
-    """Sample patches based on a grid, only returning patches containing foreground.
-
-    Args:
-        max_samples: The maximum number of samples to return.
-        overlap: The overlap between patches in the grid.
-        min_foreground_ratio: The minimum amount of foreground within a sampled patch.
-        seed: The random seed.
-    """
-
-    def __init__(
-        self,
-        max_samples: int = 20,
-        overlap: Tuple[int, int] = (0, 0),
-        min_foreground_ratio: float = 0.35,
-        seed: int = 42,
-    ):
-        """Initializes the sampler."""
-        self.max_samples = max_samples
-        self.overlap = overlap
-        self.min_foreground_ratio = min_foreground_ratio
-        self.seed = seed
-
-    def sample(
-        self,
-        width: int,
-        height: int,
-        layer_shape: Tuple[int, int],
-        mask: Mask,
-    ):
-        """Sample patches from a grid containing foreground.
-
-        Args:
-            width: The width of the patches.
-            height: The height of the patches.
-            layer_shape: The shape of the layer.
-            mask: The mask of the image.
-        """
-        x_y, indices = _get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
-
-        count = 0
-        for i in indices:
-            if count >= self.max_samples:
-                break
-            if self.is_foreground(
-                mask, x_y[i][0], x_y[i][1], width, height, self.min_foreground_ratio
-            ):
-                count += 1
-                yield x_y[i]
-
-    def is_foreground(
-        self,
-        mask: Mask,
-        x: int,
-        y: int,
-        width: int,
-        height: int,
-        min_foreground_ratio: float,
-    ) -> bool:
-        """Check if a patch contains sufficient foreground.
-
-        Args:
-            mask: The mask of the image.
-            x: The x-coordinate of the patch.
-            y: The y-coordinate of the patch.
-            width: The width of the patch.
-            height: The height of the patch.
-            min_foreground_ratio: The minimum amount of foreground in the patch.
-        """
-        x_, y_ = self._scale_coords(x, y, mask.scale_factors)
-        width_, height_ = self._scale_coords(width, height, mask.scale_factors)
-        patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_]
-        return patch_mask.sum() / patch_mask.size > min_foreground_ratio
-
-    def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]:
-        return int(x / scale_factors[0]), int(y / scale_factors[1])
-
-
-def _get_grid_coords_and_indices(
-    layer_shape: Tuple[int, int],
-    width: int,
-    height: int,
-    overlap: Tuple[int, int],
-    shuffle: bool = True,
-    seed: int = 42,
-):
-    """Get grid coordinates and indices.
-
-    Args:
-        layer_shape: The shape of the layer.
-        width: The width of the patches.
-        height: The height of the patches.
-        overlap: The overlap between patches in the grid.
-        shuffle: Whether to shuffle the indices.
-        seed: The random seed.
-    """
-    x_range = range(0, layer_shape[0] - width, width - overlap[0])
-    y_range = range(0, layer_shape[1] - height, height - overlap[1])
-    x_y = [(x, y) for x in x_range for y in y_range]
-
-    indices = list(range(len(x_y)))
-    if shuffle:
-        _set_seed(seed)
-        np.random.shuffle(indices)
-    return x_y, indices
-
-
-def _set_seed(seed: int) -> None:
-    random.seed(seed)
-    np.random.seed(seed)
diff --git a/src/eva/vision/data/wsi/patching/samplers/__init__.py b/src/eva/vision/data/wsi/patching/samplers/__init__.py
new file mode 100644
index 00000000..49860968
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/__init__.py
@@ -0,0 +1,8 @@
+"""Patch Sampler API."""
+
+from eva.vision.data.wsi.patching.samplers.base import ForegroundSampler, Sampler
+from eva.vision.data.wsi.patching.samplers.foreground_grid import ForegroundGridSampler
+from eva.vision.data.wsi.patching.samplers.grid import GridSampler
+from eva.vision.data.wsi.patching.samplers.random import RandomSampler
+
+__all__ = ["Sampler", "ForegroundSampler", "RandomSampler", "GridSampler", "ForegroundGridSampler"]
diff --git a/src/eva/vision/data/wsi/patching/samplers/_utils.py b/src/eva/vision/data/wsi/patching/samplers/_utils.py
new file mode 100644
index 00000000..9598bc8d
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/_utils.py
@@ -0,0 +1,38 @@
+import random
+from typing import Tuple
+
+import numpy as np
+
+
+def set_seed(seed: int) -> None:
+    random.seed(seed)
+    np.random.seed(seed)
+
+
+def get_grid_coords_and_indices(
+    layer_shape: Tuple[int, int],
+    width: int,
+    height: int,
+    overlap: Tuple[int, int],
+    shuffle: bool = True,
+    seed: int = 42,
+):
+    """Get grid coordinates and indices.
+
+    Args:
+        layer_shape: The shape of the layer.
+        width: The width of the patches.
+        height: The height of the patches.
+        overlap: The overlap between patches in the grid.
+        shuffle: Whether to shuffle the indices.
+        seed: The random seed.
+    """
+    x_range = range(0, layer_shape[0] - width, width - overlap[0])
+    y_range = range(0, layer_shape[1] - height, height - overlap[1])
+    x_y = [(x, y) for x in x_range for y in y_range]
+
+    indices = list(range(len(x_y)))
+    if shuffle:
+        set_seed(seed)
+        np.random.shuffle(indices)
+    return x_y, indices
diff --git a/src/eva/vision/data/wsi/patching/samplers/base.py b/src/eva/vision/data/wsi/patching/samplers/base.py
new file mode 100644
index 00000000..fa9a24ac
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/base.py
@@ -0,0 +1,48 @@
+"""Base classes for samplers."""
+
+import abc
+from typing import Generator, Tuple
+
+from eva.vision.data.wsi.patching.mask import Mask
+
+
+class Sampler(abc.ABC):
+    """Base class for samplers."""
+
+    @abc.abstractmethod
+    def sample(
+        self,
+        width: int,
+        height: int,
+        layer_shape: Tuple[int, int],
+        mask: Mask | None = None,
+    ) -> Generator[Tuple[int, int], None, None]:
+        """Sample patche coordinates.
+
+        Args:
+            width: The width of the patches.
+            height: The height of the patches.
+            layer_shape: The shape of the layer.
+            mask: Tuple containing the mask array and the scaling factor with respect to the
+                provided layer_shape. Optional, only required for samplers with foreground
+                filtering.
+
+        Returns:
+            A generator producing sampled patch coordinates.
+        """
+
+
+class ForegroundSampler(Sampler):
+    """Base class for samplers with foreground filtering capabilities."""
+
+    @abc.abstractmethod
+    def is_foreground(
+        self,
+        mask: Mask,
+        x: int,
+        y: int,
+        width: int,
+        height: int,
+        min_foreground_ratio: float,
+    ) -> bool:
+        """Check if a patch contains sufficient foreground."""
diff --git a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
new file mode 100644
index 00000000..1b00e599
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
@@ -0,0 +1,84 @@
+"""Foreground grid sampler."""
+
+from typing import Tuple
+
+from eva.vision.data.wsi.patching.mask import Mask
+from eva.vision.data.wsi.patching.samplers import _utils, base
+
+
+class ForegroundGridSampler(base.ForegroundSampler):
+    """Sample patches based on a grid, only returning patches containing foreground.
+
+    Args:
+        max_samples: The maximum number of samples to return.
+        overlap: The overlap between patches in the grid.
+        min_foreground_ratio: The minimum amount of foreground within a sampled patch.
+        seed: The random seed.
+    """
+
+    def __init__(
+        self,
+        max_samples: int = 20,
+        overlap: Tuple[int, int] = (0, 0),
+        min_foreground_ratio: float = 0.35,
+        seed: int = 42,
+    ):
+        """Initializes the sampler."""
+        self.max_samples = max_samples
+        self.overlap = overlap
+        self.min_foreground_ratio = min_foreground_ratio
+        self.seed = seed
+
+    def sample(
+        self,
+        width: int,
+        height: int,
+        layer_shape: Tuple[int, int],
+        mask: Mask,
+    ):
+        """Sample patches from a grid containing foreground.
+
+        Args:
+            width: The width of the patches.
+            height: The height of the patches.
+            layer_shape: The shape of the layer.
+            mask: The mask of the image.
+        """
+        x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
+
+        count = 0
+        for i in indices:
+            if count >= self.max_samples:
+                break
+            if self.is_foreground(
+                mask, x_y[i][0], x_y[i][1], width, height, self.min_foreground_ratio
+            ):
+                count += 1
+                yield x_y[i]
+
+    def is_foreground(
+        self,
+        mask: Mask,
+        x: int,
+        y: int,
+        width: int,
+        height: int,
+        min_foreground_ratio: float,
+    ) -> bool:
+        """Check if a patch contains sufficient foreground.
+
+        Args:
+            mask: The mask of the image.
+            x: The x-coordinate of the patch.
+            y: The y-coordinate of the patch.
+            width: The width of the patch.
+            height: The height of the patch.
+            min_foreground_ratio: The minimum amount of foreground in the patch.
+        """
+        x_, y_ = self._scale_coords(x, y, mask.scale_factors)
+        width_, height_ = self._scale_coords(width, height, mask.scale_factors)
+        patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_]
+        return patch_mask.sum() / patch_mask.size > min_foreground_ratio
+
+    def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]:
+        return int(x / scale_factors[0]), int(y / scale_factors[1])
diff --git a/src/eva/vision/data/wsi/patching/samplers/grid.py b/src/eva/vision/data/wsi/patching/samplers/grid.py
new file mode 100644
index 00000000..aaf3b110
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/grid.py
@@ -0,0 +1,44 @@
+"""Grid sampler."""
+
+from typing import Generator, Tuple
+
+from eva.vision.data.wsi.patching.samplers import _utils, base
+
+
+class GridSampler(base.Sampler):
+    """Sample patches based on a grid.
+
+    Args:
+        max_samples: The maximum number of samples to return.
+        overlap: The overlap between patches in the grid.
+        seed: The random seed.
+    """
+
+    def __init__(
+        self,
+        max_samples: int | None = None,
+        overlap: Tuple[int, int] = (0, 0),
+        seed: int = 42,
+    ):
+        """Initializes the sampler."""
+        self.max_samples = max_samples
+        self.overlap = overlap
+        self.seed = seed
+
+    def sample(
+        self,
+        width: int,
+        height: int,
+        layer_shape: Tuple[int, int],
+    ) -> Generator[Tuple[int, int], None, None]:
+        """Sample patches from a grid.
+
+        Args:
+            width: The width of the patches.
+            height: The height of the patches.
+            layer_shape: The shape of the layer.
+        """
+        x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
+        max_samples = len(indices) if self.max_samples is None else self.max_samples
+        for i in indices[:max_samples]:
+            yield x_y[i]
diff --git a/src/eva/vision/data/wsi/patching/samplers/random.py b/src/eva/vision/data/wsi/patching/samplers/random.py
new file mode 100644
index 00000000..33404c1b
--- /dev/null
+++ b/src/eva/vision/data/wsi/patching/samplers/random.py
@@ -0,0 +1,40 @@
+"""Random sampler."""
+
+import random
+from typing import Generator, Tuple
+
+from eva.vision.data.wsi.patching.samplers import _utils, base
+
+
+class RandomSampler(base.Sampler):
+    """Sample patch coordinates randomly.
+
+    Args:
+        n_samples: The number of samples to return.
+        seed: The random seed.
+    """
+
+    def __init__(self, n_samples: int = 1, seed: int = 42):
+        """Initializes the sampler."""
+        self.seed = seed
+        self.n_samples = n_samples
+
+    def sample(
+        self,
+        width: int,
+        height: int,
+        layer_shape: Tuple[int, int],
+    ) -> Generator[Tuple[int, int], None, None]:
+        """Sample random patches.
+
+        Args:
+            width: The width of the patches.
+            height: The height of the patches.
+            layer_shape: The shape of the layer.
+        """
+        _utils.set_seed(self.seed)
+
+        for _ in range(self.n_samples):
+            x_max, y_max = layer_shape[0], layer_shape[1]
+            x, y = random.randint(0, x_max - width), random.randint(0, y_max - height)  # nosec
+            yield x, y

From 96d13059e03d7289661a81321fec21b32eef1178 Mon Sep 17 00:00:00 2001
From: Nicolas Kaenzig <nkaenzig@gmail.com>
Date: Tue, 7 May 2024 10:44:44 +0200
Subject: [PATCH 2/2] added unit test for samplers

---
 .../data/wsi/patching/samplers/_utils.py      | 16 +++-
 .../wsi/patching/samplers/foreground_grid.py  |  7 +-
 .../vision/data/wsi/patching/samplers/grid.py |  5 +-
 .../data/wsi/patching/samplers/random.py      |  3 +-
 tests/eva/vision/data/wsi/__init__.py         |  1 +
 .../eva/vision/data/wsi/patching/__init__.py  |  1 +
 .../data/wsi/patching/samplers/__init__.py    |  1 +
 .../patching/samplers/test_foreground_grid.py | 93 +++++++++++++++++++
 .../data/wsi/patching/samplers/test_grid.py   | 69 ++++++++++++++
 .../data/wsi/patching/samplers/test_random.py | 48 ++++++++++
 10 files changed, 238 insertions(+), 6 deletions(-)
 create mode 100644 tests/eva/vision/data/wsi/__init__.py
 create mode 100644 tests/eva/vision/data/wsi/patching/__init__.py
 create mode 100644 tests/eva/vision/data/wsi/patching/samplers/__init__.py
 create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
 create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_grid.py
 create mode 100644 tests/eva/vision/data/wsi/patching/samplers/test_random.py

diff --git a/src/eva/vision/data/wsi/patching/samplers/_utils.py b/src/eva/vision/data/wsi/patching/samplers/_utils.py
index 9598bc8d..af8418df 100644
--- a/src/eva/vision/data/wsi/patching/samplers/_utils.py
+++ b/src/eva/vision/data/wsi/patching/samplers/_utils.py
@@ -27,8 +27,8 @@ def get_grid_coords_and_indices(
         shuffle: Whether to shuffle the indices.
         seed: The random seed.
     """
-    x_range = range(0, layer_shape[0] - width, width - overlap[0])
-    y_range = range(0, layer_shape[1] - height, height - overlap[1])
+    x_range = range(0, layer_shape[0] - width + 1, width - overlap[0])
+    y_range = range(0, layer_shape[1] - height + 1, height - overlap[1])
     x_y = [(x, y) for x in x_range for y in y_range]
 
     indices = list(range(len(x_y)))
@@ -36,3 +36,15 @@ def get_grid_coords_and_indices(
         set_seed(seed)
         np.random.shuffle(indices)
     return x_y, indices
+
+
+def validate_dimensions(width: int, height: int, layer_shape: Tuple[int, int]) -> None:
+    """Checks if the width / height is bigger than the layer shape.
+
+    Args:
+        width: The width of the patches.
+        height: The height of the patches.
+        layer_shape: The shape of the layer.
+    """
+    if width > layer_shape[0] or height > layer_shape[1]:
+        raise ValueError("The width / height cannot be bigger than the layer shape.")
diff --git a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
index 1b00e599..e062caf5 100644
--- a/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
+++ b/src/eva/vision/data/wsi/patching/samplers/foreground_grid.py
@@ -44,7 +44,10 @@ def sample(
             layer_shape: The shape of the layer.
             mask: The mask of the image.
         """
-        x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
+        _utils.validate_dimensions(width, height, layer_shape)
+        x_y, indices = _utils.get_grid_coords_and_indices(
+            layer_shape, width, height, self.overlap, seed=self.seed
+        )
 
         count = 0
         for i in indices:
@@ -78,7 +81,7 @@ def is_foreground(
         x_, y_ = self._scale_coords(x, y, mask.scale_factors)
         width_, height_ = self._scale_coords(width, height, mask.scale_factors)
         patch_mask = mask.mask_array[y_ : y_ + height_, x_ : x_ + width_]
-        return patch_mask.sum() / patch_mask.size > min_foreground_ratio
+        return patch_mask.sum() / patch_mask.size >= min_foreground_ratio
 
     def _scale_coords(self, x: int, y: int, scale_factors: Tuple[float, float]) -> Tuple[int, int]:
         return int(x / scale_factors[0]), int(y / scale_factors[1])
diff --git a/src/eva/vision/data/wsi/patching/samplers/grid.py b/src/eva/vision/data/wsi/patching/samplers/grid.py
index aaf3b110..3f2b0081 100644
--- a/src/eva/vision/data/wsi/patching/samplers/grid.py
+++ b/src/eva/vision/data/wsi/patching/samplers/grid.py
@@ -38,7 +38,10 @@ def sample(
             height: The height of the patches.
             layer_shape: The shape of the layer.
         """
-        x_y, indices = _utils.get_grid_coords_and_indices(layer_shape, width, height, self.overlap)
+        _utils.validate_dimensions(width, height, layer_shape)
+        x_y, indices = _utils.get_grid_coords_and_indices(
+            layer_shape, width, height, self.overlap, seed=self.seed
+        )
         max_samples = len(indices) if self.max_samples is None else self.max_samples
         for i in indices[:max_samples]:
             yield x_y[i]
diff --git a/src/eva/vision/data/wsi/patching/samplers/random.py b/src/eva/vision/data/wsi/patching/samplers/random.py
index 33404c1b..09ae5729 100644
--- a/src/eva/vision/data/wsi/patching/samplers/random.py
+++ b/src/eva/vision/data/wsi/patching/samplers/random.py
@@ -32,9 +32,10 @@ def sample(
             height: The height of the patches.
             layer_shape: The shape of the layer.
         """
+        _utils.validate_dimensions(width, height, layer_shape)
         _utils.set_seed(self.seed)
 
+        x_max, y_max = layer_shape[0], layer_shape[1]
         for _ in range(self.n_samples):
-            x_max, y_max = layer_shape[0], layer_shape[1]
             x, y = random.randint(0, x_max - width), random.randint(0, y_max - height)  # nosec
             yield x, y
diff --git a/tests/eva/vision/data/wsi/__init__.py b/tests/eva/vision/data/wsi/__init__.py
new file mode 100644
index 00000000..c3adfdd3
--- /dev/null
+++ b/tests/eva/vision/data/wsi/__init__.py
@@ -0,0 +1 @@
+"""WSI module tests."""
diff --git a/tests/eva/vision/data/wsi/patching/__init__.py b/tests/eva/vision/data/wsi/patching/__init__.py
new file mode 100644
index 00000000..686c6e8d
--- /dev/null
+++ b/tests/eva/vision/data/wsi/patching/__init__.py
@@ -0,0 +1 @@
+"""WSI patch extraction tests."""
diff --git a/tests/eva/vision/data/wsi/patching/samplers/__init__.py b/tests/eva/vision/data/wsi/patching/samplers/__init__.py
new file mode 100644
index 00000000..e7064022
--- /dev/null
+++ b/tests/eva/vision/data/wsi/patching/samplers/__init__.py
@@ -0,0 +1 @@
+"""WSI patch samplers tests."""
diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
new file mode 100644
index 00000000..9a5510ac
--- /dev/null
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_foreground_grid.py
@@ -0,0 +1,93 @@
+"""ForegroundGridSampler tests."""
+
+import numpy as np
+import pytest
+
+from eva.vision.data.wsi.patching import mask, samplers
+
+TEST_MASK = mask.Mask(
+    mask_array=np.array(
+        [
+            [0, 0, 0, 0, 0, 0],
+            [0, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 0],
+            [0, 1, 1, 1, 1, 0],
+            [0, 0, 0, 0, 0, 0],
+        ]
+    ),
+    mask_level_idx=3,
+    scale_factors=(6.0, 6.0),
+)
+
+TEST_ARGS = {"width": 12, "height": 12, "layer_shape": (36, 36), "mask": TEST_MASK}
+
+
+@pytest.mark.parametrize(
+    "min_foreground_ratio, max_samples, expected_n_samples",
+    [(0.0, 3, 3), (0.0, 100, 9), (0.5, 100, 5), (0.9, 100, 1)],
+)
+def test_length(min_foreground_ratio: float, max_samples: int, expected_n_samples: int) -> None:
+    """Tests if the sampler returns the correct number of samples."""
+    sampler = samplers.ForegroundGridSampler(
+        max_samples=max_samples, min_foreground_ratio=min_foreground_ratio
+    )
+
+    x_y = list(sampler.sample(**TEST_ARGS))
+
+    assert len(x_y) == expected_n_samples
+
+
+@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)])
+def test_same_seed(n_samples: int, seed: int) -> None:
+    """Tests if the sampler returns the same samples for the same seed."""
+    sampler = samplers.ForegroundGridSampler(
+        max_samples=n_samples, seed=seed, min_foreground_ratio=0.5
+    )
+
+    x_y_1 = list(sampler.sample(**TEST_ARGS))
+    x_y_2 = list(sampler.sample(**TEST_ARGS))
+
+    assert x_y_1 == x_y_2
+
+
+@pytest.mark.parametrize("n_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)])
+def test_different_seed(n_samples: int, seed_1: int, seed_2: int) -> None:
+    """Tests if the sampler returns different samples for different seeds."""
+    sampler_1 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_1)
+    sampler_2 = samplers.ForegroundGridSampler(max_samples=n_samples, seed=seed_2)
+
+    x_y_1 = list(sampler_1.sample(**TEST_ARGS))
+    x_y_2 = list(sampler_2.sample(**TEST_ARGS))
+
+    assert x_y_1 != x_y_2
+
+
+def test_invalid_width_height() -> None:
+    """Tests if the sampler raises an error when width / height is bigger than layer_shape."""
+    sampler = samplers.ForegroundGridSampler(max_samples=10, seed=42)
+
+    with pytest.raises(ValueError):
+        list(sampler.sample(width=200, height=200, layer_shape=(100, 100), mask=TEST_MASK))
+
+
+@pytest.mark.parametrize("min_foreground_ratio", [0.0, 0.5, 0.9])
+def test_min_foreground_ratio(min_foreground_ratio: float) -> None:
+    """Tests if sampled coordinates respect the min_foreground_ratio."""
+    sampler = samplers.ForegroundGridSampler(
+        max_samples=100, min_foreground_ratio=min_foreground_ratio
+    )
+
+    x_y = list(sampler.sample(**TEST_ARGS))
+
+    mask = TEST_MASK
+    width, height = TEST_ARGS["width"], TEST_ARGS["height"]
+
+    for x, y in x_y:
+        x_, y_ = sampler._scale_coords(x, y, mask.scale_factors)
+        width_, height_ = sampler._scale_coords(width, height, mask.scale_factors)
+
+        patch_mask = mask.mask_array[x_ : x_ + width_, y_ : y_ + height_]
+        foreground_ratio = patch_mask.sum() / patch_mask.size
+
+        assert foreground_ratio >= min_foreground_ratio
diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_grid.py b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
new file mode 100644
index 00000000..efeecf54
--- /dev/null
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_grid.py
@@ -0,0 +1,69 @@
+"""GridSampler tests."""
+
+from typing import Tuple
+
+import pytest
+
+from eva.vision.data.wsi.patching import samplers
+
+TEST_ARGS = {"width": 10, "height": 10, "layer_shape": (100, 100)}
+
+
+@pytest.mark.parametrize("max_samples, expected_n_samples", [(3, 3), (10, 10), (200, 100)])
+def test_length(max_samples: int, expected_n_samples: int) -> None:
+    """Tests if the sampler returns the correct number of samples."""
+    sampler = samplers.GridSampler(max_samples=max_samples)
+
+    x_y = list(sampler.sample(**TEST_ARGS))
+
+    assert len(x_y) == expected_n_samples
+
+
+@pytest.mark.parametrize("max_samples, seed", [(10, 8), (22, 42)])
+def test_same_seed(max_samples: int, seed: int) -> None:
+    """Tests if the sampler returns the same samples for the same seed."""
+    sampler = samplers.GridSampler(max_samples=max_samples, seed=seed)
+
+    x_y_1 = list(sampler.sample(**TEST_ARGS))
+    x_y_2 = list(sampler.sample(**TEST_ARGS))
+
+    assert x_y_1 == x_y_2
+
+
+@pytest.mark.parametrize("max_samples, seed_1, seed_2", [(3, 1, 2), (5, 3, 4)])
+def test_different_seed(max_samples: int, seed_1: int, seed_2: int) -> None:
+    """Tests if the sampler returns different samples for different seeds."""
+    sampler_1 = samplers.GridSampler(max_samples=max_samples, seed=seed_1)
+    sampler_2 = samplers.GridSampler(max_samples=max_samples, seed=seed_2)
+
+    x_y_1 = list(sampler_1.sample(**TEST_ARGS))
+    x_y_2 = list(sampler_2.sample(**TEST_ARGS))
+
+    assert x_y_1 != x_y_2
+
+
+def test_invalid_width_height() -> None:
+    """Tests if the sampler raises an error when width / height is bigger than layer_shape."""
+    sampler = samplers.GridSampler(max_samples=10, seed=42)
+
+    with pytest.raises(ValueError):
+        list(sampler.sample(width=200, height=200, layer_shape=(100, 100)))
+
+
+@pytest.mark.parametrize(
+    "width, height, layer_shape",
+    [
+        (5, 5, (25, 25)),
+        (5, 5, (100, 100)),
+        (224, 224, (1000, 1000)),
+    ],
+)
+def test_expected_n_patches(width: int, height: int, layer_shape: Tuple[int, int]) -> None:
+    """Tests if the sampler respects the max_samples limit."""
+    sampler = samplers.GridSampler(max_samples=None)
+
+    expected_max_samples = (layer_shape[0] // width) * (layer_shape[1] // height)
+
+    x_y = list(sampler.sample(width=width, height=height, layer_shape=layer_shape))
+
+    assert len(x_y) == expected_max_samples
diff --git a/tests/eva/vision/data/wsi/patching/samplers/test_random.py b/tests/eva/vision/data/wsi/patching/samplers/test_random.py
new file mode 100644
index 00000000..85110a6c
--- /dev/null
+++ b/tests/eva/vision/data/wsi/patching/samplers/test_random.py
@@ -0,0 +1,48 @@
+"""RandomSampler tests."""
+
+import pytest
+
+from eva.vision.data.wsi.patching import samplers
+
+TEST_ARGS = {"width": 10, "height": 10, "layer_shape": (100, 100)}
+
+
+@pytest.mark.parametrize("n_samples", [3, 10, 22])
+def test_length(n_samples: int) -> None:
+    """Tests if the sampler returns the correct number of samples."""
+    sampler = samplers.RandomSampler(n_samples=n_samples)
+
+    x_y = list(sampler.sample(**TEST_ARGS))
+
+    assert len(x_y) == n_samples
+
+
+@pytest.mark.parametrize("n_samples, seed", [(10, 8), (22, 42)])
+def test_same_seed(n_samples: int, seed: int) -> None:
+    """Tests if the sampler returns the same samples for the same seed."""
+    sampler = samplers.RandomSampler(n_samples=n_samples, seed=seed)
+
+    x_y_1 = list(sampler.sample(**TEST_ARGS))
+    x_y_2 = list(sampler.sample(**TEST_ARGS))
+
+    assert x_y_1 == x_y_2
+
+
+@pytest.mark.parametrize("n_samples, seed_1, seed_2", [(10, 1, 2), (22, 3, 4)])
+def test_different_seed(n_samples: int, seed_1: int, seed_2: int) -> None:
+    """Tests if the sampler returns different samples for different seeds."""
+    sampler_1 = samplers.RandomSampler(n_samples=n_samples, seed=seed_1)
+    sampler_2 = samplers.RandomSampler(n_samples=n_samples, seed=seed_2)
+
+    x_y_1 = list(sampler_1.sample(**TEST_ARGS))
+    x_y_2 = list(sampler_2.sample(**TEST_ARGS))
+
+    assert x_y_1 != x_y_2
+
+
+def test_invalid_width_height() -> None:
+    """Tests if the sampler raises an error when width / height is bigger than layer_shape."""
+    sampler = samplers.RandomSampler(n_samples=10, seed=42)
+
+    with pytest.raises(ValueError):
+        list(sampler.sample(width=200, height=200, layer_shape=(100, 100)))