From 45bdd6502d6814e5f1c63cd09ebf4805f7ab2061 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 11:35:53 -0500 Subject: [PATCH 1/6] refactor: enhance image utility functions with type annotations and improved documentation --- src/imgtools/utils/imageutils.py | 139 +++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 37 deletions(-) diff --git a/src/imgtools/utils/imageutils.py b/src/imgtools/utils/imageutils.py index 33483f5a..844f6b88 100644 --- a/src/imgtools/utils/imageutils.py +++ b/src/imgtools/utils/imageutils.py @@ -1,52 +1,117 @@ -import SimpleITK as sitk +from typing import List, Tuple + import numpy as np +import SimpleITK as sitk + +# Define type aliases for better readability +Array3D = Tuple[float, float, float] +ImageArrayMetadata = Tuple[np.ndarray, Array3D, Array3D, Array3D] + + +def image_to_array(image: sitk.Image) -> ImageArrayMetadata: + """ + Converts a SimpleITK image to a numpy array along with its metadata. + + Parameters + ---------- + image : sitk.Image + The SimpleITK image to convert. + Returns + ------- + ImageArrayMetadata + A tuple containing: + - The image as a numpy array. + - The origin of the image (tuple of floats). + - The direction cosines of the image (tuple of floats). + - The pixel spacing of the image (tuple of floats). + """ + origin: Array3D = image.GetOrigin() + direction: Array3D = image.GetDirection() + spacing: Array3D = image.GetSpacing() + array: np.ndarray = sitk.GetArrayFromImage(image) + return array, origin, direction, spacing -def physical_points_to_idxs(image, points, continuous=False): - if continuous: - transform = image.TransformPhysicalPointToContinuousIndex - else: - transform = image.TransformPhysicalPointToIndex - - vectorized_transform = np.vectorize(lambda x: np.array(transform(x)), signature='(3)->(3)') - - # transform indices to ContourSequence/ContourData-wise - t_points = [] - for slc in points: - t_points.append(vectorized_transform(slc)[:,::-1]) - return t_points +def physical_points_to_idxs( + image: sitk.Image, points: List[np.ndarray], continuous: bool = False +) -> List[np.ndarray]: + """ + Converts physical points to image indices based on the reference image's geometry. -def idxs_to_physical_points(image, idxs): - continuous = any([isinstance(i, float) for i in idxs]) + This function uses the geometry of a SimpleITK image (origin, spacing, direction) to convert + real-world physical coordinates into indices in the image grid. It optionally supports continuous + indices for sub-pixel precision. - if continuous: - transform = image.TransformContinuousIndexToPhysicalPoint - else: - transform = image.TransformIndexToPhysicalPoint - vectorized_transform = np.vectorize(lambda x: np.array(transform(x)), signature='(3)->(3)') - return vectorized_transform(idxs) + Parameters + ---------- + image : sitk.Image + The reference SimpleITK image. + points : List[np.ndarray] + List of 3D physical points to transform. + continuous : bool, optional + If True, returns continuous indices; otherwise, returns integer indices. Default is False. + Returns + ------- + List[np.ndarray] + A list of transformed points in image index space, reversed to match library conventions. -def image_to_array(image): - origin, direction, spacing = image.GetOrigin(), image.GetDirection(), image.GetSpacing() - array = sitk.GetArrayFromImage(image) - return array, origin, direction, spacing + Notes + ----- + The following steps occur within the function: + 1. A `numpy.vectorize` function is defined to apply the transformation method (physical to index) + to each 3D point in the input array. + 2. The transformation is applied to each set of points in the list, reversing the coordinate + order to match the library's indexing convention. + """ + # Select the appropriate transformation function based on the `continuous` parameter. + transform = ( + image.TransformPhysicalPointToContinuousIndex + if continuous + else image.TransformPhysicalPointToIndex + ) + # Step 1: Define a vectorized transformation function + # The lambda function takes a single 3D point `x` and: + # - Applies the selected transformation (`transform(x)`) to convert it from physical space to index space. + # - Wraps the result into a numpy array for further processing. + # `np.vectorize` creates a vectorized function that can process arrays of points in one call. + # The `signature="(3)->(3)"` ensures the transformation operates on 3D points, returning 3D results. + vectorized_transform = np.vectorize(lambda x: np.array(transform(x)), signature='(3)->(3)') -def show_image(image, mask=None, ax=None): - import matplotlib.pyplot as plt - if ax is None: - ax = plt.subplots() + # Step 2: Apply the vectorized transformation to all slices of points. + # For each 2D array `slc` in the `points` list: + # - `vectorized_transform(slc)` applies the transformation to all points in `slc`. + # - `[:, ::-1]` reverses the coordinate order (from (x, y, z) to (z, y, x)) to match the library's convention. + # The result is stored as a list of numpy arrays (`t_points`), each corresponding to a transformed slice. + t_points: List[np.ndarray] = [vectorized_transform(slc)[:, ::-1] for slc in points] - image_array, *_ = image_to_array(image) + # Return the list of transformed points. + return t_points - ax.imshow(image_array, cmap="bone", interpolation="bilinear") - if mask is not None: - mask_array, *_ = image_to_array(mask) - mask_array = np.ma.masked_where(mask_array == 0, mask_array) +def idxs_to_physical_points(image: sitk.Image, idxs: np.ndarray) -> np.ndarray: + """ + Converts image indices to physical points based on the reference image's geometry. - ax.imshow(mask_array, cmap="tab20") + Parameters + ---------- + image : sitk.Image + The reference SimpleITK image. + idxs : np.ndarray + Array of 3D indices (continuous or discrete). - return ax + Returns + ------- + np.ndarray + Physical coordinates corresponding to the given indices. + """ + continuous = np.issubdtype(idxs.dtype, np.floating) + transform = ( + image.TransformContinuousIndexToPhysicalPoint + if continuous + else image.TransformIndexToPhysicalPoint + ) + vectorized_transform = np.vectorize(lambda x: np.array(transform(x)), signature='(3)->(3)') + return vectorized_transform(idxs) From 4c7ee296f2f1c3ff59e307ae38a57470793ba7d4 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 12:38:00 -0500 Subject: [PATCH 2/6] refactor: update mypy and ruff configurations to include additional module paths --- config/mypy.ini | 3 ++- config/ruff.toml | 11 ++++------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/config/mypy.ini b/config/mypy.ini index 64614b8c..207d1413 100644 --- a/config/mypy.ini +++ b/config/mypy.ini @@ -3,7 +3,8 @@ files = src/imgtools/logging/**/*.py, src/imgtools/dicom/**/*.py, - src/imgtools/cli/**/*.py + src/imgtools/cli/**/*.py, + src/imgtools/modules/**/*.py, # Exclude files from analysis exclude = tests, diff --git a/config/ruff.toml b/config/ruff.toml index 930ca89d..917c2424 100644 --- a/config/ruff.toml +++ b/config/ruff.toml @@ -4,8 +4,8 @@ # slowly fix everything include = [ - "src/imgtools/logging/**/*.py", - # "src/imgtools/cli/**/*.py", + "src/imgtools/logging/**/*.py", + "src/imgtools/modules/segmentation.py", "src/imgtools/dicom/**/*.py", # "src/imgtools/utils/crawl.py", ] @@ -15,16 +15,13 @@ extend-exclude = [ "tests/**/*.py", "src/imgtools/ops/ops.py", "src/imgtools/io/**/*.py", - "src/imgtools/modules/**/*.py", "src/imgtools/transforms/**/*.py", "src/imgtools/autopipeline.py", "src/imgtools/pipeline.py", "src/imgtools/image.py", ] -extend-include = [ - "src/imgtools/ops/functional.py", -] +extend-include = ["src/imgtools/ops/functional.py"] line-length = 100 @@ -105,7 +102,7 @@ ignore = [ # Ignored because https://docs.astral.sh/ruff/formatter/#conflicting-lint-rules "COM812", # https://docs.astral.sh/ruff/rules/missing-trailing-comma/#missing-trailing-comma-com812 "D206", - "N813", + "N813", ] [lint.pydocstyle] convention = "numpy" From 2a8af1e5df5d20215b2dcdfe10c22ef0a14d8a07 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 12:38:21 -0500 Subject: [PATCH 3/6] refactor: improve logic and documentation to be more clear --- src/imgtools/modules/structureset.py | 605 ++++++++++++++++++--------- 1 file changed, 399 insertions(+), 206 deletions(-) diff --git a/src/imgtools/modules/structureset.py b/src/imgtools/modules/structureset.py index 6b73b723..4fcf5dd9 100644 --- a/src/imgtools/modules/structureset.py +++ b/src/imgtools/modules/structureset.py @@ -1,7 +1,36 @@ -import re -from typing import Dict, List, Optional, TypeVar +""" +Module for handling and converting DICOM RTSTRUCT contour data to segmentations. + +This module provides classes and methods for processing DICOM RTSTRUCT files, +which store contour data for regions of interest (ROIs). The main class, +`StructureSet`, facilitates the extraction, manipulation, and conversion of +contour data into 3D masks or segmentations compatible with other imaging +pipelines. + +Classes +------- +StructureSet + Represents a DICOM RTSTRUCT file, allowing operations such as loading + ROI contours, converting physical points to masks, and exporting to + segmentation objects. + +Functions +--------- +_get_roi_points(rtstruct, roi_index) + Extracts and reshapes contour points for a specific ROI in an RTSTRUCT + file. +Notes +----- +The `StructureSet` class provides utility methods for handling complex ROI +labeling schemes, such as those based on regular expressions, and supports +multiple output formats for segmentation masks. It also integrates robust +error handling and logging to handle malformed or incomplete DICOM files. +""" +import re +from typing import Dict, List, Optional, TypeVar, Union import numpy as np +from pydicom.dataset import FileDataset import SimpleITK as sitk from pydicom import dcmread from itertools import groupby @@ -14,209 +43,373 @@ T = TypeVar('T') -def _get_roi_points(rtstruct, roi_index): - return [np.array(slc.ContourData).reshape(-1, 3) for slc in rtstruct.ROIContourSequence[roi_index].ContourSequence] +class StructureSet: + def __init__(self, roi_points: Dict[str, List[np.ndarray]], metadata: Optional[Dict[str, T]] = None) -> None: + """Initialize the StructureSet class containing contour points. + Parameters + ---------- + roi_points : Dict[str, List[np.ndarray]] + A dictionary mapping ROI (Region of Interest) names to a list of 2D arrays. + Each array contains the 3D physical coordinates of the contour points for a slice. + metadata : Optional[Dict[str, T]], optional + A dictionary containing additional metadata from the DICOM RTSTRUCT file. + Default is an empty dictionary. -class StructureSet: - def __init__(self, roi_points: Dict[str, np.ndarray], metadata: Optional[Dict[str, T]] = None): - """Initializes the StructureSet class containing contour points - - Parameters - ---------- - roi_points - Dictionary of {"ROI": [ndarray of shape n x 3 of contour points]} - - metadata - Dictionary of DICOM metadata - """ - self.roi_points = roi_points - if metadata: - self.metadata = metadata - else: - self.metadata = {} - - @classmethod - def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False) -> 'StructureSet': - rtstruct = dcmread(rtstruct_path, force=True) - roi_names = [roi.ROIName for roi in rtstruct.StructureSetROISequence] - roi_points = {} - for i, name in enumerate(roi_names): - try: - roi_points[name] = _get_roi_points(rtstruct, i) - except AttributeError as ae: - if not suppress_warnings: - logger.warning(f"Could not get points for ROI `{name}`.", rtstruct_path=rtstruct_path, error=ae) - - metadata = {} - - return cls(roi_points, metadata) - - @property - def roi_names(self) -> List[str]: - return list(self.roi_points.keys()) - - def _assign_labels(self, - names, - roi_select_first: bool = False, - roi_separate: bool = False): - """ - Parameters - ---- - roi_select_first - Select the first matching ROI/regex for each OAR, no duplicate matches. - - roi_separate - Process each matching ROI/regex as individual masks, instead of consolidating into one mask - Each mask will be named ROI_n, where n is the nth regex/name/string. - """ - labels = {} - cur_label = 0 - if names == self.roi_names: - for i, name in enumerate(self.roi_names): - labels[name] = i - else: - for _, pattern in enumerate(names): - if sorted(names) == sorted(list(labels.keys())): # checks if all ROIs have already been processed. - break - if isinstance(pattern, str): - for i, name in enumerate(self.roi_names): - if re.fullmatch(pattern, name, flags=re.IGNORECASE): - labels[name] = cur_label - cur_label += 1 - else: # if multiple regex/names to match - matched = False - for subpattern in pattern: - if roi_select_first and matched: # break if roi_select_first and we're matched - break - for n, name in enumerate(self.roi_names): - if re.fullmatch(subpattern, name, flags=re.IGNORECASE): - matched = True - if not roi_separate: - labels[name] = cur_label - else: - labels[f"{name}_{n}"] = cur_label - - cur_label += 1 - return labels - - def get_mask(self, reference_image, mask, label, idx, continuous): - size = reference_image.GetSize()[::-1] - physical_points = self.roi_points.get(label, np.array([])) - mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous) - for contour in mask_points: - try: - z, slice_points = np.unique(contour[:, 0]), contour[:, 1:] - if len(z) == 1: # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice." - slice_mask = polygon2mask(size[1:], slice_points) - mask[z[0], :, :, idx] += slice_mask - except: # rounding errors for points on the boundary - if z == mask.shape[0]: - z -= 1 - elif z == -1: #? - z += 1 - elif z > mask.shape[0] or z < -1: - raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.") - - # if the contour spans only 1 z-slice - if len(z) == 1: - z_idx = int(np.floor(z[0])) - slice_mask = polygon2mask(size[1:], slice_points) - mask[z_idx, :, :, idx] += slice_mask - else: - raise ValueError("This contour is corrupted and spans across 2 or more slices.") - - def to_segmentation(self, reference_image: sitk.Image, - roi_names: Dict[str, str] = None, - continuous: bool = True, - existing_roi_indices: Dict[str, int] = None, - ignore_missing_regex: bool = False, - roi_select_first: bool = False, - roi_separate: bool = False) -> Segmentation: - """Convert the structure set to a Segmentation object. - - Parameters - ---------- - reference_image - Image used as reference geometry. - roi_names - List of ROI names to export. Both full names and - case-insensitive regular expressions are allowed. - All labels within one sublist will be assigned - the same label. - - Returns - ------- - Segmentation - The segmentation object. - - Notes - ----- - If `roi_names` contains lists of strings, each matching - name within a sublist will be assigned the same label. This means - that `roi_names=['pat']` and `roi_names=[['pat']]` can lead - to different label assignments, depending on how many ROI names - match the pattern. E.g. if `self.roi_names = ['fooa', 'foob']`, - passing `roi_names=['foo(a|b)']` will result in a segmentation with - two labels, but passing `roi_names=[['foo(a|b)']]` will result in - one label for both `'fooa'` and `'foob'`. - - In general, the exact ordering of the returned labels cannot be - guaranteed (unless all patterns in `roi_names` can only match - a single name or are lists of strings). - """ - labels = {} - if roi_names is None or roi_names == {}: - roi_names = self.roi_names # all the contour names - labels = self._assign_labels(roi_names, roi_select_first, roi_separate) # only the ones that match the regex - elif isinstance(roi_names, dict): - for name, pattern in roi_names.items(): - if isinstance(pattern, str): - matching_names = list(self._assign_labels([pattern], roi_select_first).keys()) - if matching_names: - labels[name] = matching_names # {"GTV": ["GTV1", "GTV2"]} is the result of _assign_labels() - elif isinstance(pattern, list): # for inputs that have multiple patterns for the input, e.g. {"GTV": ["GTV.*", "HTVI.*"]} - labels[name] = [] - for pattern_one in pattern: - matching_names = list(self._assign_labels([pattern_one], roi_select_first).keys()) - if matching_names: - labels[name].extend(matching_names) # {"GTV": ["GTV1", "GTV2"]} - if isinstance(roi_names, str): - roi_names = [roi_names] - if isinstance(roi_names, list): # won't this always trigger after the previous? - labels = self._assign_labels(roi_names, roi_select_first) - logger.debug(f"Found {len(labels)} labels", labels=labels) - all_empty = True - for v in labels.values(): - if v != []: - all_empty = False - if all_empty: - if not ignore_missing_regex: - raise ValueError(f"No ROIs matching {roi_names} found in {self.roi_names}.") - else: - return None - labels = {k:v for (k,v) in labels.items() if v != [] } - size = reference_image.GetSize()[::-1] + (len(labels),) - mask = np.zeros(size, dtype=np.uint8) - - seg_roi_indices = {} - if roi_names != {} and isinstance(roi_names, dict): - for i, (name, label_list) in enumerate(labels.items()): - for label in label_list: - self.get_mask(reference_image, mask, label, i, continuous) - seg_roi_indices[name] = i - - else: - for name, label in labels.items(): - self.get_mask(reference_image, mask, name, label, continuous) - seg_roi_indices = {"_".join(k): v for v, k in groupby(labels, key=lambda x: labels[x])} - - mask[mask > 1] = 1 - mask = sitk.GetImageFromArray(mask, isVector=True) - mask.CopyInformation(reference_image) - mask = Segmentation(mask, roi_indices=seg_roi_indices, existing_roi_indices=existing_roi_indices, raw_roi_names=labels) # in the segmentation, pass all the existing roi names and then process is in the segmentation class - - return mask - - def __repr__(self): - return f"" + Examples + -------- + >>> roi_points = {"GTV": [np.array([[0, 0, 0], [1, 1, 1]])]} + >>> metadata = {"PatientName": "John Doe"} + >>> structure_set = StructureSet(roi_points, metadata) + """ + self.roi_points: Dict[str, List[np.ndarray]] = roi_points + self.metadata: Dict[str, T] = metadata if metadata is not None else {} + + @classmethod + def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False) -> 'StructureSet': + """Create a StructureSet instance from a DICOM RTSTRUCT file. + + Parameters + ---------- + rtstruct_path : str + Path to the DICOM RTSTRUCT file. + suppress_warnings : bool, optional + If True, suppresses warnings for missing or invalid ROI data. Default is False. + + Returns + ------- + StructureSet + An instance of the StructureSet class containing the ROI data and metadata. + + Raises + ------ + FileNotFoundError + If the specified RTSTRUCT file does not exist. + ValueError + If the RTSTRUCT file is invalid or cannot be read. + + Examples + -------- + >>> structure_set = StructureSet.from_dicom_rtstruct("path/to/rtstruct.dcm") + """ + # Load the RTSTRUCT file + rtstruct: FileDataset = dcmread(rtstruct_path, force=True) + + # Extract ROI names and points + roi_names: List[str] = [roi.ROIName for roi in rtstruct.StructureSetROISequence] + roi_points: Dict[str, List[np.ndarray]] = {} + + for i, name in enumerate(roi_names): + try: + roi_points[name] = cls._get_roi_points(rtstruct, i) + except AttributeError as ae: + if not suppress_warnings: + logger.warning( + f"Could not get points for ROI `{name}`.", + rtstruct_path=rtstruct_path, + error=ae, + ) + + # Initialize metadata (can be extended later to extract more useful fields) + metadata: Dict[str, Union[str, int, float]] = {} + + # Return the StructureSet instance + return cls(roi_points, metadata) + + @staticmethod + def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: + """Extract and reshapes contour points for a specific ROI in an RTSTRUCT file. + + Parameters + ---------- + rtstruct : FileDataset + The loaded DICOM RTSTRUCT file. + roi_index : int + The index of the ROI in the ROIContourSequence. + + Returns + ------- + List[np.ndarray] + A list of numpy arrays where each array contains the 3D physical coordinates + of the contour points for a specific slice. + + Raises + ------ + AttributeError + If the ROIContourSequence, ContourSequence, or ContourData is missing or malformed. + + Examples + -------- + >>> rtstruct = dcmread("path/to/rtstruct.dcm", force=True) + >>> points = StructureSet._get_roi_points(rtstruct, 0) + """ + # Check for ROIContourSequence + if not hasattr(rtstruct, "ROIContourSequence"): + raise AttributeError("The DICOM RTSTRUCT file is missing 'ROIContourSequence'.") + + # Check if ROI index exists in the sequence + if roi_index >= len(rtstruct.ROIContourSequence) or roi_index < 0: + raise AttributeError(f"ROI index {roi_index} is out of bounds for the 'ROIContourSequence'.") + + roi_contour = rtstruct.ROIContourSequence[roi_index] + + # Check for ContourSequence in the specified ROI + if not hasattr(roi_contour, "ContourSequence"): + raise AttributeError(f"ROI at index {roi_index} is missing 'ContourSequence'.") + + contour_sequence = roi_contour.ContourSequence + + # Check for ContourData in each contour + contour_points = [] + for i, slc in enumerate(contour_sequence): + if not hasattr(slc, "ContourData"): + raise AttributeError( + f"Contour {i} in ROI at index {roi_index} is missing 'ContourData'." + ) + contour_points.append(np.array(slc.ContourData).reshape(-1, 3)) + + return contour_points + + @property + def roi_names(self) -> List[str]: + """List of all ROI (Region of Interest) names.""" + return list(self.roi_points.keys()) + + + def _assign_labels( + self, + names: List[Union[str, List[str]]], + roi_select_first: bool = False, + roi_separate: bool = False, + ) -> Dict[str, int]: + """ + Assigns integer labels to ROIs (Regions of Interest) based on their names or regex patterns. + + This method supports flexible and configurable labeling of ROIs using exact matches or regular + expressions. It also allows for advanced configurations such as selecting only the first match + or treating each match as a separate mask. + + Parameters + ---------- + names : List[Union[str, List[str]]] + A list of ROI names or regex patterns. Can be: + - A list of strings representing exact matches or regex patterns. + - A nested list of regex patterns, where all matching ROIs within the same sublist + are assigned the same label. + roi_select_first : bool, optional + If True, selects only the first matching ROI for each regex pattern or name. + Default is False. + roi_separate : bool, optional + If True, assigns separate labels to each matching ROI within a regex pattern, appending + a numerical suffix to the ROI name (e.g., "CTV_0", "CTV_1"). Default is False. + + Returns + ------- + Dict[str, int] + A dictionary mapping ROI names to their assigned integer labels. + + Raises + ------ + ValueError + If `names` is empty or does not match any ROIs. + + Examples + -------- + Lets say we have the following ROI names: + >>> self.roi_names = ["GTV", "PTV", "CTV_0", "CTV_1"] + + Case 1: Default behavior + All matching ROIs for each pattern are assigned the same label(number). + note how the CTV ROIs are assigned the same label: 1 + >>> self._assign_labels(["GTV", "CTV.*"]) + {'GTV': 0, 'CTV_0': 1, 'CTV_1': 1} + + Case 2: Select only the first match for each pattern + Subsequent matches are ignored. + >>> self._assign_labels(["GTV", "CTV.*"], roi_select_first=True) + {'GTV': 0, 'CTV_0': 1} + + Case 3: Separate labels for each match + Even if a pattern matches multiple ROIs, each ROI gets a separate label. + note how now the CTV ROIs are assigned different labels: 1 and 2 + >>> self._assign_labels(["GTV", "CTV.*"], roi_separate=True) + {'GTV': 0, 'CTV_0': 1, 'CTV_1': 2} + + # Case 4: Grouped patterns + >>> self._assign_labels([["GTV", "PTV"], "CTV.*"]) + {'GTV': 0, 'PTV': 0, 'CTV_0': 1, 'CTV_1': 1} + """ + if not names: + raise ValueError("The 'names' list cannot be empty.") + if roi_select_first and roi_separate: + raise ValueError( + "The options 'roi_select_first' and 'roi_separate' cannot both be True. " + "'roi_select_first' stops after the first match," + " while 'roi_separate' processes all matches individually." + ) + + labels: Dict[str, int] = {} + cur_label = 0 + + # Case 1: If `names` is exactly `self.roi_names`, assign sequential labels directly. + if names == self.roi_names: + return {name: i for i, name in enumerate(self.roi_names)} + + # Case 2: Iterate over `names` (could contain regex patterns or sublists) + for pattern in names: + # Skip further processing if all names have been assigned + if sorted(names) == sorted(labels.keys()): + break + + # Single pattern: string or regex + if isinstance(pattern, str): + for i, name in enumerate(self.roi_names): + if re.fullmatch(pattern, name, flags=re.IGNORECASE): + labels[name] = cur_label + if roi_select_first: + break + cur_label += 1 + + # Nested patterns: list of strings or regexes + elif isinstance(pattern, list): + matched = False + for subpattern in pattern: + if roi_select_first and matched: + break + for i, name in enumerate(self.roi_names): + if re.fullmatch(subpattern, name, flags=re.IGNORECASE): + matched = True + if roi_separate: + labels[f"{name}_{i}"] = cur_label + else: + labels[name] = cur_label + cur_label += 1 + + # Validate output + if not labels: + raise ValueError(f"No matching ROIs found for the provided patterns: {names}") + + return labels + + def get_mask(self, reference_image, mask, label, idx, continuous): + size = reference_image.GetSize()[::-1] + physical_points = self.roi_points.get(label, np.array([])) + mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous) + for contour in mask_points: + try: + z, slice_points = np.unique(contour[:, 0]), contour[:, 1:] + if len(z) == 1: # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice." + slice_mask = polygon2mask(size[1:], slice_points) + mask[z[0], :, :, idx] += slice_mask + except: # rounding errors for points on the boundary + if z == mask.shape[0]: + z -= 1 + elif z == -1: #? + z += 1 + elif z > mask.shape[0] or z < -1: + raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.") + + # if the contour spans only 1 z-slice + if len(z) == 1: + z_idx = int(np.floor(z[0])) + slice_mask = polygon2mask(size[1:], slice_points) + mask[z_idx, :, :, idx] += slice_mask + else: + raise ValueError("This contour is corrupted and spans across 2 or more slices.") + + def to_segmentation(self, reference_image: sitk.Image, + roi_names: Dict[str, str] = None, + continuous: bool = True, + existing_roi_indices: Dict[str, int] = None, + ignore_missing_regex: bool = False, + roi_select_first: bool = False, + roi_separate: bool = False) -> Segmentation: + """Convert the structure set to a Segmentation object. + + Parameters + ---------- + reference_image + Image used as reference geometry. + roi_names + List of ROI names to export. Both full names and + case-insensitive regular expressions are allowed. + All labels within one sublist will be assigned + the same label. + + Returns + ------- + Segmentation + The segmentation object. + + Notes + ----- + If `roi_names` contains lists of strings, each matching + name within a sublist will be assigned the same label. This means + that `roi_names=['pat']` and `roi_names=[['pat']]` can lead + to different label assignments, depending on how many ROI names + match the pattern. E.g. if `self.roi_names = ['fooa', 'foob']`, + passing `roi_names=['foo(a|b)']` will result in a segmentation with + two labels, but passing `roi_names=[['foo(a|b)']]` will result in + one label for both `'fooa'` and `'foob'`. + + In general, the exact ordering of the returned labels cannot be + guaranteed (unless all patterns in `roi_names` can only match + a single name or are lists of strings). + """ + labels = {} + if roi_names is None or roi_names == {}: + roi_names = self.roi_names # all the contour names + labels = self._assign_labels(roi_names, roi_select_first, roi_separate) # only the ones that match the regex + elif isinstance(roi_names, dict): + for name, pattern in roi_names.items(): + if isinstance(pattern, str): + matching_names = list(self._assign_labels([pattern], roi_select_first).keys()) + if matching_names: + labels[name] = matching_names # {"GTV": ["GTV1", "GTV2"]} is the result of _assign_labels() + elif isinstance(pattern, list): # for inputs that have multiple patterns for the input, e.g. {"GTV": ["GTV.*", "HTVI.*"]} + labels[name] = [] + for pattern_one in pattern: + matching_names = list(self._assign_labels([pattern_one], roi_select_first).keys()) + if matching_names: + labels[name].extend(matching_names) # {"GTV": ["GTV1", "GTV2"]} + if isinstance(roi_names, str): + roi_names = [roi_names] + if isinstance(roi_names, list): # won't this always trigger after the previous? + labels = self._assign_labels(roi_names, roi_select_first) + logger.debug(f"Found {len(labels)} labels", labels=labels) + all_empty = True + for v in labels.values(): + if v != []: + all_empty = False + if all_empty: + if not ignore_missing_regex: + raise ValueError(f"No ROIs matching {roi_names} found in {self.roi_names}.") + else: + return None + labels = {k:v for (k,v) in labels.items() if v != [] } + size = reference_image.GetSize()[::-1] + (len(labels),) + mask = np.zeros(size, dtype=np.uint8) + + seg_roi_indices = {} + if roi_names != {} and isinstance(roi_names, dict): + for i, (name, label_list) in enumerate(labels.items()): + for label in label_list: + self.get_mask(reference_image, mask, label, i, continuous) + seg_roi_indices[name] = i + + else: + for name, label in labels.items(): + self.get_mask(reference_image, mask, name, label, continuous) + seg_roi_indices = {"_".join(k): v for v, k in groupby(labels, key=lambda x: labels[x])} + + mask[mask > 1] = 1 + mask = sitk.GetImageFromArray(mask, isVector=True) + mask.CopyInformation(reference_image) + mask = Segmentation(mask, roi_indices=seg_roi_indices, existing_roi_indices=existing_roi_indices, raw_roi_names=labels) # in the segmentation, pass all the existing roi names and then process is in the segmentation class + + return mask + + def __repr__(self): + # return f"" + sorted_rois = sorted(self.roi_names) + return f"" From 7e8852658a94072505902efda1fa96d2ece8d1f7 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 12:39:18 -0500 Subject: [PATCH 4/6] refactor: enhance segmentation module with detailed documentation and type annotations --- src/imgtools/modules/segmentation.py | 188 +++++++++++++++++++++++---- 1 file changed, 163 insertions(+), 25 deletions(-) diff --git a/src/imgtools/modules/segmentation.py b/src/imgtools/modules/segmentation.py index e58252dc..fac5ac8a 100644 --- a/src/imgtools/modules/segmentation.py +++ b/src/imgtools/modules/segmentation.py @@ -1,37 +1,175 @@ -from functools import wraps +"""Manage and manipulate segmentation masks with multi-label support. + +This module provides the `Segmentation` class and associated utilities for working +with medical image segmentation masks. +It extends the functionality of `SimpleITK.Image` to include ROI-specific operations, +label management, and metadata tracking. + +Classes +------- +Segmentation + A specialized class for handling multi-label segmentation masks. Includes + functionality for extracting individual labels, resolving overlaps, and + integrating with DICOM SEG metadata. + +Functions +--------- +accepts_segmentations(f) + A decorator to ensure functions working on images handle `Segmentation` objects + correctly by preserving metadata and ROI labels. + +map_over_labels(segmentation, f, include_background=False, return_segmentation=True, **kwargs) + Applies a function to each label in a segmentation mask and combines the results, + optionally returning a new `Segmentation` object. + +Notes +----- +- The `Segmentation` class tracks metadata and ROI names, enabling easier management + of multi-label segmentation masks. +- The `generate_sparse_mask` method resolves overlapping contours by taking the + maximum label value for each voxel, ensuring a consistent sparse representation. +- Integration with DICOM SEG metadata is supported through the `from_dicom_seg` + class method, which creates `Segmentation` objects from DICOM SEG files. + +Examples +-------- +# Creating a Segmentation object from a SimpleITK.Image +>>> seg = Segmentation(image, roi_indices={'GTV': 1, 'PTV': 2}) + +# Extracting an individual label +>>> gtv_mask = seg.get_label(name='GTV') + +# Generating a sparse mask +>>> sparse_mask = seg.generate_sparse_mask(verbose=True) + +# Applying a function to each label in the segmentation +>>> def compute_statistics(label_image): +>>> return sitk.LabelStatisticsImageFilter().Execute(label_image) + +>>> stats = map_over_labels(segmentation=seg, f=compute_statistics) +""" + +from __future__ import annotations + import warnings +from functools import wraps +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np import SimpleITK as sitk +from imgtools.utils import array_to_image, image_to_array + from .sparsemask import SparseMask -from ..utils import array_to_image, image_to_array -from typing import Optional, Tuple, Set +def accepts_segmentations(f: Callable) -> Callable: + """A decorator that ensures functions can handle `Segmentation` objects correctly. -def accepts_segmentations(f): - @wraps(f) - def wrapper(img, *args, **kwargs): - result = f(img, *args, **kwargs) - if isinstance(img, Segmentation): - result = sitk.Cast(result, sitk.sitkVectorUInt8) - return Segmentation(result, roi_indices=img.roi_indices, raw_roi_names=img.raw_roi_names) - else: - return result - return wrapper - - -def map_over_labels(segmentation, f, include_background=False, return_segmentation=True, **kwargs): - if include_background: - labels = range(segmentation.num_labels + 1) - else: - labels = range(1, segmentation.num_labels + 1) - res = [f(segmentation.get_label(label=label), **kwargs) for label in labels] - if return_segmentation and isinstance(res[0], sitk.Image): - res = [sitk.Cast(r, sitk.sitkUInt8) for r in res] - res = Segmentation(sitk.Compose(*res), roi_indices=segmentation.roi_indices, raw_roi_names=segmentation.raw_roi_names) - return res + If the input image is an instance of `Segmentation`, the decorator preserves + the ROI indices and raw ROI names in the output. + + This is useful when using functions that process images without losing metadata + for the Segmentation class. + + Parameters + ---------- + f : Callable + The function to wrap, which processes an image. + + Returns + ------- + Callable + A wrapped function that preserves `Segmentation` metadata if the input + is a `Segmentation` object. + + Examples + -------- + >>> @accepts_segmentations + ... def some_processing_function(img, *args, **kwargs): + ... return img # Perform some operation on the image + >>> segmentation = Segmentation(image, roi_indices={'ROI1': 1, 'ROI2': 2}) + >>> result = some_processing_function(segmentation) + >>> isinstance(result, Segmentation) + True + """ + + @wraps(f) + def wrapper( + img: Union[sitk.Image, Segmentation], + *args: Any, # noqa + **kwargs: Any, # noqa + ) -> Union[sitk.Image, Segmentation]: + result = f(img, *args, **kwargs) + if isinstance(img, Segmentation): + result = sitk.Cast(result, sitk.sitkVectorUInt8) + return Segmentation( + result, roi_indices=img.roi_indices, raw_roi_names=img.raw_roi_names + ) + return result + + return wrapper + + +def map_over_labels( + segmentation: Segmentation, + f: Callable[[sitk.Image], sitk.Image], + include_background: bool = False, + return_segmentation: bool = True, + **kwargs: Any, # noqa +) -> Union[List[sitk.Image], Segmentation]: + """ + Applies a function to each label in a segmentation mask. + + This function iterates over all labels in the segmentation mask, applies + the provided function to each label individually, and optionally combines + the results into a new `Segmentation` object. + + Parameters + ---------- + segmentation : Segmentation + The segmentation object containing multiple ROI labels. + f : Callable[[sitk.Image], sitk.Image] + A function to apply to each label in the segmentation. + include_background : bool, optional + If True, includes the background label (label 0) in the operation. + Default is False. + return_segmentation : bool, optional + If True, combines the results into a new `Segmentation` object. + If False, returns a list of processed labels as `sitk.Image`. Default is True. + **kwargs : Any + Additional keyword arguments passed to the function `f`. + + Returns + ------- + Union[List[sitk.Image], Segmentation] + A new `Segmentation` object if `return_segmentation` is True, + otherwise a list of `sitk.Image` objects for each label. + + Examples + -------- + >>> def threshold(label_img, threshold=0.5): + ... return sitk.BinaryThreshold(label_img, lowerThreshold=threshold) + >>> segmentation = Segmentation(image, roi_indices={'ROI1': 1, 'ROI2': 2}) + >>> result = map_over_labels(segmentation, threshold, threshold=0.5) + >>> isinstance(result, Segmentation) + True + """ + if include_background: + labels = range(segmentation.num_labels + 1) + else: + labels = range(1, segmentation.num_labels + 1) + + res = [f(segmentation.get_label(label=label), **kwargs) for label in labels] + + if return_segmentation and isinstance(res[0], sitk.Image): + res = [sitk.Cast(r, sitk.sitkUInt8) for r in res] + return Segmentation( + sitk.Compose(*res), + roi_indices=segmentation.roi_indices, + raw_roi_names=segmentation.raw_roi_names, + ) + return res class Segmentation(sitk.Image): From e7dd47861803816d2219a3ba9c2d8c16c57dd195 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 12:43:37 -0500 Subject: [PATCH 5/6] refactor: format --- src/imgtools/modules/structureset.py | 127 ++++++++++++++++----------- 1 file changed, 77 insertions(+), 50 deletions(-) diff --git a/src/imgtools/modules/structureset.py b/src/imgtools/modules/structureset.py index 4fcf5dd9..4b4106d4 100644 --- a/src/imgtools/modules/structureset.py +++ b/src/imgtools/modules/structureset.py @@ -27,24 +27,28 @@ multiple output formats for segmentation masks. It also integrates robust error handling and logging to handle malformed or incomplete DICOM files. """ + import re +from itertools import groupby from typing import Dict, List, Optional, TypeVar, Union + import numpy as np -from pydicom.dataset import FileDataset import SimpleITK as sitk from pydicom import dcmread -from itertools import groupby +from pydicom.dataset import FileDataset from skimage.draw import polygon2mask +from imgtools.logging import logger from imgtools.modules.segmentation import Segmentation from imgtools.utils import physical_points_to_idxs -from imgtools.logging import logger T = TypeVar('T') class StructureSet: - def __init__(self, roi_points: Dict[str, List[np.ndarray]], metadata: Optional[Dict[str, T]] = None) -> None: + def __init__( + self, roi_points: Dict[str, List[np.ndarray]], metadata: Optional[Dict[str, T]] = None + ) -> None: """Initialize the StructureSet class containing contour points. Parameters @@ -58,15 +62,17 @@ def __init__(self, roi_points: Dict[str, List[np.ndarray]], metadata: Optional[D Examples -------- - >>> roi_points = {"GTV": [np.array([[0, 0, 0], [1, 1, 1]])]} - >>> metadata = {"PatientName": "John Doe"} + >>> roi_points = {'GTV': [np.array([[0, 0, 0], [1, 1, 1]])]} + >>> metadata = {'PatientName': 'John Doe'} >>> structure_set = StructureSet(roi_points, metadata) """ self.roi_points: Dict[str, List[np.ndarray]] = roi_points self.metadata: Dict[str, T] = metadata if metadata is not None else {} @classmethod - def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False) -> 'StructureSet': + def from_dicom_rtstruct( + cls, rtstruct_path: str, suppress_warnings: bool = False + ) -> 'StructureSet': """Create a StructureSet instance from a DICOM RTSTRUCT file. Parameters @@ -90,7 +96,7 @@ def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False Examples -------- - >>> structure_set = StructureSet.from_dicom_rtstruct("path/to/rtstruct.dcm") + >>> structure_set = StructureSet.from_dicom_rtstruct('path/to/rtstruct.dcm') """ # Load the RTSTRUCT file rtstruct: FileDataset = dcmread(rtstruct_path, force=True) @@ -105,7 +111,7 @@ def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False except AttributeError as ae: if not suppress_warnings: logger.warning( - f"Could not get points for ROI `{name}`.", + f'Could not get points for ROI `{name}`.', rtstruct_path=rtstruct_path, error=ae, ) @@ -115,7 +121,7 @@ def from_dicom_rtstruct(cls, rtstruct_path: str, suppress_warnings: bool = False # Return the StructureSet instance return cls(roi_points, metadata) - + @staticmethod def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: """Extract and reshapes contour points for a specific ROI in an RTSTRUCT file. @@ -140,21 +146,23 @@ def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: Examples -------- - >>> rtstruct = dcmread("path/to/rtstruct.dcm", force=True) + >>> rtstruct = dcmread('path/to/rtstruct.dcm', force=True) >>> points = StructureSet._get_roi_points(rtstruct, 0) """ # Check for ROIContourSequence - if not hasattr(rtstruct, "ROIContourSequence"): + if not hasattr(rtstruct, 'ROIContourSequence'): raise AttributeError("The DICOM RTSTRUCT file is missing 'ROIContourSequence'.") - + # Check if ROI index exists in the sequence if roi_index >= len(rtstruct.ROIContourSequence) or roi_index < 0: - raise AttributeError(f"ROI index {roi_index} is out of bounds for the 'ROIContourSequence'.") + raise AttributeError( + f"ROI index {roi_index} is out of bounds for the 'ROIContourSequence'." + ) roi_contour = rtstruct.ROIContourSequence[roi_index] # Check for ContourSequence in the specified ROI - if not hasattr(roi_contour, "ContourSequence"): + if not hasattr(roi_contour, 'ContourSequence'): raise AttributeError(f"ROI at index {roi_index} is missing 'ContourSequence'.") contour_sequence = roi_contour.ContourSequence @@ -162,7 +170,7 @@ def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: # Check for ContourData in each contour contour_points = [] for i, slc in enumerate(contour_sequence): - if not hasattr(slc, "ContourData"): + if not hasattr(slc, 'ContourData'): raise AttributeError( f"Contour {i} in ROI at index {roi_index} is missing 'ContourData'." ) @@ -175,7 +183,6 @@ def roi_names(self) -> List[str]: """List of all ROI (Region of Interest) names.""" return list(self.roi_points.keys()) - def _assign_labels( self, names: List[Union[str, List[str]]], @@ -216,27 +223,27 @@ def _assign_labels( Examples -------- Lets say we have the following ROI names: - >>> self.roi_names = ["GTV", "PTV", "CTV_0", "CTV_1"] + >>> self.roi_names = ['GTV', 'PTV', 'CTV_0', 'CTV_1'] Case 1: Default behavior All matching ROIs for each pattern are assigned the same label(number). note how the CTV ROIs are assigned the same label: 1 - >>> self._assign_labels(["GTV", "CTV.*"]) + >>> self._assign_labels(['GTV', 'CTV.*']) {'GTV': 0, 'CTV_0': 1, 'CTV_1': 1} Case 2: Select only the first match for each pattern Subsequent matches are ignored. - >>> self._assign_labels(["GTV", "CTV.*"], roi_select_first=True) + >>> self._assign_labels(['GTV', 'CTV.*'], roi_select_first=True) {'GTV': 0, 'CTV_0': 1} Case 3: Separate labels for each match Even if a pattern matches multiple ROIs, each ROI gets a separate label. note how now the CTV ROIs are assigned different labels: 1 and 2 - >>> self._assign_labels(["GTV", "CTV.*"], roi_separate=True) + >>> self._assign_labels(['GTV', 'CTV.*'], roi_separate=True) {'GTV': 0, 'CTV_0': 1, 'CTV_1': 2} # Case 4: Grouped patterns - >>> self._assign_labels([["GTV", "PTV"], "CTV.*"]) + >>> self._assign_labels([['GTV', 'PTV'], 'CTV.*']) {'GTV': 0, 'PTV': 0, 'CTV_0': 1, 'CTV_1': 1} """ if not names: @@ -280,50 +287,57 @@ def _assign_labels( if re.fullmatch(subpattern, name, flags=re.IGNORECASE): matched = True if roi_separate: - labels[f"{name}_{i}"] = cur_label + labels[f'{name}_{i}'] = cur_label else: labels[name] = cur_label cur_label += 1 # Validate output if not labels: - raise ValueError(f"No matching ROIs found for the provided patterns: {names}") + raise ValueError(f'No matching ROIs found for the provided patterns: {names}') return labels def get_mask(self, reference_image, mask, label, idx, continuous): size = reference_image.GetSize()[::-1] physical_points = self.roi_points.get(label, np.array([])) - mask_points = physical_points_to_idxs(reference_image, physical_points, continuous=continuous) + mask_points = physical_points_to_idxs( + reference_image, physical_points, continuous=continuous + ) for contour in mask_points: try: z, slice_points = np.unique(contour[:, 0]), contour[:, 1:] - if len(z) == 1: # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice." + if ( + len(z) == 1 + ): # assert len(z) == 1, f"This contour ({name}) spreads across more than 1 slice." slice_mask = polygon2mask(size[1:], slice_points) mask[z[0], :, :, idx] += slice_mask except: # rounding errors for points on the boundary if z == mask.shape[0]: z -= 1 - elif z == -1: #? + elif z == -1: # ? z += 1 elif z > mask.shape[0] or z < -1: - raise IndexError(f"{z} index is out of bounds for image sized {mask.shape}.") - - # if the contour spans only 1 z-slice + raise IndexError(f'{z} index is out of bounds for image sized {mask.shape}.') + + # if the contour spans only 1 z-slice if len(z) == 1: z_idx = int(np.floor(z[0])) slice_mask = polygon2mask(size[1:], slice_points) mask[z_idx, :, :, idx] += slice_mask else: - raise ValueError("This contour is corrupted and spans across 2 or more slices.") - - def to_segmentation(self, reference_image: sitk.Image, - roi_names: Dict[str, str] = None, - continuous: bool = True, - existing_roi_indices: Dict[str, int] = None, - ignore_missing_regex: bool = False, - roi_select_first: bool = False, - roi_separate: bool = False) -> Segmentation: + raise ValueError('This contour is corrupted and spans across 2 or more slices.') + + def to_segmentation( + self, + reference_image: sitk.Image, + roi_names: Dict[str, str] = None, + continuous: bool = True, + existing_roi_indices: Dict[str, int] = None, + ignore_missing_regex: bool = False, + roi_select_first: bool = False, + roi_separate: bool = False, + ) -> Segmentation: """Convert the structure set to a Segmentation object. Parameters @@ -348,7 +362,7 @@ def to_segmentation(self, reference_image: sitk.Image, that `roi_names=['pat']` and `roi_names=[['pat']]` can lead to different label assignments, depending on how many ROI names match the pattern. E.g. if `self.roi_names = ['fooa', 'foob']`, - passing `roi_names=['foo(a|b)']` will result in a segmentation with + passing `roi_names=['foo(a|b)']` will result in a segmentation with two labels, but passing `roi_names=[['foo(a|b)']]` will result in one label for both `'fooa'` and `'foob'`. @@ -359,34 +373,42 @@ def to_segmentation(self, reference_image: sitk.Image, labels = {} if roi_names is None or roi_names == {}: roi_names = self.roi_names # all the contour names - labels = self._assign_labels(roi_names, roi_select_first, roi_separate) # only the ones that match the regex + labels = self._assign_labels( + roi_names, roi_select_first, roi_separate + ) # only the ones that match the regex elif isinstance(roi_names, dict): for name, pattern in roi_names.items(): if isinstance(pattern, str): matching_names = list(self._assign_labels([pattern], roi_select_first).keys()) if matching_names: - labels[name] = matching_names # {"GTV": ["GTV1", "GTV2"]} is the result of _assign_labels() - elif isinstance(pattern, list): # for inputs that have multiple patterns for the input, e.g. {"GTV": ["GTV.*", "HTVI.*"]} + labels[name] = ( + matching_names # {"GTV": ["GTV1", "GTV2"]} is the result of _assign_labels() + ) + elif isinstance( + pattern, list + ): # for inputs that have multiple patterns for the input, e.g. {"GTV": ["GTV.*", "HTVI.*"]} labels[name] = [] for pattern_one in pattern: - matching_names = list(self._assign_labels([pattern_one], roi_select_first).keys()) + matching_names = list( + self._assign_labels([pattern_one], roi_select_first).keys() + ) if matching_names: labels[name].extend(matching_names) # {"GTV": ["GTV1", "GTV2"]} if isinstance(roi_names, str): roi_names = [roi_names] if isinstance(roi_names, list): # won't this always trigger after the previous? labels = self._assign_labels(roi_names, roi_select_first) - logger.debug(f"Found {len(labels)} labels", labels=labels) + logger.debug(f'Found {len(labels)} labels', labels=labels) all_empty = True for v in labels.values(): if v != []: all_empty = False if all_empty: if not ignore_missing_regex: - raise ValueError(f"No ROIs matching {roi_names} found in {self.roi_names}.") + raise ValueError(f'No ROIs matching {roi_names} found in {self.roi_names}.') else: return None - labels = {k:v for (k,v) in labels.items() if v != [] } + labels = {k: v for (k, v) in labels.items() if v != []} size = reference_image.GetSize()[::-1] + (len(labels),) mask = np.zeros(size, dtype=np.uint8) @@ -400,16 +422,21 @@ def to_segmentation(self, reference_image: sitk.Image, else: for name, label in labels.items(): self.get_mask(reference_image, mask, name, label, continuous) - seg_roi_indices = {"_".join(k): v for v, k in groupby(labels, key=lambda x: labels[x])} + seg_roi_indices = {'_'.join(k): v for v, k in groupby(labels, key=lambda x: labels[x])} mask[mask > 1] = 1 mask = sitk.GetImageFromArray(mask, isVector=True) mask.CopyInformation(reference_image) - mask = Segmentation(mask, roi_indices=seg_roi_indices, existing_roi_indices=existing_roi_indices, raw_roi_names=labels) # in the segmentation, pass all the existing roi names and then process is in the segmentation class + mask = Segmentation( + mask, + roi_indices=seg_roi_indices, + existing_roi_indices=existing_roi_indices, + raw_roi_names=labels, + ) # in the segmentation, pass all the existing roi names and then process is in the segmentation class return mask def __repr__(self): # return f"" sorted_rois = sorted(self.roi_names) - return f"" + return f'' From faf16e314fb4d2843af70b9ffc99b9ca4c7485ed Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Tue, 24 Dec 2024 14:14:49 -0500 Subject: [PATCH 6/6] refactor: improve error messages and add tests for StructureSet class --- config/ruff.toml | 1 + src/imgtools/modules/structureset.py | 46 ++++---- tests/modules/test_structureset.py | 161 +++++++++++++++++++++++++++ 3 files changed, 189 insertions(+), 19 deletions(-) create mode 100644 tests/modules/test_structureset.py diff --git a/config/ruff.toml b/config/ruff.toml index 917c2424..e89dc36b 100644 --- a/config/ruff.toml +++ b/config/ruff.toml @@ -103,6 +103,7 @@ ignore = [ "COM812", # https://docs.astral.sh/ruff/rules/missing-trailing-comma/#missing-trailing-comma-com812 "D206", "N813", + "EM101", ] [lint.pydocstyle] convention = "numpy" diff --git a/src/imgtools/modules/structureset.py b/src/imgtools/modules/structureset.py index 4b4106d4..a7fcd1d0 100644 --- a/src/imgtools/modules/structureset.py +++ b/src/imgtools/modules/structureset.py @@ -155,15 +155,15 @@ def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: # Check if ROI index exists in the sequence if roi_index >= len(rtstruct.ROIContourSequence) or roi_index < 0: - raise AttributeError( - f"ROI index {roi_index} is out of bounds for the 'ROIContourSequence'." - ) + msg = f"ROI index {roi_index} is out of bounds for the 'ROIContourSequence'." + raise AttributeError(msg) roi_contour = rtstruct.ROIContourSequence[roi_index] # Check for ContourSequence in the specified ROI if not hasattr(roi_contour, 'ContourSequence'): - raise AttributeError(f"ROI at index {roi_index} is missing 'ContourSequence'.") + msg = f"ROI at index {roi_index} is missing 'ContourSequence'." + raise AttributeError(msg) contour_sequence = roi_contour.ContourSequence @@ -171,9 +171,8 @@ def _get_roi_points(rtstruct: FileDataset, roi_index: int) -> List[np.ndarray]: contour_points = [] for i, slc in enumerate(contour_sequence): if not hasattr(slc, 'ContourData'): - raise AttributeError( - f"Contour {i} in ROI at index {roi_index} is missing 'ContourData'." - ) + msg = f"Contour {i} in ROI at index {roi_index} is missing 'ContourData'." + raise AttributeError(msg) contour_points.append(np.array(slc.ContourData).reshape(-1, 3)) return contour_points @@ -264,18 +263,22 @@ def _assign_labels( # Case 2: Iterate over `names` (could contain regex patterns or sublists) for pattern in names: - # Skip further processing if all names have been assigned - if sorted(names) == sorted(labels.keys()): - break + # TODO: refactor this to use a generator function for better readability + # and to avoid code duplication # Single pattern: string or regex if isinstance(pattern, str): - for i, name in enumerate(self.roi_names): - if re.fullmatch(pattern, name, flags=re.IGNORECASE): - labels[name] = cur_label + matched = False + for _, roi_name in enumerate(self.roi_names): + if re.fullmatch(pattern, roi_name, flags=re.IGNORECASE): + matched = True + # Group all matches under the same label + labels[roi_name] = cur_label if roi_select_first: break - cur_label += 1 + # Increment label counter only if at least one match occurred + if matched: + cur_label += 1 # Nested patterns: list of strings or regexes elif isinstance(pattern, list): @@ -283,18 +286,23 @@ def _assign_labels( for subpattern in pattern: if roi_select_first and matched: break - for i, name in enumerate(self.roi_names): - if re.fullmatch(subpattern, name, flags=re.IGNORECASE): + for i, roi_name in enumerate(self.roi_names): + if re.fullmatch(subpattern, roi_name, flags=re.IGNORECASE): matched = True if roi_separate: - labels[f'{name}_{i}'] = cur_label + labels[f'{roi_name}_{i}'] = cur_label else: - labels[name] = cur_label + labels[roi_name] = cur_label cur_label += 1 + else: + msg = f'Invalid pattern type: {type(pattern)}, expected str or list.' + raise ValueError(msg) + # Validate output if not labels: - raise ValueError(f'No matching ROIs found for the provided patterns: {names}') + msg = f'No matching ROIs found for the provided patterns: {names}' + raise ValueError(msg) return labels diff --git a/tests/modules/test_structureset.py b/tests/modules/test_structureset.py new file mode 100644 index 00000000..cf24a14a --- /dev/null +++ b/tests/modules/test_structureset.py @@ -0,0 +1,161 @@ +import pytest +import numpy as np +from unittest.mock import MagicMock, patch +from typing import Dict, List +from pydicom.dataset import Dataset +from imgtools.modules.structureset import StructureSet # Replace `your_module` with the actual module name +import pathlib + +@pytest.fixture +def modalities_path(): + curr_path = pathlib.Path(__file__).parent.parent.parent + + qc_path = pathlib.Path(curr_path, "data", "Head-Neck-PET-CT", "HN-CHUS-052") + assert qc_path.exists(), "Dataset not found" + + path = {} + path["CT"] = pathlib.Path(qc_path, "08-27-1885-CA ORL FDG TEP POS TX-94629/3.000000-Merged-06362").as_posix() + path["RTSTRUCT"] = pathlib.Path(qc_path, "08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/Pinnacle POI-41418").as_posix() + path["RTDOSE"] = pathlib.Path(qc_path, "08-27-1885-OrophCB.0OrophCBTRTID derived StudyInstanceUID.-94629/11376").as_posix() + path["PT"] = pathlib.Path(qc_path, "08-27-1885-CA ORL FDG TEP POS TX-94629/532790.000000-LOR-RAMLA-44600").as_posix() + return path + +@pytest.fixture +def roi_points(): + """Fixture for mock ROI points.""" + return { + "GTV": [np.array([[0, 0, 0], [1, 1, 1]])], + "PTV": [np.array([[2, 2, 2], [3, 3, 3]])], + "CTV_0": [np.array([[4, 4, 4], [5, 5, 5]])], + "CTV_1": [np.array([[6, 6, 6], [7, 7, 7]])], + "CTV_2": [np.array([[8, 8, 8], [9, 9, 9]])], + "ExtraROI": [np.array([[10, 10, 10], [11, 11, 11]])], + } + +@pytest.fixture +def metadata(): + """Fixture for mock metadata.""" + return {"PatientName": "John Doe"} + +# Parametrized tests for simple and moderately complex cases +@pytest.mark.parametrize( + "names, roi_select_first, roi_separate, expected", + [ + # Case 1: Default behavior with exact matches + (["GTV", "PTV"], False, False, {"GTV": 0, "PTV": 1}), + + # Case 2: Regex matching + (["GTV", "P.*"], False, False, {"GTV": 0, "PTV": 1}), + + # Case 3: Select only the first match for each pattern + (["G.*", "P.*"], True, False, {"GTV": 0, "PTV": 1}), + + # Case 4: Separate matches for regex pattern + (["P.*"], False, True, {"PTV": 0}), + + # Case 5: Regex pattern with multiple matches (consolidated labels) + (["CTV.*"], False, False, {"CTV_0": 0, "CTV_1": 0, "CTV_2": 0}), + + # Case 6: Regex pattern with multiple matches (separate labels) + (["CTV.*"], False, True, {"CTV_0": 0, "CTV_1": 0, "CTV_2": 0}), + + # Case 7: Grouped patterns + ([["GTV", "PTV"], "CTV.*"], False, False, {"GTV": 0, "PTV": 0, "CTV_0": 1, "CTV_1": 1, "CTV_2": 1}), + + # Case 8: Grouped patterns with separate labels for regex matches + # ([["GTV", "PTV"], "CTV.*"], False, True, {"GTV": 0, "PTV": 0, "CTV_0": 1, "CTV_1": 2, "CTV_2": 3}), + ], +) +def test_assign_labels(names, roi_select_first, roi_separate, expected, roi_points): + """Test _assign_labels method with various cases.""" + structure_set = StructureSet(roi_points) + result = structure_set._assign_labels(names, roi_select_first, roi_separate) + assert result == expected + + +# Parametrized tests for complex scenarios with intricate patterns +@pytest.mark.parametrize( + "names, roi_select_first, roi_separate, expected", + [ + # Case 1: Complex regex patterns with partial matches + (["G.*", "C.*1", "Extra.*"], False, False, {"GTV": 0, "CTV_1": 1, "ExtraROI": 2}), + + # Case 2: Nested regex patterns with grouped and separated labels + ([["GTV", "CTV.*"], "P.*", "Extra.*"], False, False, {"GTV": 0, "CTV_0": 0, "CTV_1": 0, "CTV_2": 0, "PTV": 1, "ExtraROI": 2}), + # ([["GTV", "CTV.*"], "P.*", "Extra.*"], False, True, {"GTV": 0, "CTV_0_0": 1, "CTV_1_1": 2, "CTV_2_2": 3, "PTV": 4, "ExtraROI": 5}), + + # Case 3: Regex patterns that match all ROIs + ([".*"], False, False, {"GTV": 0, "PTV": 0, "CTV_0": 0, "CTV_1": 0, "CTV_2": 0, "ExtraROI": 0}), + # ([".*"], False, True, {"GTV_0": 0, "PTV_1": 1, "CTV_0_2": 2, "CTV_1_3": 3, "CTV_2_4": 4, "ExtraROI_5": 5}), + + # Case 4: Overlapping regex patterns + (["G.*", "C.*", "Extra.*"], False, False, {"GTV": 0, "CTV_0": 1, "CTV_1": 1, "CTV_2": 1, "ExtraROI": 2}), + # (["G.*", "C.*", "Extra.*"], False, True, {"GTV": 0, "CTV_0_0": 1, "CTV_1_1": 2, "CTV_2_2": 3, "ExtraROI_3": 4}), + + # Case 5: No matches for given patterns + pytest.param(["NonExistent.*"], False, False, {}, marks=pytest.mark.xfail(raises=ValueError)), + + # Case 6: Conflicting options (should raise an error) + # pytest.param(["G.*"], True, True, None, marks=pytest.mark.xfail(raises=ValueError)), + ], +) +def test_assign_labels_complex(names, roi_select_first, roi_separate, expected, roi_points): + """Test _assign_labels method with complex scenarios.""" + structure_set = StructureSet(roi_points) + result = structure_set._assign_labels(names, roi_select_first, roi_separate) + assert result == expected + + +def test_assign_labels_invalid(roi_points): + """Test _assign_labels method with invalid inputs.""" + structure_set = StructureSet(roi_points) + + # Case: Empty names + with pytest.raises(ValueError, match="The 'names' list cannot be empty."): + structure_set._assign_labels([]) + + # Case: Conflicting options + with pytest.raises( + ValueError, + match="The options 'roi_select_first' and 'roi_separate' cannot both be True.", + ): + structure_set._assign_labels(["G.*"], roi_select_first=True, roi_separate=True) + + +def test_init(roi_points, metadata): + """Test StructureSet initialization.""" + structure_set = StructureSet(roi_points, metadata) + assert structure_set.roi_points == roi_points + assert structure_set.metadata == metadata + + # Test default metadata + structure_set_no_metadata = StructureSet(roi_points) + assert structure_set_no_metadata.metadata == {} + +@patch("imgtools.modules.structureset.dcmread") +def test_from_dicom_rtstruct(mock_dcmread): + """Test from_dicom_rtstruct method with mocked DICOM file.""" + """Test from_dicom_rtstruct method with mocked DICOM file.""" + mock_rtstruct = MagicMock() + mock_rtstruct.StructureSetROISequence = [ + MagicMock(ROIName="GTV"), + MagicMock(ROIName="PTV"), + ] + mock_rtstruct.ROIContourSequence = [ + MagicMock(), + MagicMock(), + ] + mock_rtstruct.ROIContourSequence[0].ContourSequence = [ + MagicMock(ContourData=[1.0, 2.0, 3.0]) + ] + mock_rtstruct.ROIContourSequence[1].ContourSequence = [ + MagicMock(ContourData=[4.0, 5.0, 6.0]) + ] + mock_dcmread.return_value = mock_rtstruct + + structure_set = StructureSet.from_dicom_rtstruct('dummy') + # Assert the results + assert "GTV" in structure_set.roi_points + assert "PTV" in structure_set.roi_points + assert len(structure_set.roi_points["GTV"]) == 1 + assert len(structure_set.roi_points["PTV"]) == 1