diff --git a/data/test_files/seg_image_sm_control.dcm b/data/test_files/seg_image_sm_control.dcm index 3ab6a22a..bf695236 100644 Binary files a/data/test_files/seg_image_sm_control.dcm and b/data/test_files/seg_image_sm_control.dcm differ diff --git a/data/test_files/seg_image_sm_dots_tiled_full.dcm b/data/test_files/seg_image_sm_dots_tiled_full.dcm new file mode 100644 index 00000000..8421dddc Binary files /dev/null and b/data/test_files/seg_image_sm_dots_tiled_full.dcm differ diff --git a/docs/ann.rst b/docs/ann.rst index d98f8199..47bd32f8 100644 --- a/docs/ann.rst +++ b/docs/ann.rst @@ -184,8 +184,20 @@ transmitted over network, etc. Reading Existing Bulk Annotation Objects ---------------------------------------- -You can read an existing bulk annotation object using `pydicom` and then convert -to the `highdicom` object like this: +You can read an existing bulk annotation object from file using the +:func:`highdicom.ann.annread()` function: + +.. code-block:: python + + from pydicom import dcmread + import highdicom as hd + + ann = hd.ann.annread('data/test_files/sm_annotations.dcm') + + assert isinstance(ann, hd.ann.MicroscopyBulkSimpleAnnotations) + +Alternatively you can converting an existing ``pydicom.Dataset`` representing a +bulk annotation object to the `highdicom` object like this: .. code-block:: python @@ -198,7 +210,7 @@ to the `highdicom` object like this: assert isinstance(ann, hd.ann.MicroscopyBulkSimpleAnnotations) -Note that this example (and the following examples) uses an example file that +Note that these examples (and the following examples) uses an example file that you can access from the test data in the `highdicom` repository. It was created using exactly the code in the construction example above. @@ -298,7 +310,7 @@ passed the data in to create the annotation with `highdicom`. import numpy as np graphic_data = group.get_graphic_data( - coordinate_type=ann.AnnotationCoordinateType, + coordinate_type=ann.annotation_coordinate_type, ) assert len(graphic_data) == 2 and isinstance(graphic_data[0], np.ndarray) diff --git a/docs/seg.rst b/docs/seg.rst index ebe73871..0adc53c6 100644 --- a/docs/seg.rst +++ b/docs/seg.rst @@ -486,6 +486,240 @@ segments. device_serial_number='1234567890', ) +Constructing SEG Images from a Total Pixel Matrix +------------------------------------------------- + +Some digital pathology images are represented as "tiled" images, +in which the full image (known as the "total pixel matrix") is divided up +into smaller rectangular regions in the row and column dimensions and each +region ("tile") is stored as a frame in a multiframe DICOM image. + +Segmentations of such images are stored as a tiled image in the same manner. +There are a two options in `highdicom` for doing this. You can either pass each +tile/frame individually stacked as a 1D list down the first dimension of the +``pixel_array`` as we have already seen (with the location of each frame either +matching that of the corresponding frame in the source image or explicitly +specified in the ``plane_positions`` argument), or you can pass the 2D total +pixel matrix of the segmentation and have `highdicom` automatically create the +tiles for you. + +To enable this latter option, pass the ``pixel_array`` as a single frame (i.e. +a 2D labelmap array, a 3D labelmap array with a single frame stacked down the +first axis, or a 4D array with a single frame stacked down the first dimension +and any number of segments stacked down the last dimension) and set the +``tile_pixel_array`` argument to ``True``. You can optionally choose the size +(in pixels) of each tile using the ``tile_size`` argument, or, by default, the +tile size of the source image will be used (regardless of whether the +segmentation is represented at the same resolution as the source image). + +If you need to specify the plane positions of the image explicitly, you should +pass a single item to the ``plane_positions`` argument giving the location of +the top left corner of the full total pixel matrix. Otherwise, all the usual +options are available to you. + +.. code-block:: python + + # Use an example slide microscopy image from the highdicom test data + # directory + sm_image = dcmread('data/test_files/sm_image.dcm') + + # The source image has multiple frames/tiles, but here we create a mask + # corresponding to the entire total pixel matrix + mask = np.zeros( + ( + sm_image.TotalPixelMatrixRows, + sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + mask[38:43, 5:41] = 1 + + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Structure") + property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") + segment_descriptions = [ + hd.seg.SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=property_category, + segmented_property_type=property_type, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ), + ] + + seg = hd.seg.Segmentation( + source_images=[sm_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + tile_pixel_array=True, + ) + + # The result stores the mask as a set of 10 tiles of the non-empty region of + # the total pixel matrix, each of size (10, 10), matching # the tile size of + # the source image + assert seg.NumberOfFrames == 10 + assert seg.pixel_array.shape == (10, 10, 10) + +``"TILED_FULL"`` and ``"TILED_SPARSE"`` +--------------------------------------- + +When the segmentation is stored as a tiled image, there are two ways in which +the locations of each frame/tile may be specified in the resulting object. +These are defined by the value of the +`"DimensionOrganizationType" +`_ +attribute: + +- ``"TILED_SPARSE"``: The position of each tile is explicitly defined in the + `"PerFrameFunctionalGroupsSequence" + `_ + of the object. This requires a potentially very long sequence to store all + the per-frame metadata, but does allow for the omission of empty frames from + the segmentation and other irregular tiling strategies. +- ``"TILED_FULL"``: The position of each tile is implicitly defined using a + predetermined order of the frames. This saves the need to store the pre-frame + metadata but does not allow for the omission of empty frames of the + segmentation and is generally less flexible. It may also be simpler for a + receiving application to process, since the tiles are guaranteed to be + regularly and consistently ordered. + +You can control this behavior by specifying the +``dimension_organization_type`` parameter and passing a value of the +:class:`highdicom.DimensionOrganizationTypeValues` enum. The default value is +``"TILED_SPARSE"``. Generally, the ``"TILED_FULL"`` option will be used in +combination with ``tile_pixel_array`` argument. + + +.. code-block:: python + + # Using the same example as above, this time as TILED_FULL + seg = hd.seg.Segmentation( + source_images=[sm_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + tile_pixel_array=True, + omit_empty_frames=False, + dimeension_organization_type=hd.DimensionOrganizationTypeValues.TILED_FULL, + ) + + # The result stores the mask as a set of 25 tiles of the entire region of + # the total pixel matrix, each of size (10, 10), matching the tile size of + # the source image + assert seg.NumberOfFrames == 25 + assert seg.pixel_array.shape == (25, 10, 10) + +Multi-resolution Pyramids +------------------------- + +Whole slide digital pathology images can often be very large and as such it +is common to represent them as *multi-resolution pyramids* of images, i.e. +to store multiple versions of the same image at different resolutions. This +helps viewers render the image at different zoom levels. + +Within DICOM, this can also extend to segmentations derived from whole slide +images. Multiple different SEG images may be stored, each representing the +same segmentation at a different resolution, as different instances within a +DICOM series. + +*highdicom* provides the :func:`highdicom.seg.create_segmentation_pyramid` +function to assist with this process. This function handles multiple related +scenarios: + +* Constructing a segmentation of a source image pyramid given a + segmentation pixel array of the highest resolution source image. + Highdicom performs the downsampling automatically to match the + resolution of the other source images. For this case, pass multiple + ``source_images`` and a single item in ``pixel_arrays``. +* Constructing a segmentation of a source image pyramid given user-provided + segmentation pixel arrays for each level in the source pyramid. For this + case, pass multiple ``source_images`` and a matching number of + ``pixel_arrays``. +* Constructing a segmentation of a single source image given multiple + user-provided downsampled segmentation pixel arrays. For this case, pass + a single item in ``source_images``, and multiple items in + ``pixel_arrays``). +* Constructing a segmentation of a single source image and a single + segmentation pixel array by downsampling by a given list of + ``downsample_factors``. For this case, pass a single item in + ``source_images``, a single item in ``pixel_arrays``, and a list of one + or more desired ``downsample_factors``. + +Here is a simple of example of specifying a single source image and segmentation +array, and having *highdicom* create a multi-resolution pyramid segmentation +series at user-specified downsample factors. + +.. code-block:: python + + import highdicom as hd + from pydicom import dcmread + import numpy as np + + + # Use an example slide microscopy image from the highdicom test data + # directory + sm_image = dcmread('data/test_files/sm_image.dcm') + + # The source image has multiple frames/tiles, but here we create a mask + # corresponding to the entire total pixel matrix + mask = np.zeros( + ( + sm_image.TotalPixelMatrixRows, + sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + mask[38:43, 5:41] = 1 + + property_category = hd.sr.CodedConcept("91723000", "SCT", "Anatomical Structure") + property_type = hd.sr.CodedConcept("84640000", "SCT", "Nucleus") + segment_descriptions = [ + hd.seg.SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=property_category, + segmented_property_type=property_type, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ), + ] + + # This will create a segmentation series of three images: one at the + # original source image resolution (implicit), one at half the size, and + # another at a quarter of the original size. + seg_pyramid = hd.seg.create_segmentation_pyramid( + source_images=[sm_image], + pixel_arrays=[mask], + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Slide Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + downsample_factors=[2.0, 4.0] + ) + +Note that the :func:`highdicom.seg.create_segmentation_pyramid` function always +behaves as if the ``tile_pixel_array`` input is ``True`` within the segmentation +constructor, i.e. it assumes that the input segmentation masks represent total +pixel matrices. Representation of Fractional SEGs --------------------------------- @@ -606,6 +840,21 @@ and 1): - The clear frame boundaries make retrieving individual frames from ``"FRACTIONAL"`` image files possible. +Multiprocessing +--------------- + +When creating large, multiframe ``"FRACTIONAL"`` segmentations using a +compressed transfer syntax, the time taken to compress the frames can become +large and dominate the time taken to create the segmentation. By default, +frames are compressed in series using the main process, however the ``workers`` +parameter allows you to specify a number of additional worker processes that +will be used to compress frames in parallel. Setting ``workers`` to a negative +number uses all available processes on your machine. Note that while this is +likely to result in significantly lower creations times for segmentations with +a very large number of frames, for segmentations with only a few frames the +additional overhead of spawning processes may in fact slow the entire +segmentation creation process down. + Geometry of SEG Images ---------------------- @@ -1101,6 +1350,88 @@ as stored in the SEG will be returned. # [0. 0.2509804 0.5019608] +Reconstructing Total Pixel Matrices from Tiled Segmentations +------------------------------------------------------------ + +For segmentations of digital pathology images that are stored as tiled images, +the :meth:`highdicom.seg.Segmentation.get_pixels_by_source_frame()` method will +return the segmentation mask as a set of frames stacked down the first +dimension of the array. However, for such images, you typically want to work +with the large 2D total pixel matrix that is formed by correctly arranging the +tiles into a 2D array. `highdicom` provides the +:meth:`highdicom.seg.Segmentation.get_total_pixel_matrix()` method for this +purpose. + +Called without any parameters, it returns a 3D array containing the full total +pixel matrix. The first two dimensions are the spatial dimensions, and the +third is the segments dimension. Behind the scenes highdicom has stitched +together the required frames stored in the original file for you. Like with the +other methods described above, setting ``combine_segments`` to ``True`` +combines all the segments into, in this case, a 2D array. + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_control.dcm') + + # Get the full total pixel matrix + mask = seg.get_total_pixel_matrix() + + expected_shape = ( + seg.TotalPixelMatrixRows, + seg.TotalPixelMatrixColumns, + seg.number_of_segments, + ) + assert mask.shape == expected_shape + + # Combine the segments into a single array + mask = seg.get_total_pixel_matrix(combine_segments=True) + + assert mask.shape == (seg.TotalPixelMatrixRows, seg.TotalPixelMatrixColumns) + +Furthermore, you can request a sub-region of the full total pixel matrix by +specifying the start and/or stop indices for the rows and/or columns within the +total pixel matrix. Note that this method follows DICOM 1-based convention for +indexing rows and columns, i.e. the first row and column of the total pixel +matrix are indexed by the number 1 (not 0 as is common within Python). Negative +indices are also supported to index relative to the last row or column, with -1 +being the index of the last row or column. Like for standard Python indexing, +the stop indices are specified as one beyond the final row/column in the +returned array. Note that the requested region does not have to start or stop +at the edges of the underlying frames: `highdicom` stitches together only the +relevant parts of the frames to create the requested image for you. + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_control.dcm') + + # Get a region of the total pixel matrix + mask = seg.get_total_pixel_matrix( + combine_segments=True, + row_start=20, + row_end=40, + column_start=10, + column_end=20, + ) + + assert mask.shape == (20, 10) + + # A further example using negative indices. Since row_end is not provided, + # the default behavior is to include the last row in the total pixel matrix. + mask = seg.get_total_pixel_matrix( + combine_segments=True, + row_start=21, + column_start=-30, + column_end=-25, + ) + + assert mask.shape == (30, 5) + Viewing DICOM SEG Images ------------------------ diff --git a/src/highdicom/ann/__init__.py b/src/highdicom/ann/__init__.py index f027d0b4..18b5a6e1 100644 --- a/src/highdicom/ann/__init__.py +++ b/src/highdicom/ann/__init__.py @@ -6,7 +6,7 @@ GraphicTypeValues, PixelOriginInterpretationValues, ) -from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations +from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations, annread SOP_CLASS_UIDS = { '1.2.840.10008.5.1.4.1.1.91.1', # Microscopy Bulk Simple Annotations @@ -20,4 +20,5 @@ 'Measurements', 'MicroscopyBulkSimpleAnnotations', 'PixelOriginInterpretationValues', + 'annread', ] diff --git a/src/highdicom/ann/content.py b/src/highdicom/ann/content.py index 03c3980f..32939dbd 100644 --- a/src/highdicom/ann/content.py +++ b/src/highdicom/ann/content.py @@ -115,13 +115,21 @@ def get_values(self, number_of_annotations: int) -> np.ndarray: return values @classmethod - def from_dataset(cls, dataset: Dataset) -> 'Measurements': + def from_dataset( + cls, + dataset: Dataset, + copy: bool = True + ) -> 'Measurements': """Construct instance from an existing dataset. Parameters ---------- dataset: pydicom.dataset.Dataset Dataset representing an item of the Measurements Sequence. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -138,17 +146,22 @@ def from_dataset(cls, dataset: Dataset) -> 'Measurements': module='microscopy-bulk-simple-annotations', base_path=['AnnotationGroupSequence', 'MeasurementsSequence'], ) - measurements = deepcopy(dataset) + if copy: + measurements = deepcopy(dataset) + else: + measurements = dataset measurements.__class__ = cls measurements.ConceptNameCodeSequence = [ CodedConcept.from_dataset( - measurements.ConceptNameCodeSequence[0] + measurements.ConceptNameCodeSequence[0], + copy=copy, ) ] measurements.MeasurementUnitsCodeSequence = [ CodedConcept.from_dataset( - measurements.MeasurementUnitsCodeSequence[0] + measurements.MeasurementUnitsCodeSequence[0], + copy=copy, ) ] @@ -246,8 +259,10 @@ def __init__( 'Argument "algorithm_identification" must be provided if ' f'argument "algorithm_type" is "{algorithm_type.value}".' ) - if not isinstance(algorithm_identification, - AlgorithmIdentificationSequence): + if not isinstance( + algorithm_identification, + AlgorithmIdentificationSequence + ): raise TypeError( 'Argument "algorithm_identification" must have type ' 'AlgorithmIdentificationSequence.' @@ -486,7 +501,7 @@ def anatomic_regions(self) -> List[CodedConcept]: @property def primary_anatomic_structures(self) -> List[CodedConcept]: """List[highdicom.sr.CodedConcept]: - List of anatomic anatomic structures the annotations represent. + List of anatomic structures the annotations represent. May be empty. """ @@ -621,6 +636,11 @@ def get_coordinates( 2D or 3D spatial coordinates of a graphical annotation """ # noqa: E501 + if annotation_number < 1: + raise ValueError( + 'Parameter "annotation_number" must be an integer greater ' + ' than 1.' + ) graphic_data = self.get_graphic_data(coordinate_type) annotation_index = annotation_number - 1 return graphic_data[annotation_index] @@ -746,13 +766,21 @@ def _get_coordinate_index( return coordinate_index @classmethod - def from_dataset(cls, dataset: Dataset) -> 'AnnotationGroup': + def from_dataset( + cls, + dataset: Dataset, + copy: bool = True, + ) -> 'AnnotationGroup': """Construct instance from an existing dataset. Parameters ---------- dataset: pydicom.dataset.Dataset Dataset representing an item of the Annotation Group Sequence. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -769,38 +797,44 @@ def from_dataset(cls, dataset: Dataset) -> 'AnnotationGroup': module='microscopy-bulk-simple-annotations', base_path=['AnnotationGroupSequence'], ) - group = deepcopy(dataset) + if copy: + group = deepcopy(dataset) + else: + group = dataset group.__class__ = cls group._graphic_data = {} # will be handled by get_graphic_data() group.AnnotationPropertyCategoryCodeSequence = [ CodedConcept.from_dataset( - group.AnnotationPropertyCategoryCodeSequence[0] + group.AnnotationPropertyCategoryCodeSequence[0], + copy=copy, ) ] group.AnnotationPropertyTypeCodeSequence = [ CodedConcept.from_dataset( - group.AnnotationPropertyTypeCodeSequence[0] + group.AnnotationPropertyTypeCodeSequence[0], + copy=copy, ) ] if hasattr(group, 'AnnotationGroupAlgorithmIdentificationSequence'): group.AnnotationGroupAlgorithmIdentificationSequence = \ AlgorithmIdentificationSequence.from_sequence( - group.AnnotationGroupAlgorithmIdentificationSequence + group.AnnotationGroupAlgorithmIdentificationSequence, + copy=copy, ) if hasattr(group, 'MeasurementsSequence'): group.MeasurementsSequence = [ - Measurements.from_dataset(ds) + Measurements.from_dataset(ds, copy=copy) for ds in group.MeasurementsSequence ] if hasattr(group, 'AnatomicRegionSequence'): group.AnatomicRegionSequence = [ - CodedConcept.from_dataset(ds) + CodedConcept.from_dataset(ds, copy=copy) for ds in group.AnatomicRegionSequence ] if hasattr(group, 'PrimaryAnatomicStructureSequence'): group.PrimaryAnatomicStructureSequence = [ - CodedConcept.from_dataset(ds) + CodedConcept.from_dataset(ds, copy=copy) for ds in group.PrimaryAnatomicStructureSequence ] diff --git a/src/highdicom/ann/sop.py b/src/highdicom/ann/sop.py index 8f07561d..6b18b1e4 100644 --- a/src/highdicom/ann/sop.py +++ b/src/highdicom/ann/sop.py @@ -2,9 +2,21 @@ from collections import defaultdict from copy import deepcopy from operator import eq -from typing import Any, cast, Dict, List, Optional, Sequence, Tuple, Union +from os import PathLike +from typing import ( + Any, + BinaryIO, + cast, + Dict, + List, + Optional, + Sequence, + Tuple, + Union, +) import numpy as np +from pydicom import dcmread from pydicom.dataset import Dataset from pydicom.sr.coding import Code from pydicom.uid import ( @@ -394,10 +406,20 @@ def get_annotation_groups( return groups + @property + def annotation_coordinate_type( + self + ) -> AnnotationCoordinateTypeValues: + """highdicom.ann.AnnotationCoordinateTypeValues: Annotation coordinate type.""" # noqa: E501 + return AnnotationCoordinateTypeValues( + self.AnnotationCoordinateType + ) + @classmethod def from_dataset( cls, - dataset: Dataset + dataset: Dataset, + copy: bool = True, ) -> 'MicroscopyBulkSimpleAnnotations': """Construct instance from an existing dataset. @@ -405,6 +427,10 @@ def from_dataset( ---------- dataset: pydicom.dataset.Dataset Dataset representing a Microscopy Bulk Simple Annotations instance. + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. Returns ------- @@ -422,12 +448,38 @@ def from_dataset( 'instance.' ) _check_little_endian(dataset) - ann = deepcopy(dataset) + if copy: + ann = deepcopy(dataset) + else: + ann = dataset ann.__class__ = MicroscopyBulkSimpleAnnotations ann.AnnotationGroupSequence = [ - AnnotationGroup.from_dataset(item) + AnnotationGroup.from_dataset(item, copy=copy) for item in ann.AnnotationGroupSequence ] return cast(MicroscopyBulkSimpleAnnotations, ann) + + +def annread( + fp: Union[str, bytes, PathLike, BinaryIO], +) -> MicroscopyBulkSimpleAnnotations: + """Read a bulk annotations object stored in DICOM File Format. + + Parameters + ---------- + fp: Union[str, bytes, os.PathLike] + Any file-like object representing a DICOM file containing a + MicroscopyBulkSimpleAnnotations object. + + Returns + ------- + highdicom.ann.MicroscopyBulkSimpleAnnotations + Bulk annotations object read from the file. + + """ + return MicroscopyBulkSimpleAnnotations.from_dataset( + dcmread(fp), + copy=False + ) diff --git a/src/highdicom/content.py b/src/highdicom/content.py index ddbed2a4..4bbc53da 100644 --- a/src/highdicom/content.py +++ b/src/highdicom/content.py @@ -19,6 +19,7 @@ UniversalEntityIDTypeValues, VOILUTFunctionValues, ) +from highdicom.sr.enum import ValueTypeValues from highdicom.sr.coding import CodedConcept from highdicom.sr.value_types import ( CodeContentItem, @@ -389,6 +390,40 @@ def from_sequence( pixel_measures.__class__ = PixelMeasuresSequence return cast(PixelMeasuresSequence, pixel_measures) + def __eq__(self, other: DataElementSequence) -> bool: + """Determine whether two sets of pixel measures are the same. + + Parameters + ---------- + other: pydicom.Sequence + A second pixel measures sequence, to be compared to self. + + Returns + ------- + bool: + True if all items match exactly. False otherwise. + + """ + if not isinstance(other, DataElementSequence): + raise TypeError('Second item must be of type pydicom.Sequence.') + if len(other) != 1: + raise ValueError('Second item must have length 1.') + + if other[0].SliceThickness != self[0].SliceThickness: + return False + if other[0].PixelSpacing != self[0].PixelSpacing: + return False + if ( + hasattr(other[0], 'SpacingBetweenSlices') != + hasattr(self[0], 'SpacingBetweenSlices') + ): + return False + if hasattr(self[0], 'SpacingBetweenSlices'): + if other[0].SpacingBetweenSlices != self[0].SpacingBetweenSlices: + return False + + return True + class PlanePositionSequence(DataElementSequence): @@ -439,6 +474,11 @@ def __init__( item.XOffsetInSlideCoordinateSystem = DS(x, auto_format=True) item.YOffsetInSlideCoordinateSystem = DS(y, auto_format=True) item.ZOffsetInSlideCoordinateSystem = DS(z, auto_format=True) + if row_position < 0 or col_position < 0: + raise ValueError( + 'Both items in "pixel_matrix_position" must be positive ' + 'integers.' + ) item.RowPositionInTotalImagePixelMatrix = row_position item.ColumnPositionInTotalImagePixelMatrix = col_position elif coordinate_system == CoordinateSystemNames.PATIENT: @@ -711,14 +751,80 @@ def __init__( """ # noqa: E501 super().__init__() + self._issuer_of_identifier = issuer_of_identifier if issuer_of_identifier_type is None: self.LocalNamespaceEntityID = issuer_of_identifier + self._issuer_of_identifier_type = None else: self.UniversalEntityID = issuer_of_identifier issuer_of_identifier_type = UniversalEntityIDTypeValues( issuer_of_identifier_type ) self.UniversalEntityIDType = issuer_of_identifier_type.value + self._issuer_of_identifier_type = issuer_of_identifier_type + + @property + def issuer_of_identifier(self) -> str: + """str: Identifier of the issuer.""" + return self._issuer_of_identifier + + @property + def issuer_of_identifier_type(self) -> Union[ + UniversalEntityIDTypeValues, None + ]: + """highdicom.UniversalEntityIDTypeValues: Type of the issuer.""" + return self._issuer_of_identifier_type + + @classmethod + def from_dataset( + cls, + dataset: Dataset, + copy: bool = True, + ) -> 'IssuerOfIdentifier': + """Construct object from an existing dataset. + + Parameters + ---------- + dataset: pydicom.dataset.Dataset + Dataset + copy: bool + If True, the underlying dataset is deep-copied such that the + original dataset remains intact. If False, this operation will + alter the original dataset in place. + + Returns + ------- + highdicom.IssuerOfIdentifier + Issuer of identifier + + """ + if copy: + issuer_of_identifier = deepcopy(dataset) + else: + issuer_of_identifier = dataset + issuer_of_identifier.__class__ = cls + if hasattr(issuer_of_identifier, "LocalNamespaceEntityID"): + issuer_id = issuer_of_identifier.LocalNamespaceEntityID + issuer_type = None + elif hasattr(issuer_of_identifier, 'UniversalEntityID'): + if not hasattr(issuer_of_identifier, 'UniversalEntityIDType'): + raise ValueError( + 'Dataset with UniversalEntityID must have ', + '"UniversalEntityIDType set".' + ) + issuer_id = issuer_of_identifier.UniversalEntityID + issuer_type = UniversalEntityIDTypeValues( + issuer_of_identifier.UniversalEntityIDType + ) + else: + raise ValueError( + 'Dataset must have "LocalNamespaceEntityID" or ', + '"UniversalEntityID" and "UniversalEntityIDType" set.' + ) + issuer_of_identifier._issuer_of_identifier = issuer_id + issuer_of_identifier._issuer_of_identifier_type = issuer_type + + return cast(IssuerOfIdentifier, issuer_of_identifier) class SpecimenCollection(ContentSequence): @@ -960,7 +1066,9 @@ def __init__( processing_datetime: Optional[datetime.datetime] = None, issuer_of_specimen_id: Optional[IssuerOfIdentifier] = None, fixative: Optional[Union[Code, CodedConcept]] = None, - embedding_medium: Optional[Union[Code, CodedConcept]] = None + embedding_medium: Optional[Union[Code, CodedConcept]] = None, + specimen_container: Optional[Union[Code, CodedConcept]] = None, + specimen_type: Optional[Union[Code, CodedConcept]] = None, ): """ Parameters @@ -974,13 +1082,30 @@ def __init__( processing_description: Union[str, pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional Description of processing issuer_of_specimen_id: highdicom.IssuerOfIdentifier, optional + The issuer of the identifier of the processed specimen. fixative: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Fixative used during processing + Fixative used during processing (see :dcm:`CID 8114 ` + "Specimen Fixative" for options). embedding_medium: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional - Embedding medium used during processing + Embedding medium used during processing see :dcm:`CID 8115 ` + "Specimen Embedding Media" for options). + specimen_container: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + Container the specimen resides in (see :dcm:`CID 8101 ` + "Container Type" for options). + specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + The anatomic pathology specimen type of the specimen (see :dcm:`CID 8103 ` + "Anatomic Pathology Specimen Type" for options). """ # noqa: E501 super().__init__() + if ( + isinstance(processing_procedure, SpecimenProcessing) and + processing_description is not None + ): + raise ValueError( + 'Processing description must be None if procedure is of type ' + '"SpecimenProcessing".' + ) sequence = ContentSequence(is_root=False, is_sr=False) specimen_identifier_item = TextContentItem( name=codes.DCM.SpecimenIdentifier, @@ -1022,7 +1147,7 @@ def __init__( if processing_datetime is not None: processing_datetime_item = DateTimeContentItem( - name=codes.DCM.DateTimeOfProcessing, + name=codes.DCM.DatetimeOfProcessing, value=processing_datetime ) sequence.append(processing_datetime_item) @@ -1057,6 +1182,18 @@ def __init__( value=embedding_medium ) sequence.append(embedding_medium_item) + if specimen_container is not None: + specimen_container_item = CodeContentItem( + name=codes.SCT.SpecimenContainer, + value=specimen_container + ) + sequence.append(specimen_container_item) + if specimen_type is not None: + specimen_type_item = CodeContentItem( + name=codes.SCT.SpecimenType, + value=specimen_type + ) + sequence.append(specimen_type_item) self.SpecimenPreparationStepContentItemSequence = sequence @property @@ -1118,6 +1255,62 @@ def embedding_medium(self) -> Union[CodedConcept, None]: return None return items[0].value + @property + def processing_description(self) -> Union[str, CodedConcept, None]: + """Union[str, highdicom.sr.CodedConcept]: Processing description""" + if isinstance(self._processing_procedure, SpecimenProcessing): + return None + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.ProcessingStepDescription + ) + if len(items) == 0: + return None + return items[0].value + + @property + def processing_datetime(self) -> Union[datetime.datetime, None]: + """datetime.datetime: Processing datetime""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.DatetimeOfProcessing + ) + if len(items) == 0: + return None + return items[0].value + + @property + def issuer_of_specimen_id(self) -> Union[str, None]: + """str: Issuer of specimen id""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.DCM.IssuerOfSpecimenIdentifier + ) + if len(items) == 0: + return None + return items[0].value + + @property + def specimen_container(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen container""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.SCT.SpecimenContainer + ) + if len(items) == 0: + return None + return items[0].value + + @property + def specimen_type(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen type""" + + items = self.SpecimenPreparationStepContentItemSequence.find( + codes.SCT.SpecimenType + ) + if len(items) == 0: + return None + return items[0].value + @classmethod def from_dataset( cls, @@ -1184,7 +1377,7 @@ def from_dataset( raise ValueError( 'Specimen Preparation Step Content Item Sequence must ' 'contain exactly one content item "Processing Step ' - 'Description" when processing type is "Specimen .' + 'Description" when processing type is "Specimen ' 'Processing".' ) instance._processing_procedure = SpecimenProcessing( @@ -1271,7 +1464,10 @@ def __init__( issuer_of_specimen_id: Optional[IssuerOfIdentifier] = None, primary_anatomic_structures: Optional[ Sequence[Union[Code, CodedConcept]] - ] = None + ] = None, + specimen_type: Optional[Union[Code, CodedConcept]] = None, + specimen_short_description: Optional[str] = None, + specimen_detailed_description: Optional[str] = None, ): """ Parameters @@ -1289,6 +1485,13 @@ def __init__( specimen_preparation_steps: Sequence[highdicom.SpecimenPreparationStep], optional Steps that were applied during the preparation of the examined specimen in the laboratory prior to image acquisition + specimen_type: Union[pydicom.sr.coding.Code, highdicom.sr.CodedConcept], optional + The anatomic pathology specimen type of the specimen (see :dcm:`CID 8103 ` + "Anatomic Pathology Specimen Type" for options). + specimen_short_description: str, optional + Short description of the examined specimen. + specimen_detailed_description: str, optional + Detailed description of the examined specimen. issuer_of_specimen_id: highdicom.IssuerOfIdentifier, optional Description of the issuer of the specimen identifier primary_anatomic_structures: Sequence[Union[pydicom.sr.Code, highdicom.sr.CodedConcept]] @@ -1308,7 +1511,6 @@ def __init__( ) self.SpecimenPreparationSequence.append(step_item) if specimen_location is not None: - loc_item: Union[TextContentItem, NumContentItem] loc_seq: List[Union[TextContentItem, NumContentItem]] = [] if isinstance(specimen_location, str): loc_item = TextContentItem( @@ -1336,6 +1538,16 @@ def __init__( loc_seq.append(loc_item) self.SpecimenLocalizationContentItemSequence = loc_seq + if specimen_type is not None: + if isinstance(specimen_type, Code): + specimen_type = CodedConcept.from_code(specimen_type) + self.SpecimenTypeCodeSequence = [specimen_type] + + if specimen_short_description is not None: + _check_long_string(specimen_short_description) + self.SpecimenShortDescription = specimen_short_description + if specimen_detailed_description is not None: + self.SpecimenDetailedDescription = specimen_detailed_description self.IssuerOfTheSpecimenIdentifierSequence: List[Dataset] = [] if issuer_of_specimen_id is not None: self.IssuerOfTheSpecimenIdentifierSequence.append( @@ -1369,19 +1581,61 @@ def __init__( @property def specimen_id(self) -> str: - """str: Specimen identifier""" + """str: Specimen identifier.""" return str(self.SpecimenIdentifier) @property def specimen_uid(self) -> UID: - """highdicom.UID: Unique specimen identifier""" + """highdicom.UID: Unique specimen identifier.""" return UID(self.SpecimenUID) + @property + def specimen_location(self) -> Union[str, Tuple[float, float, float], None]: + """Tuple[float, float, float]: Specimen location in container.""" + sequence = self.get("SpecimenLocalizationContentItemSequence") + if sequence is None: + return None + if isinstance(sequence[0], TextContentItem): + return sequence[0].value + return tuple(item.value for item in sequence) + @property def specimen_preparation_steps(self) -> List[SpecimenPreparationStep]: - """highdicom.SpecimenPreparationStep: Specimen preparation steps""" + """highdicom.SpecimenPreparationStep: Specimen preparation steps.""" return list(self.SpecimenPreparationSequence) + @property + def specimen_type(self) -> Union[CodedConcept, None]: + """highdicom.sr.CodedConcept: Specimen type.""" + sequence = self.get("SpecimenTypeCodeSequence") + if sequence is None: + return None + return sequence[0] + + @property + def specimen_short_description(self) -> Union[str, None]: + """str: Short description of specimen.""" + return self.get("SpecimenShortDescription") + + @property + def specimen_detailed_description(self) -> Union[str, None]: + """str: Detailed description of specimen.""" + return self.get("SpecimenDetailedDescription") + + @property + def issuer_of_specimen_id(self) -> Union[IssuerOfIdentifier, None]: + """IssuerOfIdentifier: Issuer of identifier for the specimen.""" + sequence = self.get("IssuerOfTheSpecimenIdentifierSequence") + if len(sequence) == 0: + return None + return sequence[0] + + @property + def primary_anatomic_structures(self) -> Union[List[CodedConcept], None]: + """List[highdicom.sr.CodedConcept]: List of anatomic structures of the + specimen.""" + return self.get("PrimaryAnatomicStructureSequence") + @classmethod def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': """Construct object from an existing dataset. @@ -1425,6 +1679,23 @@ def from_dataset(cls, dataset: Dataset) -> 'SpecimenDescription': CodedConcept.from_dataset(ds) for ds in desc.SpecimenTypeCodeSequence ] + if hasattr(desc, 'SpecimenLocalizationContentItemSequence'): + if ( + desc.SpecimenLocalizationContentItemSequence[0].ValueType == + ValueTypeValues.TEXT.value + ): + content_item_type = TextContentItem + else: + content_item_type = NumContentItem + desc.SpecimenLocalizationContentItemSequence = [ + content_item_type.from_dataset(ds) + for ds in desc.SpecimenLocalizationContentItemSequence + ] + if hasattr(desc, 'IssuerOfTheSpecimenIdentifierSequence'): + desc.IssuerOfTheSpecimenIdentifierSequence = [ + IssuerOfIdentifier.from_dataset(ds) + for ds in desc.IssuerOfTheSpecimenIdentifierSequence + ] return desc diff --git a/src/highdicom/seg/__init__.py b/src/highdicom/seg/__init__.py index 94e8f720..73d75d88 100644 --- a/src/highdicom/seg/__init__.py +++ b/src/highdicom/seg/__init__.py @@ -12,6 +12,7 @@ DimensionIndexSequence, ) from highdicom.seg import utils +from highdicom.seg.pyramid import create_segmentation_pyramid SOP_CLASS_UIDS = { '1.2.840.10008.5.1.4.1.1.66.4', # Segmentation @@ -27,5 +28,6 @@ 'SegmentDescription', 'SegmentsOverlapValues', 'SpatialLocationsPreservedValues', + 'create_segmentation_pyramid', 'utils', ] diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index 94b94435..495f89d6 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -384,32 +384,32 @@ def __init__( z_axis_index.DimensionDescriptionLabel = \ 'Z Offset in Slide Coordinate System' - row_dimension_index = Dataset() - row_dimension_index.DimensionIndexPointer = tag_for_keyword( + column_dimension_index = Dataset() + column_dimension_index.DimensionIndexPointer = tag_for_keyword( 'ColumnPositionInTotalImagePixelMatrix' ) - row_dimension_index.FunctionalGroupPointer = tag_for_keyword( + column_dimension_index.FunctionalGroupPointer = tag_for_keyword( 'PlanePositionSlideSequence' ) - row_dimension_index.DimensionOrganizationUID = dim_uid - row_dimension_index.DimensionDescriptionLabel = \ + column_dimension_index.DimensionOrganizationUID = dim_uid + column_dimension_index.DimensionDescriptionLabel = \ 'Column Position In Total Image Pixel Matrix' - column_dimension_index = Dataset() - column_dimension_index.DimensionIndexPointer = tag_for_keyword( + row_dimension_index = Dataset() + row_dimension_index.DimensionIndexPointer = tag_for_keyword( 'RowPositionInTotalImagePixelMatrix' ) - column_dimension_index.FunctionalGroupPointer = tag_for_keyword( + row_dimension_index.FunctionalGroupPointer = tag_for_keyword( 'PlanePositionSlideSequence' ) - column_dimension_index.DimensionOrganizationUID = dim_uid - column_dimension_index.DimensionDescriptionLabel = \ + row_dimension_index.DimensionOrganizationUID = dim_uid + row_dimension_index.DimensionDescriptionLabel = \ 'Row Position In Total Image Pixel Matrix' - # Organize frames for each segment similar to TILED_FULL, first - # along the row dimension (column indices from left to right) and - # then along the column dimension (row indices from top to bottom) - # of the Total Pixel Matrix. + # Organize frames for each segment similar to TILED_FULL, with + # segment position changing least frequently, followed by position + # of the row (from top to bottom) and then position of the column + # (from left to right) changing most frequently self.extend([ segment_number_index, row_dimension_index, @@ -614,7 +614,11 @@ def get_index_values( Returns ------- dimension_index_values: numpy.ndarray - 2D array of dimension index values + Array of dimension index values. The first dimension corresponds + to the items in the input plane_positions sequence. The second + dimension corresponds to the dimensions of the dimension index. + The third dimension (if any) corresponds to the multiplicity + of the values, and is omitted if this is 1 for all dimensions. plane_indices: numpy.ndarray 1D array of planes indices for sorting frames according to their spatial position specified by the dimension index @@ -660,6 +664,14 @@ def get_index_values( return_index=True ) + if len(plane_sort_indices) != len(plane_positions): + raise ValueError( + "Input image/frame positions are not unique according to the " + "Dimension Index Pointers. The generated segmentation would be " + "ambiguous. Ensure that source images/frames have distinct " + "locations." + ) + return (plane_position_values, plane_sort_indices) def get_index_keywords(self) -> List[str]: @@ -689,8 +701,8 @@ def get_index_keywords(self) -> List[str]: >>> names = dimension_index.get_index_keywords() >>> for name in names: ... print(name) - ColumnPositionInTotalImagePixelMatrix RowPositionInTotalImagePixelMatrix + ColumnPositionInTotalImagePixelMatrix XOffsetInSlideCoordinateSystem YOffsetInSlideCoordinateSystem ZOffsetInSlideCoordinateSystem diff --git a/src/highdicom/seg/pyramid.py b/src/highdicom/seg/pyramid.py new file mode 100644 index 00000000..af7f7ba0 --- /dev/null +++ b/src/highdicom/seg/pyramid.py @@ -0,0 +1,415 @@ +"""Tools for constructing multi-resolution segmentation pyramids.""" +from typing import Any, List, Optional, Sequence, Tuple, Union + +import numpy as np +from PIL import Image +from pydicom import Dataset +from pydicom.uid import VLWholeSlideMicroscopyImageStorage + +from highdicom.content import PixelMeasuresSequence +from highdicom.seg.sop import Segmentation +from highdicom.seg.enum import ( + SegmentationTypeValues, +) +from highdicom.seg.content import ( + SegmentDescription, +) +from highdicom.uid import UID + + +def create_segmentation_pyramid( + source_images: Sequence[Dataset], + pixel_arrays: Sequence[np.ndarray], + segmentation_type: Union[str, SegmentationTypeValues], + segment_descriptions: Sequence[SegmentDescription], + series_instance_uid: str, + series_number: int, + manufacturer: str, + manufacturer_model_name: str, + software_versions: Union[str, Tuple[str]], + device_serial_number: str, + downsample_factors: Optional[Sequence[float]] = None, + sop_instance_uids: Optional[List[str]] = None, + pyramid_uid: Optional[str] = None, + pyramid_label: Optional[str] = None, + **kwargs: Any +) -> List[Segmentation]: + """Construct a multi-resolution segmentation pyramid series. + + A multi-resolution pyramid represents the same segmentation array at + multiple resolutions. + + This function handles multiple related scenarios: + + * Constructing a segmentation of a source image pyramid given a + segmentation pixel array of the highest resolution source image. + Highdicom performs the downsampling automatically to match the + resolution of the other source images. For this case, pass multiple + ``source_images`` and a single item in ``pixel_arrays``. + * Constructing a segmentation of a source image pyramid given user-provided + segmentation pixel arrays for each level in the source pyramid. For this + case, pass multiple ``source_images`` and a matching number of + ``pixel_arrays``. + * Constructing a segmentation of a single source image given multiple + user-provided downsampled segmentation pixel arrays. For this case, pass + a single item in ``source_images``, and multiple items in + ``pixel_arrays``). + * Constructing a segmentation of a single source image and a single + segmentation pixel array by downsampling by a given list of + ``downsample_factors``. For this case, pass a single item in + ``source_images``, a single item in ``pixel_arrays``, and a list of one + or more desired ``downsample_factors``. + + In all cases, the items in both ``source_images`` and ``pixel_arrays`` + should be sorted in pyramid order from highest resolution (smallest + spacing) to lowest resolution (largest spacing), and the pixel array + in ``pixel_arrays[0]`` must be the segmentation of the source image in + ``source_images[0]`` with spatial locations preserved (a one-to-one + correspondence between pixels in the source image's total pixel matrix and + the provided segmentation pixel array). + + In all cases, the provided pixel arrays should be total pixel matrices. + Tiling is performed automatically. + + Parameters + ---------- + source_images: Sequence[pydicom.Dataset] + List of source images. If there are multiple source images, they should + be from the same series and pyramid. + pixel_arrays: Sequence[numpy.ndarray] + List of segmentation pixel arrays. Each should be a total pixel matrix. + segmentation_type: Union[str, highdicom.seg.SegmentationTypeValues] + Type of segmentation, either ``"BINARY"`` or ``"FRACTIONAL"`` + segment_descriptions: Sequence[highdicom.seg.SegmentDescription] + Description of each segment encoded in `pixel_array`. In the case of + pixel arrays with multiple integer values, the segment description + with the corresponding segment number is used to describe each segment. + series_number: int + Number of the output segmentation series. + manufacturer: str + Name of the manufacturer of the device (developer of the software) + that creates the instance + manufacturer_model_name: str + Name of the device model (name of the software library or + application) that creates the instance + software_versions: Union[str, Tuple[str]] + Version(s) of the software that creates the instance. + device_serial_number: str + Manufacturer's serial number of the device + downsample_factors: Optional[Sequence[float]], optional + Factors by which to downsample the pixel array to create each of the + output segmentation objects. This should be provided if and only if a + single source image and single pixel array are provided. Note that the + original array is always used to create the first segmentation output, + so the number of created segmententation instances is one greater than + the number of items in this list. Items must be numbers greater than + 1 and sorted in ascending order. A downsampling factor of *n* implies + that the output array is *1/n* time the size of input pixel array. For + example a list ``[2, 4, 8]`` would be produce 4 output segmentation + instances. The first is the same size as the original pixel array, the + next is half the size, the next is a quarter of the size of the + original, and the last is one eighth the size of the original. + Output sizes are rounded to the nearest integer. + series_instance_uid: Optional[str], optional + UID of the output segmentation series. If not specified, UIDs are + generated automatically using highdicom's prefix. + sop_instance_uids: Optional[List[str]], optional + SOP instance UIDS of the output instances. If not specified, UIDs are + generated automatically using highdicom's prefix. + pyramid_uid: Optional[str], optional + UID for the output imaging pyramid. If not specified, a UID is generated + using highdicom's prefix. + pyramid_label: Optional[str], optional + A human readable label for the output pyramid. + **kwargs: Any + Any further parameters are passed directly to the constructor of the + :class:highdicom.seg.Segmentation object. However the following + parameters are disallowed: ``instance_number``, ``sop_instance_uid``, + ``plane_orientation``, ``plane_positions``, ``pixel_measures``, + ``pixel_array``, ``tile_pixel_array``. + + Note + ---- + Downsampling is performed via simple nearest neighbor interpolation. If + more control is needed over the downsampling process (for example + anti-aliasing), explicitly pass the downsampled arrays. + + """ + # Disallow duplicate items in kwargs + kwarg_keys = set(kwargs.keys()) + disallowed_keys = { + 'instance_number', + 'sop_instance_uid', + 'plane_orientation', + 'plane_positions', + 'pixel_array', + 'tile_pixel_array', + } + error_keys = kwarg_keys & disallowed_keys + if len(error_keys) > 0: + raise TypeError( + f'kwargs supplied to the create_segmentation_pyramid function ' + f'should not contain a value for parameter {error_keys[0]}.' + ) + + if pyramid_uid is None: + pyramid_uid = UID() + if series_instance_uid is None: + series_instance_uid = UID() + + n_sources = len(source_images) + n_pix_arrays = len(pixel_arrays) + + if n_sources == 0: + raise ValueError( + 'Argument "source_images" must not be empty.' + ) + if n_pix_arrays == 0: + raise ValueError( + 'Argument "pixel_arrays" must not be empty.' + ) + + if n_sources == 1 and n_pix_arrays == 1: + if downsample_factors is None: + raise TypeError( + 'Argument "downsample_factors" must be provided when providing ' + 'only a single source image and pixel array.' + ) + if len(downsample_factors) < 1: + raise ValueError('Argument "downsample_factors" may not be empty.') + if any(f <= 1.0 for f in downsample_factors): + raise ValueError( + 'All items in "downsample_factors" must be greater than 1.' + ) + if len(downsample_factors) > 1: + if any( + z1 > z2 for z1, z2 in zip( + downsample_factors[:-1], + downsample_factors[1:] + ) + ): + raise ValueError( + 'Items in argument "downsample_factors" must be sorted in ' + 'ascending order.' + ) + n_outputs = len(downsample_factors) + 1 # including original + else: + if downsample_factors is not None: + raise TypeError( + 'Argument "downsample_factors" must not be provided when ' + 'multiple source images or pixel arrays are provided.' + ) + if n_sources > 1 and n_pix_arrays > 1: + if n_sources != n_pix_arrays: + raise ValueError( + 'If providing multiple source images and multiple pixel ' + 'arrays, the number of items in the two lists must match.' + ) + n_outputs = n_sources + else: + # Either n_sources > 1 or n_pix_arrays > 1 but not both + n_outputs = max(n_sources, n_pix_arrays) + + if sop_instance_uids is not None: + if len(sop_instance_uids) != n_outputs: + raise ValueError( + 'Number of specified SOP Instance UIDs does not match number ' + 'of output images.' + ) + + # Check the source images are appropriately ordered + for index in range(1, len(source_images)): + r0 = source_images[index - 1].TotalPixelMatrixRows + c0 = source_images[index - 1].TotalPixelMatrixColumns + r1 = source_images[index].TotalPixelMatrixRows + c1 = source_images[index].TotalPixelMatrixColumns + + if r0 <= r1 or c0 <= c1: + raise ValueError( + 'Items in argument "source_images" must be strictly ordered in ' + 'decreasing resolution.' + ) + + # Check that source images are WSI + for im in source_images: + if im.SOPClassUID != VLWholeSlideMicroscopyImageStorage: + raise ValueError( + 'Source images must have IOD VLWholeSlideMicroscopyImageStorage' + ) + + # Check that the source images are from the same series and pyramid + if len(source_images) > 1: + series_uid = source_images[0].SeriesInstanceUID + if not all( + dcm.SeriesInstanceUID == series_uid + for dcm in source_images[1:] + ): + raise ValueError( + 'All source images should belong to the same series.' + ) + if not all(hasattr(dcm, 'PyramidUID') for dcm in source_images): + raise ValueError( + 'All source images should belong to the same pyramid ' + '(share a Pyramid UID).' + ) + pyramid_uid = source_images[0].PyramidUID + if not all( + dcm.PyramidUID == pyramid_uid + for dcm in source_images[1:] + ): + raise ValueError( + 'All source images should belong to the same pyramid ' + '(share a Pyramid UID).' + ) + + # Check that pixel arrays have an appropriate shape + for pixel_array in pixel_arrays: + if pixel_array.ndim not in (2, 3, 4): + raise ValueError( + 'Each item of argument "pixel_arrays" must be a NumPy array ' + 'with 2, 3, or 4 dimensions.' + ) + if pixel_array.ndim > 2 and pixel_array.shape[0] != 1: + raise ValueError( + 'Each item of argument "pixel_arrays" must contain a single ' + 'frame, with a size of 1 along dimension 0.' + ) + + # Check the pixel arrays are appropriately ordered + for index in range(1, len(pixel_arrays)): + arr0 = pixel_arrays[index - 1] + arr1 = pixel_arrays[index] + + if arr0.ndim == 2: + r0 = arr0.shape[:2] + c0 = arr0.shape[:2] + else: + r0 = arr0.shape[1:3] + c0 = arr0.shape[1:3] + + if arr1.ndim == 2: + r1 = arr1.shape[:2] + c1 = arr1.shape[:2] + else: + r1 = arr1.shape[1:3] + c1 = arr1.shape[1:3] + + if r0 <= r1 or c0 <= c1: + raise ValueError( + 'Items in argument "pixel_arrays" must be strictly ordered in ' + 'decreasing resolution.' + ) + + # Check that input dimensions match + for index, (source_image, pixel_array) in enumerate( + zip(source_images, pixel_arrays) + ): + src_shape = ( + source_image.TotalPixelMatrixRows, + source_image.TotalPixelMatrixColumns + ) + pix_shape = ( + pixel_array.shape[1:3] if pixel_array.ndim > 2 + else pixel_array.shape + ) + if pix_shape != src_shape: + raise ValueError( + "The shape of each provided pixel array must match the shape " + "of the total pixel matrix of the corresponding source image. " + f"Got pixel array of shape {pix_shape} for a source image of " + f"shape {src_shape} at index {index}." + ) + + if n_pix_arrays == 1: + # Create a pillow image for use later with resizing + mask_image = Image.fromarray(pixel_arrays[0]) + + all_segs = [] + + # Work "up" pyramid from high to low resolution + for output_level in range(n_outputs): + if n_sources > 1: + source_image = source_images[output_level] + else: + source_image = source_images[0] + + if n_pix_arrays > 1: + pixel_array = pixel_arrays[output_level] + else: + need_resize = True + if n_sources > 1: + output_size = ( + source_image.TotalPixelMatrixColumns, + source_image.TotalPixelMatrixRows + ) + else: + if output_level == 0: + pixel_array = pixel_arrays[0] + need_resize = False + else: + f = downsample_factors[output_level - 1] + output_size = ( + int(source_images[0].TotalPixelMatrixColumns / f), + int(source_images[0].TotalPixelMatrixRows / f) + ) + + if need_resize: + pixel_array = np.array( + mask_image.resize(output_size, Image.Resampling.NEAREST) + ) + + if n_sources == 1: + source_pixel_measures = ( + source_image + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + ) + src_pixel_spacing = source_pixel_measures.PixelSpacing + src_slice_thickness = source_pixel_measures.SliceThickness + row_spacing = ( + src_pixel_spacing[0] * + (pixel_arrays[0].shape[0] / pixel_array.shape[0]) + ) + column_spacing = ( + src_pixel_spacing[1] * + (pixel_arrays[0].shape[1] / pixel_array.shape[1]) + ) + pixel_measures = PixelMeasuresSequence( + pixel_spacing=(row_spacing, column_spacing), + slice_thickness=src_slice_thickness + ) + else: + # This will be copied from the source image + pixel_measures = None + + if sop_instance_uids is None: + sop_instance_uid = UID() + else: + sop_instance_uid = sop_instance_uids[output_level] + + # Create the output segmentation + seg = Segmentation( + source_images=[source_image], + pixel_array=pixel_array, + segmentation_type=segmentation_type, + segment_descriptions=segment_descriptions, + series_instance_uid=series_instance_uid, + series_number=series_number, + sop_instance_uid=sop_instance_uid, + instance_number=output_level + 1, + manufacturer=manufacturer, + manufacturer_model_name=manufacturer_model_name, + software_versions=software_versions, + device_serial_number=device_serial_number, + pyramid_uid=pyramid_uid, + pyramid_label=pyramid_label, + tile_pixel_array=True, + plane_orientation=None, + plane_positions=None, + pixel_measures=pixel_measures, + **kwargs, + ) + + all_segs.append(seg) + + return all_segs diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index f28218fc..be933a3d 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -1,6 +1,7 @@ """Module for SOP classes of the SEG modality.""" import logging from collections import Counter, defaultdict +from concurrent.futures import Executor, Future, ProcessPoolExecutor from contextlib import contextmanager from copy import deepcopy from os import PathLike @@ -20,10 +21,11 @@ Union, cast, ) +import warnings import numpy as np from pydicom.dataset import Dataset -from pydicom.datadict import keyword_for_tag, tag_for_keyword +from pydicom.datadict import get_entry, keyword_for_tag, tag_for_keyword from pydicom.encaps import encapsulate from pydicom.multival import MultiValue from pydicom.pixel_data_handlers.numpy_handler import pack_bits @@ -49,8 +51,19 @@ PlanePositionSequence, PixelMeasuresSequence ) -from highdicom.enum import CoordinateSystemNames +from highdicom.enum import ( + CoordinateSystemNames, + DimensionOrganizationTypeValues, +) from highdicom.frame import encode_frame +from highdicom.utils import ( + are_plane_positions_tiled_full, + compute_plane_position_tiled_full, + is_tiled_image, + get_tile_array, + iter_tiled_full_frame_data, + tile_pixel_matrix, +) from highdicom.seg.content import ( DimensionIndexSequence, SegmentDescription, @@ -65,7 +78,11 @@ from highdicom.seg.utils import iter_segments from highdicom.spatial import ImageToReferenceTransformer from highdicom.sr.coding import CodedConcept -from highdicom.valuerep import check_person_name, _check_code_string +from highdicom.valuerep import ( + check_person_name, + _check_code_string, + _check_long_string, +) from highdicom.uid import UID as hd_UID @@ -137,11 +154,34 @@ class _SegDBManager: """Database manager for data associated with a segmentation image.""" + # Dictionary mapping DCM VRs to appropriate SQLite types + _DCM_SQL_TYPE_MAP = { + 'CS': 'VARCHAR', + 'DS': 'REAL', + 'FD': 'REAL', + 'FL': 'REAL', + 'IS': 'INTEGER', + 'LO': 'TEXT', + 'LT': 'TEXT', + 'PN': 'TEXT', + 'SH': 'TEXT', + 'SL': 'INTEGER', + 'SS': 'INTEGER', + 'ST': 'TEXT', + 'UI': 'TEXT', + 'UL': 'INTEGER', + 'UR': 'TEXT', + 'US or SS': 'INTEGER', + 'US': 'INTEGER', + 'UT': 'TEXT', + } + def __init__( self, referenced_uids: List[Tuple[str, str, str]], segment_numbers: List[int], dim_indices: Dict[int, List[int]], + dim_values: Dict[int, List[Any]], referenced_instances: Optional[List[str]], referenced_frames: Optional[List[int]], ): @@ -158,7 +198,11 @@ def __init__( dim_indices: Dict[int, List[int]] Dictionary mapping the integer tag value of each dimension index pointer (excluding SegmentNumber) to a list of dimension indices - for each frames in the segmentation image. + for each frame in the segmentation image. + dim_values: Dict[int, List[Values]] + Dictionary mapping the integer tag value of each dimension index + pointer (excluding SegmentNumber) to a list of dimension values + for each frame in the segmentation image. referenced_instances: Optional[List[str]] SOP Instance UID of each referenced image instance for each frame in the segmentation image. Should be omitted if there is not a @@ -176,14 +220,6 @@ def __init__( self._number_of_frames = len(segment_numbers) - self._dim_ind_col_names = {} - for i, t in enumerate(dim_indices.keys()): - kw = keyword_for_tag(t) - if kw == '': - kw = f'UnknownDimensionIndex{i}' - col_name = kw + '_DimensionIndexValues' - self._dim_ind_col_names[t] = col_name - # Construct the columns and values to put into a frame look-up table # table within sqlite. There will be one row per frame in the # segmentation instance @@ -198,12 +234,40 @@ def __init__( col_defs.append('SegmentNumber INTEGER NOT NULL') col_data.append(segment_numbers) - # Columns for other dimension index values - col_defs += [ - f'{col_name} INTEGER NOT NULL' - for col_name in self._dim_ind_col_names.values() - ] - col_data.extend(list(dim_indices.values())) + self._dim_ind_col_names = {} + for i, t in enumerate(dim_indices.keys()): + vr, vm_str, _, _, kw = get_entry(t) + if kw == '': + kw = f'UnknownDimensionIndex{i}' + ind_col_name = kw + '_DimensionIndexValues' + self._dim_ind_col_names[t] = ind_col_name + + # Add column for dimension index + col_defs.append(f'{ind_col_name} INTEGER NOT NULL') + col_data.append(dim_indices[t]) + + # Add column for dimension value + # For this to be possible, must have a fixed VM + # and a VR that we can map to a sqlite type + # Otherwise, we just omit the data from the db + try: + vm = int(vm_str) + except ValueError: + continue + try: + sql_type = self._DCM_SQL_TYPE_MAP[vr] + except KeyError: + continue + + if vm > 1: + for d in range(vm): + data = [el[d] for el in dim_values[t]] + col_defs.append(f'{kw}_{d} {sql_type} NOT NULL') + col_data.append(data) + else: + # Single column + col_defs.append(f'{kw} {sql_type} NOT NULL') + col_data.append(dim_values[t]) # Columns related to source frames, if they are usable for indexing if (referenced_frames is None) != (referenced_instances is None): @@ -429,6 +493,23 @@ def get_unique_dim_index_values( ) } + def is_indexable_as_total_pixel_matrix(self) -> bool: + """Whether the segmentation can be indexed as a total pixel matrix. + + Returns + ------- + bool: + True if the segmentation may be indexed using row and column + positions in the total pixel matrix. False otherwise. + + """ + row_pos_kw = tag_for_keyword('RowPositionInTotalImagePixelMatrix') + col_pos_kw = tag_for_keyword('ColumnPositionInTotalImagePixelMatrix') + return ( + row_pos_kw in self._dim_ind_col_names and + col_pos_kw in self._dim_ind_col_names + ) + @contextmanager def _generate_temp_table( self, @@ -556,7 +637,17 @@ def iterate_indices_by_source_instance( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over segmentation frame indices for given source image instances. @@ -595,15 +686,17 @@ def iterate_indices_by_source_instance( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Run query to create the iterable of indices needed to construct the # desired pixel array. The approach here is to create two temporary # tables in the SQLite database, one for the desired source UIDs, and @@ -645,7 +738,14 @@ def iterate_indices_by_source_instance( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) @contextmanager def iterate_indices_by_source_frame( @@ -655,7 +755,17 @@ def iterate_indices_by_source_frame( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over frame indices for given source image frames. This is intended for the case of a segmentation image that references a @@ -696,15 +806,17 @@ def iterate_indices_by_source_frame( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Run query to create the iterable of indices needed to construct the # desired pixel array. The approach here is to create two temporary # tables in the SQLite database, one for the desired frame numbers, and @@ -746,7 +858,14 @@ def iterate_indices_by_source_frame( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) @contextmanager def iterate_indices_by_dimension_index_values( @@ -756,7 +875,17 @@ def iterate_indices_by_dimension_index_values( segment_numbers: Sequence[int], combine_segments: bool = False, relabel: bool = False, - ) -> Generator[Iterator[Tuple[int, int, int]], None, None]: + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: """Iterate over frame indices for given dimension index values. This is intended to be the most flexible and lowest-level (and there @@ -799,15 +928,17 @@ def iterate_indices_by_dimension_index_values( Yields ------ - Iterator[Tuple[int, int, int]]: + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: Indices required to construct the requested mask. Each - triplet denotes the (output frame index, segmentation frame index, + triplet denotes the (output indexer, segmentation indexer, output segment number) representing a list of "instructions" to create the requested output array by copying frames from the segmentation dataset and inserting them into the output array with - a given segment value. + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. - """ + """ # noqa: E501 # Create temporary table of desired dimension indices table_name = 'TemporaryDimensionIndexValues' @@ -850,7 +981,160 @@ def iterate_indices_by_dimension_index_values( combine_segments=combine_segments, relabel=relabel ): - yield self._db_con.execute(query) + yield ( + ( + (fo, slice(None), slice(None)), + (fi, slice(None), slice(None)), + seg_no + ) + for (fo, fi, seg_no) in self._db_con.execute(query) + ) + + @contextmanager + def iterate_indices_for_tiled_region( + self, + row_start: int, + row_end: int, + column_start: int, + column_end: int, + tile_shape: Tuple[int, int], + segment_numbers: Sequence[int], + combine_segments: bool = False, + relabel: bool = False, + ) -> Generator[ + Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], + None, + None, + ]: + """Iterate over segmentation frame indices for a given region of the + segmentation's total pixel matrix. + + This is intended for the case of a segmentation image that is stored as + a tiled representation of total pixel matrix. + + This yields an iterator to the underlying database result that iterates + over information on the steps required to construct the requested + segmentation mask from the stored frames of the segmentation image. + + This method is intended to be used as a context manager that yields the + requested iterator. The iterator is only valid while the context + manager is active. + + Parameters + ---------- + row_start: int + Row index (1-based) in the total pixel matrix of the first row of + the output array. May be negative (last row is -1). + row_end: int + Row index (1-based) in the total pixel matrix one beyond the last + row of the output array. May be negative (last row is -1). + column_start: int + Column index (1-based) in the total pixel matrix of the first + column of the output array. May be negative (last column is -1). + column_end: int + Column index (1-based) in the total pixel matrix one beyond the last + column of the output array. May be negative (last column is -1). + tile_shape: Tuple[int, int] + Shape of each tile (rows, columns). + segment_numbers: Sequence[int] + Numbers of segments to include. + combine_segments: bool, optional + If True, produce indices to combine the different segments into a + single label map in which the value of a pixel represents its + segment. If False (the default), segments are binary and stacked + down the last dimension of the output array. + relabel: bool, optional + If True and ``combine_segments`` is ``True``, the output segment + numbers are relabelled into the range ``0`` to + ``len(segment_numbers)`` (inclusive) according to the position of + the original segment numbers in ``segment_numbers`` parameter. If + ``combine_segments`` is ``False``, this has no effect. + + Yields + ------ + Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int]]: + Indices required to construct the requested mask. Each + triplet denotes the (output indexer, segmentation indexer, + output segment number) representing a list of "instructions" to + create the requested output array by copying frames from the + segmentation dataset and inserting them into the output array with + a given segment value. Output indexer and segmentation indexer are + tuples that can be used to index the output and segmentations + numpy arrays directly. + + """ # noqa: E501 + th, tw = tile_shape + + oh = row_end - row_start + ow = column_end - column_start + + row_offset_start = row_start - th + 1 + column_offset_start = column_start - tw + 1 + + # Construct the query The ORDER BY is not logically necessary + # but seems to improve performance of the downstream numpy + # operations, presumably as it is more cache efficient + query = ( + 'SELECT ' + ' L.RowPositionInTotalImagePixelMatrix,' + ' L.ColumnPositionInTotalImagePixelMatrix,' + ' L.FrameNumber - 1,' + ' S.OutputSegmentNumber ' + 'FROM FrameLUT L ' + 'INNER JOIN TemporarySegmentNumbers S' + ' ON L.SegmentNumber = S.SegmentNumber ' + 'WHERE (' + ' L.RowPositionInTotalImagePixelMatrix >= ' + f' {row_offset_start}' + f' AND L.RowPositionInTotalImagePixelMatrix < {row_end}' + ' AND L.ColumnPositionInTotalImagePixelMatrix >= ' + f' {column_offset_start}' + f' AND L.ColumnPositionInTotalImagePixelMatrix < {column_end}' + ')' + 'ORDER BY ' + ' L.RowPositionInTotalImagePixelMatrix,' + ' L.ColumnPositionInTotalImagePixelMatrix,' + ' S.OutputSegmentNumber' + ) + + with self._generate_temp_segment_table( + segment_numbers=segment_numbers, + combine_segments=combine_segments, + relabel=relabel + ): + yield ( + ( + ( + slice( + max(rp - row_start, 0), + min(rp + th - row_start, oh) + ), + slice( + max(cp - column_start, 0), + min(cp + tw - column_start, ow) + ), + ), + ( + fi, + slice( + max(row_start - rp, 0), + min(row_end - rp, th) + ), + slice( + max(column_start - cp, 0), + min(column_end - cp, tw) + ), + ), + seg_no + ) + for (rp, cp, fi, seg_no) in self._db_con.execute(query) + ) class Segmentation(SOPClass): @@ -886,6 +1170,16 @@ def __init__( content_creator_identification: Optional[ ContentCreatorIdentificationCodeSequence ] = None, + workers: Union[int, Executor] = 0, + dimension_organization_type: Union[ + DimensionOrganizationTypeValues, + str, + None, + ] = None, + tile_pixel_array: bool = False, + tile_size: Union[Sequence[int], None] = None, + pyramid_uid: Optional[str] = None, + pyramid_label: Optional[str] = None, **kwargs: Any ) -> None: """ @@ -913,7 +1207,7 @@ def __init__( ``plane_positions`` parameter is provided, the frame in ``pixel_array[i, ...]`` should correspond to either ``source_images[i]`` (if ``source_images`` is a list of single - frame instances) or source_images[0].pixel_array[i, ...] if + frame instances) or ``source_images[0].pixel_array[i, ...]`` if ``source_images`` is a single multiframe instance. Similarly, if ``pixel_array`` is a 3D array representing the @@ -979,7 +1273,7 @@ def __init__( series_instance_uid: str UID of the series series_number: int - Number of the series within the study + Number of the output segmentation series. sop_instance_uid: str UID that should be assigned to the instance instance_number: int @@ -1038,6 +1332,63 @@ def __init__( content_creator_identification: Union[highdicom.ContentCreatorIdentificationCodeSequence, None], optional Identifying information for the person who created the content of this segmentation. + workers: Union[int, concurrent.futures.Executor], optional + Number of worker processes to use for frame compression. If 0, no + workers are used and compression is performed in the main process + (this is the default behavior). If negative, as many processes are + created as the machine has processors. + + Alternatively, you may directly pass an instance of a class derived + from ``concurrent.futures.Executor`` (most likely an instance of + ``concurrent.futures.ProcessPoolExecutor``) for highdicom to use. + You may wish to do this either to have greater control over the + setup of the executor, or to avoid the setup cost of spawning new + processes each time this ``__init__`` method is called if your + application creates a large number of Segmentations. + + Note that if you use worker processes, you must ensure that your + main process uses the ``if __name__ == "__main__"`` idiom to guard + against spawned child processes creating further workers. + dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None], optional + Dimension organization type to use for the output image. + tile_pixel_array: bool, optional + If True, `highdicom` will automatically convert an input total + pixel matrix into a sequence of frames representing tiles of the + segmentation. This is valid only when the source image supports + tiling (e.g. VL Whole Slide Microscopy images). + + If True, the input pixel array must consist of a single "frame", + i.e. must be either a 2D numpy array, a 3D numpy array with a size + of 1 down the first dimension (axis zero), or a 4D numpy array also + with a size of 1 down the first dimension. The input pixel array is + treated as the total pixel matrix of the segmentation, and this is + tiled along the row and column dimension to create an output image + with multiple, smaller frames. + + If no ``pixel_measures``, ``plane_positions``, + ``plane_orientation`` are supplied, the total pixel matrix of the + segmentation is assumed to correspond to the total pixel matrix of + the (single) source image. If ``plane_positions`` is supplied, the + sequence should contain a single item representing the plane + position of the entire total pixel matrix. Plane positions of + the newly created tiles will derived automatically from this. + + If False, the pixel array is already considered to consist of one + or more existing frames, as described above. + tile_size: Union[Sequence[int], None], optional + Tile size to use when tiling the input pixel array. If ``None`` + (the default), the tile size is copied from the source image. + Otherwise the tile size is specified explicitly as (number of rows, + number of columns). This value is ignored if ``tile_pixel_array`` + is False. + pyramid_uid: Optional[str], optional + Unique identifier for the pyramid containing this segmentation. + Should only be used if this segmentation is part of a + multi-resolution pyramid. + pyramid_label: Optional[str], optional + Human readable label for the pyramid containing this segmentation. + Should only be used if this segmentation is part of a + multi-resolution pyramid. **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor of `highdicom.base.SOPClass` @@ -1090,7 +1441,6 @@ def __init__( 'Only one source image should be provided in case images ' 'are multi-frame images.' ) - is_tiled = hasattr(src_img, 'TotalPixelMatrixRows') supported_transfer_syntaxes = { ImplicitVRLittleEndian, ExplicitVRLittleEndian, @@ -1108,6 +1458,19 @@ def __init__( if pixel_array.ndim not in [3, 4]: raise ValueError('Pixel array must be a 2D, 3D, or 4D array.') + is_tiled = hasattr(src_img, 'TotalPixelMatrixRows') + if tile_pixel_array and not is_tiled: + raise ValueError( + 'When argument "tile_pixel_array" is True, the source image ' + 'must be a tiled image.' + ) + if tile_pixel_array and pixel_array.shape[0] != 1: + raise ValueError( + 'When argument "tile_pixel_array" is True, the input pixel ' + 'array must contain only one "frame" representing the entire ' + 'entire pixel matrix.' + ) + super().__init__( study_instance_uid=src_img.StudyInstanceUID, series_instance_uid=series_instance_uid, @@ -1189,26 +1552,38 @@ def __init__( self._coordinate_system = None # General Reference - self.SourceImageSequence: List[Dataset] = [] + + # Note that appending directly to the SourceImageSequence is typically + # slow so it's more efficient to build as a Python list then convert + # later. We save conversion for after the main loop + source_image_seq: List[Dataset] = [] referenced_series: Dict[str, List[Dataset]] = defaultdict(list) for s_img in source_images: ref = Dataset() ref.ReferencedSOPClassUID = s_img.SOPClassUID ref.ReferencedSOPInstanceUID = s_img.SOPInstanceUID - self.SourceImageSequence.append(ref) + source_image_seq.append(ref) referenced_series[s_img.SeriesInstanceUID].append(ref) + self.SourceImageSequence = source_image_seq # Common Instance Reference - self.ReferencedSeriesSequence: List[Dataset] = [] + ref_image_seq: List[Dataset] = [] for series_instance_uid, referenced_images in referenced_series.items(): ref = Dataset() ref.SeriesInstanceUID = series_instance_uid ref.ReferencedInstanceSequence = referenced_images - self.ReferencedSeriesSequence.append(ref) + ref_image_seq.append(ref) + self.ReferencedSeriesSequence = ref_image_seq # Image Pixel - self.Rows = pixel_array.shape[1] - self.Columns = pixel_array.shape[2] + if tile_pixel_array: + # By default use the same tile size as the source image (even if + # they are not spatially aligned) + tile_size = tile_size or (src_img.Rows, src_img.Columns) + self.Rows, self.Columns = (tile_size) + else: + self.Rows = pixel_array.shape[1] + self.Columns = pixel_array.shape[2] # Segmentation Image self.ImageType = ['DERIVED', 'PRIMARY'] @@ -1277,40 +1652,41 @@ def __init__( self.LossyImageCompressionMethod = \ src_img.LossyImageCompressionMethod - self.SegmentSequence: List[SegmentDescription] = [] + # Multi-Resolution Pyramid + if pyramid_uid is not None: + if not is_tiled: + raise TypeError( + 'Argument "pyramid_uid" should only be specified ' + 'for tiled images.' + ) + if ( + self._coordinate_system is None or + self._coordinate_system != CoordinateSystemNames.SLIDE + ): + raise TypeError( + 'Argument "pyramid_uid" should only be specified for ' + 'segmentations in the SLIDE coordinate system.' + ) + self.PyramidUID = pyramid_uid + + if pyramid_label is not None: + _check_long_string(pyramid_label) + self.PyramidLabel = pyramid_label + + elif pyramid_label is not None: + raise TypeError( + 'Argument "pyramid_label" should not be specified if ' + '"pyramid_uid" is not specified.' + ) # Multi-Frame Functional Groups and Multi-Frame Dimensions sffg_item = Dataset() + source_pixel_measures = self._get_pixel_measures_sequence( + source_image=src_img, + is_multiframe=is_multiframe, + ) if pixel_measures is None: - if is_multiframe: - src_shared_fg = src_img.SharedFunctionalGroupsSequence[0] - pixel_measures = src_shared_fg.PixelMeasuresSequence - else: - if has_ref_frame_uid: - pixel_measures = PixelMeasuresSequence( - pixel_spacing=src_img.PixelSpacing, - slice_thickness=src_img.SliceThickness, - spacing_between_slices=src_img.get( - 'SpacingBetweenSlices', - None - ) - ) - else: - pixel_spacing = getattr(src_img, 'PixelSpacing', None) - if pixel_spacing is not None: - pixel_measures = PixelMeasuresSequence( - pixel_spacing=pixel_spacing, - slice_thickness=src_img.get( - 'SliceThickness', - None - ), - spacing_between_slices=src_img.get( - 'SpacingBetweenSlices', - None - ) - ) - else: - pixel_measures = None + pixel_measures = source_pixel_measures if has_ref_frame_uid: if self._coordinate_system == CoordinateSystemNames.SLIDE: @@ -1362,59 +1738,107 @@ def __init__( sffg_item.PlaneOrientationSequence = plane_orientation self.SharedFunctionalGroupsSequence = [sffg_item] - # Information about individual frames will be updated below - self.NumberOfFrames = 0 - self.PerFrameFunctionalGroupsSequence: List[Dataset] = [] - # Check segment numbers described_segment_numbers = np.array([ int(item.SegmentNumber) for item in segment_descriptions ]) self._check_segment_numbers(described_segment_numbers) + number_of_segments = len(described_segment_numbers) + self.SegmentSequence = segment_descriptions # Checks on pixels and overlap pixel_array, segments_overlap = self._check_and_cast_pixel_array( pixel_array, - described_segment_numbers, - segmentation_type + number_of_segments, + segmentation_type, ) self.SegmentsOverlap = segments_overlap.value - if omit_empty_frames and pixel_array.sum() == 0: - omit_empty_frames = False - logger.warning( - 'Encoding an empty segmentation with "omit_empty_frames" ' - 'set to True. Reverting to encoding all frames since omitting ' - 'all frames is not possible.' - ) if has_ref_frame_uid: - if plane_positions is None: - if pixel_array.shape[0] != len(source_plane_positions): - raise ValueError( - 'Number of plane positions in source image(s) does not ' - 'match size of first dimension of "pixel_array" ' - 'argument.' + if tile_pixel_array: + + if plane_positions is None: + # Use the origin of the source image + origin_seq = src_img.TotalPixelMatrixOriginSequence[0] + x_offset = origin_seq.XOffsetInSlideCoordinateSystem + y_offset = origin_seq.YOffsetInSlideCoordinateSystem + else: + # Use the provided image origin + pp = plane_positions[0][0] + rp = pp.RowPositionInTotalImagePixelMatrix + cp = pp.ColumnPositionInTotalImagePixelMatrix + if rp != 1 or cp != 1: + raise ValueError( + "When specifying a single plane position when " + 'the "tile_pixel_array" argument is True, the ' + "plane position must be at the top left corner " + "of the total pixel matrix. I.e. it must have " + "RowPositionInTotalImagePixelMatrix and " + "ColumnPositionInTotalImagePixelMatrix equal to 1." + ) + x_offset = pp.XOffsetInSlideCoordinateSystem + y_offset = pp.YOffsetInSlideCoordinateSystem + orientation = plane_orientation[0].ImageOrientationSlide + + plane_positions = [ + compute_plane_position_tiled_full( + row_index=r, + column_index=c, + x_offset=x_offset, + y_offset=y_offset, + rows=self.Rows, + columns=self.Columns, + image_orientation=orientation, + pixel_spacing=pixel_measures[0].PixelSpacing, ) - plane_positions = source_plane_positions - else: - if pixel_array.shape[0] != len(plane_positions): - raise ValueError( - 'Number of PlanePositionSequence items provided via ' - '"plane_positions" argument does not match size of ' - 'first dimension of "pixel_array" argument.' + for c, r in tile_pixel_matrix( + total_pixel_matrix_rows=pixel_array.shape[1], + total_pixel_matrix_columns=pixel_array.shape[2], + rows=self.Rows, + columns=self.Columns, ) + ] + + else: + if plane_positions is None: + if pixel_array.shape[0] != len(source_plane_positions): + raise ValueError( + 'Number of plane positions in source image(s) does ' + 'not match size of first dimension of ' + '"pixel_array" argument.' + ) + plane_positions = source_plane_positions + else: + if pixel_array.shape[0] != len(plane_positions): + raise ValueError( + 'Number of PlanePositionSequence items provided ' + 'via "plane_positions" argument does not match ' + 'size of first dimension of "pixel_array" argument.' + ) + + # plane_position_values is an array giving, for each plane of + # the input array, the raw values of all attributes that + # describe its position. The first dimension is sorted the same + # way as the input pixel array and the second is sorted the + # same way as the dimension index sequence (without segment + # number) plane_sort_index is a list of indices into the input + # planes giving the order in which they should be arranged to + # correctly sort them for inclusion into the segmentation + plane_position_values, plane_sort_index = \ + self.DimensionIndexSequence.get_index_values( + plane_positions + ) are_spatial_locations_preserved = ( all( plane_positions[i] == source_plane_positions[i] for i in range(len(plane_positions)) ) and - plane_orientation == source_plane_orientation + plane_orientation == source_plane_orientation and + pixel_measures == source_pixel_measures ) - plane_position_values, plane_sort_index = \ - self.DimensionIndexSequence.get_index_values(plane_positions) else: # Only one spatial location supported plane_positions = [None] @@ -1422,316 +1846,294 @@ def __init__( plane_sort_index = np.array([0]) are_spatial_locations_preserved = True - plane_position_names = self.DimensionIndexSequence.get_index_keywords() - - if ( - has_ref_frame_uid and - self._coordinate_system == CoordinateSystemNames.SLIDE - ): - self.ImageOrientationSlide = deepcopy( - plane_orientation[0].ImageOrientationSlide - ) - if are_spatial_locations_preserved and is_tiled: - self.TotalPixelMatrixOriginSequence = deepcopy( - src_img.TotalPixelMatrixOriginSequence - ) - self.TotalPixelMatrixRows = src_img.TotalPixelMatrixRows - self.TotalPixelMatrixColumns = src_img.TotalPixelMatrixColumns - elif are_spatial_locations_preserved and not is_tiled: - self.ImageCenterPointCoordinatesSequence = deepcopy( - src_img.ImageCenterPointCoordinatesSequence - ) - else: - row_index = plane_position_names.index( - 'RowPositionInTotalImagePixelMatrix' - ) - row_offsets = plane_position_values[:, row_index] - col_index = plane_position_names.index( - 'ColumnPositionInTotalImagePixelMatrix' - ) - col_offsets = plane_position_values[:, col_index] - frame_indices = np.lexsort([row_offsets, col_offsets]) - first_frame_index = frame_indices[0] - last_frame_index = frame_indices[-1] - x_index = plane_position_names.index( - 'XOffsetInSlideCoordinateSystem' - ) - x_origin = plane_position_values[first_frame_index, x_index] - y_index = plane_position_names.index( - 'YOffsetInSlideCoordinateSystem' - ) - y_origin = plane_position_values[first_frame_index, y_index] - z_index = plane_position_names.index( - 'ZOffsetInSlideCoordinateSystem' - ) - z_origin = plane_position_values[first_frame_index, z_index] - - if is_tiled: - origin_item = Dataset() - origin_item.XOffsetInSlideCoordinateSystem = \ - format_number_as_ds(x_origin) - origin_item.YOffsetInSlideCoordinateSystem = \ - format_number_as_ds(y_origin) - self.TotalPixelMatrixOriginSequence = [origin_item] - self.TotalPixelMatrixRows = int( - plane_position_values[last_frame_index, row_index] + - self.Rows + if are_spatial_locations_preserved: + if tile_pixel_array: + if ( + pixel_array.shape[1:3] != + ( + src_img.TotalPixelMatrixRows, + src_img.TotalPixelMatrixColumns ) - self.TotalPixelMatrixColumns = int( - plane_position_values[last_frame_index, col_index] + - self.Columns - ) - else: - transform = ImageToReferenceTransformer( - image_position=(x_origin, y_origin, z_origin), - image_orientation=plane_orientation, - pixel_spacing=pixel_measures[0].PixelSpacing - ) - center_image_coordinates = np.array( - [[self.Columns / 2, self.Rows / 2]], - dtype=float + ): + raise ValueError( + "Shape of input pixel_array does not match shape of " + "the total pixel matrix of the source image." ) - center_reference_coordinates = transform( - center_image_coordinates + else: + if pixel_array.shape[1:3] != (src_img.Rows, src_img.Columns): + raise ValueError( + "Shape of input pixel_array does not match shape of " + "the source image." ) - x_center = center_reference_coordinates[0, 0] - y_center = center_reference_coordinates[0, 1] - z_center = center_reference_coordinates[0, 2] - center_item = Dataset() - center_item.XOffsetInSlideCoordinateSystem = \ - format_number_as_ds(x_center) - center_item.YOffsetInSlideCoordinateSystem = \ - format_number_as_ds(y_center) - center_item.ZOffsetInSlideCoordinateSystem = \ - format_number_as_ds(z_center) - self.ImageCenterPointCoordinatesSequence = [center_item] - - # Remove empty slices + + # Dimension Organization Type + dimension_organization_type = self._check_dimension_organization_type( + dimension_organization_type=dimension_organization_type, + is_tiled=is_tiled, + are_spatial_locations_preserved=are_spatial_locations_preserved, + omit_empty_frames=omit_empty_frames, + source_image=src_img, + plane_positions=plane_positions, + rows=self.Rows, + columns=self.Columns, + ) + if dimension_organization_type is not None: + self.DimensionOrganizationType = dimension_organization_type.value + + # Find indices such that empty planes are removed if omit_empty_frames: - pixel_array, plane_positions, source_image_indices = \ - self._omit_empty_frames(pixel_array, plane_positions) + if tile_pixel_array: + included_plane_indices, is_empty = \ + self._get_nonempty_tile_indices( + pixel_array, + plane_positions=plane_positions, + rows=self.Rows, + columns=self.Columns, + ) + else: + included_plane_indices, is_empty = \ + self._get_nonempty_plane_indices(pixel_array) + if is_empty: + # Cannot omit empty frames when all frames are empty + omit_empty_frames = False + included_plane_indices = list(range(len(plane_positions))) + else: + # Remove all empty plane positions from the list of sorted + # plane position indices + included_plane_indices_set = set(included_plane_indices) + plane_sort_index = [ + ind for ind in plane_sort_index + if ind in included_plane_indices_set + ] else: - source_image_indices = list(range(pixel_array.shape[0])) + included_plane_indices = list(range(len(plane_positions))) if has_ref_frame_uid: - plane_position_values = plane_position_values[source_image_indices] - _, plane_sort_index = np.unique( - plane_position_values, - axis=0, - return_index=True - ) - # Get unique values of attributes in the Plane Position Sequence or # Plane Position Slide Sequence, which define the position of the # plane with respect to the three dimensional patient or slide # coordinate system, respectively. These can subsequently be used # to look up the relative position of a plane relative to the # indexed dimension. - dimension_position_values = [ - np.unique(plane_position_values[:, index], axis=0) + unique_dimension_values = [ + np.unique( + plane_position_values[included_plane_indices, index], + axis=0 + ) for index in range(plane_position_values.shape[1]) ] else: - dimension_position_values = [None] + unique_dimension_values = [None] + + if ( + has_ref_frame_uid and + self._coordinate_system == CoordinateSystemNames.SLIDE + ): + total_pixel_matrix_size = ( + pixel_array.shape[1:3] if tile_pixel_array else None + ) + self._add_slide_coordinate_metadata( + source_image=src_img, + plane_orientation=plane_orientation, + plane_position_values=plane_position_values, + pixel_measures=pixel_measures, + are_spatial_locations_preserved=are_spatial_locations_preserved, + is_tiled=is_tiled, + total_pixel_matrix_size=total_pixel_matrix_size, + ) is_encaps = self.file_meta.TransferSyntaxUID.is_encapsulated + process_pool: Optional[Executor] = None + + if not isinstance(workers, (int, Executor)): + raise TypeError( + 'Argument "workers" must be of type int or ' + 'concurrent.futures.Executor (or a derived class).' + ) + using_multiprocessing = ( + isinstance(workers, Executor) or workers != 0 + ) + + # List of frames. In the case of native transfer syntaxes, we will + # collect a list of frames as flattened NumPy arrays for bitpacking at + # the end. In the case of encapsulated transfer syntaxes with no + # workers, we will accumulate a list of encoded frames to encapsulate + # at the end + frames: Union[List[bytes], List[np.ndarray]] = [] + if is_encaps: - # In the case of encapsulated transfer syntaxes, we will accumulate - # a list of encoded frames to encapsulate at the end - full_frames_list = [] - else: - # In the case of non-encapsulated (uncompressed) transfer syntaxes - # we will accumulate a 1D array of pixels from all frames for - # bitpacking at the end - full_pixel_array = np.array([], np.bool_) - - for i, segment_number in enumerate(described_segment_numbers): - # Pixel array for just this segment - if pixel_array.dtype in (np.float_, np.float32, np.float64): - # Floating-point numbers must be mapped to 8-bit integers in - # the range [0, max_fractional_value]. - if pixel_array.ndim == 4: - segment_array = pixel_array[:, :, :, segment_number - 1] + if using_multiprocessing: + # In the case of encapsulated transfer syntaxes with multiple + # workers, we will accumulate a list of encoded frames to + # encapsulate at the end + frame_futures: List[Future] = [] + + # Use the existing executor or create one + if isinstance(workers, Executor): + process_pool = workers else: - segment_array = pixel_array - planes = np.around( - segment_array * float(self.MaximumFractionalValue) + # If workers is negative, pass None to use all processors + process_pool = ProcessPoolExecutor( + workers if workers > 0 else None + ) + + # Parameters to use when calling the encode_frame function in + # either of the above two cases + encode_frame_kwargs = dict( + transfer_syntax_uid=self.file_meta.TransferSyntaxUID, + bits_allocated=self.BitsAllocated, + bits_stored=self.BitsStored, + photometric_interpretation=self.PhotometricInterpretation, + pixel_representation=self.PixelRepresentation + ) + else: + if using_multiprocessing: + warnings.warn( + "Setting workers != 0 or passing an instance of " + "concurrent.futures.Executor when using a non-encapsulated " + "transfer syntax has no effect.", + UserWarning ) - planes = planes.astype(np.uint8) - elif pixel_array.dtype in (np.uint8, np.uint16): - # Note that integer arrays with segments stacked down the last - # dimension will already have been converted to bool, leaving - # only "label maps" here, which must be converted to binary - # masks. - planes = np.zeros(pixel_array.shape, dtype=np.uint8) - planes[pixel_array == segment_number] = 1 - elif pixel_array.dtype == np.bool_: - if pixel_array.ndim == 4: - planes = pixel_array[:, :, :, segment_number - 1] + using_multiprocessing = False + + # Information about individual frames is placed into the + # PerFrameFunctionalGroupsSequence. Note that a *very* significant + # efficiency gain is observed when building this as a Python list + # rather than a pydicom sequence, and then converting to a pydicom + # sequence at the end + pffg_sequence: List[Dataset] = [] + + for segment_number in described_segment_numbers: + for plane_index in plane_sort_index: + + if tile_pixel_array: + pos = plane_positions[plane_index][0] + plane_array = get_tile_array( + pixel_array[0], + row_offset=pos.RowPositionInTotalImagePixelMatrix, + column_offset=pos.ColumnPositionInTotalImagePixelMatrix, + tile_rows=self.Rows, + tile_columns=self.Columns, + ) else: - planes = pixel_array - planes = planes.astype(np.uint8) - # It may happen that a boolean array is passed that should be - # interpreted as fractional segmentation type. In this case, we - # also need to stretch pixel values to 8-bit unsigned integer - # range by multiplying with the maximum fractional value. - if segmentation_type == SegmentationTypeValues.FRACTIONAL: - planes *= int(self.MaximumFractionalValue) - else: - raise TypeError('Pixel array has an invalid data type.') - - contained_plane_index = [] - for j in plane_sort_index: - # Index of this frame in the original list of source indices - source_image_index = source_image_indices[j] + # Select the relevant existing frame + plane_array = pixel_array[plane_index] + + # Pixel array for just this segment and this position + segment_array = self._get_segment_pixel_array( + plane_array, + segment_number=segment_number, + number_of_segments=number_of_segments, + segmentation_type=segmentation_type, + max_fractional_value=max_fractional_value, + ) - # Even though completely empty slices were removed earlier, - # there may still be slices in which this specific segment is + # Even though completely empty planes were removed earlier, + # there may still be planes in which this specific segment is # absent. Such frames should be removed - if omit_empty_frames and np.sum(planes[j]) == 0: - logger.info( - 'skip empty plane {} of segment #{}'.format( - j, segment_number - ) + if ( + omit_empty_frames and not + np.any(segment_array) + ): + logger.debug( + f'skip empty plane {plane_index} of segment ' + f'#{segment_number}' ) continue - contained_plane_index.append(j) - logger.info( - 'add plane #{} for segment #{}'.format( - j, segment_number - ) + logger.debug( + f'add plane #{plane_index} for segment #{segment_number}' ) - pffp_item = Dataset() - frame_content_item = Dataset() - - if not has_ref_frame_uid: - index_values = [] - else: - # Look up the position of the plane relative to the indexed - # dimension. + # Get the item of the PerFrameFunctionalGroupsSequence for this + # segmentation frame + if has_ref_frame_uid: + plane_pos_val = plane_position_values[plane_index] try: - if ( - self._coordinate_system == - CoordinateSystemNames.SLIDE - ): - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[j] - ) - ] - else: - # In case of the patient coordinate system, the - # value of the attribute the Dimension Index - # Sequence points to (Image Position Patient) has a - # value multiplicity greater than one. - index_values = [ - int( - np.where( - (dimension_position_values[idx] == pos) - .all(axis=1) - )[0][0] + 1 - ) - for idx, pos in enumerate( - plane_position_values[j] - ) - ] + dimension_index_values = ( + self._get_dimension_index_values( + unique_dimension_values=unique_dimension_values, + plane_position_value=plane_pos_val, + coordinate_system=self._coordinate_system, + ) + ) except IndexError as error: raise IndexError( - 'Could not determine position of plane #{} in ' - 'three dimensional coordinate system based on ' - 'dimension index values: {}'.format(j, error) + 'Could not determine position of plane ' + f'#{plane_index} in three dimensional coordinate ' + f'system based on dimension index values: {error}' ) - frame_content_item.DimensionIndexValues = ( - [int(segment_number)] + index_values - ) - pffp_item.FrameContentSequence = [frame_content_item] - if has_ref_frame_uid: - pos = plane_positions[j] - if self._coordinate_system == CoordinateSystemNames.SLIDE: - pffp_item.PlanePositionSlideSequence = pos - else: - pffp_item.PlanePositionSequence = pos - - # Determining the source images that map to the frame is not - # always trivial. Since DerivationImageSequence is a type 2 - # attribute, we leave its value empty. - pffp_item.DerivationImageSequence = [] - - if are_spatial_locations_preserved: - derivation_image_item = Dataset() - derivation_code = codes.cid7203.Segmentation - derivation_image_item.DerivationCodeSequence = [ - CodedConcept.from_code(derivation_code) - ] - - derivation_src_img_item = Dataset() - if hasattr(source_images[0], 'NumberOfFrames'): - # A single multi-frame source image - src_img_item = self.SourceImageSequence[0] - # Frame numbers are one-based - derivation_src_img_item.ReferencedFrameNumber = ( - source_image_index + 1 + else: + dimension_index_values = [] + + if ( + dimension_organization_type != + DimensionOrganizationTypeValues.TILED_FULL + ): + # No per-frame functional group for TILED FULL + pffg_item = self._get_pffg_item( + segment_number=segment_number, + dimension_index_values=dimension_index_values, + plane_position=plane_positions[plane_index], + source_images=source_images, + source_image_index=plane_index, + are_spatial_locations_preserved=are_spatial_locations_preserved, # noqa: E501 + has_ref_frame_uid=has_ref_frame_uid, + coordinate_system=self._coordinate_system, + ) + pffg_sequence.append(pffg_item) + + # Add the segmentation pixel array for this frame to the list + if is_encaps: + if process_pool is None: + # Encode this frame and add resulting bytes to the list + # for encapsulation at the end + frames.append( + encode_frame( + segment_array, + **encode_frame_kwargs, + ) ) else: - # Multiple single-frame source images - src_img_item = self.SourceImageSequence[ - source_image_index - ] - derivation_src_img_item.ReferencedSOPClassUID = \ - src_img_item.ReferencedSOPClassUID - derivation_src_img_item.ReferencedSOPInstanceUID = \ - src_img_item.ReferencedSOPInstanceUID - purpose_code = \ - codes.cid7202.SourceImageForImageProcessingOperation - derivation_src_img_item.PurposeOfReferenceCodeSequence = [ - CodedConcept.from_code(purpose_code) - ] - derivation_src_img_item.SpatialLocationsPreserved = 'YES' - derivation_image_item.SourceImageSequence = [ - derivation_src_img_item, - ] - pffp_item.DerivationImageSequence.append( - derivation_image_item - ) + # Submit this frame for encoding this frame and add the + # future to the list for encapsulation at the end + future = process_pool.submit( + encode_frame, + array=segment_array, + **encode_frame_kwargs, + ) + frame_futures.append(future) else: - logger.warning('spatial locations not preserved') + # Concatenate the 1D array for encoding at the end + frames.append(segment_array.flatten()) + + if ( + dimension_organization_type != + DimensionOrganizationTypeValues.TILED_FULL + ): + self.PerFrameFunctionalGroupsSequence = pffg_sequence - identification = Dataset() - identification.ReferencedSegmentNumber = int(segment_number) - pffp_item.SegmentIdentificationSequence = [ - identification, + if is_encaps: + if process_pool is not None: + frames = [ + fut.result() for fut in frame_futures ] - self.PerFrameFunctionalGroupsSequence.append(pffp_item) - self.NumberOfFrames += 1 - - if is_encaps: - # Encode this frame and add to the list for encapsulation - # at the end - for f in contained_plane_index: - full_frames_list.append(self._encode_pixels(planes[f])) - else: - # Concatenate the 1D array for re-encoding at the end - full_pixel_array = np.concatenate([ - full_pixel_array, - planes[contained_plane_index].flatten() - ]) - self.SegmentSequence.append(segment_descriptions[i]) + # Shutdown the pool if we created it, otherwise it is the + # caller's responsibility + if process_pool is not workers: + process_pool.shutdown() - if is_encaps: # Encapsulate all pre-compressed frames - self.PixelData = encapsulate(full_frames_list) + self.NumberOfFrames = len(frames) + self.PixelData = encapsulate(frames) else: # Encode the whole pixel array at once # This allows for correct bit-packing in cases where # number of pixels per frame is not a multiple of 8 - self.PixelData = self._encode_pixels(full_pixel_array) + self.NumberOfFrames = len(frames) + self.PixelData = self._encode_pixels_native( + np.concatenate(frames) + ) # Add a null trailing byte if required if len(self.PixelData) % 2 == 1: @@ -1796,10 +2198,280 @@ def _check_segment_numbers(described_segment_numbers: np.ndarray): f'from 1. Found {described_segment_numbers[0]}. ' ) + @staticmethod + def _get_pixel_measures_sequence( + source_image: Dataset, + is_multiframe: bool, + ) -> Optional[PixelMeasuresSequence]: + """Get a Pixel Measures Sequence from the source image. + + This is a helper method used in the constructor. + + Parameters + ---------- + source_image: pydicom.Dataset + The first source image. + is_multiframe: bool + Whether the source image is multiframe. + + Returns + ------- + Union[highdicom.PixelMeasuresSequence, None] + A PixelMeasuresSequence derived from the source image, if this is + possible. Otherwise None. + + """ + if is_multiframe: + src_shared_fg = source_image.SharedFunctionalGroupsSequence[0] + pixel_measures = src_shared_fg.PixelMeasuresSequence + else: + if hasattr(source_image, 'FrameOfReferenceUID'): + pixel_measures = PixelMeasuresSequence( + pixel_spacing=source_image.PixelSpacing, + slice_thickness=source_image.SliceThickness, + spacing_between_slices=source_image.get( + 'SpacingBetweenSlices', + None + ) + ) + else: + pixel_spacing = getattr(source_image, 'PixelSpacing', None) + if pixel_spacing is not None: + pixel_measures = PixelMeasuresSequence( + pixel_spacing=pixel_spacing, + slice_thickness=source_image.get( + 'SliceThickness', + None + ), + spacing_between_slices=source_image.get( + 'SpacingBetweenSlices', + None + ) + ) + else: + pixel_measures = None + + return pixel_measures + + def _add_slide_coordinate_metadata( + self, + source_image: Dataset, + plane_orientation: PlaneOrientationSequence, + plane_position_values: np.ndarray, + pixel_measures: PixelMeasuresSequence, + are_spatial_locations_preserved: bool, + is_tiled: bool, + total_pixel_matrix_size: Optional[Tuple[int, int]] = None, + ) -> None: + """Add metadata related to the slide coordinate system. + + This is a helper method used in the constructor. + + Parameters + ---------- + source_image: pydicom.Dataset + The source image (assumed to be a single source image). + plane_orientation: highdicom.PlaneOrientationSequence + Plane orientation sequence for the segmentation. + plane_position_values: numpy.ndarray + Plane positions of each plane. + pixel_measures: highdicom.PixelMeasuresSequence + PixelMeasuresSequence for the segmentation. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the source image + and the segmentation. + is_tiled: bool + Whether the source image is a tiled image. + total_pixel_matrix_size: Optional[Tuple[int, int]] + Size (rows, columns) of the total pixel matrix, if known. If None, + this will be deduced from the specified plane position values. + Explicitly providing the total pixel matrix size is required if the + total pixel matrix is smaller than the total area covered by the + provided tiles (i.e. the provided plane positions are padded). + + """ + plane_position_names = self.DimensionIndexSequence.get_index_keywords() + + self.ImageOrientationSlide = deepcopy( + plane_orientation[0].ImageOrientationSlide + ) + if are_spatial_locations_preserved and is_tiled: + self.TotalPixelMatrixOriginSequence = deepcopy( + source_image.TotalPixelMatrixOriginSequence + ) + self.TotalPixelMatrixRows = source_image.TotalPixelMatrixRows + self.TotalPixelMatrixColumns = source_image.TotalPixelMatrixColumns + self.TotalPixelMatrixFocalPlanes = 1 + elif are_spatial_locations_preserved and not is_tiled: + self.ImageCenterPointCoordinatesSequence = deepcopy( + source_image.ImageCenterPointCoordinatesSequence + ) + else: + row_index = plane_position_names.index( + 'RowPositionInTotalImagePixelMatrix' + ) + row_offsets = plane_position_values[:, row_index] + col_index = plane_position_names.index( + 'ColumnPositionInTotalImagePixelMatrix' + ) + col_offsets = plane_position_values[:, col_index] + frame_indices = np.lexsort([row_offsets, col_offsets]) + first_frame_index = frame_indices[0] + last_frame_index = frame_indices[-1] + x_index = plane_position_names.index( + 'XOffsetInSlideCoordinateSystem' + ) + x_origin = plane_position_values[first_frame_index, x_index] + y_index = plane_position_names.index( + 'YOffsetInSlideCoordinateSystem' + ) + y_origin = plane_position_values[first_frame_index, y_index] + z_index = plane_position_names.index( + 'ZOffsetInSlideCoordinateSystem' + ) + z_origin = plane_position_values[first_frame_index, z_index] + + if is_tiled: + origin_item = Dataset() + origin_item.XOffsetInSlideCoordinateSystem = \ + format_number_as_ds(x_origin) + origin_item.YOffsetInSlideCoordinateSystem = \ + format_number_as_ds(y_origin) + self.TotalPixelMatrixOriginSequence = [origin_item] + self.TotalPixelMatrixFocalPlanes = 1 + if total_pixel_matrix_size is None: + self.TotalPixelMatrixRows = int( + plane_position_values[last_frame_index, row_index] + + self.Rows - 1 + ) + self.TotalPixelMatrixColumns = int( + plane_position_values[last_frame_index, col_index] + + self.Columns - 1 + ) + else: + self.TotalPixelMatrixRows = total_pixel_matrix_size[0] + self.TotalPixelMatrixColumns = total_pixel_matrix_size[1] + else: + transform = ImageToReferenceTransformer( + image_position=(x_origin, y_origin, z_origin), + image_orientation=plane_orientation, + pixel_spacing=pixel_measures[0].PixelSpacing + ) + center_image_coordinates = np.array( + [[self.Columns / 2, self.Rows / 2]], + dtype=float + ) + center_reference_coordinates = transform( + center_image_coordinates + ) + x_center = center_reference_coordinates[0, 0] + y_center = center_reference_coordinates[0, 1] + z_center = center_reference_coordinates[0, 2] + center_item = Dataset() + center_item.XOffsetInSlideCoordinateSystem = \ + format_number_as_ds(x_center) + center_item.YOffsetInSlideCoordinateSystem = \ + format_number_as_ds(y_center) + center_item.ZOffsetInSlideCoordinateSystem = \ + format_number_as_ds(z_center) + self.ImageCenterPointCoordinatesSequence = [center_item] + + @staticmethod + def _check_dimension_organization_type( + dimension_organization_type: Union[ + DimensionOrganizationTypeValues, + str, + None, + ], + is_tiled: bool, + are_spatial_locations_preserved: bool, + omit_empty_frames: bool, + source_image: Dataset, + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, + ) -> Optional[DimensionOrganizationTypeValues]: + """Checks that the specified Dimension Organization Type is valid. + + Parameters + ---------- + dimension_organization_type: Union[highdicom.enum.DimensionOrganizationTypeValues, str, None] + The specified DimensionOrganizationType for the output Segmentation. + is_tiled: bool + Whether the source image is a tiled image. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the source image + and the segmentation pixel array. + omit_empty_frames: bool + Whether it was specified to omit empty frames. + source_image: pydicom.Dataset + Representative dataset of the source images. + plane_positions: Sequence[highdicom.PlanePositionSequence] + Plane positions of all frames. + rows: int + Number of rows in each frame of the segmentation image. + columns: int + Number of columns in each frame of the segmentation image. + + Returns + ------- + Optional[highdicom.enum.DimensionOrganizationTypeValues]: + DimensionOrganizationType to use for the output Segmentation. + + """ # noqa: E501 + if is_tiled and dimension_organization_type is None: + dimension_organization_type = \ + DimensionOrganizationTypeValues.TILED_SPARSE + + if dimension_organization_type is not None: + dimension_organization_type = DimensionOrganizationTypeValues( + dimension_organization_type + ) + tiled_dimension_organization_types = [ + DimensionOrganizationTypeValues.TILED_SPARSE, + DimensionOrganizationTypeValues.TILED_FULL + ] + + if ( + dimension_organization_type in + tiled_dimension_organization_types + ): + if not is_tiled: + raise ValueError( + f"A value of {dimension_organization_type.value} " + 'for parameter "dimension_organization_type" is ' + 'only valid if the source images are tiled.' + ) + + if ( + dimension_organization_type == + DimensionOrganizationTypeValues.TILED_FULL + ): + if not are_plane_positions_tiled_full( + plane_positions, + rows, + columns, + ): + raise ValueError( + 'A value of "TILED_FULL" for parameter ' + '"dimension_organization_type" is not permitted unless ' + 'the "plane_positions" of the segmentation do not ' + 'do not follow the relevant requirements. See ' + 'https://dicom.nema.org/medical/dicom/current/output/' + 'chtml/part03/sect_C.7.6.17.3.html#sect_C.7.6.17.3.' + ) + if omit_empty_frames: + raise ValueError( + 'Parameter "omit_empty_frames" should be False if ' + 'using "dimension_organization_type" of "TILED_FULL".' + ) + + return dimension_organization_type + @staticmethod def _check_and_cast_pixel_array( pixel_array: np.ndarray, - described_segment_numbers: np.ndarray, + number_of_segments: int, segmentation_type: SegmentationTypeValues ) -> Tuple[np.ndarray, SegmentsOverlapValues]: """Checks on the shape and data type of the pixel array. @@ -1810,7 +2482,7 @@ def _check_and_cast_pixel_array( ---------- pixel_array: numpy.ndarray The segmentation pixel array. - described_segment_numbers: numpy.ndarray + number_of_segments: int The segment numbers from the segment descriptions, in the order they were passed. 1D array of integers. segmentation_type: highdicom.seg.SegmentationTypeValues @@ -1827,26 +2499,24 @@ def _check_and_cast_pixel_array( """ if pixel_array.ndim == 4: # Check that the number of segments in the array matches - if pixel_array.shape[-1] != len(described_segment_numbers): + if pixel_array.shape[-1] != number_of_segments: raise ValueError( 'The number of segments in last dimension of the pixel ' f'array ({pixel_array.shape[-1]}) does not match the ' 'number of described segments ' - f'({len(described_segment_numbers)}).' + f'({number_of_segments}).' ) if pixel_array.dtype in (np.bool_, np.uint8, np.uint16): + max_pixel = pixel_array.max() + if pixel_array.ndim == 3: # A label-map style array where pixel values represent # segment associations - segments_present = np.unique(pixel_array).astype(np.uint16) - segments_present = segments_present[segments_present > 0] # The pixel values in the pixel array must all belong to # a described segment - if not np.all( - np.in1d(segments_present, described_segment_numbers) - ): + if max_pixel > number_of_segments: raise ValueError( 'Pixel array contains segments that lack ' 'descriptions.' @@ -1859,23 +2529,28 @@ def _check_and_cast_pixel_array( # Pixel array is 4D where each segment is stacked down # the last dimension # In this case, each segment of the pixel array should be binary - if pixel_array.max() > 1: + if max_pixel > 1: raise ValueError( 'When passing a 4D stack of segments with an integer ' 'pixel type, the pixel array must be binary.' ) - pixel_array = pixel_array.astype(np.bool_) # Need to check whether or not segments overlap - if pixel_array.shape[-1] == 1: + if max_pixel == 0: + # Empty segments can't overlap (this skips an unnecessary + # further test) + segments_overlap = SegmentsOverlapValues.NO + elif pixel_array.shape[-1] == 1: # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO - elif pixel_array.sum(axis=-1).max() > 1: - segments_overlap = SegmentsOverlapValues.YES else: - segments_overlap = SegmentsOverlapValues.NO + sum_over_segments = pixel_array.sum(axis=-1) + if np.any(sum_over_segments > 1): + segments_overlap = SegmentsOverlapValues.YES + else: + segments_overlap = SegmentsOverlapValues.NO - elif (pixel_array.dtype in (np.float_, np.float32, np.float64)): + elif pixel_array.dtype in (np.float_, np.float32, np.float64): unique_values = np.unique(pixel_array) if np.min(unique_values) < 0.0 or np.max(unique_values) > 1.0: raise ValueError( @@ -1892,10 +2567,13 @@ def _check_and_cast_pixel_array( 'Floating point pixel array values must be either ' '0.0 or 1.0 in case of BINARY segmentation type.' ) - pixel_array = pixel_array.astype(np.bool_) + pixel_array = pixel_array.astype(np.uint8) # Need to check whether or not segments overlap - if pixel_array.shape[-1] == 1: + if len(unique_values) == 1 and unique_values[0] == 0.0: + # All pixels are zero: there can be no overlap + segments_overlap = SegmentsOverlapValues.NO + elif pixel_array.ndim == 3 or pixel_array.shape[-1] == 1: # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO elif pixel_array.sum(axis=-1).max() > 1: @@ -1907,105 +2585,400 @@ def _check_and_cast_pixel_array( # A single segment does not overlap segments_overlap = SegmentsOverlapValues.NO else: - # A truly fractional segmentation with multiple segments. - # Unclear how overlap should be interpreted in this case - segments_overlap = SegmentsOverlapValues.UNDEFINED + # A truly fractional segmentation with multiple segments. + # Unclear how overlap should be interpreted in this case + segments_overlap = SegmentsOverlapValues.UNDEFINED + else: + raise TypeError('Pixel array has an invalid data type.') + + return pixel_array, segments_overlap + + @staticmethod + def _get_nonempty_plane_indices( + pixel_array: np.ndarray + ) -> Tuple[List[int], bool]: + """Get a list of all indices of original planes that are non-empty. + + Empty planes (without any positive pixels in any of the segments) do + not need to be included in the segmentation image. This method finds a + list of indices of the input frames that are non-empty, and therefore + should be included in the segmentation image. + + Parameters + ---------- + pixel_array: numpy.ndarray + Segmentation pixel array + + Returns + ------- + included_plane_indices : List[int] + List giving for each plane position in the resulting segmentation + image the index of the corresponding frame in the original pixel + array. + is_empty: bool + Whether the entire image is empty. If so, empty frames should not + be omitted. + + """ + # This list tracks which source image each non-empty frame came from + source_image_indices = [ + i for i, frm in enumerate(pixel_array) + if np.any(frm) + ] + + if len(source_image_indices) == 0: + logger.warning( + 'Encoding an empty segmentation with "omit_empty_frames" ' + 'set to True. Reverting to encoding all frames since omitting ' + 'all frames is not possible.' + ) + return (list(range(pixel_array.shape[0])), True) + + return (source_image_indices, False) + + @staticmethod + def _get_nonempty_tile_indices( + pixel_array: np.ndarray, + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, + ) -> Tuple[List[int], bool]: + """Get a list of all indices of tile locations that are non-empty. + + This is similar to _get_nonempty_plane_indices, but works on a total + pixel matrix rather than a set of frames. Empty planes (without any + positive pixels in any of the segments) do not need to be included in + the segmentation image. This method finds a list of indices of the + input frames that are non-empty, and therefore should be included in + the segmentation image. + + Parameters + ---------- + pixel_array: numpy.ndarray + Segmentation pixel array + plane_positions: Sequence[highdicom.PlanePositionSequence] + Plane positions of each tile. + rows: int + Number of rows in each tile. + columns: int + Number of columns in each tile. + + Returns + ------- + included_plane_indices : List[int] + List giving for each plane position in the resulting segmentation + image the index of the corresponding frame in the original pixel + array. + is_empty: bool + Whether the entire image is empty. If so, empty frames should not + be omitted. + + """ + # This list tracks which source image each non-empty frame came from + source_image_indices = [ + i for i, pos in enumerate(plane_positions) + if np.any( + get_tile_array( + pixel_array[0], + row_offset=pos[0].RowPositionInTotalImagePixelMatrix, + column_offset=pos[0].ColumnPositionInTotalImagePixelMatrix, + tile_rows=rows, + tile_columns=columns, + ) + ) + ] + + if len(source_image_indices) == 0: + logger.warning( + 'Encoding an empty segmentation with "omit_empty_frames" ' + 'set to True. Reverting to encoding all frames since omitting ' + 'all frames is not possible.' + ) + return (list(range(len(plane_positions))), True) + + return (source_image_indices, False) + + @staticmethod + def _get_segment_pixel_array( + pixel_array: np.ndarray, + segment_number: int, + number_of_segments: int, + segmentation_type: SegmentationTypeValues, + max_fractional_value: int + ) -> np.ndarray: + """Get pixel data array for a specific segment and plane. + + This is a helper method used during the constructor. Note that the + pixel array is expected to have been processed using the + ``_check_and_cast_pixel_array`` method before being passed to this + method. + + Parameters + ---------- + pixel_array: numpy.ndarray + Segmentation pixel array containing all segments for a single plane. + Array is therefore either (Rows x Columns x Segments) or (Rows x + Columns) in case of a "label map" style array. + segment_number: int + The segment of interest. + number_of_segments: int + Number of segments in the the segmentation. + segmentation_type: highdicom.seg.SegmentationTypeValues + Desired output segmentation type. + max_fractional_value: int + Value for scaling FRACTIONAL segmentations. + + Returns + ------- + numpy.ndarray: + Pixel data array consisting of pixel data for a single segment for + a single plane. Output array has dtype np.uint8 and binary values + (0 or 1). + + """ + if pixel_array.dtype in (np.float_, np.float32, np.float64): + # Based on the previous checks and casting, if we get here the + # output is a FRACTIONAL segmentation Floating-point numbers must + # be mapped to 8-bit integers in the range [0, + # max_fractional_value]. + if pixel_array.ndim == 3: + segment_array = pixel_array[:, :, segment_number - 1] + else: + segment_array = pixel_array + segment_array = np.around( + segment_array * float(max_fractional_value) + ) + segment_array = segment_array.astype(np.uint8) + else: + if pixel_array.ndim == 2: + # "Label maps" that must be converted to binary masks. + if number_of_segments == 1: + # We wish to avoid unnecessary comparison or casting + # operations here, for efficiency reasons. If there is only + # a single segment, the label map pixel array is already + # correct + if pixel_array.dtype != np.uint8: + segment_array = pixel_array.astype(np.uint8) + else: + segment_array = pixel_array + else: + segment_array = ( + pixel_array == segment_number + ).astype(np.uint8) + else: + segment_array = pixel_array[:, :, segment_number - 1] + if segment_array.dtype != np.uint8: + segment_array = segment_array.astype(np.uint8) + + # It may happen that a binary valued array is passed that should be + # stored as a fractional segmentation. In this case, we also need + # to stretch pixel values to 8-bit unsigned integer range by + # multiplying with the maximum fractional value. + if segmentation_type == SegmentationTypeValues.FRACTIONAL: + # Avoid an unnecessary multiplication operation if max + # fractional value is 1 + if int(max_fractional_value) != 1: + segment_array *= int(max_fractional_value) + + return segment_array + + @staticmethod + def _get_dimension_index_values( + unique_dimension_values: List[np.ndarray], + plane_position_value: np.ndarray, + coordinate_system: Optional[CoordinateSystemNames], + ) -> List[int]: + """Get Dimension Index Values for a frame. + + The Dimension Index Values are a list of integer indices that describe + the position of a frame as indices along each of the dimensions of + the Dimension Index Sequence. See + :class:`highdicom.seg.DimensionIndexSequence`. + + Parameters + ---------- + unique_dimension_values: List[numpy.ndarray] + List of arrays containing, for each dimension in the dimension + index sequence (except ReferencedSegment), the sorted unique + values of all planes along that dimension. Each array in the list + corresponds to one dimension, and has shape (N x m) where N is the + number of unique values for that dimension and m is the + multiplicity of values for that dimension. + plane_position_value: numpy.ndarray + Plane position of the plane. This is a 1D or 2D array containing + each of the raw values for this plane of the attributes listed as + dimension index pointers (except ReferencedSegment). For dimension + indices where the value multiplicity of all attributes is 1, the + array will be 1D. If the value multiplicity of attributes is + greater than 1, these values are stacked along the second + dimension. + coordinate_system: Optional[highdicom.CoordinateSystemNames] + The type of coordinate system used (if any). + + Returns + ------- + dimension_index_values: List[int] + The dimension index values (except the segment number) for the + given plane. + + """ + # Look up the position of the plane relative to the indexed + # dimension. + if ( + coordinate_system == + CoordinateSystemNames.SLIDE + ): + index_values = [ + int( + np.where( + (unique_dimension_values[idx] == pos) + )[0][0] + 1 + ) + for idx, pos in enumerate(plane_position_value) + ] else: - raise TypeError('Pixel array has an invalid data type.') + # In case of the patient coordinate system, the + # value of the attribute the Dimension Index + # Sequence points to (Image Position Patient) has a + # value multiplicity greater than one. + index_values = [ + int( + np.where( + (unique_dimension_values[idx] == pos).all( + axis=1 + ) + )[0][0] + 1 + ) + for idx, pos in enumerate(plane_position_value) + ] - return pixel_array, segments_overlap + return index_values @staticmethod - def _omit_empty_frames( - pixel_array: np.ndarray, - plane_positions: Sequence[Optional[PlanePositionSequence]] - ) -> Tuple[np.ndarray, List[Optional[PlanePositionSequence]], List[int]]: - """Remove empty frames from the pixel array. - - Empty frames (without any positive pixels) do not need to be included - in the segmentation image. This method removes the relevant frames - and updates the plane positions accordingly. + def _get_pffg_item( + segment_number: int, + dimension_index_values: List[int], + plane_position: PlanePositionSequence, + source_images: List[Dataset], + source_image_index: int, + are_spatial_locations_preserved: bool, + has_ref_frame_uid: bool, + coordinate_system: Optional[CoordinateSystemNames], + ) -> Dataset: + """Get a single item of the Per Frame Functional Groups Sequence. + + This is a helper method used in the constructor. Parameters ---------- - pixel_array: numpy.ndarray - Segmentation pixel array - plane_positions: Sequence[Optional[highdicom.PlanePositionSequence]] - Plane positions for each of the frames + segment_number: int + Segment number of this segmentation frame. + dimension_index_values: List[int] + Dimension index values (except segment number) for this frame. + plane_position: highdicom.seg.PlanePositionSequence + Plane position of this frame. + source_images: List[Dataset] + Full list of source images. + source_image_index: int + Index of this frame in the original list of source images. + are_spatial_locations_preserved: bool + Whether spatial locations are preserved between the segmentation + and the source images. + has_ref_frame_uid: bool + Whether the sources images have a frame of reference UID. + coordinate_system: Optional[highdicom.CoordinateSystemNames] + Coordinate system used, if any. Returns ------- - pixel_array: numpy.ndarray - Pixel array with empty frames removed - plane_positions: List[Optional[highdicom.PlanePositionSequence]] - Plane positions with entries corresponding to empty frames removed. - source_image_indices: List[int] - List giving for each frame in the output pixel array the index of - the corresponding frame in the original pixel array + pydicom.Dataset + Dataset representing the item of the + Per Frame Functional Groups Sequence for this segmentation frame. """ - non_empty_frames = [] - non_empty_plane_positions = [] + pffg_item = Dataset() + frame_content_item = Dataset() - # This list tracks which source image each non-empty frame came from - source_image_indices = [] - for i, (frm, pos) in enumerate(zip(pixel_array, plane_positions)): - if frm.sum() > 0: - non_empty_frames.append(frm) - non_empty_plane_positions.append(pos) - source_image_indices.append(i) - pixel_array = np.stack(non_empty_frames) - plane_positions = non_empty_plane_positions + frame_content_item.DimensionIndexValues = ( + [int(segment_number)] + dimension_index_values + ) + pffg_item.FrameContentSequence = [frame_content_item] + if has_ref_frame_uid: + if coordinate_system == CoordinateSystemNames.SLIDE: + pffg_item.PlanePositionSlideSequence = plane_position + else: + pffg_item.PlanePositionSequence = plane_position + + # Determining the source images that map to the frame is not + # always trivial. Since DerivationImageSequence is a type 2 + # attribute, we leave its value empty. + pffg_item.DerivationImageSequence = [] + + if are_spatial_locations_preserved: + derivation_image_item = Dataset() + derivation_code = codes.cid7203.Segmentation + derivation_image_item.DerivationCodeSequence = [ + CodedConcept.from_code(derivation_code) + ] + + derivation_src_img_item = Dataset() + if hasattr(source_images[0], 'NumberOfFrames'): + # A single multi-frame source image + src_img_item = source_images[0] + # Frame numbers are one-based + derivation_src_img_item.ReferencedFrameNumber = ( + source_image_index + 1 + ) + else: + # Multiple single-frame source images + src_img_item = source_images[source_image_index] + derivation_src_img_item.ReferencedSOPClassUID = \ + src_img_item.SOPClassUID + derivation_src_img_item.ReferencedSOPInstanceUID = \ + src_img_item.SOPInstanceUID + purpose_code = \ + codes.cid7202.SourceImageForImageProcessingOperation + derivation_src_img_item.PurposeOfReferenceCodeSequence = [ + CodedConcept.from_code(purpose_code) + ] + derivation_src_img_item.SpatialLocationsPreserved = 'YES' + derivation_image_item.SourceImageSequence = [ + derivation_src_img_item, + ] + pffg_item.DerivationImageSequence.append( + derivation_image_item + ) + else: + logger.debug('spatial locations not preserved') + + identification = Dataset() + identification.ReferencedSegmentNumber = int(segment_number) + pffg_item.SegmentIdentificationSequence = [ + identification, + ] - return (pixel_array, plane_positions, source_image_indices) + return pffg_item - def _encode_pixels(self, planes: np.ndarray) -> bytes: - """Encodes pixel planes. + def _encode_pixels_native(self, planes: np.ndarray) -> bytes: + """Encode pixel planes using a native transfer syntax. Parameters ---------- planes: numpy.ndarray - Array representing one or more segmentation image planes. - For encapsulated transfer syntaxes, only a single frame may be - processed. For other transfer syntaxes, multiple planes in a 3D - array may be processed. + Array representing one or more segmentation image planes. If + multiple image planes, planes stacked down the first dimension + (index 0). Returns ------- bytes Encoded pixels - Raises - ------ - ValueError - If multiple frames are passed when using an encapsulated - transfer syntax. - """ - if self.file_meta.TransferSyntaxUID.is_encapsulated: - # Check that only a single plane was passed - if planes.ndim == 3: - if planes.shape[0] == 1: - planes = planes[0, ...] - else: - raise ValueError( - 'Only single frame can be encoded at at time ' - 'in case of encapsulated format encoding.' - ) - return encode_frame( - planes, - transfer_syntax_uid=self.file_meta.TransferSyntaxUID, - bits_allocated=self.BitsAllocated, - bits_stored=self.BitsStored, - photometric_interpretation=self.PhotometricInterpretation, - pixel_representation=self.PixelRepresentation - ) + if self.SegmentationType == SegmentationTypeValues.BINARY.value: + return pack_bits(planes) else: - # The array may represent more than one frame item. - if self.SegmentationType == SegmentationTypeValues.BINARY.value: - return pack_bits(planes.flatten()) - else: - return planes.flatten().tobytes() + return planes.tobytes() @classmethod def from_dataset( @@ -2099,25 +3072,26 @@ def from_dataset( sf_groups.PixelMeasuresSequence = pixel_measures # Per-frame functional group items - for pffg_item in seg.PerFrameFunctionalGroupsSequence: - if hasattr(pffg_item, 'PlanePositionSequence'): - plane_pos = PlanePositionSequence.from_sequence( - pffg_item.PlanePositionSequence, - copy=False - ) - pffg_item.PlanePositionSequence = plane_pos - if hasattr(pffg_item, 'PlaneOrientationSequence'): - plane_ori = PlaneOrientationSequence.from_sequence( - pffg_item.PlaneOrientationSequence, - copy=False, - ) - pffg_item.PlaneOrientationSequence = plane_ori - if hasattr(pffg_item, 'PixelMeasuresSequence'): - pixel_measures = PixelMeasuresSequence.from_sequence( - pffg_item.PixelMeasuresSequence, - copy=False, - ) - pffg_item.PixelMeasuresSequence = pixel_measures + if hasattr(seg, 'PerFrameFunctionalGroupsSequence'): + for pffg_item in seg.PerFrameFunctionalGroupsSequence: + if hasattr(pffg_item, 'PlanePositionSequence'): + plane_pos = PlanePositionSequence.from_sequence( + pffg_item.PlanePositionSequence, + copy=False + ) + pffg_item.PlanePositionSequence = plane_pos + if hasattr(pffg_item, 'PlaneOrientationSequence'): + plane_ori = PlaneOrientationSequence.from_sequence( + pffg_item.PlaneOrientationSequence, + copy=False, + ) + pffg_item.PlaneOrientationSequence = plane_ori + if hasattr(pffg_item, 'PixelMeasuresSequence'): + pixel_measures = PixelMeasuresSequence.from_sequence( + pffg_item.PixelMeasuresSequence, + copy=False, + ) + pffg_item.PixelMeasuresSequence = pixel_measures seg._build_luts() @@ -2193,9 +3167,12 @@ def _build_luts(self) -> None: referenced_uids = self._get_ref_instance_uids() all_referenced_sops = {uids[2] for uids in referenced_uids} + is_tiled_full = ( + hasattr(self, 'DimensionOrganizationType') and + self.DimensionOrganizationType == 'TILED_FULL' + ) + segment_numbers = [] - referenced_instances: Optional[List[str]] = [] - referenced_frames: Optional[List[int]] = [] # Get list of all dimension index pointers, excluding the segment # number, since this is treated differently @@ -2205,6 +3182,14 @@ def _build_luts(self) -> None: for dim_ind in self.DimensionIndexSequence if dim_ind.DimensionIndexPointer != seg_num_tag ] + + func_grp_pointers = {} + for dim_ind in self.DimensionIndexSequence: + ptr = dim_ind.DimensionIndexPointer + if ptr in self._dim_ind_pointers: + grp_ptr = getattr(dim_ind, "FunctionalGroupPointer", None) + func_grp_pointers[ptr] = grp_ptr + dim_ind_positions = { dim_ind.DimensionIndexPointer: i for i, dim_ind in enumerate(self.DimensionIndexSequence) @@ -2213,112 +3198,172 @@ def _build_luts(self) -> None: dim_indices: Dict[int, List[int]] = { ptr: [] for ptr in self._dim_ind_pointers } + dim_values: Dict[int, List[Any]] = { + ptr: [] for ptr in self._dim_ind_pointers + } - # Create a list of source images and check for spatial locations - # preserved and that there is a single source frame per seg frame - locations_list_type = List[Optional[SpatialLocationsPreservedValues]] - locations_preserved: locations_list_type = [] self._single_source_frame_per_seg_frame = True - for frame_item in self.PerFrameFunctionalGroupsSequence: - # Get segment number for this frame - seg_id_seg = frame_item.SegmentIdentificationSequence[0] - seg_num = seg_id_seg.ReferencedSegmentNumber - segment_numbers.append(int(seg_num)) - - # Get dimension indices for this frame - indices = frame_item.FrameContentSequence[0].DimensionIndexValues - if not isinstance(indices, (MultiValue, list)): - # In case there is a single dimension index - indices = [indices] - if len(indices) != len(self._dim_ind_pointers) + 1: - # (+1 because referenced segment number is ignored) + + if is_tiled_full: + # With TILED_FULL, there is no PerFrameFunctionalGroupsSequence, + # so we have to deduce the per-frame information + row_tag = tag_for_keyword('RowPositionInTotalImagePixelMatrix') + col_tag = tag_for_keyword('ColumnPositionInTotalImagePixelMatrix') + x_tag = tag_for_keyword('XOffsetInSlideCoordinateSystem') + y_tag = tag_for_keyword('YOffsetInSlideCoordinateSystem') + z_tag = tag_for_keyword('ZOffsetInSlideCoordinateSystem') + tiled_full_dim_indices = {row_tag, col_tag, x_tag, y_tag, z_tag} + if len(set(dim_indices.keys()) - tiled_full_dim_indices) > 0: raise RuntimeError( - 'Unexpected mismatch between dimension index values in ' - 'per-frames functional groups sequence and items in the ' - 'dimension index sequence.' + 'Expected segmentation images with ' + '"DimensionOrganizationType" of "TILED_FULL" are expected ' + 'to have the following dimension index pointers: ' + 'SegmentNumber, RowPositionInTotalImagePixelMatrix, ' + 'ColumnPositionInTotalImagePixelMatrix.' ) - for ptr in self._dim_ind_pointers: - dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) - - frame_source_instances = [] - frame_source_frames = [] - for der_im in frame_item.DerivationImageSequence: - for src_im in der_im.SourceImageSequence: - frame_source_instances.append( - src_im.ReferencedSOPInstanceUID + self._single_source_frame_per_seg_frame = False + ( + segment_numbers, + _, + dim_values[col_tag], + dim_values[row_tag], + dim_values[x_tag], + dim_values[y_tag], + dim_values[z_tag], + ) = zip(*iter_tiled_full_frame_data(self)) + + # Create indices for each of the dimensions + for ptr, vals in dim_values.items(): + _, indices = np.unique(vals, return_inverse=True) + dim_indices[ptr] = (indices + 1).tolist() + + # There is no way to deduce whether the spatial locations are + # preserved in the tiled full case + self._locations_preserved = None + + referenced_instances = None + referenced_frames = None + else: + referenced_instances: Optional[List[str]] = [] + referenced_frames: Optional[List[int]] = [] + + # Create a list of source images and check for spatial locations + # preserved + locations_list_type = List[ + Optional[SpatialLocationsPreservedValues] + ] + locations_preserved: locations_list_type = [] + + for frame_item in self.PerFrameFunctionalGroupsSequence: + # Get segment number for this frame + seg_id_seg = frame_item.SegmentIdentificationSequence[0] + seg_num = seg_id_seg.ReferencedSegmentNumber + segment_numbers.append(int(seg_num)) + + # Get dimension indices for this frame + content_seq = frame_item.FrameContentSequence[0] + indices = content_seq.DimensionIndexValues + if not isinstance(indices, (MultiValue, list)): + # In case there is a single dimension index + indices = [indices] + if len(indices) != len(self._dim_ind_pointers) + 1: + # (+1 because referenced segment number is ignored) + raise RuntimeError( + 'Unexpected mismatch between dimension index values in ' + 'per-frames functional groups sequence and items in ' + 'the dimension index sequence.' ) - if hasattr(src_im, 'SpatialLocationsPreserved'): - locations_preserved.append( - SpatialLocationsPreservedValues( - src_im.SpatialLocationsPreserved - ) - ) + for ptr in self._dim_ind_pointers: + dim_indices[ptr].append(indices[dim_ind_positions[ptr]]) + grp_ptr = func_grp_pointers[ptr] + if grp_ptr is not None: + dim_val = frame_item[grp_ptr][0][ptr].value else: - locations_preserved.append( - None + dim_val = frame_item[ptr].value + dim_values[ptr].append(dim_val) + + frame_source_instances = [] + frame_source_frames = [] + for der_im in frame_item.DerivationImageSequence: + for src_im in der_im.SourceImageSequence: + frame_source_instances.append( + src_im.ReferencedSOPInstanceUID ) - - if hasattr(src_im, 'ReferencedFrameNumber'): - if isinstance( - src_im.ReferencedFrameNumber, - MultiValue - ): - frame_source_frames.extend( - [ - int(f) - for f in src_im.ReferencedFrameNumber - ] + if hasattr(src_im, 'SpatialLocationsPreserved'): + locations_preserved.append( + SpatialLocationsPreservedValues( + src_im.SpatialLocationsPreserved + ) ) else: - frame_source_frames.append( - int(src_im.ReferencedFrameNumber) + locations_preserved.append( + None ) - else: - frame_source_frames.append(_NO_FRAME_REF_VALUE) - if ( - len(set(frame_source_instances)) != 1 or - len(set(frame_source_frames)) != 1 + if hasattr(src_im, 'ReferencedFrameNumber'): + if isinstance( + src_im.ReferencedFrameNumber, + MultiValue + ): + frame_source_frames.extend( + [ + int(f) + for f in src_im.ReferencedFrameNumber + ] + ) + else: + frame_source_frames.append( + int(src_im.ReferencedFrameNumber) + ) + else: + frame_source_frames.append(_NO_FRAME_REF_VALUE) + + if ( + len(set(frame_source_instances)) != 1 or + len(set(frame_source_frames)) != 1 + ): + self._single_source_frame_per_seg_frame = False + else: + ref_instance_uid = frame_source_instances[0] + if ref_instance_uid not in all_referenced_sops: + raise AttributeError( + f'SOP instance {ref_instance_uid} referenced in ' + 'the source image sequence is not included in the ' + 'Referenced Series Sequence or Studies Containing ' + 'Other Referenced Instances Sequence. This is an ' + 'error with the integrity of the Segmentation ' + 'object.' + ) + referenced_instances.append(ref_instance_uid) + referenced_frames.append(frame_source_frames[0]) + + # Summarise + if any( + isinstance(v, SpatialLocationsPreservedValues) and + v == SpatialLocationsPreservedValues.NO + for v in locations_preserved + ): + Type = Optional[SpatialLocationsPreservedValues] + self._locations_preserved: Type = \ + SpatialLocationsPreservedValues.NO + elif all( + isinstance(v, SpatialLocationsPreservedValues) and + v == SpatialLocationsPreservedValues.YES + for v in locations_preserved ): - self._single_source_frame_per_seg_frame = False + self._locations_preserved = SpatialLocationsPreservedValues.YES else: - ref_instance_uid = frame_source_instances[0] - if ref_instance_uid not in all_referenced_sops: - raise AttributeError( - f'SOP instance {ref_instance_uid} referenced in the ' - 'source image sequence is not included in the ' - 'Referenced Series Sequence or Studies Containing ' - 'Other Referenced Instances Sequence. This is an ' - 'error with the integrity of the Segmentation object.' - ) - referenced_instances.append(ref_instance_uid) - referenced_frames.append(frame_source_frames[0]) - - # Summarise - if any( - isinstance(v, SpatialLocationsPreservedValues) and - v == SpatialLocationsPreservedValues.NO - for v in locations_preserved - ): - Type = Optional[SpatialLocationsPreservedValues] - self._locations_preserved: Type = SpatialLocationsPreservedValues.NO - elif all( - isinstance(v, SpatialLocationsPreservedValues) and - v == SpatialLocationsPreservedValues.YES - for v in locations_preserved - ): - self._locations_preserved = SpatialLocationsPreservedValues.YES - else: - self._locations_preserved = None + self._locations_preserved = None - if not self._single_source_frame_per_seg_frame: - referenced_instances = None - referenced_frames = None + if not self._single_source_frame_per_seg_frame: + referenced_instances = None + referenced_frames = None self._db_man = _SegDBManager( referenced_uids=referenced_uids, segment_numbers=segment_numbers, dim_indices=dim_indices, + dim_values=dim_values, referenced_instances=referenced_instances, referenced_frames=referenced_frames, ) @@ -2622,8 +3667,14 @@ def segmented_property_types(self) -> List[CodedConcept]: def _get_pixels_by_seg_frame( self, - num_output_frames: int, - indices_iterator: Iterable[Tuple[int, int, int]], + output_shape: Union[int, Tuple[int, int]], + indices_iterator: Iterator[ + Tuple[ + Tuple[Union[slice, int], ...], + Tuple[Union[slice, int], ...], + int + ] + ], segment_numbers: np.ndarray, combine_segments: bool = False, relabel: bool = False, @@ -2639,17 +3690,26 @@ def _get_pixels_by_seg_frame( Parameters ---------- - num_output_frames: int - Number of frames in the output array. - indices_iterator: Iterable[Tuple[int, int, int]], - An iterable object that yields tuples of (out_frame_index, - seg_frame_index, output_segment_number) that describes how to + output_shape: Union[int, Tuple[int, int]] + Shape of the output array. If an integer is False, this is the + number of frames in the output array and the number of rows and + columns are taken to match those of each segmentation frame. If a + tuple of integers, it contains the number of (rows, columns) in the + output array and there is no frame dimension (this is the tiled + case). Note in either case, the segments dimension (if relevant) is + omitted. + indices_iterator: Iterator[Tuple[Tuple[Union[slice, int], ...], Tuple[Union[slice, int], ...], int ]] + An iterable object that yields tuples of (output_indexer, + segmentation_indexer, output_segment_number) that describes how to construct the desired output pixel array from the segmentation - image's pixel array. out_frame_index is the (0-based) index of a - frame of the output array. 'seg_frame_index' is the (0-based) - frame index of a frame of the segmentation image that should be - placed into that output frame with as segment number - 'output_segment_number'. + image's pixel array. 'output_indexer' is a tuple that may be used + directly to index the output array to place a single frame's pixels + into the output array. Similarly 'segmentation_indexer' is a tuple + that may be used directly to index the segmentation pixel array + to retrieve the pixels to place into the output array. + with as segment number 'output_segment_number'. Note that in both + cases the indexers access the frame, row and column dimensions of + the relevant array, but not the segment dimension (if relevant). segment_numbers: np.ndarray One dimensional numpy array containing segment numbers corresponding to the columns of the seg frames matrix. @@ -2690,7 +3750,7 @@ def _get_pixels_by_seg_frame( pixel_array: np.ndarray Segmentation pixel array - """ + """ # noqa: E501 if ( segment_numbers.min() < 1 or segment_numbers.max() > self.number_of_segments @@ -2775,47 +3835,62 @@ def _get_pixels_by_seg_frame( pixel_array = pixel_array[None, :, :] # Initialize empty pixel array + full_output_shape = ( + output_shape + if isinstance(output_shape, tuple) + else (output_shape, h, w) + ) out_array = np.zeros( - (num_output_frames, h, w), + full_output_shape, dtype=intermediate_dtype ) # Loop over the supplied iterable - for fo, fi, seg_n in indices_iterator: + for (output_indexer, seg_indexer, seg_n) in indices_iterator: pix_value = intermediate_dtype.type(seg_n) + if not skip_overlap_checks: if np.any( np.logical_and( - pixel_array[fi, :, :] > 0, - out_array[fo, :, :] > 0 + pixel_array[seg_indexer] > 0, + out_array[output_indexer] > 0 ) ): raise RuntimeError( "Cannot combine segments because segments " "overlap." ) - out_array[fo, :, :] = np.maximum( - pixel_array[fi, :, :] * pix_value, - out_array[fo, :, :] + out_array[output_indexer] = np.maximum( + pixel_array[seg_indexer] * pix_value, + out_array[output_indexer] ) else: # Initialize empty pixel array + full_output_shape = ( + (*output_shape, num_segments) + if isinstance(output_shape, tuple) + else (output_shape, h, w, num_segments) + ) out_array = np.zeros( - (num_output_frames, h, w, num_segments), - intermediate_dtype + full_output_shape, + dtype=intermediate_dtype ) - # Loop through output frames - for fo, fi, seg_n in indices_iterator: + # loop through output frames + for (output_indexer, seg_indexer, seg_n) in indices_iterator: + + # Output indexer needs segment index + output_indexer = (*output_indexer, seg_n) + # Copy data to to output array if self.pixel_array.ndim == 2: # Special case with a single segmentation frame - out_array[fo, :, :, seg_n] = \ + out_array[output_indexer] = \ self.pixel_array.copy() else: - out_array[fo, :, :, seg_n] = \ - self.pixel_array[fi, :, :].copy() + out_array[output_indexer] = \ + self.pixel_array[seg_indexer].copy() if rescale_fractional: if self.segmentation_type == SegmentationTypeValues.FRACTIONAL: @@ -2935,7 +4010,17 @@ def _check_indexing_with_source_frames( """ # Checks that it is possible to index using source frames in this # dataset - if self._locations_preserved is None: + is_tiled_full = ( + hasattr(self, 'DimensionOrganizationType') and + self.DimensionOrganizationType == 'TILED_FULL' + ) + if is_tiled_full: + raise RuntimeError( + 'Indexing via source frames is not possible when a ' + 'segmentation is stored using the DimensionOrganizationType ' + '"TILED_FULL".' + ) + elif self._locations_preserved is None: if not ignore_spatial_locations: raise RuntimeError( 'Indexing via source frames is not permissible since this ' @@ -3165,7 +4250,7 @@ def get_pixels_by_source_instance( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(source_sop_instance_uids), + output_shape=len(source_sop_instance_uids), indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, @@ -3256,19 +4341,19 @@ def get_pixels_by_source_frame( the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. ignore_spatial_locations: bool, optional - Ignore whether or not spatial locations were preserved in the - derivation of the segmentation frames from the source frames. In - some segmentation images, the pixel locations in the segmentation - frames may not correspond to pixel locations in the frames of the - source image from which they were derived. The segmentation image - may or may not specify whether or not spatial locations are - preserved in this way through use of the optional (0028,135A) - SpatialLocationsPreserved attribute. If this attribute specifies - that spatial locations are not preserved, or is absent from the - segmentation image, highdicom's default behavior is to disallow - indexing by source frames. To override this behavior and retrieve - segmentation pixels regardless of the presence or value of the - spatial locations preserved attribute, set this parameter to True. + Ignore whether or not spatial locations were preserved in the + derivation of the segmentation frames from the source frames. In + some segmentation images, the pixel locations in the segmentation + frames may not correspond to pixel locations in the frames of the + source image from which they were derived. The segmentation image + may or may not specify whether or not spatial locations are + preserved in this way through use of the optional (0028,135A) + SpatialLocationsPreserved attribute. If this attribute specifies + that spatial locations are not preserved, or is absent from the + segmentation image, highdicom's default behavior is to disallow + indexing by source frames. To override this behavior and retrieve + segmentation pixels regardless of the presence or value of the + spatial locations preserved attribute, set this parameter to True. assert_missing_frames_are_empty: bool, optional Assert that requested source frame numbers that are not referenced by the segmentation image contain no segments. If a source frame @@ -3419,7 +4504,7 @@ def get_pixels_by_source_frame( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(source_frame_numbers), + output_shape=len(source_frame_numbers), indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, @@ -3668,7 +4753,224 @@ def get_pixels_by_dimension_index_values( ) as indices: return self._get_pixels_by_seg_frame( - num_output_frames=len(dimension_index_values), + output_shape=len(dimension_index_values), + indices_iterator=indices, + segment_numbers=np.array(segment_numbers), + combine_segments=combine_segments, + relabel=relabel, + rescale_fractional=rescale_fractional, + skip_overlap_checks=skip_overlap_checks, + dtype=dtype, + ) + + def get_total_pixel_matrix( + self, + row_start: int = 1, + row_end: Optional[int] = None, + column_start: int = 1, + column_end: Optional[int] = None, + segment_numbers: Optional[Sequence[int]] = None, + combine_segments: bool = False, + relabel: bool = False, + rescale_fractional: bool = True, + skip_overlap_checks: bool = False, + dtype: Union[type, str, np.dtype, None] = None, + ): + """Get the pixel array as a (region of) the total pixel matrix. + + This is intended for retrieving segmentation masks derived from + multi-frame (enhanced) source images that are tiled. The method + returns (a region of) the 2D total pixel matrix implied by the + frames within the segmentation. + + The output array will have 3 dimensions under the default behavior, and + 2 dimensions if ``combine_segments`` is set to ``True``. The first two + dimensions are the rows and columns of the total pixel matrix, + respectively. By default, the full total pixel matrix is returned, + however a smaller region may be requested using the ``row_start``, + ``row_end``, ``column_start`` and ``column_end`` parameters as 1-based + indices into the total pixel matrix. + + When ``combine_segments`` is ``False`` (the default behavior), the + segments are stacked down the final (3rd) dimension of the pixel array. + If ``segment_numbers`` was specified, then ``pixel_array[:, :, i]`` + represents the data for segment ``segment_numbers[i]``. If + ``segment_numbers`` was unspecified, then ``pixel_array[:, :, i]`` + represents the data for segment ``parser.segment_numbers[i]``. Note + that in neither case does ``pixel_array[:, :, i]`` represent + the segmentation data for the segment with segment number ``i``, since + segment numbers begin at 1 in DICOM. + + When ``combine_segments`` is ``True``, then the segmentation data from + all specified segments is combined into a multi-class array in which + pixel value is used to denote the segment to which a pixel belongs. + This is only possible if the segments do not overlap and either the + type of the segmentation is ``BINARY`` or the type of the segmentation + is ``FRACTIONAL`` but all values are exactly 0.0 or 1.0. the segments + do not overlap. If the segments do overlap, a ``RuntimeError`` will be + raised. After combining, the value of a pixel depends upon the + ``relabel`` parameter. In both cases, pixels that appear in no segments + with have a value of ``0``. If ``relabel`` is ``False``, a pixel that + appears in the segment with segment number ``i`` (according to the + original segment numbering of the segmentation object) will have a + value of ``i``. If ``relabel`` is ``True``, the value of a pixel in + segment ``i`` is related not to the original segment number, but to the + index of that segment number in the ``segment_numbers`` parameter of + this method. Specifically, pixels belonging to the segment with segment + number ``segment_numbers[i]`` is given the value ``i + 1`` in the + output pixel array (since 0 is reserved for pixels that belong to no + segments). In this case, the values in the output pixel array will + always lie in the range ``0`` to ``len(segment_numbers)`` inclusive. + + Parameters + ---------- + row_start: int, optional + 1-based row index in the total pixel matrix of the first row to + include in the output array. May be negative, in which case the + last row is considered index -1. + row_end: Union[int, None], optional + 1-based row index in the total pixel matrix of the first row beyond + the last row to include in the output array. A ``row_end`` value of + ``n`` will include rows ``n - 1`` and below, similar to standard + Python indexing. If ``None``, rows up until the final row of the + total pixel matrix are included. May be negative, in which case the + last row is considered index -1. + column_start: int, optional + 1-based column index in the total pixel matrix of the first column + to include in the output array. May be negative, in which case the + last column is considered index -1. + column_end: Union[int, None], optional + 1-based column index in the total pixel matrix of the first column + beyond the last column to include in the output array. A + ``column_end`` value of ``n`` will include columns ``n - 1`` and + below, similar to standard Python indexing. If ``None``, columns up + until the final column of the total pixel matrix are included. May + be negative, in which case the last column is considered index -1. + segment_numbers: Optional[Sequence[int]], optional + Sequence containing segment numbers to include. If unspecified, + all segments are included. + combine_segments: bool, optional + If True, combine the different segments into a single label + map in which the value of a pixel represents its segment. + If False (the default), segments are binary and stacked down the + last dimension of the output array. + relabel: bool, optional + If True and ``combine_segments`` is ``True``, the pixel values in + the output array are relabelled into the range ``0`` to + ``len(segment_numbers)`` (inclusive) according to the position of + the original segment numbers in ``segment_numbers`` parameter. If + ``combine_segments`` is ``False``, this has no effect. + rescale_fractional: bool + If this is a FRACTIONAL segmentation and ``rescale_fractional`` is + True, the raw integer-valued array stored in the segmentation image + output will be rescaled by the MaximumFractionalValue such that + each pixel lies in the range 0.0 to 1.0. If False, the raw integer + values are returned. If the segmentation has BINARY type, this + parameter has no effect. + skip_overlap_checks: bool + If True, skip checks for overlap between different segments. By + default, checks are performed to ensure that the segments do not + overlap. However, this reduces performance. If checks are skipped + and multiple segments do overlap, the segment with the highest + segment number (after relabelling, if applicable) will be placed + into the output array. + dtype: Union[type, str, numpy.dtype, None] + Data type of the returned array. If None, an appropriate type will + be chosen automatically. If the returned values are rescaled + fractional values, this will be numpy.float32. Otherwise, the + smallest unsigned integer type that accommodates all of the output + values will be chosen. + + Returns + ------- + pixel_array: np.ndarray + Pixel array representing the segmentation's total pixel matrix. + + Note + ---- + This method uses 1-based indexing of rows and columns in order to match + the conventions used in the DICOM standard. The first row of the total + pixel matrix is row 1, and the last is ``self.TotalPixelMatrixRows``. + This is is unlike standard Python and NumPy indexing which is 0-based. + For negative indices, the two are equivalent with the final row/column + having index -1. + + """ + # Check whether this segmentation is appropriate for tile-based indexing + if not is_tiled_image(self): + raise RuntimeError("Segmentation is not a tiled image.") + if not self._db_man.is_indexable_as_total_pixel_matrix(): + raise RuntimeError( + "Segmentation does not have appropriate dimension indices " + "to be indexed as a total pixel matrix." + ) + + # Checks on validity of the inputs + if segment_numbers is None: + segment_numbers = list(self.segment_numbers) + if len(segment_numbers) == 0: + raise ValueError( + 'Segment numbers may not be empty.' + ) + + if row_start is None: + row_start = 1 + if row_end is None: + row_end = self.TotalPixelMatrixRows + 1 + if column_start is None: + column_start = 1 + if column_end is None: + column_end = self.TotalPixelMatrixColumns + 1 + + if column_start == 0 or row_start == 0: + raise ValueError( + 'Arguments "row_start" and "column_start" may not be 0.' + ) + + if row_start > self.TotalPixelMatrixRows + 1: + raise ValueError( + 'Invalid value for "row_start".' + ) + elif row_start < 0: + row_start = self.TotalPixelMatrixRows + row_start + 1 + if row_end > self.TotalPixelMatrixRows + 1: + raise ValueError( + 'Invalid value for "row_end".' + ) + elif row_end < 0: + row_end = self.TotalPixelMatrixRows + row_end + 1 + + if column_start > self.TotalPixelMatrixColumns + 1: + raise ValueError( + 'Invalid value for "column_start".' + ) + elif column_start < 0: + column_start = self.TotalPixelMatrixColumns + column_start + 1 + if column_end > self.TotalPixelMatrixColumns + 1: + raise ValueError( + 'Invalid value for "column_end".' + ) + elif column_end < 0: + column_end = self.TotalPixelMatrixColumns + column_end + 1 + + output_shape = ( + row_end - row_start, + column_end - column_start, + ) + + with self._db_man.iterate_indices_for_tiled_region( + row_start=row_start, + row_end=row_end, + column_start=column_start, + column_end=column_end, + tile_shape=(self.Rows, self.Columns), + segment_numbers=segment_numbers, + combine_segments=combine_segments, + relabel=relabel, + ) as indices: + + return self._get_pixels_by_seg_frame( + output_shape=output_shape, indices_iterator=indices, segment_numbers=np.array(segment_numbers), combine_segments=combine_segments, diff --git a/src/highdicom/sr/value_types.py b/src/highdicom/sr/value_types.py index 16a1958c..5969ffe4 100644 --- a/src/highdicom/sr/value_types.py +++ b/src/highdicom/sr/value_types.py @@ -877,19 +877,16 @@ def __init__( @property def value(self) -> datetime.time: """datetime.time: time""" - allowed_formats = [ - '%H:%M:%S.%f', - '%H:%M:%S', - '%H:%M', - '%H', - ] - for fmt in allowed_formats: + if isinstance(self.Time, TM): + value = self.Time + else: try: - dt = datetime.datetime.strptime(self.Time.isoformat(), fmt) - return dt.time() - except ValueError: - continue - raise ValueError(f'Could not decode time value "{self.Time}"') + value = TM(self.Time) + except ValueError as exception: + raise ValueError( + f'Could not decode time value "{self.Time}"' + ) from exception + return value.replace() @classmethod def from_dataset( @@ -952,8 +949,16 @@ def __init__( @property def value(self) -> datetime.date: """datetime.date: date""" - fmt = '%Y-%m-%d' - return datetime.datetime.strptime(self.Date.isoformat(), fmt).date() + if isinstance(self.Date, DA): + value = self.Date + else: + try: + value = DA(self.Date) + except ValueError as exception: + raise ValueError( + f'Could not decode date value "{self.Date}"' + ) from exception + return value.replace() @classmethod def from_dataset( @@ -1016,26 +1021,16 @@ def __init__( @property def value(self) -> datetime.datetime: """datetime.datetime: datetime""" - allowed_formats = [ - '%Y-%m-%dT%H:%M:%S.%f%z', - '%Y-%m-%dT%H:%M:%S.%f', - '%Y-%m-%dT%H:%M:%S', - '%Y-%m-%dT%H:%M:%S%z', - '%Y-%m-%dT%H:%M', - '%Y-%m-%dT%H:%M%z', - '%Y-%m-%dT%H', - '%Y-%m-%dT%H%z', - '%Y-%m-%d', - '%Y-%m', - '%Y', - ] - for fmt in allowed_formats: + if isinstance(self.DateTime, DT): + value = self.DateTime + else: try: - dt = datetime.datetime.strptime(self.DateTime.isoformat(), fmt) - return dt - except ValueError: - continue - raise ValueError(f'Could not decode datetime value "{self.DateTime}"') + value = DT(self.DateTime) + except ValueError as exception: + raise ValueError( + f'Could not decode datetime value "{self.DateTime}"' + ) from exception + return value.replace() @classmethod def from_dataset( diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index 4458536e..7ef56207 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -1,5 +1,5 @@ import itertools -from typing import Iterator, List, Optional, Sequence, Tuple +from typing import Iterator, Generator, List, Optional, Sequence, Tuple import numpy as np from pydicom.dataset import Dataset @@ -20,6 +20,9 @@ def tile_pixel_matrix( ) -> Iterator[Tuple[int, int]]: """Tiles an image into smaller frames (rectangular regions). + Follows the convention used in image with Dimension Organization Type + "TILED_FULL" images. + Parameters ---------- total_pixel_matrix_rows: int @@ -41,7 +44,85 @@ def tile_pixel_matrix( tiles_per_row = int(np.ceil(total_pixel_matrix_columns / columns)) tile_row_indices = iter(range(1, tiles_per_col + 1)) tile_col_indices = iter(range(1, tiles_per_row + 1)) - return itertools.product(tile_col_indices, tile_row_indices) + return ( + (c, r) for (r, c) in itertools.product( + tile_row_indices, + tile_col_indices + ) + ) + + +def get_tile_array( + pixel_array: np.ndarray, + row_offset: int, + column_offset: int, + tile_rows: int, + tile_columns: int, + pad: bool = True, +) -> np.ndarray: + """Extract a tile from a total pixel matrix array. + + Parameters + ---------- + pixel_array: np.ndarray + Array representing a total pixel matrix. The first two dimensions + are treated as the rows and columns, respectively, of the total pixel + matrix. Any subsequent dimensions are not used but are retained in the + output array. + row_offset: int + Offset of the first row of the requested tile from the top of the total + pixel matrix (1-based index). + column_offset: int + Offset of the first column of the requested tile from the left of the + total pixel matrix (1-based index). + tile_rows: int + Number of rows per tile. + tile_columns: + Number of columns per tile. + pad: bool + Whether to pad the returned array with zeros at the right and/or bottom + to ensure that it matches the correct tile size. Otherwise, the returned + array is not padded and may be smaller than the full tile size. + + Returns + ------- + np.ndarray: + Returned pixel array for the requested tile. + + """ + if row_offset < 1 or row_offset > pixel_array.shape[0]: + raise ValueError( + "Row offset must be between 1 and the size of dimension 0 of the " + "pixel array." + ) + if column_offset < 1 or column_offset > pixel_array.shape[1]: + raise ValueError( + "Column offset must be between 1 and the size of dimension 1 of " + "the pixel array." + ) + # Move to pythonic 1-based indexing + row_offset -= 1 + column_offset -= 1 + row_end = row_offset + tile_rows + if row_end > pixel_array.shape[0]: + pad_rows = row_end - pixel_array.shape[0] + row_end = pixel_array.shape[0] + else: + pad_rows = 0 + column_end = column_offset + tile_columns + if column_end > pixel_array.shape[1]: + pad_columns = column_end - pixel_array.shape[1] + column_end = pixel_array.shape[1] + else: + pad_columns = 0 + # Account for 1-based to 0-based index conversion + tile_array = pixel_array[row_offset:row_end, column_offset:column_end] + if pad_rows > 0 or pad_columns > 0: + extra_dims = pixel_array.ndim - 2 + padding = [(0, pad_rows), (0, pad_columns)] + [(0, 0)] * extra_dims + tile_array = np.pad(tile_array, padding) + + return tile_array def compute_plane_position_tiled_full( @@ -114,6 +195,8 @@ def compute_plane_position_tiled_full( When only one of `slice_index` and `spacing_between_slices` is provided """ + if row_index < 1 or column_index < 1: + raise ValueError("Row and column indices must be positive integers.") row_offset_frame = ((row_index - 1) * rows) column_offset_frame = ((column_index - 1) * columns) @@ -149,30 +232,64 @@ def compute_plane_position_tiled_full( ) -def compute_plane_position_slide_per_frame( - dataset: Dataset -) -> List[PlanePositionSequence]: - """Computes the plane position for each frame in given dataset with - respect to the slide coordinate system. +def iter_tiled_full_frame_data( + dataset: Dataset, +) -> Generator[Tuple[int, int, int, int, float, float, float], None, None]: + """Get data on the position of each tile in a TILED_FULL image. + + This works only with images with Dimension Organization Type of + "TILED_FULL". + + Unlike :func:`highdicom.utils.compute_plane_position_slide_per_frame`, + this functions returns the data in their basic Python types rather than + wrapping as :class:`highdicom.PlanePositionSequence` Parameters ---------- dataset: pydicom.dataset.Dataset - VL Whole Slide Microscopy Image + VL Whole Slide Microscopy Image or Segmentation Image using the + "TILED_FULL" DimensionOrganizationType. Returns ------- - List[highdicom.PlanePositionSequence] - Plane Position Sequence per frame - - Raises - ------ - ValueError - When `dataset` does not represent a VL Whole Slide Microscopy Image + channel: int + 1-based integer index of the "channel". The meaning of "channel" + depends on the image type. For segmentation images, the channel is the + segment number. For other images, it is the optical path number. + focal_plane_index: int + 1-based integer index of the focal plane. + column_position: int + 1-based column position of the tile (measured left from the left side + of the total pixel matrix). + row_position: int + 1-based row position of the tile (measured down from the top of the + total pixel matrix). + x: float + X coordinate in the frame-of-reference coordinate system in millimeter + units. + y: float + Y coordinate in the frame-of-reference coordinate system in millimeter + units. + z: float + Z coordinate in the frame-of-reference coordinate system in millimeter + units. """ - if not dataset.SOPClassUID == '1.2.840.10008.5.1.4.1.1.77.1.6': - raise ValueError('Expected a VL Whole Slide Microscopy Image') + allowed_sop_class_uids = { + '1.2.840.10008.5.1.4.1.1.77.1.6', # VL Whole Slide Microscopy Image + '1.2.840.10008.5.1.4.1.1.66.4', # Segmentation Image + } + if dataset.SOPClassUID not in allowed_sop_class_uids: + raise ValueError( + 'Expected a VL Whole Slide Microscopy Image or Segmentation Image.' + ) + if ( + not hasattr(dataset, "DimensionOrganizationType") or + dataset.DimensionOrganizationType != "TILED_FULL" + ): + raise ValueError( + 'Expected an image with "TILED_FULL" dimension organization type.' + ) image_origin = dataset.TotalPixelMatrixOriginSequence[0] image_orientation = ( @@ -194,11 +311,19 @@ def compute_plane_position_slide_per_frame( 'TotalPixelMatrixFocalPlanes', 1 ) - num_optical_paths = getattr( - dataset, - 'NumberOfOpticalPaths', - len(dataset.OpticalPathSequence) - ) + + is_segmentation = dataset.SOPClassUID == '1.2.840.10008.5.1.4.1.1.66.4' + + # The "channels" output is either segment for segmentations, or optical + # path for other images + if is_segmentation: + num_channels = len(dataset.SegmentSequence) + else: + num_channels = getattr( + dataset, + 'NumberOfOpticalPaths', + len(dataset.OpticalPathSequence) + ) shared_fg = dataset.SharedFunctionalGroupsSequence[0] pixel_measures = shared_fg.PixelMeasuresSequence[0] @@ -216,91 +341,82 @@ def compute_plane_position_slide_per_frame( x_offset = image_origin.XOffsetInSlideCoordinateSystem y_offset = image_origin.YOffsetInSlideCoordinateSystem - transformer_lut = {} - for slice_index in range(1, num_focal_planes + 1): - # These checks are needed for mypy to determine the correct type - z_offset = float(slice_index - 1) * spacing_between_slices - transformer_lut[slice_index] = PixelToReferenceTransformer( - image_position=(x_offset, y_offset, z_offset), - image_orientation=image_orientation, - pixel_spacing=pixel_spacing - ) + # Array of tile indices (col_index, row_index) + tile_indices = np.array( + [ + (c, r) for (r, c) in + itertools.product( + range(1, tiles_per_column + 1), + range(1, tiles_per_row + 1) + ) + ] + ) - def _compute_plane_position_tiled_full_efficiently( - row_index: int, - column_index: int, - rows: int, - columns: int, - transformer: PixelToReferenceTransformer - ) -> PlanePositionSequence: - """More efficient implementation of `compute_plane_position_tiled_full`. - - Function re-uses an existing `transformer` instance instead of creating - one for every function call. This can hurt performance if the number - of frames in an image is large. - - Parameters - ---------- - row_index: int - One-based Row index value for a given frame (tile) along the column - direction of the tiled Total Pixel Matrix, which is defined by - the second triplet in `image_orientation` (values should be in the - range [1, *n*], where *n* is the number of tiles per column) - column_index: int - One-based Column index value for a given frame (tile) along the row - direction of the tiled Total Pixel Matrix, which is defined by - the first triplet in `image_orientation` (values should be in the - range [1, *n*], where *n* is the number of tiles per row) - rows: int - Number of rows per Frame (tile) - columns: int - Number of columns per Frame (tile) - transformer: highdicom.spatial.PixelToReferenceTransformer - Callable transformer instance to map pixel indices into reference - slide coordinates - - Returns - ------- - highdicom.PlanePositionSequence - Position, of the plane in the slide coordinate system - - """ - row_offset_frame = ((row_index - 1) * rows) - column_offset_frame = ((column_index - 1) * columns) - - # We should only be dealing with planar rotations. - transformed_coordinates = transformer( - np.array([(column_offset_frame, row_offset_frame)], dtype=int) - ) - x = transformed_coordinates[0, 0] - y = transformed_coordinates[0, 1] - z = transformed_coordinates[0, 2] + # Pixel offsets of each in the total pixel matrix + frame_pixel_offsets = ( + (tile_indices - 1) * np.array([dataset.Columns, dataset.Rows]) + ) - return PlanePositionSequence( - coordinate_system=CoordinateSystemNames.SLIDE, - image_position=(x, y, z), - # Position of plane (tile) in Total Pixel Matrix: - # First tile has position (1, 1) - pixel_matrix_position=( - column_offset_frame + 1, - row_offset_frame + 1, + for channel in range(1, num_channels + 1): + for slice_index in range(1, num_focal_planes + 1): + # These checks are needed for mypy to determine the correct type + z_offset = float(slice_index - 1) * spacing_between_slices + transformer = PixelToReferenceTransformer( + image_position=(x_offset, y_offset, z_offset), + image_orientation=image_orientation, + pixel_spacing=pixel_spacing ) - ) + reference_coordinates = transformer(frame_pixel_offsets) + + for offsets, coords in zip( + frame_pixel_offsets, + reference_coordinates + ): + yield ( + channel, + slice_index, + int(offsets[0] + 1), + int(offsets[1] + 1), + float(coords[0]), + float(coords[1]), + float(coords[2]), + ) + + +def compute_plane_position_slide_per_frame( + dataset: Dataset +) -> List[PlanePositionSequence]: + """Computes the plane position for each frame in given dataset with + respect to the slide coordinate system for an image using the TILED_FULL + DimensionOrganizationType. + + Parameters + ---------- + dataset: pydicom.dataset.Dataset + VL Whole Slide Microscopy Image or Segmentation Image using the + "TILED_FULL" DimensionOrganizationType. + + Returns + ------- + List[highdicom.PlanePositionSequence] + Plane Position Sequence per frame + + Raises + ------ + ValueError + When `dataset` does not represent a VL Whole Slide Microscopy Image or + Segmentation Image or the image does not use the "TILED_FULL" dimension + organization type. + + """ return [ - _compute_plane_position_tiled_full_efficiently( - row_index=r, - column_index=c, - rows=dataset.Rows, - columns=dataset.Columns, - transformer=transformer_lut[s], - ) - for _, s, r, c in itertools.product( - range(num_optical_paths), - range(1, num_focal_planes + 1), - range(1, tiles_per_column + 1), # column direction, top to bottom - range(1, tiles_per_row + 1), # row direction, left to right + PlanePositionSequence( + coordinate_system=CoordinateSystemNames.SLIDE, + image_position=(x, y, z), + pixel_matrix_position=(c, r), ) + for _, _, c, r, x, y, z in iter_tiled_full_frame_data(dataset) ] @@ -320,3 +436,67 @@ def is_tiled_image(dataset: Dataset) -> bool: ): return True return False + + +def are_plane_positions_tiled_full( + plane_positions: Sequence[PlanePositionSequence], + rows: int, + columns: int, +) -> bool: + """Determine whether a list of plane positions matches "TILED_FULL". + + This takes a list of plane positions for each frame and determines whether + the plane positions satisfy the requirements of "TILED_FULL". Plane + positions match the TILED_FULL dimension organization type if they are + non-overlapping, and cover the entire image plane in the order specified in + the standard. + + The test implemented in this function is necessary and sufficient for the + use of TILED_FULL in a newly created tiled image (thus allowing the plane + positions to be omitted from the image and defined implicitly). + + Parameters + ---------- + plane_positions: Sequence[PlanePositionSequence] + Plane positions of each frame. + rows: int + Number of rows in each frame. + columns: int + Number of columns in each frame. + + Returns + ------- + bool: + True if the supplied plane positions satisfy the requirements for + TILED_FULL. False otherwise. + + """ + max_r = -1 + max_c = -1 + for plane_position in plane_positions: + r = plane_position[0].RowPositionInTotalImagePixelMatrix + c = plane_position[0].ColumnPositionInTotalImagePixelMatrix + if r > max_r: + max_r = r + if c > max_c: + max_c = c + + expected_positions = [ + (r, c) for (r, c) in itertools.product( + range(1, max_r + 1, rows), + range(1, max_c + 1, columns), + ) + ] + if len(expected_positions) != len(plane_positions): + return False + + for (r_exp, c_exp), plane_position in zip( + expected_positions, + plane_positions + ): + r = plane_position[0].RowPositionInTotalImagePixelMatrix + c = plane_position[0].ColumnPositionInTotalImagePixelMatrix + if r != r_exp or c != c_exp: + return False + + return True diff --git a/src/highdicom/version.py b/src/highdicom/version.py index 8c306aa6..81edede8 100644 --- a/src/highdicom/version.py +++ b/src/highdicom/version.py @@ -1 +1 @@ -__version__ = '0.21.1' +__version__ = '0.22.0' diff --git a/tests/test_ann.py b/tests/test_ann.py index 72cdfc91..71475a82 100644 --- a/tests/test_ann.py +++ b/tests/test_ann.py @@ -15,7 +15,7 @@ AnnotationGroupGenerationTypeValues, GraphicTypeValues, ) -from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations +from highdicom.ann.sop import MicroscopyBulkSimpleAnnotations, annread from highdicom.content import AlgorithmIdentificationSequence from highdicom.sr.coding import CodedConcept from highdicom.uid import UID @@ -498,7 +498,7 @@ def test_construction(self): version='1.0' ) - annotation_coordinate_type = '3D' + annotation_coordinate_type = AnnotationCoordinateTypeValues.SCOORD3D first_property_type = Code('4421005', 'SCT', 'Cell') first_label = 'cells' first_uid = UID() @@ -573,6 +573,20 @@ def test_construction(self): annotations = MicroscopyBulkSimpleAnnotations.from_dataset(dataset) + with BytesIO() as fp: + annotations.save_as(fp) + fp.seek(0) + annotations = annread(fp) + + assert isinstance( + annotations.annotation_coordinate_type, + AnnotationCoordinateTypeValues + ) + assert ( + annotations.annotation_coordinate_type == + annotation_coordinate_type + ) + retrieved_groups = annotations.get_annotation_groups() assert len(retrieved_groups) == 2 diff --git a/tests/test_content.py b/tests/test_content.py index f28568f9..5511ac42 100644 --- a/tests/test_content.py +++ b/tests/test_content.py @@ -1,15 +1,19 @@ +import datetime from unittest import TestCase import pytest from pydicom import dcmread from pydicom.dataset import Dataset from pydicom.sr.codedict import codes +from pydicom.sr.coding import Code from pydicom.data import get_testdata_file, get_testdata_files import numpy as np +from highdicom.enum import UniversalEntityIDTypeValues from highdicom.sr import CodedConcept from highdicom import ( + IssuerOfIdentifier, PaletteColorLUT, ContentCreatorIdentificationCodeSequence, ModalityLUT, @@ -31,7 +35,11 @@ VOILUTTransformation, VOILUTFunctionValues, ) -from highdicom.sr.value_types import CodeContentItem, TextContentItem +from highdicom.sr.value_types import ( + CodeContentItem, + TextContentItem, + DateTimeContentItem +) from .utils import write_and_read_dataset @@ -413,7 +421,6 @@ def test_construction_missing_substances(self): class TestSpecimenPreparationStep(TestCase): - def test_construction_collection(self): specimen_id = 'specimen id' processing_type = codes.SCT.SpecimenCollection @@ -625,6 +632,288 @@ def test_construction_staining(self): assert staining_item.value == substance assert staining_item.relationship_type is None + def test_construction_staining_from_dataset(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.Staining + substance = codes.SCT.HematoxylinStain + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + CodeContentItem( + name=codes.SCT.UsingSubstance, + value=substance + ), + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative is None + assert instance.embedding_medium is None + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenStaining) + assert processing_procedure.substances == [substance] + + def test_construction_processing(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenProcessing + description = codes.SCT.SpecimenFreezing + instance = SpecimenPreparationStep( + specimen_id=specimen_id, + processing_procedure=SpecimenProcessing(description=description), + ) + seq = instance.SpecimenPreparationStepContentItemSequence + assert len(seq) == 3 + assert not seq.is_root + assert not seq.is_sr + + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.processing_datetime is None + assert instance.issuer_of_specimen_id is None + assert instance.fixative is None + assert instance.embedding_medium is None + + specimen_id_item = seq[0] + assert specimen_id_item.name == codes.DCM.SpecimenIdentifier + assert specimen_id_item.value == specimen_id + assert specimen_id_item.relationship_type is None + + processing_type_item = seq[1] + assert processing_type_item.name == codes.DCM.ProcessingType + assert processing_type_item.value == processing_type + assert processing_type_item.relationship_type is None + + processing_step_description_item = seq[2] + assert ( + processing_step_description_item.name == + codes.DCM.ProcessingStepDescription + ) + assert processing_step_description_item.value == description + assert processing_step_description_item.relationship_type is None + + def test_construction_processing_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + processing_description = 'processing description' + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept( + "specimen container", + "test", + "test specimen container" + ) + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) + + instance = SpecimenPreparationStep( + specimen_id=specimen_id, + processing_procedure=processing_procedure, + processing_description=processing_description, + processing_datetime=processing_datetime, + issuer_of_specimen_id=issuer_of_specimen_id, + fixative=fixative, + embedding_medium=embedding_medium, + specimen_container=specimen_container, + specimen_type=specimen_type + ) + + seq = instance.SpecimenPreparationStepContentItemSequence + assert len(seq) == 10 + + specimen_id_item = seq[0] + assert specimen_id_item.name == codes.DCM.SpecimenIdentifier + assert specimen_id_item.value == specimen_id + assert specimen_id_item.relationship_type is None + + issuer_of_specimen_id_item = seq[1] + assert ( + issuer_of_specimen_id_item.name == + codes.DCM.IssuerOfSpecimenIdentifier + ) + assert ( + issuer_of_specimen_id_item.value == + issuer_of_specimen_id.LocalNamespaceEntityID + ) + assert issuer_of_specimen_id_item.relationship_type is None + + processing_type_item = seq[2] + assert ( + processing_type_item.name == codes.DCM.ProcessingType + ) + assert processing_type_item.value == processing_type + assert processing_type_item.relationship_type is None + + processing_datetime_item = seq[3] + assert ( + processing_datetime_item.name == codes.DCM.DatetimeOfProcessing + ) + assert processing_datetime_item.value == processing_datetime + assert processing_datetime_item.relationship_type is None + + processing_description_item = seq[4] + assert ( + processing_description_item.name == + codes.DCM.ProcessingStepDescription + ) + assert processing_description_item.value == processing_description + assert processing_description_item.relationship_type is None + + collection_step_item = seq[5] + assert ( + collection_step_item.name == codes.SCT.SpecimenCollection + ) + assert collection_step_item.value == procedure + assert collection_step_item.relationship_type is None + + fixative_item = seq[6] + assert fixative_item.name == codes.SCT.TissueFixative + assert fixative_item.value == fixative + assert fixative_item.relationship_type is None + + embedding_item = seq[7] + assert embedding_item.name == codes.SCT.TissueEmbeddingMedium + assert embedding_item.value == embedding_medium + assert embedding_item.relationship_type is None + + specimen_container_item = seq[8] + assert specimen_container_item.name == codes.SCT.SpecimenContainer + assert specimen_container_item.value == specimen_container + assert specimen_container_item.relationship_type is None + + specimen_type_item = seq[9] + assert specimen_type_item.name == codes.SCT.SpecimenType + assert specimen_type_item.value == specimen_type + assert specimen_type_item.relationship_type is None + + def test_construction_processing_from_dataset(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenProcessing + description = codes.SCT.SpecimenFreezing + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + CodeContentItem( + name=codes.DCM.ProcessingStepDescription, + value=description + ), + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative is None + assert instance.embedding_medium is None + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenProcessing) + assert processing_procedure.description == description + + def test_construction_processing_from_dataset_optionals(self): + specimen_id = 'specimen id' + processing_type = codes.SCT.SpecimenCollection + procedure = codes.SCT.Excision + processing_procedure = SpecimenCollection(procedure=procedure) + processing_description = "processing description" + processing_datetime = datetime.datetime(2023, 6, 17, 21, 38, 14) + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + fixative = CodedConcept("fixative", "test", "test fixative") + embedding_medium = CodedConcept("embedding", "test", "test embedding") + specimen_container = CodedConcept( + "specimen container", + "test", + "test specimen container" + ) + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) + dataset = Dataset() + dataset.SpecimenPreparationStepContentItemSequence = [ + TextContentItem( + name=codes.DCM.SpecimenIdentifier, + value=specimen_id + ), + TextContentItem( + name=codes.DCM.IssuerOfSpecimenIdentifier, + value=issuer_of_specimen_id.LocalNamespaceEntityID + ), + DateTimeContentItem( + name=codes.DCM.DatetimeOfProcessing, + value=processing_datetime + ), + CodeContentItem( + name=codes.DCM.ProcessingType, + value=processing_type + ), + TextContentItem( + name=codes.DCM.ProcessingStepDescription, + value=processing_description + ), + CodeContentItem( + name=codes.SCT.SpecimenCollection, + value=procedure + ), + CodeContentItem( + name=codes.SCT.TissueFixative, + value=fixative + ), + CodeContentItem( + name=codes.SCT.TissueEmbeddingMedium, + value=embedding_medium + ), + CodeContentItem( + name=codes.SCT.SpecimenContainer, + value=specimen_container + ), + CodeContentItem( + name=codes.SCT.SpecimenType, + value=specimen_type + ) + ] + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenPreparationStep.from_dataset(dataset_reread) + assert isinstance(instance, SpecimenPreparationStep) + assert instance.specimen_id == specimen_id + assert instance.processing_type == processing_type + assert instance.fixative == fixative + assert instance.embedding_medium == embedding_medium + assert instance.processing_description == processing_description + processing_procedure = instance.processing_procedure + assert isinstance(processing_procedure, SpecimenCollection) + assert processing_procedure.procedure == procedure + assert instance.processing_datetime == processing_datetime + assert isinstance(instance.issuer_of_specimen_id, str) + assert ( + instance.issuer_of_specimen_id == + issuer_of_specimen_id.LocalNamespaceEntityID + ) + assert instance.specimen_container == specimen_container + assert instance.specimen_type == specimen_type + class TestVOILUTTransformation(TestCase): @@ -1032,112 +1321,105 @@ def test_construction_different_first_values(self): blue_lut=b_lut, ) - def test_construction_staining_from_dataset(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.Staining - substance = codes.SCT.HematoxylinStain - dataset = Dataset() - dataset.SpecimenPreparationStepContentItemSequence = [ - TextContentItem( - name=codes.DCM.SpecimenIdentifier, - value=specimen_id - ), - CodeContentItem( - name=codes.DCM.ProcessingType, - value=processing_type - ), - CodeContentItem( - name=codes.SCT.UsingSubstance, - value=substance - ), - ] - dataset_reread = write_and_read_dataset(dataset) - instance = SpecimenPreparationStep.from_dataset(dataset_reread) - assert isinstance(instance, SpecimenPreparationStep) - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative is None - assert instance.embedding_medium is None - processing_procedure = instance.processing_procedure - assert isinstance(processing_procedure, SpecimenStaining) - assert processing_procedure.substances == [substance] - def test_construction_processing(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenProcessing - description = codes.SCT.SpecimenFreezing - instance = SpecimenPreparationStep( +class TestSpecimenDescription(TestCase): + def test_construction(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + instance = SpecimenDescription( specimen_id=specimen_id, - processing_procedure=SpecimenProcessing(description=description) + specimen_uid=specimen_uid ) - seq = instance.SpecimenPreparationStepContentItemSequence - assert len(seq) == 3 - assert not seq.is_root - assert not seq.is_sr - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative is None - assert instance.embedding_medium is None - - specimen_id_item = seq[0] - assert specimen_id_item.name == codes.DCM.SpecimenIdentifier - assert specimen_id_item.value == specimen_id - assert specimen_id_item.relationship_type is None - - processing_type_item = seq[1] - assert processing_type_item.name == codes.DCM.ProcessingType - assert processing_type_item.value == processing_type - assert processing_type_item.relationship_type is None - - staining_item = seq[2] - assert staining_item.name == codes.DCM.ProcessingStepDescription - assert staining_item.value == description - assert staining_item.relationship_type is None + assert instance.specimen_uid == specimen_uid + assert instance.specimen_location is None + assert len(instance.specimen_preparation_steps) == 0 + assert instance.specimen_type is None + assert instance.specimen_short_description is None + assert instance.specimen_detailed_description is None + assert instance.issuer_of_specimen_id is None + assert instance.primary_anatomic_structures is None - def test_construction_processing_from_dataset(self): - specimen_id = 'specimen id' - processing_type = codes.SCT.SpecimenProcessing - description = codes.SCT.SpecimenFreezing - dataset = Dataset() - dataset.SpecimenPreparationStepContentItemSequence = [ - TextContentItem( - name=codes.DCM.SpecimenIdentifier, - value=specimen_id - ), - CodeContentItem( - name=codes.DCM.ProcessingType, - value=processing_type - ), - CodeContentItem( - name=codes.DCM.ProcessingStepDescription, - value=description - ), + def test_construction_optionals(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_location = "specimen location" + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) + specimen_short_description = "specimen short description" + specimen_detailed_description = "specimen detailed description" + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + primary_anatomic_structures = [ + CodedConcept( + "anatomic structure", + "test", + "test anatomic structure" + ) ] - dataset_reread = write_and_read_dataset(dataset) - instance = SpecimenPreparationStep.from_dataset(dataset_reread) - assert isinstance(instance, SpecimenPreparationStep) - assert instance.specimen_id == specimen_id - assert instance.processing_type == processing_type - assert instance.fixative is None - assert instance.embedding_medium is None - processing_procedure = instance.processing_procedure - assert isinstance(processing_procedure, SpecimenProcessing) - assert processing_procedure.description == description + instance = SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_location=specimen_location, + issuer_of_specimen_id=issuer_of_specimen_id, + primary_anatomic_structures=primary_anatomic_structures, + specimen_type=specimen_type, + specimen_short_description=specimen_short_description, + specimen_detailed_description=specimen_detailed_description, + ) + assert instance.specimen_location == specimen_location + assert instance.specimen_type == specimen_type + assert instance.specimen_short_description == specimen_short_description + assert ( + instance.specimen_detailed_description == + specimen_detailed_description + ) + assert ( + instance.issuer_of_specimen_id == issuer_of_specimen_id + ) + assert ( + instance.primary_anatomic_structures == primary_anatomic_structures + ) + def test_construction_with_to_long_short_description(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_short_description = "x" * 65 + with pytest.raises(ValueError): + SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_short_description=specimen_short_description + ) -class TestSpecimenDescription(TestCase): + def test_construction_with_backslash_in_short_description(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_short_description = 'short_description_with_backslash\\' + with pytest.raises(ValueError): + SpecimenDescription( + specimen_id=specimen_id, + specimen_uid=specimen_uid, + specimen_short_description=specimen_short_description + ) - def test_construction(self): + def test_construction_with_code_specimen_type(self): specimen_id = 'specimen 1' specimen_uid = UID() + specimen_type = Code( + "specimen type", + "test", + "test specimen type" + ) instance = SpecimenDescription( specimen_id=specimen_id, - specimen_uid=specimen_uid + specimen_uid=specimen_uid, + specimen_type=specimen_type, ) - assert instance.specimen_id == specimen_id - assert instance.specimen_uid == specimen_uid - assert len(instance.specimen_preparation_steps) == 0 + assert isinstance(instance.specimen_type, CodedConcept) + assert instance.specimen_type == specimen_type def test_construction_with_preparation_steps(self): parent_specimen_id = 'surgical specimen' @@ -1190,3 +1472,99 @@ def test_construction_from_dataset(self): assert instance.specimen_id == specimen_id assert instance.specimen_uid == specimen_uid assert len(instance.specimen_preparation_steps) == 0 + + def test_construction_from_dataset_with_optionals(self): + specimen_id = 'specimen 1' + specimen_uid = UID() + specimen_location = "specimen location" + specimen_preparation_steps = [ + SpecimenPreparationStep( + specimen_id, + SpecimenCollection(procedure=codes.SCT.Biopsy) + ) + ] + specimen_type = CodedConcept( + "specimen type", + "test", + "test specimen type" + ) + specimen_short_description = "specimen short description" + specimen_detailed_description = "specimen detailed description" + issuer_of_specimen_id = IssuerOfIdentifier("issuer id") + primary_anatomic_structures = [ + CodedConcept( + "anatomic structure", + "test", + "test anatomic structure" + ) + ] + dataset = Dataset() + dataset.SpecimenIdentifier = specimen_id + dataset.SpecimenUID = str(specimen_uid) + dataset.SpecimenLocalizationContentItemSequence = [ + TextContentItem( + name=codes.DCM.LocationOfSpecimen, + value=specimen_location + ) + ] + dataset.SpecimenTypeCodeSequence = [specimen_type] + dataset.SpecimenPreparationSequence = specimen_preparation_steps + dataset.SpecimenShortDescription = specimen_short_description + dataset.SpecimenDetailedDescription = specimen_detailed_description + dataset.IssuerOfTheSpecimenIdentifierSequence = [issuer_of_specimen_id] + dataset.PrimaryAnatomicStructureSequence = primary_anatomic_structures + dataset_reread = write_and_read_dataset(dataset) + instance = SpecimenDescription.from_dataset(dataset_reread) + assert instance.specimen_location == specimen_location + assert instance.specimen_preparation_steps == specimen_preparation_steps + assert instance.specimen_type == specimen_type + assert instance.specimen_short_description == specimen_short_description + assert ( + instance.specimen_detailed_description == + specimen_detailed_description + ) + assert isinstance(instance.issuer_of_specimen_id, IssuerOfIdentifier) + assert instance.issuer_of_specimen_id == issuer_of_specimen_id + assert ( + instance.primary_anatomic_structures == primary_anatomic_structures + ) + + +class TestIssuerOfIdentifier(TestCase): + def test_construction(self): + issuer_of_identifier = "issuer of identifier" + instance = IssuerOfIdentifier(issuer_of_identifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type is None + + def test_construction_with_optionals(self): + issuer_of_identifier = "issuer of identifier id" + issuer_of_identifier_type = UniversalEntityIDTypeValues.DNS + instance = IssuerOfIdentifier( + issuer_of_identifier, + issuer_of_identifier_type + ) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type == issuer_of_identifier_type + + def test_construction_from_dataset(self): + issuer_of_identifier = "issuer of identifier" + dataset = Dataset() + dataset.LocalNamespaceEntityID = issuer_of_identifier + dataset_reread = write_and_read_dataset(dataset) + instance = IssuerOfIdentifier.from_dataset(dataset_reread) + assert isinstance(instance, IssuerOfIdentifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type is None + + def test_construction_from_dataset_with_optionals(self): + issuer_of_identifier = "issuer of identifier" + issuer_of_identifier_type = UniversalEntityIDTypeValues.DNS + dataset = Dataset() + dataset.UniversalEntityID = issuer_of_identifier + dataset.UniversalEntityIDType = issuer_of_identifier_type.value + dataset_reread = write_and_read_dataset(dataset) + instance = IssuerOfIdentifier.from_dataset(dataset_reread) + assert isinstance(instance, IssuerOfIdentifier) + assert instance.issuer_of_identifier == issuer_of_identifier + assert instance.issuer_of_identifier_type == issuer_of_identifier_type diff --git a/tests/test_frame.py b/tests/test_frame.py index 2e4d424d..164aea86 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -260,7 +260,7 @@ def test_jpegls_monochrome(self): assert compressed_frame.endswith(b'\xFF\xD9') decoded_frame = decode_frame( value=compressed_frame, - transfer_syntax_uid=JPEG2000Lossless, + transfer_syntax_uid=JPEGLSLossless, rows=frame.shape[0], columns=frame.shape[1], samples_per_pixel=1, diff --git a/tests/test_seg.py b/tests/test_seg.py index 5da1cf8d..4a572eaf 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -1,9 +1,14 @@ from collections import defaultdict +from concurrent.futures import ProcessPoolExecutor +from copy import deepcopy +import itertools import unittest from pathlib import Path +import warnings import numpy as np import pytest +from PIL import Image from pydicom.data import get_testdata_file, get_testdata_files from pydicom.datadict import tag_for_keyword @@ -23,8 +28,12 @@ PixelMeasuresSequence, PlaneOrientationSequence, ) -from highdicom.enum import CoordinateSystemNames +from highdicom.enum import ( + CoordinateSystemNames, + DimensionOrganizationTypeValues, +) from highdicom.seg import ( + create_segmentation_pyramid, segread, DimensionIndexSequence, SegmentationTypeValues, @@ -542,9 +551,9 @@ def test_construction_2(self): assert len(seq) == 6 assert seq[0].DimensionIndexPointer == 0x0062000B assert seq[0].FunctionalGroupPointer == 0x0062000A - assert seq[1].DimensionIndexPointer == 0x0048021E + assert seq[1].DimensionIndexPointer == 0x0048021F assert seq[1].FunctionalGroupPointer == 0x0048021A - assert seq[2].DimensionIndexPointer == 0x0048021F + assert seq[2].DimensionIndexPointer == 0x0048021E assert seq[2].FunctionalGroupPointer == 0x0048021A assert seq[3].DimensionIndexPointer == 0x0040072A assert seq[3].FunctionalGroupPointer == 0x0048021A @@ -656,6 +665,27 @@ def setUp(self): ) self._sm_pixel_array[2:3, 1:5, 7:9] = True + # Total pixel matrix segmentation array for tests + self._sm_total_pixel_array = np.zeros( + ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ), + dtype=bool + ) + self._sm_total_pixel_array[38:43, 5:41] = True + self._sm_total_pixel_array[4:24, 25:29] = True + + self._sm_total_pixel_array_multiclass = np.zeros( + ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ), + dtype=np.uint8, + ) + self._sm_total_pixel_array_multiclass[38:43, 5:41] = 1 + self._sm_total_pixel_array_multiclass[4:24, 25:29] = 2 + # A series of single frame CT images ct_series = [ dcmread(f) @@ -786,8 +816,8 @@ def check_dimension_index_vals(seg): else: # Build up the mapping from index to value for dim_kw, dim_ind in zip([ + 'RowPositionInTotalImagePixelMatrix', 'ColumnPositionInTotalImagePixelMatrix', - 'RowPositionInTotalImagePixelMatrix' ], [1, 2]): index_mapping = defaultdict(list) for f in seg.PerFrameFunctionalGroupsSequence: @@ -1406,6 +1436,251 @@ def test_construction_7(self): assert SegmentsOverlapValues[instance.SegmentsOverlap] == \ SegmentsOverlapValues.NO + def test_construction_workers(self): + # Create a segmentation with multiple workers + Segmentation( + self._ct_series, + self._ct_series_mask_array, + SegmentationTypeValues.FRACTIONAL.value, + self._segment_descriptions, + self._series_instance_uid, + self._series_number, + self._sop_instance_uid, + self._instance_number, + self._manufacturer, + self._manufacturer_model_name, + self._software_versions, + self._device_serial_number, + content_label=self._content_label, + transfer_syntax_uid=RLELossless, + workers=2, + ) + + def test_construction_workers_manual(self): + # Create a segmentation with multiple workers created manually + with ProcessPoolExecutor(2) as pool: + Segmentation( + self._ct_series, + self._ct_series_mask_array, + SegmentationTypeValues.FRACTIONAL.value, + self._segment_descriptions, + self._series_instance_uid, + self._series_number, + self._sop_instance_uid, + self._instance_number, + self._manufacturer, + self._manufacturer_model_name, + self._software_versions, + self._device_serial_number, + content_label=self._content_label, + transfer_syntax_uid=RLELossless, + workers=pool, + ) + + def test_construction_tiled_full(self): + instance = Segmentation( + [self._sm_image], + pixel_array=self._sm_pixel_array, + segmentation_type=SegmentationTypeValues.FRACTIONAL.value, + segment_descriptions=self._segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number, + dimension_organization_type="TILED_FULL", + omit_empty_frames=False, + ) + assert instance.DimensionOrganizationType == "TILED_FULL" + assert not hasattr(instance, "PerFrameFunctionalGroupsSequence") + + @staticmethod + @pytest.fixture( + params=[ + DimensionOrganizationTypeValues.TILED_FULL, + DimensionOrganizationTypeValues.TILED_SPARSE, + ]) + def dimension_organization_type(request): + return request.param + + @staticmethod + @pytest.fixture( + params=[ + SegmentationTypeValues.FRACTIONAL, + SegmentationTypeValues.BINARY, + ]) + def segmentation_type(request): + return request.param + + @staticmethod + @pytest.fixture( + params=[ + None, + (10, 10), + (10, 25), + (25, 25), + (30, 30), + ]) + def tile_size(request): + return request.param + + @staticmethod + @pytest.fixture(params=[False, True]) + def locations_preserved(request): + return request.param + + @staticmethod + @pytest.fixture(params=[1, 2]) + def num_segments(request): + return request.param + + def test_construction_autotile( + self, + tile_size, + dimension_organization_type, + segmentation_type, + locations_preserved, + num_segments, + ): + if num_segments == 1: + pixel_array = self._sm_total_pixel_array + segment_descriptions = self._segment_descriptions + else: + pixel_array = self._sm_total_pixel_array_multiclass + segment_descriptions = self._both_segment_descriptions + + if locations_preserved: + pixel_measures = None + plane_orientation = None + plane_positions = None + else: + pixel_measures = PixelMeasuresSequence( + pixel_spacing=(0.0001, 0.0001), + slice_thickness=0.001, + ) + plane_orientation = PlaneOrientationSequence( + coordinate_system='SLIDE', + image_orientation=[0.0, -1.0, 0.0, 1.0, 0.0, 0.0] + ) + plane_positions = [ + PlanePositionSequence( + coordinate_system='SLIDE', + image_position=[1.1234, -5.4323214, 0.0], + pixel_matrix_position=(1, 1), + ) + ] + + if dimension_organization_type.value == 'TILED_FULL': + # Cannot omit empty frames with TILED_FULL + omit_empty_frames_values = [False] + else: + omit_empty_frames_values = [False, True] + + transfer_syntax_uids = [ExplicitVRLittleEndian] + if segmentation_type.value == 'FRACTIONAL': + try: + import libjpeg # noqa: F401 + except ModuleNotFoundError: + pass + else: + transfer_syntax_uids += [ + JPEG2000Lossless, + JPEGLSLossless, + ] + + for omit_empty_frames, transfer_syntax_uid in itertools.product( + omit_empty_frames_values, + transfer_syntax_uids, + ): + instance = Segmentation( + [self._sm_image], + pixel_array=pixel_array, + segmentation_type=segmentation_type, + segment_descriptions=segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number, + dimension_organization_type=dimension_organization_type, + omit_empty_frames=omit_empty_frames, + plane_orientation=plane_orientation, + plane_positions=plane_positions, + pixel_measures=pixel_measures, + tile_pixel_array=True, + tile_size=tile_size, + max_fractional_value=1, + transfer_syntax_uid=transfer_syntax_uid, + ) + assert ( + instance.DimensionOrganizationType == + dimension_organization_type.value + ) + if tile_size is not None: + assert instance.Rows == tile_size[0] + assert instance.Columns == tile_size[1] + + # pydicom raises warnings if it has to pad or truncate pixel data + with warnings.catch_warnings(record=True) as w: + self.get_array_after_writing(instance) + assert len(w) == 0 + + # Check that full reconstructed array matches the input + reconstructed_array = instance.get_total_pixel_matrix( + combine_segments=True, + ) + assert reconstructed_array.shape == ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ) + assert np.array_equal( + reconstructed_array, + pixel_array, + ) + + def to_numpy(c): + # Move from our 1-based convention to numpy zero based + if c is None: + # None is handled the same + return None + elif c > 0: + # Positive indices are 1-based + return c - 1 + else: + # Negative indices are the same + return c + + # Check that subregions defined in different ways match the input + for rs, re, cs, ce in [ + (34, 48, 3, None), + (-13, None, -34, -23), + ]: + reconstructed_array = instance.get_total_pixel_matrix( + combine_segments=True, + row_start=rs, + row_end=re, + column_start=cs, + column_end=ce, + ) + + rs_np = to_numpy(rs) + re_np = to_numpy(re) + cs_np = to_numpy(cs) + ce_np = to_numpy(ce) + expected_array = pixel_array[ + (slice(rs_np, re_np), slice(cs_np, ce_np)) + ] + assert np.array_equal( + reconstructed_array, + expected_array, + ) + def test_pixel_types_fractional( self, fractional_transfer_syntax_uid, @@ -1490,16 +1765,10 @@ def test_pixel_types_fractional( ) # Ensure the recovered pixel array matches what is expected - if pix_type in (np.bool_, np.float_): - assert np.array_equal( - self.get_array_after_writing(instance), - expected_encoding * max_fractional_value - ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' - else: - assert np.array_equal( - self.get_array_after_writing(instance), - expected_encoding - ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' + assert np.array_equal( + self.get_array_after_writing(instance), + expected_encoding * max_fractional_value + ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) # Multi-segment (exclusive) @@ -1516,7 +1785,7 @@ def test_pixel_types_fractional( self._manufacturer_model_name, self._software_versions, self._device_serial_number, - max_fractional_value=1, + max_fractional_value=max_fractional_value, transfer_syntax_uid=fractional_transfer_syntax_uid ) if pix_type == np.float_: @@ -1532,7 +1801,7 @@ def test_pixel_types_fractional( assert np.array_equal( self.get_array_after_writing(instance), - expected_enc_exc + expected_enc_exc * max_fractional_value ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) @@ -1550,7 +1819,7 @@ def test_pixel_types_fractional( self._manufacturer_model_name, self._software_versions, self._device_serial_number, - max_fractional_value=1, + max_fractional_value=max_fractional_value, transfer_syntax_uid=fractional_transfer_syntax_uid ) if pix_type == np.float_: @@ -1566,7 +1835,7 @@ def test_pixel_types_fractional( assert np.array_equal( self.get_array_after_writing(instance), - expected_enc_overlap + expected_enc_overlap * max_fractional_value ), f'{sources[0].Modality} {fractional_transfer_syntax_uid}' self.check_dimension_index_vals(instance) @@ -1871,7 +2140,7 @@ def test_construction_empty_source_image(self): ) def test_construction_empty_source_seg_sparse(self): - # Can encoding an empty segmentation with omit_empty_frames=True issues + # Encoding an empty segmentation with omit_empty_frames=True issues # a warning and encodes the full segmentation empty_pixel_array = np.zeros_like(self._ct_pixel_array) seg = Segmentation( @@ -1921,9 +2190,7 @@ def test_construction_invalid_content_label(self): source_images=[self._ct_image], pixel_array=self._ct_pixel_array, segmentation_type=SegmentationTypeValues.FRACTIONAL.value, - segment_descriptions=( - self._segment_descriptions - ), + segment_descriptions=self._segment_descriptions, series_instance_uid=self._series_instance_uid, series_number=self._series_number, sop_instance_uid=self._sop_instance_uid, @@ -1941,9 +2208,35 @@ def test_construction_mixed_source_series(self): source_images=self._ct_series + [self._ct_image], pixel_array=self._ct_pixel_array, segmentation_type=SegmentationTypeValues.FRACTIONAL.value, - segment_descriptions=( - self._additional_segment_descriptions # seg num 2 - ), + segment_descriptions=self._segment_descriptions, + series_instance_uid=self._series_instance_uid, + series_number=self._series_number, + sop_instance_uid=self._sop_instance_uid, + instance_number=self._instance_number, + manufacturer=self._manufacturer, + manufacturer_model_name=self._manufacturer_model_name, + software_versions=self._software_versions, + device_serial_number=self._device_serial_number + ) + + def test_construction_nonunqiue_plane_positions(self): + # It should not be possible to construct a segmentation with input + # images with the same plane location, even if they are otherwise + # distinct + ct_image_2 = deepcopy(self._ct_image) + ct_image_2.SOPInstanceUID = UID() + ct_image_2.InstanceNumber = 2 + pixel_array = np.zeros( + (2, *self._ct_image.pixel_array.shape), + dtype=bool + ) + pixel_array[0, 1:5, 10:15] = True + with pytest.raises(ValueError): + Segmentation( + source_images=[self._ct_image, ct_image_2], + pixel_array=pixel_array, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, series_instance_uid=self._series_instance_uid, series_number=self._series_number, sop_instance_uid=self._sop_instance_uid, @@ -2634,6 +2927,8 @@ def test_segread(self): assert isinstance(seg, Segmentation) seg = segread('data/test_files/seg_image_sm_numbers.dcm') assert isinstance(seg, Segmentation) + seg = segread('data/test_files/seg_image_sm_dots_tiled_full.dcm') + assert isinstance(seg, Segmentation) def test_properties(self): # SM segs @@ -3446,3 +3741,210 @@ def test_iter_segments_ct_single_frame_2_segments(self): seg_id_item_2 = item_segment_2[1][0].SegmentIdentificationSequence[0] assert seg_id_item_2.ReferencedSegmentNumber == 2 assert item_segment_2[2].SegmentNumber == 2 + + +class TestPyramid(unittest.TestCase): + + def setUp(self): + file_path = Path(__file__) + data_dir = file_path.parent.parent.joinpath('data') + self._sm_image = dcmread( + str(data_dir.joinpath('test_files', 'sm_image.dcm')) + ) + tpm_size = ( + self._sm_image.TotalPixelMatrixRows, + self._sm_image.TotalPixelMatrixColumns + ) + self._seg_pix = np.zeros( + tpm_size, + dtype=np.uint8, + ) + self._seg_pix[5:15, 3:8] = 1 + + self._n_downsamples = 3 + self._downsampled_pix_arrays = [self._seg_pix] + seg_pil = Image.fromarray(self._seg_pix) + pyramid_uid = UID() + self._source_pyramid = [deepcopy(self._sm_image)] + self._source_pyramid[0].PyramidUID = pyramid_uid + for i in range(1, self._n_downsamples): + f = 2 ** i + out_size = ( + self._sm_image.TotalPixelMatrixRows // f, + self._sm_image.TotalPixelMatrixColumns // f + ) + + # Resize the segmentation arrays + resized = np.array( + seg_pil.resize(out_size, Image.Resampling.NEAREST) + ) + self._downsampled_pix_arrays.append(resized) + + # Mock lower-resolution source images. No need to have their pixel + # data correctly set as it isn't used. Just update the relevant + # metadata + src_pixel_spacing = ( + self._sm_image + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + .PixelSpacing + ) + pixel_spacing = [src_pixel_spacing[0] * f, src_pixel_spacing[1] * f] + downsampled_source_im = deepcopy(self._sm_image) + delattr(downsampled_source_im, 'PixelData') + downsampled_source_im.TotalPixelMatrixRows = out_size[0] + downsampled_source_im.TotalPixelMatrixColumns = out_size[1] + ( + downsampled_source_im + .SharedFunctionalGroupsSequence[0] + .PixelMeasuresSequence[0] + .PixelSpacing + ) = pixel_spacing + downsampled_source_im.PyramidUID = pyramid_uid + self._source_pyramid.append(downsampled_source_im) + + self._segmented_property_category = \ + codes.SCT.MorphologicallyAbnormalStructure + self._segmented_property_type = codes.SCT.Neoplasm + self._segment_descriptions = [ + SegmentDescription( + segment_number=1, + segment_label='Segment #1', + segmented_property_category=self._segmented_property_category, + segmented_property_type=self._segmented_property_type, + algorithm_type=SegmentAlgorithmTypeValues.AUTOMATIC.value, + algorithm_identification=AlgorithmIdentificationSequence( + name='bla', + family=codes.DCM.ArtificialIntelligence, + version='v1' + ) + ), + ] + + def test_pyramid_factors(self): + downsample_factors = [2.0, 5.0] + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + downsample_factors=downsample_factors, + ) + + assert len(segs) == len(downsample_factors) + 1 + tol = 0.01 + for f, seg in zip([1.0, *downsample_factors], segs): + assert hasattr(seg, 'PyramidUID') + assert abs( + seg.TotalPixelMatrixRows - int(self._seg_pix.shape[0] / f) + ) < tol + assert abs( + seg.TotalPixelMatrixColumns - int(self._seg_pix.shape[1] / f) + ) < tol + + def test_pyramid_downsample_factors(self): + # Test construction when given a single source image, single + # segmentation mask, and specified downsample factors + downsample_factors = [2.0, 5.0] + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + downsample_factors=downsample_factors, + ) + + assert len(segs) == len(downsample_factors) + 1 + tol = 0.01 + for f, seg in zip([1.0, *downsample_factors], segs): + assert hasattr(seg, 'PyramidUID') + assert abs( + seg.TotalPixelMatrixRows - int(self._seg_pix.shape[0] / f) + ) < tol + assert abs( + seg.TotalPixelMatrixColumns - int(self._seg_pix.shape[1] / f) + ) < tol + + def test_single_source_multiple_pixel_arrays(self): + # Test construction when given a single source image and multiple + # segmentation images + segs = create_segmentation_pyramid( + source_images=[self._sm_image], + pixel_arrays=self._downsampled_pix_arrays, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._downsampled_pix_arrays) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) + + def test_multiple_source_single_pixel_array(self): + # Test construction when given multiple source images and a single + # segmentation image + segs = create_segmentation_pyramid( + source_images=self._source_pyramid, + pixel_arrays=[self._seg_pix], + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._source_pyramid) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) + + def test_multiple_source_multiple_pixel_arrays(self): + # Test construction when given multiple source images and multiple + # segmentation images + segs = create_segmentation_pyramid( + source_images=self._source_pyramid, + pixel_arrays=self._downsampled_pix_arrays, + segmentation_type=SegmentationTypeValues.BINARY, + segment_descriptions=self._segment_descriptions, + series_instance_uid=UID(), + series_number=1, + manufacturer='Foo', + manufacturer_model_name='Bar', + software_versions='1', + device_serial_number='123', + ) + + assert len(segs) == len(self._source_pyramid) + for pix, seg in zip(self._downsampled_pix_arrays, segs): + assert hasattr(seg, 'PyramidUID') + assert np.array_equal( + seg.get_total_pixel_matrix(combine_segments=True), + pix + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 054f5ef3..377f6027 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -4,12 +4,19 @@ from pydicom import dcmread from pydicom.dataset import Dataset +from pydicom.uid import VLWholeSlideMicroscopyImageStorage import pytest from highdicom import PlanePositionSequence +from highdicom.sr import CodedConcept from highdicom.enum import CoordinateSystemNames -from highdicom.utils import compute_plane_position_tiled_full, is_tiled_image +from highdicom.utils import ( + compute_plane_position_tiled_full, + compute_plane_position_slide_per_frame, + is_tiled_image, + are_plane_positions_tiled_full, +) params_plane_positions = [ @@ -167,10 +174,11 @@ def test_is_tiled_image(filepath, expected_output): assert is_tiled_image(dcm) == expected_output -def compute_plane_position_slide_per_frame(): +def test_compute_plane_position_slide_per_frame(): iterator = itertools.product(range(1, 4), range(1, 3)) for num_optical_paths, num_focal_planes in iterator: image = Dataset() + image.SOPClassUID = VLWholeSlideMicroscopyImageStorage image.Rows = 4 image.Columns = 4 image.TotalPixelMatrixRows = 16 @@ -185,14 +193,66 @@ def compute_plane_position_slide_per_frame(): pixel_measures_item.SpacingBetweenSlices = 1.0 shared_fg_item.PixelMeasuresSequence = [pixel_measures_item] image.SharedFunctionalGroupsSequence = [shared_fg_item] + origin_item = Dataset() + origin_item.XOffsetInSlideCoordinateSystem = 0.0 + origin_item.YOffsetInSlideCoordinateSystem = 0.0 + image.TotalPixelMatrixOriginSequence = [origin_item] + image.DimensionOrganizationType = "TILED_FULL" + optical_path_item = Dataset() + optical_path_item.OpticalPathIdentifier = '1' + optical_path_item.IlluminationTypeCodeSequence = [ + CodedConcept( + value="111744", + meaning="Brightfield illumination", + scheme_designator="DCM", + ) + ] + image.OpticalPathSequence = [optical_path_item] - plane_positions = compute_plane_position_tiled_full(image) + plane_positions = compute_plane_position_slide_per_frame(image) tiles_per_column = math.ceil(image.TotalPixelMatrixRows / image.Rows) tiles_per_row = math.ceil(image.TotalPixelMatrixColumns / image.Columns) - assert len(plane_positions) == math.prod([ - num_optical_paths, - num_focal_planes, - tiles_per_row, + assert len(plane_positions) == ( + num_optical_paths * + num_focal_planes * + tiles_per_row * tiles_per_column - ]) + ) + + +def test_are_plane_positions_tiled_full(): + + sm_path = Path(__file__).parents[1].joinpath( + 'data/test_files/sm_image.dcm' + ) + sm_image = dcmread(sm_path) + + # The plane positions from a TILED_FULL image should satsify the + # requirements + plane_positions = compute_plane_position_slide_per_frame(sm_image) + assert are_plane_positions_tiled_full( + plane_positions, + sm_image.Rows, + sm_image.Columns, + ) + + # If a plane is missing, it should not satisfy the requirements + plane_positions_missing = plane_positions[:5] + plane_positions[6:] + assert not are_plane_positions_tiled_full( + plane_positions_missing, + sm_image.Rows, + sm_image.Columns, + ) + + # If a plane is misordered, it should not satisfy the requirements + plane_positions_misordered = [ + plane_positions[1], + plane_positions[0], + *plane_positions[2:] + ] + assert not are_plane_positions_tiled_full( + plane_positions_misordered, + sm_image.Rows, + sm_image.Columns, + ) diff --git a/tests/test_valuetypes.py b/tests/test_valuetypes.py new file mode 100644 index 00000000..2a268b36 --- /dev/null +++ b/tests/test_valuetypes.py @@ -0,0 +1,159 @@ +import datetime + +import pytest +from pydicom import Dataset +from pydicom.sr.codedict import codes +from pydicom.sr.coding import Code +from pydicom.valuerep import DT, DA, TM +from pydicom import config + +from highdicom.sr.coding import CodedConcept +from highdicom.sr.enum import ValueTypeValues +from highdicom.sr.value_types import ( + DateContentItem, + DateTimeContentItem, + TimeContentItem +) +from tests.utils import write_and_read_dataset + + +class TestDateTimeContentItem: + test_datetime_values = [ + DT("2023"), + DT("202306"), + DT("20230623"), + DT("2023062311"), + DT("202306231112"), + DT("20230623111247"), + DT("20230623111247.123456"), + ] + + @pytest.mark.parametrize("datetime_value", test_datetime_values) + def test_construct_from_datetime(self, datetime_value: DT): + name = codes.DCM.DatetimeOfProcessing + assert isinstance(name, Code) + value_type = ValueTypeValues.DATETIME + item = DateTimeContentItem( + name=name, + value=datetime_value + ) + + assert item.name == name + assert item.value == datetime_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.datetime) + assert item.value.isoformat() == datetime_value.isoformat() + + @pytest.mark.parametrize("datetime_value", test_datetime_values) + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset( + self, + datetime_value: DT, + datetime_conversion: bool + ): + config.datetime_conversion = datetime_conversion + name = codes.DCM.DatetimeOfProcessing + assert isinstance(name, Code) + value_type = ValueTypeValues.DATETIME + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.DateTime = datetime_value + + dataset_reread = write_and_read_dataset(dataset) + item = DateTimeContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == datetime_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.datetime) + assert item.value.isoformat() == datetime_value.isoformat() + + +class TestDateContentItem: + def test_construct_from_date(self): + date_value = DA("20230623") + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.DATE + item = DateContentItem( + name=name, + value=date_value + ) + + assert item.name == name + assert item.value == date_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.date) + assert item.value.isoformat() == date_value.isoformat() + + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset(self, datetime_conversion: bool): + config.datetime_conversion = datetime_conversion + date_value = DA("20230623") + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.DATE + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.Date = date_value + + dataset_reread = write_and_read_dataset(dataset) + item = DateContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == date_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.date) + assert item.value.isoformat() == date_value.isoformat() + + +class TestTimeContentItem: + test_time_values = [ + TM("11"), + TM("1112"), + TM("111247"), + TM("111247.123456"), + ] + + @pytest.mark.parametrize("time_value", test_time_values) + def test_construct_from_time(self, time_value: TM): + name = codes.DCM.AcquisitionTime + assert isinstance(name, Code) + value_type = ValueTypeValues.TIME + item = TimeContentItem( + name=name, + value=time_value + ) + + assert item.name == name + assert item.value == time_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.time) + assert item.value.isoformat() == time_value.isoformat() + + @pytest.mark.parametrize("time_value", test_time_values) + @pytest.mark.parametrize("datetime_conversion", [True, False]) + def test_from_dataset( + self, + time_value: TM, + datetime_conversion: bool + ): + config.datetime_conversion = datetime_conversion + name = codes.DCM.AcquisitionDate + assert isinstance(name, Code) + value_type = ValueTypeValues.TIME + dataset = Dataset() + dataset.ValueType = value_type.value + dataset.ConceptNameCodeSequence = [CodedConcept.from_code(name)] + dataset.Time = time_value + + dataset_reread = write_and_read_dataset(dataset) + item = TimeContentItem.from_dataset(dataset_reread) + + assert item.name == name + assert item.value == time_value + assert item.value_type == value_type + assert isinstance(item.value, datetime.time) + assert item.value.isoformat() == time_value.isoformat()