diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000..d808b203 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,4 @@ +[codespell] +skip = .git,*.pdf,*.svg,*.ipynb +# te,fo - either abbreviations of variables +ignore-words-list = te,fo diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000..7373affc --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,22 @@ +--- +name: Codespell + +on: + push: + branches: [master] + pull_request: + branches: [master] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/data/test_files/seg_image_sm_control.dcm b/data/test_files/seg_image_sm_control.dcm index 0bcc183c..3ab6a22a 100644 Binary files a/data/test_files/seg_image_sm_control.dcm and b/data/test_files/seg_image_sm_control.dcm differ diff --git a/data/test_files/seg_image_sm_dots.dcm b/data/test_files/seg_image_sm_dots.dcm index 97a299ee..21572272 100644 Binary files a/data/test_files/seg_image_sm_dots.dcm and b/data/test_files/seg_image_sm_dots.dcm differ diff --git a/data/test_files/seg_image_sm_numbers.dcm b/data/test_files/seg_image_sm_numbers.dcm index d7a199b2..374672b3 100644 Binary files a/data/test_files/seg_image_sm_numbers.dcm and b/data/test_files/seg_image_sm_numbers.dcm differ diff --git a/data/test_files/sm_annotations.dcm b/data/test_files/sm_annotations.dcm new file mode 100644 index 00000000..20734e21 Binary files /dev/null and b/data/test_files/sm_annotations.dcm differ diff --git a/data/test_files/sr_document_with_multiple_groups.dcm b/data/test_files/sr_document_with_multiple_groups.dcm new file mode 100644 index 00000000..2150d162 Binary files /dev/null and b/data/test_files/sr_document_with_multiple_groups.dcm differ diff --git a/docs/ann.rst b/docs/ann.rst new file mode 100644 index 00000000..d98f8199 --- /dev/null +++ b/docs/ann.rst @@ -0,0 +1,344 @@ +.. _ann: + +Microscopy Bulk Simple Annotation (ANN) Objects +=============================================== + +The Microscopy Bulk Simple Annotation IOD is an IOD designed specifically to +store large numbers of similar annotations and measurements from microscopy +images. Annotations of microscopy images typically refer to very large numbers +of cells or cellular structures. Storing these in a Structured Report Document, +with its highly nested structure, would be very inefficient in storage space +and unnecessarily complex and slow to parse. Microscopy Bulk Simple Annotation +objects ("bulk annotations") solve this problem by allowing you to store large +number of similar annotations or measurements in efficient arrays without +duplication of the descriptive metadata. + +Each bulk annotation object contains one or more Annotation Groups, each of +which contains a set of graphical annotations, and optionally one or more +numerical measurements relating to those graphical annotations. + +Constructing Annotation Groups +------------------------------ + +An Annotation Group is a set of multiple similar annotations from a microscopy +image. For example, a single annotation group may contain all annotations of +cell nuclei, lymphocytes, or regions of necrosis in the image. In *highdicom*, +an annotation group is represented by a :class:`highdicom.ann.AnnotationGroup`. + +Each annotation group contains some required metadata that describes the +contents of the group, as well as some further optional metadata that may +contain further details about the group or the derivation of the annotations it +contains. The required metadata elements include: + +* A ``number`` (``int``), an integer number for the group. +* A ``label`` (``str``) giving a human-readable label for the group. +* A ``uid`` (``str`` or :class:`highdicom.UID`) uniquely identifying the group. + Usually, you will want to generate UID for this. +* An ``annotated_property_category`` and ``annotated_property_type`` + (:class:`highdicom.sr.CodedConcept`) coded values (see :ref:`coding`) + describing the category and specific structure that has been annotated. +* A ``graphic_type`` (:class:`highdicom.ann.GraphicTypeValues`) indicating the + "form" of the annotations. Permissible values are ``"ELLIPSE"``, ``"POINT"``, + ``"POLYGON"``, ``"RECTANGLE"``, and ``"POLYLINE"``. +* The ``algorithm_type`` + (:class:`highdicom.ann.AnnotationGroupGenerationTypeValues`), the type of the + algorithm used to generate the annotations (``"MANUAL"``, + ``"SEMIAUTOMATIC"``, or ``"AUTOMATIC"``). + +Further optional metadata may optionally be provided, see the API documentation +for more information. + +The actual annotation data is passed to the group as a list of +``numpy.ndarray`` objects, each of shape (*N* x *D*). *N* is the number of +coordinates required for each individual annotation and is determined by the +graphic type (see :class:`highdicom.ann.GraphicType`). *D* is either 2 -- meaning +that the coordinates are expressed as a (Column,Row) pair in image coordinates +-- or 3 -- meaning that the coordinates are expressed as a (X,Y,Z) triple in 3D +frame of reference coordinates. + +When considering which type of coordinate to use, bear in mind that the 2D image +coordinates refer only to one image in a image pyramid, whereas 3D frame of +reference coordinates are more easily used with any image in the pyramid. +Also note that although you can include multiple annotation groups in a single +bulk annotation object, they must all use the same coordinate type. + +Here is a simple example of constructing an annotation group: + +.. code-block:: python + + from pydicom.sr.codedict import codes + from pydicom.sr.coding import Code + import highdicom as hd + import numpy as np + + # Graphic data containing two nuclei, each represented by a single point + # expressed in 2D image coordinates + graphic_data = [ + np.array([[34.6, 18.4]]), + np.array([[28.7, 34.9]]), + ] + + # Nuclei annotations produced by a manual algorithm + nuclei_group = hd.ann.AnnotationGroup( + number=1, + uid=hd.UID(), + label='nuclei', + annotated_property_category=codes.SCT.AnatomicalStructure, + annotated_property_type=Code('84640000', 'SCT', 'Nucleus'), + algorithm_type=hd.ann.AnnotationGroupGenerationTypeValues.MANUAL, + graphic_type=hd.ann.GraphicTypeValues.POINT, + graphic_data=graphic_data, + ) + +Note that including two nuclei would be very unusual in practice: annotations +often number in the thousands or even millions within a large whole slide image. + +Including Measurements +---------------------- + +In addition to the coordinates of the annotations themselves, it is also +possible to attach one or more continuous-valued numeric *measurements* +corresponding to those annotations. The measurements are passed as a +:class:`highdicom.ann.Measurements` object, which contains the *name* of the +measurement (as a coded value), the *unit* of the measurement (also as a coded +value) and an array of the measurements themselves (as a ``numpy.ndarray``). + +The length of the measurement array for any measurements attached to an +annotation group must match exactly the number of annotations in the group. +Value *i* in the array therefore represents the measurement of annotation *i*. + +Here is the above example with an area measurement included: + +.. code-block:: python + + from pydicom.sr.codedict import codes + from pydicom.sr.coding import Code + import highdicom as hd + import numpy as np + + # Graphic data containing two nuclei, each represented by a single point + # expressed in 2D image coordinates + graphic_data = [ + np.array([[34.6, 18.4]]), + np.array([[28.7, 34.9]]), + ] + + # Measurement object representing the areas of each of the two nuclei + area_measurement = hd.ann.Measurements( + name=codes.SCT.Area, + unit=codes.UCUM.SquareMicrometer, + values=np.array([20.4, 43.8]), + ) + + # Nuclei annotations produced by a manual algorithm + nuclei_group = hd.ann.AnnotationGroup( + number=1, + uid=hd.UID(), + label='nuclei', + annotated_property_category=codes.SCT.AnatomicalStructure, + annotated_property_type=Code('84640000', 'SCT', 'Nucleus'), + algorithm_type=hd.ann.AnnotationGroupGenerationTypeValues.MANUAL, + graphic_type=hd.ann.GraphicTypeValues.POINT, + graphic_data=graphic_data, + measurements=[area_measurement], + ) + +Constructing MicroscopyBulkSimpleAnnotation Objects +--------------------------------------------------- + +When you have constructed the annotation groups, you can include them into +a bulk annotation object along with a bit more metadata using the +:class:`highdicom.ann.MicroscopyBulkSimpleAnnotations` constructor. You also +need to pass the image from which the annotations were derived so that +`highdicom` can copy all the patient, study and slide-level metadata: + +.. code-block:: python + + from pydicom import dcmread + import highdicom as hd + + # Load a slide microscopy image from the highdicom test data (if you have + # cloned the highdicom git repo) + sm_image = dcmread('data/test_files/sm_image.dcm') + + bulk_annotations = hd.ann.MicroscopyBulkSimpleAnnotations( + source_images=[sm_image], + annotation_coordinate_type=hd.ann.AnnotationCoordinateTypeValues.SCOORD, + annotation_groups=[nuclei_group], + series_instance_uid=hd.UID(), + series_number=10, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='MGH Pathology', + manufacturer_model_name='MGH Pathology Manual Annotations', + software_versions='0.0.1', + device_serial_number='1234', + content_description='Nuclei Annotations', + ) + + bulk_annotations.save_as('nuclei_annotations.dcm') + +The result is a complete DICOM object that can be written out as a DICOM file, +transmitted over network, etc. + +Reading Existing Bulk Annotation Objects +---------------------------------------- + +You can read an existing bulk annotation object using `pydicom` and then convert +to the `highdicom` object like this: + +.. code-block:: python + + from pydicom import dcmread + import highdicom as hd + + ann_dcm = dcmread('data/test_files/sm_annotations.dcm') + + ann = hd.ann.MicroscopyBulkSimpleAnnotations.from_dataset(ann_dcm) + + assert isinstance(ann, hd.ann.MicroscopyBulkSimpleAnnotations) + +Note that this example (and the following examples) uses an example file that +you can access from the test data in the `highdicom` repository. It was created +using exactly the code in the construction example above. + +Accessing Annotation Groups +--------------------------- + +Usually the next step when working with bulk annotation objects is to find the +relevant annotation groups. You have two ways to do this. + +If you know either the number or the UID of the group, you can access the group +directly (since either of these should uniquely identify a group). The +:meth:`highdicom.ann.MicroscopyBulkSimpleAnnotations.get_annotation_group()` +method is used for this purpose: + +.. code-block:: python + + # Access a group by its number + group = ann.get_annotation_group(number=1) + assert isinstance(group, hd.ann.AnnotationGroup) + + # Access a group by its UID + group = ann.get_annotation_group( + uid='1.2.826.0.1.3680043.10.511.3.40670836327971302375623613533993686' + ) + assert isinstance(group, hd.ann.AnnotationGroup) + +Alternatively, you can search for groups that match certain filters such as +the annotation property type or category, label, or graphic type. The +:meth:`highdicom.ann.MicroscopyBulkSimpleAnnotations.get_annotation_groups()` +method (note groups instead of group) is used for this. It returns a list +of matching groups, since the filters may match multiple groups. + +.. code-block:: python + + from pydicom.sr.coding import Code + + # Search for groups by annotated property type + groups = ann.get_annotation_groups( + annotated_property_type=Code('84640000', 'SCT', 'Nucleus'), + ) + assert len(groups) == 1 and isinstance(groups[0], hd.ann.AnnotationGroup) + + # If there are no matches, an empty list is returned + groups = ann.get_annotation_groups( + annotated_property_type=Code('53982002', "SCT", "Cell membrane"), + ) + assert len(groups) == 0 + + # Search for groups by label + groups = ann.get_annotation_groups(label='nuclei') + assert len(groups) == 1 and isinstance(groups[0], hd.ann.AnnotationGroup) + + # Search for groups by label and graphic type together (results must match + # *all* provided filters) + groups = ann.get_annotation_groups( + label='nuclei', + graphic_type=hd.ann.GraphicTypeValues.POINT, + ) + assert len(groups) == 1 and isinstance(groups[0], hd.ann.AnnotationGroup) + + +Extracting Information From Annotation Groups +--------------------------------------------- + +When you have found a relevant group, you can use the Python properties on +the object to conveniently access metadata and the graphic data of the +annotations. For example (see :class:`highdicom.ann.AnnotationGroup` for a full +list): + +.. code-block:: python + + # Access the label + assert group.label == 'nuclei' + + # Access the number + assert group.number == 1 + + # Access the UID + assert group.uid == '1.2.826.0.1.3680043.10.511.3.40670836327971302375623613533993686' + + # Access the annotated property type (returns a CodedConcept) + assert group.annotated_property_type == Code('84640000', 'SCT', 'Nucleus') + + # Access the graphic type, describing the "form" of each annotation + assert group.graphic_type == hd.ann.GraphicTypeValues.POINT + + +You can access the entire array of annotations at once using +:meth:`highdicom.ann.AnnotationGroup.get_graphic_data()`. You need to pass the +annotation coordinate type from the parent bulk annotation object to the group +so that it knows how to interpret the coordinate data. This method returns a +list of 2D numpy arrays of shape (*N* x *D*), mirroring how you would have +passed the data in to create the annotation with `highdicom`. + +.. code-block:: python + + import numpy as np + + graphic_data = group.get_graphic_data( + coordinate_type=ann.AnnotationCoordinateType, + ) + assert len(graphic_data) == 2 and isinstance(graphic_data[0], np.ndarray) + +Alternatively, you can access the coordinate array for a specific annotation +using its (one-based) index in the annotation list: + +.. code-block:: python + + # Get the number of annotations + assert group.number_of_annotations == 2 + + # Access an annotation using 1-based index + first_annotation = group.get_coordinates( + annotation_number=1, + coordinate_type=ann.AnnotationCoordinateType, + ) + assert np.array_equal(first_annotation, np.array([[34.6, 18.4]])) + +Extracting Measurements From Annotation Groups +---------------------------------------------- + +You can use the :meth:`highdicom.ann.AnnotationGroup.get_measurements()` method +to access any measurements included in the group. By default, this will return +all measurements in the group, but you can also filter for measurements matching +a certain name. + +Measurements are returned as a tuple of ``(names, values, units)``, where +``names`` is a list of nnames as :class:`highdicom.sr.CondedConcept` objects, +``units`` is a list of units also as :class:`highdicom.sr.CondedConcept` +objects, and the values is a ``numpy.ndarray`` of values of shape (*N* by *M*) +where *N* is the number of annotations and *M* is the number of measurements. +This return format is intended to facilitate the loading of measurements into +tables or dataframes for further analysis. + + +.. code-block:: python + + from pydicom.sr.codedict import codes + + names, values, units = group.get_measurements() + assert names[0] == codes.SCT.Area + assert units[0] == codes.UCUM.SquareMicrometer + assert values.shape == (2, 1) diff --git a/docs/coding.rst b/docs/coding.rst new file mode 100644 index 00000000..9d293b6d --- /dev/null +++ b/docs/coding.rst @@ -0,0 +1,148 @@ +.. _coding: + +Coding +====== + +"Coding" is a key concept used throughout `highdicom`. By "coding", we are +referring to the use of standardized nomenclatures or terminologies to describe +medical (or related) concepts. For example, instead of using the English word +"liver" (or a word in another human language) to describe the liver, we instead +use a code such as '10200004' from the SNOMED-CT nomenclature to describe the +liver in standardized way. Use of coding is vital to ensure that these concepts +are expressed unambiguously within DICOM files. Coding is especially +fundamental within structured reporting, but is also found in other places +around the DICOM standard and, in turn, `highdicom`. + +To communicate a concept in DICOM using a coding scheme, three elements are +necessary: + +- A **coding scheme**: an identifier of the pre-defined terminology used to + define the concept. +- A code **value**: the code value conveys a unique identifier for the specific + concept. It is often a number or alphanumeric string that may not have any + inherent meaning outside of the terminology. +- A code **meaning**. The code meaning conveys the concept in a way that is + understandable to humans. + +Any coding scheme that operates in this way may be used within DICOM objects, +including ones that you create yourself. However, it is highly recommended to +use a well-known and widely accepted standard terminology to ensure that your +DICOM objects will be as widely understood and as interoperable as possible. +Examples of widely used medical terminologies include: + +- The `DCM `_ + terminology. This terminology is defined within the DICOM standard itself and + is used to refer to DICOM concepts, as well as other concepts + within the radiology workflow. +- `SNOMED-CT `_. This terminology contains codes to + describe medical concepts including anatomy, diseases, and procedures. +- `RadLex `_. A standardized terminology for concepts + in radiology. +- `UCUM `_. A terminology specifically to describe units of + measurement. + +See +`this page `_ +for a list of terminologies used within DICOM. + +`Highdicom` defines the :class:`highdicom.sr.CodedConcept` class to encapsulate +a coded concept. To create a :class:`highdicom.sr.CodedConcept`, you pass +values for the coding scheme, code value, and code meaning. For example, to +describe a tumor using the SNOMED-CT terminology, you could do this: + +.. code-block:: python + + import highdicom as hd + + tumor_code = hd.sr.CodedConcept( + value="108369006", + scheme_designator="SCT", + meaning="Tumor" + ) + +Codes within Pydicom +-------------------- + +The `pydicom` library, upon which `highdicom` is built, has its own class +``pydicom.sr.coding.Code`` that captures coded concepts in the same way that +:class:`highdicom.sr.CodedConcept` does. The reason for the difference is that +the `highdicom` class is a sub-class of ``pydicom.Dataset`` with the relevant +attributes such that it can be included directly into a DICOM object. `pydicom` +also includes within it values for a large number of coded concepts within +the DCM, SNOMED-CT, and UCUM terminologies. For example, instead of manually +creating the "tumor" concept above, we could have just used the pre-defined +value in `pydicom`: + +.. code-block:: python + + from pydicom.sr.codedict import codes + + tumor_code = codes.SCT.Tumor + print(tumor_code.value) + # '1083690006' + print(tumor_code.scheme_designator) + # 'SCT' + print(tumor_code.meaning) + # 'tumor' + +Here are some other examples of codes within `pydicom`: + +.. code-block:: python + + from pydicom.sr.codedict import codes + + # A patient, as described by the DCM terminology + patient_code = codes.DCM.Patient + print(patient_code) + # Code(value='121025', scheme_designator='DCM', meaning='Patient', scheme_version=None) + + # A centimeter, a described by the UCUM coding scheme + cm_code = codes.UCUM.cm + print(cm_code) + # Code(value='cm', scheme_designator='UCUM', meaning='cm', scheme_version=None) + + +The two classes can be used interoperably throughout highdicom: anywhere in the +`highdicom` API that you can pass a :class:`highdicom.sr.CodedConcept`, you +can pass an ``pydicom.sr.coding.Code`` instead and it will be converted behind +the scenes for you. Furthermore, equality is defined between the two classes +such that it evaluates to true if they represent the same concept, and they +hash to the same value if you use them within sets or as keys in dictionaries. + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + tumor_code_hd = hd.sr.CodedConcept( + value="108369006", + scheme_designator="SCT", + meaning="Tumor" + ) + tumor_code = codes.SCT.Tumor + + assert tumor_code_hd == tumor_code + assert len({tumor_code_hd, tumor_code}) == 1 + +For equality and hashing, two codes are considered equivalent if they have the +same coding scheme, and value, regardless of how their meaning is represented. + +Finding Suitable Codes +---------------------- + +The `pydicom` code dictionary allows searching for concepts via simple string +matching. However, for more advanced searching it is generally advisable to +search the documentation for the coding scheme itself. + +.. code-block:: python + + from pydicom.sr.codedict import codes + + print(codes.SCT.dir('liver')) + # ['DeliveredRadiationDose', + # 'HistoryOfPrematureDelivery', + # 'Liver', + # 'LiverStructure'] + + print(codes.SCT.Liver) + # Code(value='10200004', scheme_designator='SCT', meaning='Liver', scheme_version=None) diff --git a/docs/conf.py b/docs/conf.py index 9484fc72..3544dc58 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -113,7 +113,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = [] # Custom sidebar templates, must be a dictionary that maps document names # to template names. diff --git a/docs/general.rst b/docs/general.rst new file mode 100644 index 00000000..36fcafcb --- /dev/null +++ b/docs/general.rst @@ -0,0 +1,13 @@ +.. _general-concepts: + +General Concepts +================ + +This section covers topics that are generally applicable across various +parts of the library. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + coding diff --git a/docs/generalsr.rst b/docs/generalsr.rst new file mode 100644 index 00000000..c5de763c --- /dev/null +++ b/docs/generalsr.rst @@ -0,0 +1,326 @@ +.. _generalsr: + +Structured Report (SR) Overview +=============================== + +Structured report documents are DICOM files that contain information derived +from a medical image in a structured and computer-readable way. `Highdicom` +supports structured reports through the :mod:`highdicom.sr` sub-package. + +SRs are highly complex, and this page attempts to give a basic introduction +while also describing the implementation within `highdicom`. A more thorough +explanation my be found in: + +* *DICOM Structured Reporting*. David Clunie. PixelMed Publishing, 2000. + Digital copy available + `here `_. + +Content Items +------------- + +At their core, structured reports are collections of "content items". Each +content item is a collection of DICOM attributes (a DICOM dataset) that are +intended to convey a single piece of information. Each content item consists of +a "name", which is always a `coded concept `_ describing what +information is being conveyed, and a "value", which actually contains the +information of interest. In a loose analogy, you can think of this as similar +to other sorts of key-value mappings such as Python dictionaries and JSON +documents. There are multiple different types of values (known as "value +types"), and accordingly, there are a number of different types of content +item. The classes representing these content items in `highdicom` are: + +- :class:`highdicom.sr.CodeContentItem`: The value is a coded concept. +- :class:`highdicom.sr.CompositeContentItem`: The value is a reference to + another (composite) DICOM object (for example an image or segmentation + image). +- :class:`highdicom.sr.ContainerContentItem`: The value is a template container + containing other content items (more on this later). +- :class:`highdicom.sr.DateContentItem`: The value is a date. +- :class:`highdicom.sr.DateTimeContentItem`: The value is a date and a + time. +- :class:`highdicom.sr.NumContentItem`: The value is a decimal number. +- :class:`highdicom.sr.PnameContentItem`: The value is a person's name. +- :class:`highdicom.sr.ScoordContentItem`: The value is a (2D) spatial + coordinate in the image coordinate system. +- :class:`highdicom.sr.Scoord3DContentItem`: The value is a 3D spatial + coordinate in the frame of reference coordinate system. +- :class:`highdicom.sr.TcoordContentItem`: The value is a temporal coordinate + defined relative to some start point. +- :class:`highdicom.sr.TextContentItem`: The value is a general string. +- :class:`highdicom.sr.TimeContentItem`: The value is a time. +- :class:`highdicom.sr.WaveformContentItem`: The value is a reference to a + waveform stored within another DICOM object. +- :class:`highdicom.sr.UIDRefContentItem`: The value is a UID (unique + identifier). + +These classes are all subclasses pf ``pydicom.Dataset`` and you can view and +interact with their attributes as you can with any pydicom dataset. + +You can look at the API for each class to see how to construct content items of +each type. Here are some simple examples for the more common types: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + + # A code content item expressing that the severity is mild + mild_item = hd.sr.CodeContentItem( + name=codes.SCT.Severity, + value=codes.SCT.Mild, + ) + + # A num content item expressing that the depth is 3.4cm + depth_item = hd.sr.NumContentItem( + name=codes.DCM.Depth, + value=3.4, + unit=codes.UCUM.cm, + ) + + # A scoord content item expressing a point in 3D space of a particular + # frame of reference + region_item = hd.sr.Scoord3DContentItem( + name=codes.DCM.ImageRegion, + graphic_type=hd.sr.GraphicTypeValues3D.POINT, + graphic_data=np.array([[10.6, 2.3, -9.6]]), + frame_of_reference_uid="1.2.826.0.1.3680043.10.511.3.88131829333631241913772141475338566", + ) + + # A composite content item referencing another image as the source for a + # segmentation + source_item = hd.sr.CompositeContentItem( + name=codes.DCM.SourceImageForSegmentation, + referenced_sop_class_uid="1.2.840.10008.5.1.4.1.1.2", + referenced_sop_instance_uid="1.2.826.0.1.3680043.10.511.3.21429265101044966075687084803549517", + ) + +Graphic Data Content Items (SCOORD and SCOORD3D) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Two types of Content Item that are worth discussing in greater detail are the +:class:`highdicom.sr.ScoordContentItem` and +:class:`highdicom.sr.Scoord3DContentItem`. These two types both encode "graphic +data" in the form of points/lines/polygons to allow describing locations of an +image in the report. + +Scoord (spatial coordinate) Content Items describe locations in 2D image +coordinates. Image coordinates are decimal numbers with sub-pixel accuracy that +are defined in a coordinate system from (0.0, 0.0) at the top left corner of +the top left pixel of the image and (rows, columns) at the bottom right corner +of the bottom right pixel of the image. I.e. the center of the top left pixel +is at location (0.5, 0.5). + +Scoord3D (3D spatial coordinate) Content Items describe locations in the 3D +frame of reference that the corresponding image (or images) are defined within. +The points are expressed in millimeters relative to the origin of the +coordinate system (which is not generally the same as the origin of any +particular image, which is given by the "ImagePositionPatient" or +"ImagePositionSlide" attribute of the image). Points expressed in this way +do not change if the underlying image is resampled. + +See the :mod:`highdicom.spatial` module for useful utilities for moving +between these two coordinate systems. + +Each of these has a distinct but similar list of graphical objects that can be +represented, defined by the enumerations +:class:`highdicom.sr.GraphicTypeValues` (for Scoord Content Items) and +:class:`highdicom.sr.GraphicTypeValues3D`. These types are: + + +Graphic Type Values (Scoord): + +- ``CIRCLE`` +- ``ELLIPSE`` +- ``MULTIPOINT`` +- ``POINT`` +- ``POLYLINE`` + +Graphic Type 3D Values (Scoord3D): + +- ``ELLIPSE`` +- ``ELLIPSOID`` +- ``MULTIPOINT`` +- ``POINT`` +- ``POLYLINE`` +- ``POLYGON`` + +`highdicom` uses NumPy NdArrays to pass data into the constructors of the +content items. These arrays should have dimensions (*N*, 2) for Scoord Content +Items and (*N*, 3) for Scoord3D Content Items, where *N* is the number of +points. The permissible number of points depends upon the graphic type. For +example, a ``POINT`` is described by exactly one point, a ``CIRCLE`` is +described by exactly 2 points (the center and a point on the circumference), +and a ``POLYLINE`` may contain 2 or more points. See the documentation of the +relevant enumeration class (:class:`highdicom.sr.GraphicTypeValues` or +:class:`highdicom.sr.GraphicTypeValues3D`) for specific details on all graphic +types. + +Furthermore, `highdicom` will reconstruct the graphic data stored into a +content item into a NumPy array of the correct shape if you use the +`value` property of the content item. + +Here are some examples of creating Scoord and Scoord3D Content Items and +accessing their graphic data: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + + circle_data = np.array( + [ + [10.0, 10.0], + [11.0, 11.0], + ] + ) + circle_item = hd.sr.ScoordContentItem( + name=codes.DCM.ImageRegion, + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + graphic_data=circle_data, + ) + assert np.array_equal(circle_data, circle_item.value) + + multipoint_data = np.array( + [ + [100.0, 110.0, -90.0], + [130.0, 70.0, -80.0], + [-10.0, 400.0, 80.0], + ] + ) + multipoint_item = hd.sr.Scoord3DContentItem( + name=codes.DCM.ImageRegion, + graphic_type=hd.sr.GraphicTypeValues3D.MULTIPOINT, + graphic_data=multipoint_data, + frame_of_reference_uid="1.2.826.0.1.3680043.10.511.3.88131829333631241913772141475338566", + ) + assert np.array_equal(multipoint_data, multipoint_item.value) + +Nesting of Content Items and Sequences +-------------------------------------- + +Each content item in an SR document may additionally have an attribute named +"ContentSequence", which is a sequence of other Content Items that are the +children of that Content Item. `Highdicom` has the class +:class:`highdicom.sr.ContentSequence` to encapsulate this behavior. + +Using Content Sequences containing further Content Items, whose sequences may in +turn contain further items, and so on, it is possible to build highly nested +structures of content items in a "tree" structure. + +When this is done, it is necessary to include a "relationship type" attribute +in each child content item (i.e. all Content Items except the one at the root +of the tree) that encodes the relationship that the child item has with the +parent (the Content Item whose Content Sequence the parent belongs to). + +The possible relationship types are defined with the enumeration +:class:`highdicom.sr.RelationshipTypeValues` (see the documentation of that +class for more detail): + +- ``CONTAINS`` +- ``HAS_ACQ_CONTEXT`` +- ``HAS_CONCEPT_MOD`` +- ``HAS_OBS_CONTEXT`` +- ``HAS_PROPERTIES`` +- ``INFERRED_FROM`` +- ``SELECTED_FROM`` + +If you construct Content Items with the relationship type, you can nest +Content Items like this: + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # A measurement derived from an image + depth_item = hd.sr.NumContentItem( + name=codes.DCM.Depth, + value=3.4, + unit=codes.UCUM.cm, + ) + + # The source image from which the measurement was inferred + source_item = hd.sr.CompositeContentItem( + name=codes.DCM.SourceImage, + referenced_sop_class_uid="1.2.840.10008.5.1.4.1.1.2", + referenced_sop_instance_uid="1.3.6.1.4.1.5962.1.1.1.1.1.20040119072730.12322", + relationship_type=hd.sr.RelationshipTypeValues.INFERRED_FROM, + ) + + # A tracking identifier identifying the measurement + tracking_item = hd.sr.UIDRefContentItem( + name=codes.DCM.TrackingIdentifier, + value=hd.UID(), # a newly generated UID + relationship_type=hd.sr.RelationshipTypeValues.HAS_OBS_CONTEXT, + ) + + # Nest the source item below the depth item + depth_item.ContentSequence = [source_item, tracking_item] + +Structured Reporting IODs +------------------------- + +By nesting Content Items and Content Sequences in this way, you can create a +Structured Report DICOM object. There are many IODs (Information Object +Definitions) for Structured Reports, and `highdicom` currently implements three +of them: + +- :class:`highdicom.sr.EnhancedSR` -- Does not support Scoord 3D Content Items. +- :class:`highdicom.sr.ComprehensiveSR` -- Does not support Scoord 3D Content + Items. In terms of functionality currently supported by `highdicom`, this is + equivalent to the EnhancedSR. +- :class:`highdicom.sr.Comprehensive3DSR` -- This is the most general form of + SR, but is relatively new and may not be supported by all systems. It does + support Scoord 3D Content Items. + +The constructors for these classes take a number of parameters specifying the +content of the structured report, the evidence from which it was derived in the +form of a list of ``pydicom.Dataset`` objects, as well as various metadata +associated with the report. + +The content is provided as the ``content`` parameter, which should be a single +content item representing the "root" of the (potentially) nested structure +containing all Content Items in the report. + +Using the depth item constructed above as the root Content Item, we can +create a Structured Report like this (here we use an example dataset from +the highdicom test data): + +.. code-block:: python + + # Path to single-frame CT image instance stored as PS3.10 file + image_dataset = pydicom.dcmread("data/test_files/ct_image.dcm") + + # Create the Structured Report instance + sr_dataset = hd.sr.Comprehensive3DSR( + evidence=[image_dataset], + content=depth_item, + series_number=1, + series_instance_uid=hd.UID(), + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer' + ) + +Note that this is just a toy example and we do **not** recommend producing SRs +like this in practice. Instead of this arbitrary structure of Content Items, it +is far better to follow an existing **template** that encapsulates a +standardized structure of Content Items. + +Structured Reporting Templates +------------------------------ + +The DICOM standard defines a large number of Structured Reporting +`templates `_, +which are essentially sets of constraints on the pattern of Content Items +within a report. Each template is intended for a particular purpose. + +*Highdicom* currently implements only the TID1500 "Measurement Report" template +and its many sub-templates. This template is highly flexible and provides a +standardized way to store general measurements and evaluations from one or more +images or image regions (expressed in image or frame of reference coordinates). + +The following page gives a detailed overview of how to use the Measurement +Report template within *highdicom*. diff --git a/docs/images/tid1500_overview.svg b/docs/images/tid1500_overview.svg new file mode 100644 index 00000000..8a940d72 --- /dev/null +++ b/docs/images/tid1500_overview.svg @@ -0,0 +1,808 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Structured Report Document + + Measurement Report + + - Observation Context + + + Measurements and Qualitative Evaluations + + - Source Images +- Tracking Identifier +- Finding Category and Type +- Algorithm ID +- Finding Sites + - Numerical Measurement +- Unit +- Qualifier +- Tracking Identifier +- Algorithm ID +- Derivation +- Method +- Finding Sites + + Planar ROI Measurements and Qualitative Evaluations + + - ROI or Referenced Segment +- Tracking Identifier +- Finding Category and Type +- Algorithm ID +- Finding Sites + + Volumetric ROI Measurements and Qualitative Evaluations + + - ROI or Referenced Segment +- Tracking Identifier +- Finding Category and Type +- Algorithm ID +- Finding Sites + + Contains 1 + Contains 0-n + Contains 0-n + Contains 0-n + Contains 0-n + Contains 0-n + Measurement + + + - Coded (Categorical) Evaluation + Qualitative Evaluation + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index 66be1127..7b1eb1bd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,7 +2,7 @@ Documentation of the highdicom package ====================================== .. toctree:: - :maxdepth: 2 + :maxdepth: 3 :caption: Contents: introduction @@ -12,8 +12,8 @@ Documentation of the highdicom package conformance citation license - package release_notes + package diff --git a/docs/iods.rst b/docs/iods.rst new file mode 100644 index 00000000..709e16f1 --- /dev/null +++ b/docs/iods.rst @@ -0,0 +1,22 @@ +.. _iods: + +Information Object Definitions (IODs) +===================================== + +An Information Object Definition defines a single "type" of DICOM file, such +as a Segmentation, Presentation State or Structured Report. The following +sections give in-depth explanations of the various IODs implemented within +*highdicom*. + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + seg + sr + kos + ann + pm + pr + sc + legacy diff --git a/docs/kos.rst b/docs/kos.rst new file mode 100644 index 00000000..e6bee912 --- /dev/null +++ b/docs/kos.rst @@ -0,0 +1,6 @@ +.. _kos: + +Key Object Selection (KOS) Documents +==================================== + +This page is under construction, and more detail will be added soon. diff --git a/docs/legacy.rst b/docs/legacy.rst new file mode 100644 index 00000000..56e6a685 --- /dev/null +++ b/docs/legacy.rst @@ -0,0 +1,6 @@ +.. _legacy: + +Legacy Converted Enhanced Images +================================ + +This page is under construction, and more detail will be added soon. diff --git a/docs/package.rst b/docs/package.rst index ef3818ec..8a6b787c 100644 --- a/docs/package.rst +++ b/docs/package.rst @@ -10,6 +10,7 @@ highdicom package .. automodule:: highdicom :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -19,6 +20,7 @@ highdicom.color module .. automodule:: highdicom.color :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -28,6 +30,7 @@ highdicom.frame module .. automodule:: highdicom.frame :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -37,6 +40,7 @@ highdicom.io module .. automodule:: highdicom.io :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -46,6 +50,7 @@ highdicom.spatial module .. automodule:: highdicom.spatial :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -55,6 +60,7 @@ highdicom.valuerep module .. automodule:: highdicom.valuerep :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -64,6 +70,7 @@ highdicom.utils module .. automodule:: highdicom.utils :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -76,6 +83,7 @@ highdicom.legacy package .. automodule:: highdicom.legacy :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -87,6 +95,7 @@ highdicom.ann package .. automodule:: highdicom.ann :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -98,6 +107,7 @@ highdicom.ko package .. automodule:: highdicom.ko :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -109,6 +119,7 @@ highdicom.pm package .. automodule:: highdicom.pm :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -120,6 +131,7 @@ highdicom.pr package .. automodule:: highdicom.pr :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -131,6 +143,7 @@ highdicom.seg package .. automodule:: highdicom.seg :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -140,6 +153,7 @@ highdicom.seg.utils module .. automodule:: highdicom.seg.utils :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -151,6 +165,7 @@ highdicom.sr package .. automodule:: highdicom.sr :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -160,6 +175,7 @@ highdicom.sr.utils module .. automodule:: highdicom.sr.utils :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: @@ -171,6 +187,7 @@ highdicom.sc package .. automodule:: highdicom.sc :members: + :inherited-members: pydicom.dataset.Dataset,pydicom.sequence.Sequence,Dataset,Sequence,list,str,DataElementSequence,enum.Enum,Enum, :special-members: __call__ :undoc-members: :show-inheritance: diff --git a/docs/pm.rst b/docs/pm.rst new file mode 100644 index 00000000..f2c7033f --- /dev/null +++ b/docs/pm.rst @@ -0,0 +1,6 @@ +.. _pm: + +Parametric Maps +=============== + +This page is under construction, and more detail will be added soon. diff --git a/docs/pr.rst b/docs/pr.rst new file mode 100644 index 00000000..f8923bb8 --- /dev/null +++ b/docs/pr.rst @@ -0,0 +1,6 @@ +.. _pr: + +Presentation States +=================== + +This page is under construction, and more detail will be added soon. diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 00000000..ca4732a9 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,915 @@ +.. _quick-start: + +Quick Start +=========== + +This section gives simple examples of how to create various types of DICOM +object with *highdicom*. See :doc:`iods` for more detail on the +options available within each. + +.. _creating-seg: + +Creating Segmentation (SEG) images +---------------------------------- + +Derive a Segmentation image from a series of single-frame Computed Tomography +(CT) images: + +.. code-block:: python + + from pathlib import Path + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + from pydicom.filereader import dcmread + + # Path to directory containing single-frame legacy CT Image instances + # stored as PS3.10 files + series_dir = Path('path/to/series/directory') + image_files = series_dir.glob('*.dcm') + + # Read CT Image data sets from PS3.10 files on disk + image_datasets = [dcmread(str(f)) for f in image_files] + + # Create a binary segmentation mask + mask = np.zeros( + shape=( + len(image_datasets), + image_datasets[0].Rows, + image_datasets[0].Columns + ), + dtype=np.bool + ) + mask[1:-1, 10:-10, 100:-100] = True + + # Describe the algorithm that created the segmentation + algorithm_identification = hd.AlgorithmIdentificationSequence( + name='test', + version='v1.0', + family=codes.cid7162.ArtificialIntelligence + ) + + # Describe the segment + description_segment_1 = hd.seg.SegmentDescription( + segment_number=1, + segment_label='first segment', + segmented_property_category=codes.cid7150.Tissue, + segmented_property_type=codes.cid7166.ConnectiveTissue, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.AUTOMATIC, + algorithm_identification=algorithm_identification, + tracking_uid=hd.UID(), + tracking_id='test segmentation of computed tomography image' + ) + + # Create the Segmentation instance + seg_dataset = hd.seg.Segmentation( + source_images=image_datasets, + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[description_segment_1], + series_instance_uid=hd.UID(), + series_number=2, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer', + manufacturer_model_name='Model', + software_versions='v1', + device_serial_number='Device XYZ', + ) + + print(seg_dataset) + + seg_dataset.save_as("seg.dcm") + + +Derive a Segmentation image from a multi-frame Slide Microscopy (SM) image: + +.. code-block:: python + + from pathlib import Path + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + from pydicom.filereader import dcmread + + # Path to multi-frame SM image instance stored as PS3.10 file + image_file = Path('/path/to/image/file') + + # Read SM Image data set from PS3.10 files on disk + image_dataset = dcmread(str(image_file)) + + # Create a binary segmentation mask + mask = np.max(image_dataset.pixel_array, axis=3) > 1 + + # Describe the algorithm that created the segmentation + algorithm_identification = hd.AlgorithmIdentificationSequence( + name='test', + version='v1.0', + family=codes.cid7162.ArtificialIntelligence + ) + + # Describe the segment + description_segment_1 = hd.seg.SegmentDescription( + segment_number=1, + segment_label='first segment', + segmented_property_category=codes.cid7150.Tissue, + segmented_property_type=codes.cid7166.ConnectiveTissue, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.AUTOMATIC, + algorithm_identification=algorithm_identification, + tracking_uid=hd.UID(), + tracking_id='test segmentation of slide microscopy image' + ) + + # Create the Segmentation instance + seg_dataset = hd.seg.Segmentation( + source_images=[image_dataset], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[description_segment_1], + series_instance_uid=hd.UID(), + series_number=2, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer', + manufacturer_model_name='Model', + software_versions='v1', + device_serial_number='Device XYZ' + ) + + print(seg_dataset) + +For more information see :doc:`seg`. + +.. _parsing-seg: + +Parsing Segmentation (SEG) images +--------------------------------- + +Finding relevant segments in a segmentation image instance and retrieving masks +for them: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + + # Read SEG Image data set from PS3.10 files on disk into a Segmentation + # object + # This example is a test file in the highdicom git repository + seg = hd.seg.segread('data/test_files/seg_image_ct_binary_overlap.dcm') + + # Check the number of segments + assert seg.number_of_segments == 2 + + # Find segments (identified by their segment number) that have segmented + # property type "Bone" + bone_segment_numbers = seg.get_segment_numbers( + segmented_property_type=codes.SCT.Bone + ) + assert bone_segment_numbers == [1] + + # List SOP Instance UIDs of the images from which the segmentation was + # derived + for study_uid, series_uid, sop_uid in seg.get_source_image_uids(): + print(study_uid, series_uid, sop_uid) + # '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.1, 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.2, 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93' + # ... + + # Here is a list of known SOP Instance UIDs that are a subset of those + # from which the segmentation was derived + source_image_uids = [ + '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93', + '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.94', + ] + + # Retrieve a binary segmentation mask for these images for the bone segment + mask = seg.get_pixels_by_source_instance( + source_sop_instance_uids=source_image_uids, + segment_numbers=bone_segment_numbers, + ) + # Output is a numpy array of shape (instances x rows x columns x segments) + assert mask.shape == (2, 16, 16, 1) + assert np.unique(mask).tolist() == [0, 1] + + # Alternatively, retrieve the segmentation mask for the full list of segments + # (2 in this case), and combine the resulting array into a "label mask", where + # pixel value represents segment number + mask = seg.get_pixels_by_source_instance( + source_sop_instance_uids=source_image_uids, + combine_segments=True, + skip_overlap_checks=True, # the segments in this image overlap + ) + # Output is a numpy array of shape (instances x rows x columns) + assert mask.shape == (2, 16, 16) + assert np.unique(mask).tolist() == [0, 1, 2] + +For more information see :doc:`seg`. + +.. _creating-sr: + +Creating Structured Report (SR) documents +----------------------------------------- + +Create a Structured Report document that contains a numeric area measurement for +a planar region of interest (ROI) in a single-frame computed tomography (CT) +image: + +.. code-block:: python + + from pathlib import Path + + import highdicom as hd + import numpy as np + from pydicom.filereader import dcmread + from pydicom.sr.codedict import codes + from pydicom.uid import generate_uid + from highdicom.sr.content import FindingSite + from highdicom.sr.templates import Measurement, TrackingIdentifier + + # Path to single-frame CT image instance stored as PS3.10 file + image_file = Path('/path/to/image/file') + + # Read CT Image data set from PS3.10 files on disk + image_dataset = dcmread(str(image_file)) + + # Describe the context of reported observations: the person that reported + # the observations and the device that was used to make the observations + observer_person_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Person, + observer_identifying_attributes=hd.sr.PersonObserverIdentifyingAttributes( + name='Foo' + ) + ) + observer_device_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Device, + observer_identifying_attributes=hd.sr.DeviceObserverIdentifyingAttributes( + uid=hd.UID() + ) + ) + observation_context = hd.sr.ObservationContext( + observer_person_context=observer_person_context, + observer_device_context=observer_device_context, + ) + + # Describe the image region for which observations were made + # (in physical space based on the frame of reference) + referenced_region = hd.sr.ImageRegion3D( + graphic_type=hd.sr.GraphicTypeValues3D.POLYGON, + graphic_data=np.array([ + (165.0, 200.0, 134.0), + (170.0, 200.0, 134.0), + (170.0, 220.0, 134.0), + (165.0, 220.0, 134.0), + (165.0, 200.0, 134.0), + ]), + frame_of_reference_uid=image_dataset.FrameOfReferenceUID + ) + + # Describe the anatomic site at which observations were made + finding_sites = [ + FindingSite( + anatomic_location=codes.SCT.CervicoThoracicSpine, + topographical_modifier=codes.SCT.VertebralForamen + ), + ] + + # Describe the imaging measurements for the image region defined above + measurements = [ + Measurement( + name=codes.SCT.AreaOfDefinedRegion, + tracking_identifier=hd.sr.TrackingIdentifier(uid=generate_uid()), + value=1.7, + unit=codes.UCUM.SquareMillimeter, + properties=hd.sr.MeasurementProperties( + normality=hd.sr.CodedConcept( + value="17621005", + meaning="Normal", + scheme_designator="SCT" + ), + level_of_significance=codes.SCT.NotSignificant + ) + ) + ] + imaging_measurements = [ + hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + tracking_identifier=TrackingIdentifier( + uid=hd.UID(), + identifier='Planar ROI Measurements' + ), + referenced_region=referenced_region, + finding_type=codes.SCT.SpinalCord, + measurements=measurements, + finding_sites=finding_sites + ) + ] + + # Create the report content + measurement_report = hd.sr.MeasurementReport( + observation_context=observation_context, + procedure_reported=codes.LN.CTUnspecifiedBodyRegion, + imaging_measurements=imaging_measurements + ) + + # Create the Structured Report instance + sr_dataset = hd.sr.Comprehensive3DSR( + evidence=[image_dataset], + content=measurement_report, + series_number=1, + series_instance_uid=hd.UID(), + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer' + ) + + print(sr_dataset) + +For more information see :doc:`generalsr` and :doc:`tid1500`. + +.. _parsing-sr: + +Parsing Structured Report (SR) documents +---------------------------------------- + +Highdicom has special support for parsing structured reports conforming to the +TID1500 "Measurement Report" template using specialized Python classes for +templates. + +.. code-block:: python + + import numpy as np + import highdicom as hd + from pydicom.sr.codedict import codes + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # First we explore finding measurement groups. There are three types of + # measurement groups (image measurement, planar roi measurement groups, and + # volumetric roi measurement groups) + + # Get a list of all image measurement groups referencing an image with a + # particular SOP Instance UID + groups = sr.content.get_image_measurement_groups( + referenced_sop_instance_uid="1.3.6.1.4.1.5962.1.1.1.1.1.20040119072730.12322", + ) + assert len(groups) == 1 + + # Get a list of all image measurement groups with a particular tracking UID + groups = sr.content.get_image_measurement_groups( + tracking_uid="1.2.826.0.1.3680043.10.511.3.77718622501224431322963356892468048", + ) + assert len(groups) == 1 + + # Get a list of all planar ROI measurement groups with finding type "Nodule" + # AND finding site "Lung" + groups = sr.content.get_planar_roi_measurement_groups( + finding_type=codes.SCT.Nodule, + finding_site=codes.SCT.Lung, + ) + assert len(groups) == 1 + + # Get a list of all volumetric ROI measurement groups (with no filters) + groups = sr.content.get_volumetric_roi_measurement_groups() + assert len(groups) == 1 + + # Get a list of all planar ROI measurement groups with graphic type CIRCLE + groups = sr.content.get_planar_roi_measurement_groups( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + ) + assert len(groups) == 1 + + # Get a list of all planar ROI measurement groups stored as regions + groups = sr.content.get_planar_roi_measurement_groups( + reference_type=codes.DCM.ImageRegion, + ) + assert len(groups) == 2 + + # Get a list of all volumetric ROI measurement groups stored as volume + # surfaces + groups = sr.content.get_volumetric_roi_measurement_groups( + reference_type=codes.DCM.VolumeSurface, + ) + assert len(groups) == 1 + + # Next, we explore the properties of measurement groups that can + # be conveniently accessed with Python properties + + # Use the first (only) image measurement group as an example + group = sr.content.get_image_measurement_groups()[0] + + # tracking_identifier returns a Python str + assert group.tracking_identifier == "Image0001" + + # tracking_uid returns a hd.UID, a subclass of str + assert group.tracking_uid == "1.2.826.0.1.3680043.10.511.3.77718622501224431322963356892468048" + + # source_images returns a list of hd.sr.SourceImageForMeasurementGroup, + # which in turn have some properties to access data + assert isinstance(group.source_images[0], hd.sr.SourceImageForMeasurementGroup) + ref_sop_uid = group.source_images[0].referenced_sop_instance_uid + assert ref_sop_uid == "1.3.6.1.4.1.5962.1.1.1.1.1.20040119072730.12322" + + # for the various optional pieces of information in a measurement, accessing + # the relevant property returns None if the information is not present + assert group.finding_type is None + + # Now use the first planar ROI group as a second example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # finding_type returns a CodedConcept + assert group.finding_type == codes.SCT.Nodule + + # finding_sites returns a list of hd.sr.FindingSite objects + assert isinstance(group.finding_sites[0], hd.sr.FindingSite) + # the value of a finding site is a CodedConcept + assert group.finding_sites[0].value == codes.SCT.Lung + + # reference_type returns a CodedConcept (the same values used above for + # filtering) + assert group.reference_type == codes.DCM.ImageRegion + + # since this has reference type ImageRegion, we can access the referenced + # using 'roi', which will return an hd.sr.ImageRegion object + assert isinstance(group.roi, hd.sr.ImageRegion) + + # the graphic type and actual ROI coordinates (as a numpy array) can be + # accessed with the graphic_type and value properties of the roi + assert group.roi.graphic_type == hd.sr.GraphicTypeValues.CIRCLE + assert isinstance(group.roi.value, np.ndarray) + assert group.roi.value.shape == (2, 2) + + # Next, we explore getting individual measurements out of measurement + # groups + + # Use the first planar measurement group as an example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # Get a list of all measurements + measurements = group.get_measurements() + + # Get the first measurements for diameter + measurement = group.get_measurements(name=codes.SCT.Diameter)[0] + + # Access the measurement's name + assert measurement.name == codes.SCT.Diameter + + # Access the measurement's value + assert measurement.value == 10.0 + + # Access the measurement's unit + assert measurement.unit == codes.UCUM.mm + + # Get the diameter measurement in this group + evaluation = group.get_qualitative_evaluations( + name=codes.DCM.LevelOfSignificance + )[0] + + # Access the measurement's name + assert evaluation.name == codes.DCM.LevelOfSignificance + + # Access the measurement's value + assert evaluation.value == codes.SCT.NotSignificant + + +For more information see :doc:`tid1500parsing`. + +Additionally, there are low-level utilities that you can use to find content +items in the content tree of any structured report documents: + +.. code-block:: python + + from pathlib import Path + + import highdicom as hd + from pydicom.filereader import dcmread + from pydicom.sr.codedict import codes + + # Path to SR document instance stored as PS3.10 file + document_file = Path('/path/to/document/file') + + # Load document from file on disk + sr_dataset = dcmread(str(document_file)) + + # Find all content items that may contain other content items. + containers = hd.sr.utils.find_content_items( + dataset=sr_dataset, + relationship_type=RelationshipTypeValues.CONTAINS + ) + print(containers) + + # Query content of SR document, where content is structured according + # to TID 1500 "Measurement Report" + if sr_dataset.ContentTemplateSequence[0].TemplateIdentifier == 'TID1500': + # Determine who made the observations reported in the document + observers = hd.sr.utils.find_content_items( + dataset=sr_dataset, + name=codes.DCM.PersonObserverName + ) + print(observers) + + # Find all imaging measurements reported in the document + measurements = hd.sr.utils.find_content_items( + dataset=sr_dataset, + name=codes.DCM.ImagingMeasurements, + recursive=True + ) + print(measurements) + + # Find all findings reported in the document + findings = hd.sr.utils.find_content_items( + dataset=sr_dataset, + name=codes.DCM.Finding, + recursive=True + ) + print(findings) + + # Find regions of interest (ROI) described in the document + # in form of spatial coordinates (SCOORD) + regions = hd.sr.utils.find_content_items( + dataset=sr_dataset, + value_type=ValueTypeValues.SCOORD, + recursive=True + ) + print(regions) + + +.. _creating-ann: + +Creating Microscopy Bulk Simple Annotation (ANN) objects +-------------------------------------------------------- + +Microscopy Bulk Simple Annotations store large numbers of annotations of +objects in microscopy images in a space-efficient way. + + +.. code-block:: python + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.sr.coding import Code + import highdicom as hd + import numpy as np + + # Load a slide microscopy image from the highdicom test data (if you have + # cloned the highdicom git repo) + sm_image = dcmread('data/test_files/sm_image.dcm') + + # Graphic data containing two nuclei, each represented by a single point + # expressed in 2D image coordinates + graphic_data = [ + np.array([[34.6, 18.4]]), + np.array([[28.7, 34.9]]), + ] + + # You may optionally include measurements corresponding to each annotation + # This is a measurement object representing the areas of each of the two + # nuclei + area_measurement = hd.ann.Measurements( + name=codes.SCT.Area, + unit=codes.UCUM.SquareMicrometer, + values=np.array([20.4, 43.8]), + ) + + # An annotation group represents a single set of annotations of the same + # type. Multiple such groups may be included in a bulk annotations object + # This group represents nuclei annotations produced by a manual "algorithm" + nuclei_group = hd.ann.AnnotationGroup( + number=1, + uid=hd.UID(), + label='nuclei', + annotated_property_category=codes.SCT.AnatomicalStructure, + annotated_property_type=Code('84640000', 'SCT', 'Nucleus'), + algorithm_type=hd.ann.AnnotationGroupGenerationTypeValues.MANUAL, + graphic_type=hd.ann.GraphicTypeValues.POINT, + graphic_data=graphic_data, + measurements=[area_measurement], + ) + + # Include the annotation group in a bulk annotation object + bulk_annotations = hd.ann.MicroscopyBulkSimpleAnnotations( + source_images=[sm_image], + annotation_coordinate_type=hd.ann.AnnotationCoordinateTypeValues.SCOORD, + annotation_groups=[nuclei_group], + series_instance_uid=hd.UID(), + series_number=10, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='MGH Pathology', + manufacturer_model_name='MGH Pathology Manual Annotations', + software_versions='0.0.1', + device_serial_number='1234', + content_description='Nuclei Annotations', + ) + + bulk_annotations.save_as('nuclei_annotations.dcm') + +For more information see :ref:`ann`. + +.. _parsing-ann: + +Parsing Microscopy Bulk Simple Annotation (ANN) objects +------------------------------------------------------- + +The following example demonstrates loading in a small bulk microscopy +annotations file, finding an annotation group representing annotation of +nuclei, and extracting the graphic data for the annotation as well as the area +measurements corresponding to those annotations. + +.. code-block:: python + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.sr.coding import Code + import highdicom as hd + + # Load a bulk annotation file and convert to highdicom object + ann_dataset = dcmread('data/test_files/sm_annotations.dcm') + ann = hd.ann.MicroscopyBulkSimpleAnnotations.from_dataset(ann_dataset) + + # Search for annotation groups by filtering for annotated property type of + # 'nucleus', and take the first such group + group = ann.get_annotation_groups( + annotated_property_type=Code('84640000', 'SCT', 'Nucleus'), + )[0] + + # Determine the graphic type and the number of annotations + assert group.number_of_annotations == 2 + assert group.graphic_type == hd.ann.GraphicTypeValues.POINT + + # Get the graphic data as a list of numpy arrays, we have to pass the + # coordinate type from the parent object here + graphic_data = group.get_graphic_data( + coordinate_type=ann.AnnotationCoordinateType + ) + + # For annotations of graphic type "POINT" and coordinate type "SCOORD" (2D + # image coordinates), each annotation is a (1 x 2) NumPy array + assert graphic_data[0].shape == (1, group.number_of_annotations) + + # Annotations may also optionally contain measurements + names, values, units = group.get_measurements(name=codes.SCT.Area) + + # The name and the unit are returned as a list of CodedConcepts + # and the values are returned in a numpy array of shape (number of + # annotations x number of measurements) + assert names[0] == codes.SCT.Area + assert units[0] == codes.UCUM.SquareMicrometer + assert values.shape == (group.number_of_annotations, 1) + + +For more information see :ref:`ann`. + +.. _creating-sc: + +Creating Secondary Capture (SC) images +-------------------------------------- + +Secondary captures are a way to store images that were not created directly +by an imaging modality within a DICOM file. They are often used to store +screenshots or overlays, and are widely supported by viewers. However other +methods of displaying image derived information, such as segmentation images +and structured reports should be preferred if they are supported because they +can capture more detail about how the derived information was obtained and +what it represents. + +In this example, we use a secondary capture to store an image containing a +labeled bounding box region drawn over a CT image. + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + from pydicom.uid import RLELossless + from PIL import Image, ImageDraw + + # Read in the source CT image + image_dataset = dcmread('/path/to/image.dcm') + + # Create an image for display by windowing the original image and drawing a + # bounding box over it using Pillow's ImageDraw module + slope = getattr(image_dataset, 'RescaleSlope', 1) + intercept = getattr(image_dataset, 'RescaleIntercept', 0) + original_image = image_dataset.pixel_array * slope + intercept + + # Window the image to a soft tissue window (center 40, width 400) + # and rescale to the range 0 to 255 + lower = -160 + upper = 240 + windowed_image = np.clip(original_image, lower, upper) + windowed_image = (windowed_image - lower) * 255 / (upper - lower) + windowed_image = windowed_image.astype(np.uint8) + + # Create RGB channels + windowed_image = np.tile(windowed_image[:, :, np.newaxis], [1, 1, 3]) + + # Cast to a PIL image for easy drawing of boxes and text + pil_image = Image.fromarray(windowed_image) + + # Draw a red bounding box over part of the image + x0 = 10 + y0 = 10 + x1 = 60 + y1 = 60 + draw_obj = ImageDraw.Draw(pil_image) + draw_obj.rectangle( + [x0, y0, x1, y1], + outline='red', + fill=None, + width=3 + ) + + # Add some text + draw_obj.text(xy=[10, 70], text='Region of Interest', fill='red') + + # Convert to numpy array + pixel_array = np.array(pil_image) + + # The patient orientation defines the directions of the rows and columns of the + # image, relative to the anatomy of the patient. In a standard CT axial image, + # the rows are oriented leftwards and the columns are oriented posteriorly, so + # the patient orientation is ['L', 'P'] + patient_orientation=['L', 'P'] + + # Create the secondary capture image. By using the `from_ref_dataset` + # constructor, all the patient and study information will be copied from the + # original image dataset + sc_image = hd.sc.SCImage.from_ref_dataset( + ref_dataset=image_dataset, + pixel_array=pixel_array, + photometric_interpretation=hd.PhotometricInterpretationValues.RGB, + bits_allocated=8, + coordinate_system=hd.CoordinateSystemNames.PATIENT, + series_instance_uid=hd.UID(), + sop_instance_uid=hd.UID(), + series_number=100, + instance_number=1, + manufacturer='Manufacturer', + pixel_spacing=image_dataset.PixelSpacing, + patient_orientation=patient_orientation, + transfer_syntax_uid=RLELossless + ) + + # Save the file + sc_image.save_as('sc_output.dcm') + + +To save a 3D image as a series of output slices, simply loop over the 2D +slices and ensure that the individual output instances share a common series +instance UID. Here is an example for a CT scan that is in a NumPy array called +"ct_to_save" where we do not have the original DICOM files on hand. We want to +overlay a segmentation that is stored in a NumPy array called "seg_out". + +.. code-block:: python + + import highdicom as hd + import numpy as np + import os + + pixel_spacing = [1.0, 1.0] + sz = ct_to_save.shape[2] + series_instance_uid = hd.UID() + study_instance_uid = hd.UID() + + for iz in range(sz): + this_slice = ct_to_save[:, :, iz] + + # Window the image to a soft tissue window (center 40, width 400) + # and rescale to the range 0 to 255 + lower = -160 + upper = 240 + windowed_image = np.clip(this_slice, lower, upper) + windowed_image = (windowed_image - lower) * 255 / (upper - lower) + + # Create RGB channels + pixel_array = np.tile(windowed_image[:, :, np.newaxis], [1, 1, 3]) + + # transparency level + alpha = 0.1 + + pixel_array[:, :, 0] = 255 * (1 - alpha) * seg_out[:, :, iz] + alpha * pixel_array[:, :, 0] + pixel_array[:, :, 1] = alpha * pixel_array[:, :, 1] + pixel_array[:, :, 2] = alpha * pixel_array[:, :, 2] + + patient_orientation = ['L', 'P'] + + # Create the secondary capture image + sc_image = hd.sc.SCImage( + pixel_array=pixel_array.astype(np.uint8), + photometric_interpretation=hd.PhotometricInterpretationValues.RGB, + bits_allocated=8, + coordinate_system=hd.CoordinateSystemNames.PATIENT, + study_instance_uid=study_instance_uid, + series_instance_uid=series_instance_uid, + sop_instance_uid=hd.UID(), + series_number=100, + instance_number=iz + 1, + manufacturer='Manufacturer', + pixel_spacing=pixel_spacing, + patient_orientation=patient_orientation, + ) + + sc_image.save_as(os.path.join("output", 'sc_output_' + str(iz) + '.dcm')) + + +Creating Grayscale Softcopy Presentation State (GSPS) Objects +------------------------------------------------------------- + +A presentation state contains information about how another image should be +rendered, and may include "annotations" in the form of basic shapes, polylines, +and text overlays. Note that a GSPS is not recommended for storing annotations +for any purpose except visualization. A structured report would usually be +preferred for storing annotations for clinical or research purposes. + +.. code-block:: python + + import highdicom as hd + + import numpy as np + from pydicom import dcmread + from pydicom.valuerep import PersonName + + + # Read in an example CT image + image_dataset = dcmread('path/to/image.dcm') + + # Create an annotation containing a polyline + polyline = hd.pr.GraphicObject( + graphic_type=hd.pr.GraphicTypeValues.POLYLINE, + graphic_data=np.array([ + [10.0, 10.0], + [20.0, 10.0], + [20.0, 20.0], + [10.0, 20.0]] + ), # coordinates of polyline vertices + units=hd.pr.AnnotationUnitsValues.PIXEL, # units for graphic data + tracking_id='Finding1', # site-specific ID + tracking_uid=hd.UID() # highdicom will generate a unique ID + ) + + # Create a text object annotation + text = hd.pr.TextObject( + text_value='Important Finding!', + bounding_box=np.array( + [30.0, 30.0, 40.0, 40.0] # left, top, right, bottom + ), + units=hd.pr.AnnotationUnitsValues.PIXEL, # units for bounding box + tracking_id='Finding1Text', # site-specific ID + tracking_uid=hd.UID() # highdicom will generate a unique ID + ) + + # Create a single layer that will contain both graphics + # There may be multiple layers, and each GraphicAnnotation object + # belongs to a single layer + layer = hd.pr.GraphicLayer( + layer_name='LAYER1', + order=1, # order in which layers are displayed (lower first) + description='Simple Annotation Layer', + ) + + # A GraphicAnnotation may contain multiple text and/or graphic objects + # and is rendered over all referenced images + annotation = hd.pr.GraphicAnnotation( + referenced_images=[image_dataset], + graphic_layer=layer, + graphic_objects=[polyline], + text_objects=[text] + ) + + # Assemble the components into a GSPS object + gsps = hd.pr.GrayscaleSoftcopyPresentationState( + referenced_images=[image_dataset], + series_instance_uid=hd.UID(), + series_number=123, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer', + manufacturer_model_name='Model', + software_versions='v1', + device_serial_number='Device XYZ', + content_label='ANNOTATIONS', + graphic_layers=[layer], + graphic_annotations=[annotation], + institution_name='MGH', + institutional_department_name='Radiology', + content_creator_name=PersonName.from_named_components( + family_name='Doe', + given_name='John' + ), + ) + + # Save the GSPS file + gsps.save_as('gsps.dcm') + + +.. .. _creation-legacy: + +.. Creating Legacy Converted Enhanced Images +.. ----------------------------------------- + +.. .. code-block:: python + +.. from highdicom.legacy.sop import LegacyConvertedEnhancedCTImage diff --git a/docs/sc.rst b/docs/sc.rst new file mode 100644 index 00000000..66db43af --- /dev/null +++ b/docs/sc.rst @@ -0,0 +1,6 @@ +.. _sc: + +Secondary Capture (SC) Images +============================= + +This page is under construction, and more detail will be added soon. diff --git a/docs/seg.rst b/docs/seg.rst new file mode 100644 index 00000000..ebe73871 --- /dev/null +++ b/docs/seg.rst @@ -0,0 +1,1117 @@ +.. _seg: + +Segmentation (SEG) Images +========================= + +DICOM Segmentation Images (often abbreviated DICOM SEG) are one of the primary +IODs (information objects definitions) implemented in the *highdicom* library. +SEG images store `segmentations +`_ of other DICOM images +(which we will refer to as *source images*) of other modalities, such as +magnetic resonance (MR), computed tomography (CT), slide microscopy (SM) and +many others. A segmentation is a partitioning of the source image into +different regions. In medical imaging these regions may commonly represent +different organs or tissue types, or regions of abnormality (e.g. tumor or +infarct) identified within an image. + +The crucial difference between SEGs and other IODs that allow for storing image +regions is that SEGs store the segmented regions in *raster* format as pixel +arrays as opposed to the *vector* descriptions of the region's boundary used by +structured reports (SRs), presentation states, and RT structures. This makes +them a more natural choice for many automatic image processing algorithms such +as convolutional neural networks. + +The DICOM standard provides a highly flexible object definition for Segmentation +images that is able to cover a large variety of possible use cases. +Unfortunately, this flexibility comes with complexity that may make Segmentation +images difficult to understand and work with at first. + +Segments +-------- + +A SEG image encodes one or more distinct regions of an image, which are known +as *segments*. A single segment could represent, for example, a particular +organ or structure (liver, lung, kidney, cell nucleus), tissue (fat, muscle, +bone), or abnormality (tumor, infarct). Elsewhere the same concept is known by +other names such as *class* or *label*. + +Each segment in a DICOM SEG image is represented by a separate 2D *frame* (or +set of *frames*) within the Segmentation image. One important ramification of +this is that segments need not be *mutually exclusive*, i.e. a given pixel or +spatial location within the source image can belong to multiple segments. In +other words, the segments within a SEG image may *overlap*. There is an +optional attribute called "Segments Overlap" (0062, 0013) that, if present, +will indicate whether the segments overlap in a given SEG image. + +Segment Descriptions +-------------------- + +Within a DICOM SEG image, segments are identified by a Segment Number. Segments +are numbered with consecutive segment numbers starting at 1 (i.e., 1, 2, 3, +...). Additionally, each segment present is accompanied by information +describing what the segment represents. This information is placed in the +"SegmentsSequence" (0062, 0002) attribute of the segmentation file. In +*highdcom*, we use the :class:`highdicom.seg.SegmentDescription` class to hold +this information. When you construct a DICOM SEG image using *highdicom*, you +must construct a single :class:`highdicom.seg.SegmentDescription` object for +each segment. The segment description includes the following information: + +- **Segment Label**: A human-readable name for the segment (e.g. ``"Left + Kidney"``). This can be any string. +- **Segmented Property Category**: A coded value describing the + category of the segmented region. For example this could specify that the + segment represents an anatomical structure, a tissue type, or an abnormality. + This is passed as either a + :class:`highdicom.sr.CodedConcept`, or a :class:`pydicom.sr.coding.Code` + object. +- **Segmented Property Type**: Another coded value that more specifically + describes the segmented region, as for example a kidney or tumor. This is + passed as either a :class:`highdicom.sr.CodedConcept`, or a + :class:`pydicom.sr.coding.Code` object. +- **Algorithm Type**: Whether the segment was produced by an automatic, + semi-automatic, or manual algorithm. The valid values are contained within the + enum :class:`highdicom.seg.SegmentAlgorithmTypeValues`. +- **Anatomic Regions**: (Optional) A coded value describing the anatomic region + in which the segment is found. For example, if the segmented property type is + "tumor", this can be used to convey that the tumor is found in the kidney. + This is passed as a sequence of coded values as either + :class:`highdicom.sr.CodedConcept`, or :class:`pydicom.sr.coding.Code` + objects. +- **Tracking ID and UID**: (Optional) These allow you to provide, respectively, + a human readable ID and unique ID to a specific segment. This can be used, + for example, to uniquely identify particular lesions over multiple imaging + studies. These are passed as strings. + +Notice that the segment description makes use of coded concepts to ensure that +the way a particular anatomical structure is described is standardized and +unambiguous (if standard nomenclatures are used). See :ref:`coding` for more +information. + +Here is an example of constructing a simple segment description for a segment +representing a liver that has been manually segmented. + +.. code-block:: python + + from pydicom.sr.codedict import codes + + import highdicom as hd + + + # Liver segment produced by a manual algorithm + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + +In this second example, we describe a segment representing a tumor that has +been automatically segmented by an artificial intelligence algorithm. For this, +we must first provide more information about the algorithm used in an +:class:`highdicom.AlgorithmIdentificationSequence`. + +.. code-block:: python + + # For the next segment, we will describe the specific algorithm used to + # create it + algorithm_identification = hd.AlgorithmIdentificationSequence( + name='Auto-Tumor', + version='v1.0', + family=codes.cid7162.ArtificialIntelligence + ) + + # Kidney tumor segment produced by the above algorithm + tumor_description = hd.seg.SegmentDescription( + segment_number=2, + segment_label='kidney tumor', + segmented_property_category=codes.SCT.MorphologicallyAbnormalStructure, + segmented_property_type=codes.SCT.Tumor, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.AUTOMATIC, + algorithm_identification=algorithm_identification, + anatomic_regions=[codes.SCT.Kidney] + ) + +Binary and Fractional SEGs +-------------------------- + +One particularly important characteristic of a segmentation image is its +"Segmentation Type" (0062,0001), which may take the value of either +``"BINARY"`` or ``"FRACTIONAL"`` and describes the values that pixels within the +segmentation may take. Pixels in a ``"BINARY"`` segmentation image may only +take values 0 or 1, i.e. each pixel either belongs to the segment or does not. + +By contrast, pixels in a ``"FRACTIONAL"`` segmentation image lie in the range 0 +to 1. A second attribute, "Segmentation Fractional Type" (0062,0010) specifies +how these values should be interpreted. There are two options, represented by +the enumerated type :class:`highdicom.seg.SegmentationFractionalTypeValues`: + +- ``"PROBABILITY"``, i.e. the number between 0 and 1 represents a probability + that a pixel belongs to the segment +- ``"OCCUPANCY"`` i.e. the number represents the fraction of the volume of the + pixel's (or voxel's) area (or volume) that belongs to the segment + +A potential source of confusion is that having a Segmentation Type of +``"BINARY"`` only limits the range of values *within a given segment*. It is +perfectly valid for a ``"BINARY"`` segmentation to have multiple segments. It +is therefore not the same sense of the word *binary* that distinguishes *binary* +from *multiclass* segmentations. + +*Highdicom* provides the Python enumerations +:class:`highdicom.seg.SegmentationTypeValues` and +:class:`highdicom.seg.SegmentationFractionalTypeValues` for the valid values of +the "Segmentation Type" and "Segmentation Fractional Type" attributes, +respectively. + +Constructing Basic Binary SEG Images +------------------------------------ + +We have now covered enough to construct a basic binary segmentation image. We +use the :class:`highdicom.seg.Segmentation` class and provide a description of +each segment, a pixel array of the segmentation mask, the source images as a +list of ``pydicom.Dataset`` objects, and some other basic information. The +segmentation pixel array is provided as a numpy array with a boolean or +unsigned integer data type containing only the values 0 and 1. + +.. code-block:: python + + import numpy as np + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.data import get_testdata_file + + import highdicom as hd + + # Load a CT image + source_image = dcmread(get_testdata_file('CT_small.dcm')) + + # Description of liver segment produced by a manual algorithm + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + + # Pixel array is an unsigned integer array with 0 and 1 values + mask = np.zeros((128, 128), dtype=np.uint8) + mask[10:20, 10:20] = 1 + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=[source_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[liver_description], + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Liver Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +Constructing Binary SEG Images with Multiple Frames +--------------------------------------------------- + +DICOM SEGs are multiframe objects, which means that they may contain more than +one frame within the same object. For example, a single SEG image may contain +the segmentations for an entire series of CT images. In this case you can pass +a 3D numpy array as the ``pixel_array`` parameter of the constructor. The +segmentation masks of each of the input images are stacked down axis 0 of the +numpy array. The order of segmentation masks is assumed to match the order of +the frames within the ``source_images`` parameter, i.e. ``pixel_array[i, ...]`` +is the segmentation of ``source_images[i]``. Note that highdicom makes no +attempt to sort the input source images in any way. It is the responsibility of +the user to ensure that they pass the source images in a meaningful order, and +that the source images and segmentation frames at the same index correspond. + + +.. code-block:: python + + import numpy as np + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.data import get_testdata_files + + import highdicom as hd + + # Load a series of CT images as a list of pydicom.Datasets + source_images = [ + dcmread(f) for f in get_testdata_files('dicomdirtests/77654033/CT2/*') + ] + + # Sort source frames by instance number (note that this is illustrative + # only, sorting by instance number is not generally recommended as this + # attribute is not guaranteed to be present in all types of source image) + source_images = sorted(source_images, key=lambda x: x.InstanceNumber) + + # Create a segmentation by thresholding the CT image at 1000 HU + thresholded = [ + im.pixel_array * im.RescaleSlope + im.RescaleIntercept > 1000 + for im in source_images + ] + + # Stack segmentations of each frame down axis zero. Now we have an array + # with shape (frames x height x width) + mask = np.stack(thresholded, axis=0) + + # Description of liver segment produced by a manual algorithm + # Note that now there are multiple frames but still only a single segment + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=source_images, + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[liver_description], + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Liver Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +Note that the example of the previous section with a 2D pixel array is simply +a convenient shorthand for the special case where there is only a single source +frame and a single segment. It is equivalent in every way to passing a 3D array +with a single frame down axis 0. + +Constructing Binary SEG Images of Multiframe Source Images +---------------------------------------------------------- + +Alternatively, we could create a segmentation of a source image that is itself +a multiframe image (such as an Enhanced CT, Enhanced MR image, or a Whole Slide +Microscopy image). In this case, we just pass the single source image object, +and the ``pixel_array`` input with one segmentation frame in axis 0 for each +frame of the source file, listed in ascending order by frame number. I.e. +``pixel_array[i, ...]`` is the segmentation of frame ``i + 1`` of the single +source image (the offset of +1 is because numpy indexing starts at 0 whereas +DICOM frame indices start at 1). + +.. code-block:: python + + import numpy as np + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.data import get_testdata_file + + import highdicom as hd + + # Load an enhanced (multiframe) CT image + source_dcm = dcmread(get_testdata_file('eCT_Supplemental.dcm')) + + # Apply some basic processing to correctly scale the source images + pixel_xform_seq = source_dcm.SharedFunctionalGroupsSequence[0]\ + .PixelValueTransformationSequence[0] + slope = pixel_xform_seq.RescaleSlope + intercept = pixel_xform_seq.RescaleIntercept + image_array = source_dcm.pixel_array * slope + intercept + + # Create a segmentation by thresholding the CT image at 0 HU + mask = image_array > 0 + + # Description of liver segment produced by a manual algorithm + # Note that now there are multiple frames but still only a single segment + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=[source_dcm], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[liver_description], + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Liver Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +Constructing Binary SEG Images with Multiple Segments +----------------------------------------------------- + +To further generalize our initial example, we can include multiple segments +representing, for example, multiple organs. The first change is to include +the descriptions of all segments in the ``segment_descriptions`` parameter. +Note that the ``segment_descriptions`` list must contain segment descriptions +ordered consecutively by their ``segment_number``, starting with +``segment_number=1``. + +The second change is to include the segmentation mask of each segment within +the ``pixel_array`` passed to the constructor. There are two methods of doing +this. The first is to stack the masks for the multiple segments down axis 3 +(the fourth axis) of the ``pixel_array``. The shape of the resulting +``pixel_array`` with *F* source frames of height *H* and width *W*, with *S* +segments, is then (*F* x *H* x *W* x *S*). The segmentation mask for the segment +with ``segment_number=i`` should be found at ``pixel_array[:, :, :, i - 1]`` +(the offset of -1 is because segments are numbered starting at 1 but numpy +array indexing starts at 0). + +Note that when multiple segments are used, the first dimension (*F*) must +always be present even if there is a single source frame. + +.. code-block:: python + + # Load a series of CT images as a list of pydicom.Datasets + source_images = [ + dcmread(f) for f in get_testdata_files('dicomdirtests/77654033/CT2/*') + ] + + # Sort source frames by instance number + source_images = sorted(source_images, key=lambda x: x.InstanceNumber) + image_array = np.stack([ + im.pixel_array * im.RescaleSlope + im.RescaleIntercept + for im in source_images + ], axis=0) + + # Create a segmentation by thresholding the CT image at 1000 HU + thresholded_0 = image_array > 1000 + + # ...and a second below 500 HU + thresholded_1 = image_array < 500 + + # Stack the two segments down axis 3 + mask = np.stack([thresholded_0, thresholded_1], axis=3) + + # Description of bone segment produced by a manual algorithm + bone_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='bone', + segmented_property_category=codes.SCT.Tissue, + segmented_property_type=codes.SCT.Bone, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + # Description of liver segment produced by a manual algorithm + liver_description = hd.seg.SegmentDescription( + segment_number=2, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + segment_descriptions = [bone_description, liver_description] + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=source_images, + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Multi-Organ Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +The second way to pass segmentation masks for multiple labels is as a "label +map". A label map is a 3D array (or 2D in the case of a single frame) in which +each pixel's value determines which segment it belongs to, i.e. a pixel with +value 1 belongs to segment 1 (which is the first item in the +``segment_descriptions``). A pixel with value 0 belongs to no segments. The +label map form is more convenient to work with in many applications, however it +is limited to representing segmentations that do not overlap (i.e. those in +which a single pixel can belong to at most one segment). The more general form +does not have this limitation: a given pixel may belong to any number of +segments. Note that passing a "label map" is purely a convenience provided by +`highdicom`, it makes no difference to how the segmentation is actually stored +(`highdicom` splits the label map into multiple single-segment frames and +stores these, as required by the standard). + +Therefore, The following snippet produces an equivalent SEG image to the +previous snippet, but passes the mask as a label map rather than as a stack of +segments. + +.. code-block:: python + + # Load a CT image + source_images = [ + dcmread(f) for f in get_testdata_files('dicomdirtests/77654033/CT2/*') + ] + + # Sort source frames by instance number + source_images = sorted(source_images, key=lambda x: x.InstanceNumber) + image_array = np.stack([ + im.pixel_array * im.RescaleSlope + im.RescaleIntercept + for im in source_images + ], axis=0) + + # Create the same two segments as above as a label map + mask = np.zeros_like(image_array, np.uint8) + mask[image_array > 1000] = 1 + mask[image_array < 500] = 2 + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=source_images, + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=segment_descriptions, + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Multi-Organ Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + + +Representation of Fractional SEGs +--------------------------------- + +Although the pixel values of ``"FRACTIONAL"`` segmentation images can be +considered to lie within a continuous range between 0 and 1, they are in fact +not stored this way. Instead they are quantized and scaled so that they may be +stored as unsigned 8-bit integers between 0 and the value of the "Maximum +Fractional Value" (0062,000E) attribute. Thus, assuming a "Maximum Fractional +Value" of 255, a pixel value of *x* should be interpreted as a probability or +occupancy value of *x*/255. You can control the "Maximum Fractional Value" by +passing the ``max_fractional_value`` parameter. 255 is used as the default. + +When constructing ``"FRACTIONAL"`` segmentation images, you pass a +floating-point valued pixel array and *highdicom* handles this +quantization for you. If you wish, you may change the "Maximum Fractional Value" +from the default of 255 (which gives the maximum possible level of precision). +Note that this does entail a loss of precision. + +Similarly, *highdicom* will rescale stored values back down to the range 0-1 by +default in its methods for retrieving pixel arrays (more on this below). + +Otherwise, constructing ``"FRACTIONAL"`` segs is identical to constructing +binary ones ``"BINARY"``, with the limitation that fractional SEGs may not use +the "label map" method to pass multiple segments but must instead stack them +along axis 3. + +The example below shows a simple example of constructing a fractional seg +representing a probabilistic segmentation of the liver. + +.. code-block:: python + + import numpy as np + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.data import get_testdata_file + + import highdicom as hd + + # Load a CT image + source_image = dcmread(get_testdata_file('CT_small.dcm')) + + # Description of liver segment produced by a manual algorithm + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + + # Pixel array is an float array with values between 0 and 1 + mask = np.zeros((128, 128), dtype=float) + mask[10:20, 10:20] = 0.5 + mask[30:40, 30:40] = 0.75 + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=[source_image], + pixel_array=mask, + segmentation_type=hd.seg.SegmentationTypeValues.FRACTIONAL, + fractional_type=hd.seg.SegmentationFractionalTypeValues.PROBABILITY, + segment_descriptions=[liver_description], + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Liver Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +Implicit Conversion to Fractional +--------------------------------- + +Note that any segmentation pixel array that `highdicom` allows you to store as a +``"BINARY"`` SEG (i.e. a binary segmentation with segments stacked down axis 3, +or a label-map style segmentation) may also be stored as a ``"FRACTIONAL"`` +SEG. You just pass the integer array, specify the ``segmentaton_type`` as +``"FRACTIONAL"`` and `highdicom` does the conversion for you. Input pixels +with value 1 will be automatically stored with value ``max_fractional_value``. +We recommend that if you do this, you specify ``max_fractional_value=1`` to +clearly communicate that the segmentation is inherently binary in nature. + +Why would you want to make this seemingly rather strange choice? Well, +``"FRACTIONAL"`` SEGs tend to compress *much* better than ``"BINARY"`` ones +(see next section). Note however, that this is arguably an misuse of the intent +of the standard, so *caveat emptor*. + +Compression +----------- + +The types of pixel compression available in segmentation images depends on the +segmentation type. Pixels in a ``"BINARY"`` segmentation image are "bit-packed" +such that 8 pixels are grouped into 1 byte in the stored array. If a given frame +contains a number of pixels that is not divisible by 8 exactly, a single byte +will straddle a frame boundary into the next frame if there is one, or the byte +will be padded with zeroes of there are no further frames. This means that +retrieving individual frames from segmentation images in which each frame +size is not divisible by 8 becomes problematic. No further compression may be +applied to frames of ``"BINARY"`` segmentation images. + +Pixels in ``"FRACTIONAL"`` segmentation images may be compressed using one of +the lossless compression methods available within DICOM. Currently *highdicom* +supports the following compressed transfer syntaxes when creating +``"FRACTIONAL"`` segmentation images: ``"RLELossless"``, +``"JPEG2000Lossless"``, and ``"JPEGLSLossless"``. + +Note that there may be advantages to using ``"FRACTIONAL"`` segmentations to +store segmentation images that are binary in nature (i.e. only taking values 0 +and 1): + +- If the segmentation is very simple or sparse, the lossless compression methods + available in ``"FRACTIONAL"`` images may be more effective than the + "bit-packing" method required by ``"BINARY"`` segmentations. +- The clear frame boundaries make retrieving individual frames from + ``"FRACTIONAL"`` image files possible. + +Geometry of SEG Images +---------------------- + +In the simple cases we have seen so far, the geometry of the segmentation +``pixel_array`` has matched that of the source images, i.e. there is a spatial +correspondence between a given pixel in the ``pixel_array`` and the +corresponding pixel in the relevant source frame. While this covers most use +cases, DICOM SEGs actually allow for more general segmentations in which there +is a more complicated geometrical relationship between the source frames and +the segmentation masks. This could arise when a source image is resampled or +transformed before the segmentation method is applied, such that there is no +longer a simple correspondence between pixels in the segmentation mask and +pixels in the original source DICOM image. + +`Highdicom` supports this case by allowing you to manually specify the plane +positions of the each frame in the segmentation mask, and further the +orientations and pixel spacings of these planes if they do not match that in the +source images. In this case, the correspondence between the items of the +``source_images`` list and axis 0 of the segmentation ``pixel_array`` is broken +and the number of frames in each may differ. + +.. code-block:: python + + import numpy as np + + from pydicom import dcmread + from pydicom.sr.codedict import codes + from pydicom.data import get_testdata_files + + import highdicom as hd + + # Load a CT image + source_images = [ + dcmread(f) for f in get_testdata_files('dicomdirtests/77654033/CT2/*') + ] + + # Sort source frames by instance number + source_images = sorted(source_images, key=lambda x: x.InstanceNumber) + + # Now the shape and size of the mask does not have to match the source + # images + mask = np.zeros((2, 100, 100), np.uint8) + mask[0, 50:60, 50:60] = 1 + + # Define custom positions for each frame + positions = [ + hd.PlanePositionSequence( + hd.CoordinateSystemNames.PATIENT, + [100.0, 50.0, -50.0] + ), + hd.PlanePositionSequence( + hd.CoordinateSystemNames.PATIENT, + [100.0, 50.0, -48.0] + ), + ] + + # Define a custom orientation and spacing for the segmentation mask + orientation = hd.PlaneOrientationSequence( + hd.CoordinateSystemNames.PATIENT, + [0.0, 1.0, 0.0, -1.0, 0.0, 0.0] + ) + spacings = hd.PixelMeasuresSequence( + slice_thickness=2.0, + pixel_spacing=[2.0, 2.0] + ) + + # Description of liver segment produced by a manual algorithm + # Note that now there are multiple frames but still only a single segment + liver_description = hd.seg.SegmentDescription( + segment_number=1, + segment_label='liver', + segmented_property_category=codes.SCT.Organ, + segmented_property_type=codes.SCT.Liver, + algorithm_type=hd.seg.SegmentAlgorithmTypeValues.MANUAL, + ) + + # Construct the Segmentation Image + seg = hd.seg.Segmentation( + source_images=source_images, + pixel_array=mask, + plane_positions=positions, + plane_orientation=orientation, + pixel_measures=spacings, + segmentation_type=hd.seg.SegmentationTypeValues.BINARY, + segment_descriptions=[liver_description], + series_instance_uid=hd.UID(), + series_number=1, + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Foo Corp.', + manufacturer_model_name='Liver Segmentation Algorithm', + software_versions='0.0.1', + device_serial_number='1234567890', + ) + +Organization of Frames in SEGs +------------------------------ + +After construction, there may be many 2D frames within an SEG image, each +referring to the segmentation of a certain 2D source image or frame (or a +resampled plane defined by its plane position and orientation) for a certain +segment. Note that this may mean that there are multiple frames of the SEG +image that are derived from each frame of the input image or series. These +frames are stored within the SEG as an array indexed by a frame number +(consecutive integers starting at 1). The DICOM standard gives the creator of a +SEG a lot of freedom about how to organize the resulting frames within the 1D +list within the SEG. To complicate matters further, frames in the segmentation +image that would otherwise be "empty" (contain only 0s) may be omitted from the +SEG image entirely (this is `highdicom`'s default behavior but can be turned +off if you prefer by specifying ``omit_empty_frames=False`` in the constructor). + +Every ``pydicom.Dataset`` has the ``.pixel_array`` property, which, in the case +of a multiframe image, returns the full list of frames in the image as an array +of shape (frames x rows x columns), with frames organized in whatever manner +they were organized in by the creator of the object. A +:class:`highdicom.seg.Segmentation` is a sub-class of ``pydicom.Dataset``, and +therefore also has the ``.pixel_array`` property. However, given the +complexities outlined above, *it is not recommended* to use to the +``.pixel_array`` property with SEG images since the meaning of the resulting +array is unclear without referring to other metadata within the object in all +but the most trivial cases (single segment and/or single source frame with no +empty frames). This may be particularly confusing and perhaps offputting to +those working with SEG images for the first time. + +The order in which the creator of a SEG image has chosen to organize the frames +of the SEG image is described by the `"DimensionIndexSequence" +`_ +attribute (0020, 9222) of the SEG object. Referring to this, and the +information held about a given frame within the item of the +`"PerFrameFunctionalGroupsSequence" +`_ +attribute (5200, 9230) with the matching frame number, it is possible to +determine the meaning of a certain segmentation frame. We will not describe the +full details of this mechanism here. + +Instead, `highdicom` provides a family of methods to help users reconstruct +segmentation masks from SEG objects in a predictable and more intuitive way. We +recommend using these methods over the basic ``.pixel_array`` in nearly all +circumstances. + +Reading Existing Segmentation Images +------------------------------------ + +Since a segmentation is a DICOM object just like any other image, you can read +it in from a file using ``pydicom`` to give you a ``pydicom.Dataset``. However, +if you read the file in using the :func:`highdicom.seg.segread` function, the +segmentation will have type :class:`highdicom.seg.Segmentation`. This adds +several extra methods that make it easier to work with the segmentation. + +.. code-block:: python + + import highdicom as hd + + seg = hd.seg.segread('data/test_files/seg_image_ct_binary.dcm') + assert isinstance(seg, hd.seg.Segmentation) + +Alternatively, you can convert an existing ``pydicom.Dataset`` into a +:class:`highdicom.seg.Segmentation` using the +:meth:`highdicom.seg.Segmentation.from_dataset()` method. This is useful if +you receive the object over network rather than reading from file. + +.. code-block:: python + + import highdicom as hd + import pydicom + + dcm = pydicom.dcmread('data/test_files/seg_image_ct_binary.dcm') + + # Convert to highdicom Segmentation object + seg = hd.Segmentation.from_dataset(dcm) + + assert isinstance(seg, hd.seg.Segmentation) + +By default this operation copies the underlying dataset, which may be slow for +large objects. You can use ``copy=False`` to change the type of the object +without copying the data. + +Since :class:`highdicom.seg.Segmentation` is a subclass of ``pydicom.Dataset``, +you can still perform `pydicom` operations on it, such as access DICOM +attributes by their keyword, in the usual way. + +.. code-block:: python + + import highdicom as hd + import pydicom + + seg = hd.seg.segread('data/test_files/seg_image_ct_binary.dcm') + assert isinstance(seg, pydicom.Dataset) + + # Accessing DICOM attributes as usual in pydicom + seg.PatientName + # 'Doe^Archibald' + +Searching For Segments +---------------------- + +When working with existing SEG images you can use the method +:meth:`highdicom.seg.Segmentation.get_segment_numbers()` to search for segments +whose descriptions meet certain criteria. For example: + +.. code-block:: python + + from pydicom.sr.codedict import codes + + import highdicom as hd + + + # This is a test file in the highdicom git repository + seg = hd.seg.segread('data/test_files/seg_image_ct_binary_overlap.dcm') + + # Check the number of segments + assert seg.number_of_segments == 2 + + # Check the range of segment numbers + assert seg.segment_numbers == range(1, 3) + + # Search for segments by label (returns segment numbers of all matching + # segments) + assert seg.get_segment_numbers(segment_label='first segment')) == [1] + assert seg.get_segment_numbers(segment_label='second segment')) == [2] + + # Search for segments by segmented property type (returns segment numbers + # of all matching segments) + assert seg.get_segment_numbers(segmented_property_type=codes.SCT.Bone)) == [1] + assert seg.get_segment_numbers(segmented_property_type=codes.SCT.Spine)) == [2] + + # Search for segments by tracking UID (returns segment numbers of all + # matching segments) + assert seg.get_segment_numbers(tracking_uid='1.2.826.0.1.3680043.10.511.3.83271046815894549094043330632275067')) == [1] + assert seg.get_segment_numbers(tracking_uid='1.2.826.0.1.3680043.10.511.3.10042414969629429693880339016394772')) == [2] + + # You can also get the full description for a given segment, and access + # the information in it via properties + segment_1_description = seg.get_segment_description(1) + assert segment_1_description.segment_label) == 'first segment' + assert segment_1_description.tracking_uid) == '1.2.826.0.1.3680043.10.511.3.83271046815894549094043330632275067' + + +Reconstructing Segmentation Masks From DICOM SEGs +------------------------------------------------- + +`Highdicom` provides the +:meth:`highdicom.seg.Segmentation.get_pixels_by_source_instance()` and +:meth:`highdicom.seg.Segmentation.get_pixels_by_source_frame()` methods to +handle reconstruction of segmentation masks from SEG objects in which each +frame in the SEG object is derived from a single source frame. The only +difference between the two methods is that the +:meth:`highdicom.seg.Segmentation.get_pixels_by_source_instance()` is used when +the segmentation is derived from a source series consisting of multiple +single-frame instances, while +:meth:`highdicom.seg.Segmentation.get_pixels_by_source_frame()` is used when +the segmentation is derived from a single multiframe source instance. + +When reconstructing a segmentation mask using +:meth:`highdicom.seg.Segmentation.get_pixels_by_source_instance()`, the user must +provide a list of SOP Instance UIDs of the source images for which the +segmentation mask should be constructed. Whatever order is chosen here will be +used to order the frames of the output segmentation mask, so it is up to the +user to sort them according to their needs. The default behavior is that the +output pixel array is of shape (*F* x *H* x *W* x *S*), where *F* is the number +of source instance UIDs, *H* and *W* are the height and width of the frames, +and *S* is the number of segments included in the segmentation. In this way, +the output of this method matches the input `pixel_array` to the constructor +that would create the SEG object if it were created with `highdicom`. + +The following example (and those in later sections) use DICOM files from the +`highdicom` test data, which may be found in the +`highdicom repository `_ +on GitHub. + +.. code-block:: python + + import numpy as np + import highdicom as hd + + seg = hd.seg.segread('data/test_files/seg_image_ct_binary.dcm') + + # List the source images for this segmentation: + for study_uid, series_uid, sop_uid in seg.get_source_image_uids(): + print(sop_uid) + # 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93 + # 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.94 + # 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.95 + # 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.96 + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_instance( + source_sop_instance_uids=[ + '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93', + '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.94' + ] + ) + assert pixels.shape == (2, 16, 16, 1) + assert np.unique(pixels).tolist() == [0, 1] + +This second example demonstrates reconstructing segmentation masks from a +segmentation derived from a multiframe image, in this case a whole slide +microscopy image, and also demonstrates an example with multiple, in +this case 20, segments: + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_numbers.dcm') + + assert seg.number_of_segments == 20 + + # SOP Instance UID of the single multiframe image from which the + # segmentation was derived + _, _, source_sop_instance_uid = seg.get_source_image_uids()[0] + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_frame( + source_sop_instance_uid=source_sop_instance_uid, + source_frame_numbers=range(1, 26), + ) + + # Source frames are stacked down the first dimension, segments are stacked + # down the fourth dimension + assert pixels.shape == (25, 10, 10, 20) + + # Each segment is still binary + assert np.unique(pixels).tolist() == [0, 1] + +Note that these two methods may only be used when the segmentation's metadata +indicates that each segmentation frame is derived from exactly one source +instance or frame of a source instance. If this is not the case, a +``RuntimeError`` is raised. + +In the general case, the +:meth:`highdicom.seg.Segmentation.get_pixels_by_dimension_index_values()` method +is available to query directly by the underlying dimension index values. We +will not cover this advanced topic. + +Reconstructing Specific Segments +-------------------------------- + +A further optional parameter, ``segment_numbers``, allows the user to request +only a subset of the segments available within the SEG object by providing a +list of segment numbers. In this case, the output array will have a dimension +equal to the number of segments requested, with the segments stacked in the +order they were requested (which may not be ascending by segment number). + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_numbers.dcm') + + assert seg.number_of_segments == 20 + + # SOP Instance UID of the single multiframe image from which the + # segmentation was derived + _, _, source_sop_instance_uid = seg.get_source_image_uids()[0] + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_frame( + source_sop_instance_uid=source_sop_instance_uid, + source_frame_numbers=range(1, 26), + assert_missing_frames_are_empty=True, + segment_numbers=[10, 9, 8] + ) + + # Source frames are stacked down the first dimension, segments are stacked + # down the fourth dimension + assert pixels.shape == (25, 10, 10, 3) + +After this, the array ``pixels[:, :, :, 0]`` contains the pixels for segment +number 10, ``pixels[:, :, :, 1]`` contains the pixels for segment number 9, and +``pixels[:, :, :, 2]`` contains the pixels for segment number 8. + +Reconstructing Segmentation Masks as "Label Maps" +------------------------------------------------- + +If the segments do not overlap, it is possible to combine the multiple segments +into a simple "label map" style mask, as described above. This can be achieved +by specifying the ``combine_segments`` parameter as ``True``. In this case, the +output will have shape (*F* x *H* x *W*), and a pixel value of *i > 0* +indicates that the pixel belongs to segment *i* or a pixel value of 0 +represents that the pixel belongs to none of the requested segments. Again, +this mirrors the way you would have passed this segmentation mask to the +constructor to create the object if you had used a label mask. If the segments +overlap, `highdicom` will raise a ``RuntimeError``. Alternatively, if you +specify the ``skip_overlap_checks`` parameter as ``True``, no error will be +raised and each pixel will be given the value of the highest segment number of +those present in the pixel (or the highest segment value after relabelling has +been applied if you pass ``relabel=True``, see below). Note that combining +segments is only possible when the segmentation type is ``"BINARY"``, or the +segmentation type is ``"FRACTIONAL"`` but the only two values are actually +present in the image. + +Here, we repeat the above example but request the output as a label map: + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_numbers.dcm') + + # SOP Instance UID of the single multiframe image from which the + # segmentation was derived + _, _, source_sop_instance_uid = seg.get_source_image_uids()[0] + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_frame( + source_sop_instance_uid=source_sop_instance_uid, + source_frame_numbers=range(1, 26), + assert_missing_frames_are_empty=True, + segment_numbers=[10, 9, 8], + combine_segments=True, + ) + + # Source frames are stacked down the first dimension, now there is no + # fourth dimension + assert pixels.shape == (25, 10, 10) + + assert np.unique(pixels).tolist() == [0, 8, 9, 10] + +In the default behavior, the pixel values of the output label map correspond to +the original segment numbers to which those pixels belong. Therefore we see +that the output array contains values 8, 9, and 10, corresponding to the three +segments that we requested (in addition to 0, meaning no segment). However, +when you are specifying a subset of segments, you may wish to "relabel" these +segments such that in the output array the first segment you specify (10 in the +above example) is indicated by pixel value 1, the second segment (9 in the +example) is indicated by pixel value 2, and so on. This is achieved using +the ``relabel`` parameter. + +.. code-block:: python + + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_sm_numbers.dcm') + + # SOP Instance UID of the single multiframe image from which the + # segmentation was derived + _, _, source_sop_instance_uid = seg.get_source_image_uids()[0] + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_frame( + source_sop_instance_uid=source_sop_instance_uid, + source_frame_numbers=range(1, 26), + assert_missing_frames_are_empty=True, + segment_numbers=[10, 9, 8], + combine_segments=True, + relabel=True, + ) + + # Source frames are stacked down the first dimension, now there is no + # fourth dimension + assert pixels.shape == (25, 10, 10) + + # Now the output segments have been relabelled to 1, 2, 3 + assert np.unique(pixels).tolist() == [0, 1, 2, 3] + +Reconstructing Fractional Segmentations +--------------------------------------- + +For ``"FRACTIONAL"`` SEG objects, `highdicom` will rescale the pixel values in +the segmentation masks from the integer values as which they are stored back +down to the range `0.0` to `1.0` as floating point values by scaling by the +"MaximumFractionalValue" attribute. If desired, this behavior can be disabled +by specifying ``rescale_fractional=False``, in which case the raw integer array +as stored in the SEG will be returned. + +.. code-block:: python + + import numpy as np + import highdicom as hd + + # Read in the segmentation using highdicom + seg = hd.seg.segread('data/test_files/seg_image_ct_true_fractional.dcm') + + assert seg.segmentation_type == hd.seg.SegmentationTypeValues.FRACTIONAL + + # List the source images for this segmentation: + sop_uids = [uids[2] for uids in seg.get_source_image_uids()] + + # Get the segmentation array for a subset of these images: + pixels = seg.get_pixels_by_source_instance( + source_sop_instance_uids=sop_uids, + ) + + # Each segment values are now floating point + assert pixels.dtype == np.float32 + + print(np.unique(pixels)) + # [0. 0.2509804 0.5019608] + + +Viewing DICOM SEG Images +------------------------ + +Unfortunately, DICOM SEG images are not widely supported by DICOM +viewers. Viewers that do support SEG include: + +- The `OHIF Viewer `_, an open-source + web-based viewer. +- `3D Slicer `_, an open-source desktop application + for 3D medical image computing. It supports both display and creation of + DICOM SEG files via the "Quantitative Reporting" plugin. + +Note that these viewers may not support all features of segmentation images +that `highdicom` is able to encode. diff --git a/docs/sr.rst b/docs/sr.rst new file mode 100644 index 00000000..7333899d --- /dev/null +++ b/docs/sr.rst @@ -0,0 +1,17 @@ +.. _sr: + +Structured Report Documents (SRs) +================================= + +Structured report documents are DICOM files that contain information derived +from a medical image in a structured and computer-readable way. `Highdicom` +supports structured reports through the :mod:`highdicom.sr` sub-package. + +Since SRs are a complex topic, this section is sub-divided as follows: + +.. toctree:: + :maxdepth: 1 + + generalsr + tid1500 + tid1500parsing diff --git a/docs/tid1500.rst b/docs/tid1500.rst new file mode 100644 index 00000000..b7bb255f --- /dev/null +++ b/docs/tid1500.rst @@ -0,0 +1,745 @@ +.. _tid1500: + +The TID1500 Measurement Report Template +======================================= + +The `TID1500 "Measurement Report" `_ template is a general-purpose +template for communicating measurements and qualitative qualitative evaluations +derived from one or more images or regions of images. It is recommended to read +the previous page on :ref:`generalsr` before this page. + +*Highdicom* represents the various sub-templates of the TID1500 template as +Python classes. Using these classes will guide you through the process of +creating TID 1500 SRs in a modular and structured way, and will perform various +checks on the inputs you provide. + +Overview of TID1500 Content +--------------------------- + +A diagram of the structure of TID1500 content is shown here: + +.. figure:: images/tid1500_overview.svg + :scale: 100 % + :alt: TID1500 diagram + + Simplified diagram of the structure of the TID1500 template and major + subtemplates. Note that this is intended to give a quick overview, please + refer to the standard itself for full details. + +At the top level, the Measurement Report template +(:class:`highdicom.sr.MeasurementReport`) represents a report containing +various measurements and various metadata about the process through which they +were created. + +A measurement report contains one or more "Measurement Groups", where each +group contains measurements and/or qualitative evaluations about a particular image or +image region. There are three types of Measurement Group, each of which refer +to different types of region: + +- :class:`highdicom.sr.MeasurementsAndQualitativeEvaluations` + (`TID1501 `_): Refers to one or more entire images or image + frames. +- :class:`highdicom.sr.PlanarROIMeasurementsAndQualitativeEvaluations` + (`TID1410 `_): Refers to a 2D region within a single image. +- :class:`highdicom.sr.VolumetricROIMeasurementsAndQualitativeEvaluations` + (`TID1411 `_): Refers to a 3D region within an image or image + series. + +A single Measurement Report may contain a mixture of Measurement Groups of +these different types in any combination (as long as there is at least one +group). + +Each Measurement Group contains a number of Measurements +(`TID300 `_) - numerical values derived from an image, such as a +length or volume - and/or Qualitative Evaluations - categorical values derived +from an image, such as classification of a tumor morphology. + +When constructing the content, it is necessary to start at the bottom of the +content tree with the Measurements and Evaluations and work up, by adding them +into Measurement Groups, adding these groups to a Measurement Report, and then +creating the document that contains the report. However, here we will describe +the structure from the top down as it makes the big picture clearer. + +Measurement Report (`TID1500 `_) +--------------------------------------------- + +Every TID1500 Structured Report contains exactly one Measurement Report +at the root of its content tree. This is represented by the class +:class:`highdicom.sr.MeasurementReport`. + +The first ingredient in the Measurement Report is the "Observation Context", +which contains metadata describing the way the observations that led to the +report were made. This includes information such as the person or device that +made the observations, and the subject about which the observations were made: + + +.. code-block:: python + + from pydicom.sr.codedict import codes + import highdicom as hd + + observer_person_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Person, + observer_identifying_attributes=hd.sr.PersonObserverIdentifyingAttributes( + name='Doe^John' + ) + ) + observer_device_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Device, + observer_identifying_attributes=hd.sr.DeviceObserverIdentifyingAttributes( + uid=hd.UID() + ) + ) + observation_context = hd.sr.ObservationContext( + observer_person_context=observer_person_context, + observer_device_context=observer_device_context, + ) + +The second required ingredient is a procedure code describing the procedure +that was performed to result in the observations. Finally, we have the image +measurement groups that the report contains (described below). There are some +further optional parameters, such as a title for the report. Combining these we +can construct the Measurement Report, and use it to construct the SR document: + +.. code-block:: python + + from pydicom.sr.codedict import codes + import highdicom as hd + + measurement_report = hd.sr.MeasurementReport( + observation_context=observation_context, # from above + procedure_reported=codes.LN.CTUnspecifiedBodyRegion, + imaging_measurements=[...], # list of measurement groups, see below + title=codes.DCM.ImagingMeasurementReport, + ) + + # Create the Structured Report instance + sr_dataset = hd.sr.Comprehensive3DSR( + evidence=[...], # all datasets referenced in the report + content=measurement_report, + series_number=1, + series_instance_uid=hd.UID(), + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer' + ) + +Measurement Groups +------------------ + +A Measurement Report contains one or more Measurement Groups. There are three +types of Measurement Groups, corresponding to entire images, 2D regions of +interest, and 3D regions of interest. The three types may be mixed and matched +within a single Measurement Report in any combination. + +Measurements And Qualitative Evaluations Group (`TID1501 `_) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The first, and simplest, type of Measurement Group applies to one or more +entire images (or alternatively one or more entire frames in the case of +multiframe source images). This is implemented using +:class:`highdicom.sr.MeasurementsAndQualitativeEvaluations`. + +This class also accepts a parameter ``source_images``, which is a sequence of +:class:`highdicom.sr.SourceImageForMeasurementGroup` items specifying the +images (or frames) to which the measurement group applies. If this is omitted, +the measurement group is assumed to include all images referenced in the SR +document (as passed in the ``evidence`` parameter of the relevant Structured +Report object's ``__init__`` method). + +The following is a simple example: + +.. code-block:: python + + import highdicom as hd + from pydicom import dcmread + + im = dcmread('/path/to/file.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Image0001', + uid=hd.UID(), + ) + + # An object describing the source image for the measurements + source_image = hd.sr.SourceImageForMeasurementGroup.from_source_image(im) + + # Construct the measurement group + group = hd.sr.MeasurementsAndQualitativeEvaluations( + source_images=[source_image], + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +Planar ROI Image Measurements Group (`TID1410 `_) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This type of Measurement Group applies to a specific planar sub-region of the +source image or images. This is implemented in the class +:class:`highdicom.sr.PlanarROIMeasurementsAndQualitativeEvaluations`. + +This class takes a parameter specifying the region. There are two distinct +options here: + +- ``referenced_region``: The image region is specified directly in the SR + using a :class:`highdicom.sr.ImageRegion` or + :class:`highdicom.sr.ImageRegion3D` passed as the ``referenced_region`` + parameter. In this case, the coordinates defining the region are stored + within the measurement group itself. The choice between + :class:`highdicom.sr.ImageRegion` and :class:`highdicom.sr.ImageRegion3D` + determines whether the image region is defined in 2D image coordinates or 3D + frame-of-reference coordinates. Either way, the region must be planar. +- ``referenced_segment``: The region is specified indirectly as a reference to + a single slice of a single segment stored in a separate DICOM Segmentation + Image object, specified by passing a + :class:`highdicom.sr.ReferencedSegmentationFrame` to the + ``referenced_segment`` parameter, which contains UIDs to identify the + Segmentation Image along with the segment number of the specific segment and + the frames within which it is stored. + +Note that **either** ``referenced_region`` or ``referenced_segment`` +should be passed, and not both (or neither). + +The following example uses an :class:`highdicom.sr.ImageRegion` as the +``referenced_region``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + im = dcmread('/path/to/file.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region0001', + uid=hd.UID(), + ) + + # Define the image region (a circle) using image coordinates + region = hd.sr.ImageRegion( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + graphic_data=np.array([[45.0, 55.0], [45.0, 65.0]]), + source_image=hd.sr.SourceImageForRegion.from_source_image(im), + ) + + # Construct the measurement group + group = hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + referenced_region=region, + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +This example uses an :class:`highdicom.sr.ImageRegion3D` as the +``referenced_region``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + im = dcmread('/path/to/file.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region3D0001', + uid=hd.UID(), + ) + + # Define the image region (a point) using frame-of-reference coordinates + region = hd.sr.ImageRegion3D( + graphic_type=hd.sr.GraphicTypeValues3D.POINT, + graphic_data=np.array([[123.5, 234.1, -23.7]]), + frame_of_reference_uid=im.FrameOfReferenceUID, + ) + + # Construct the measurement group + group = hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + referenced_region=region, + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +The final example uses an :class:`highdicom.sr.ReferencedSegmentationFrame` as +the ``referenced_segment``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + # The image dataset referenced + im = dcmread('/path/to/file.dcm') + + # A segmentation dataset, assumed to contain a segmentation of the source + # image above + seg = dcmread('/path/to/seg.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region3D0001', + uid=hd.UID(), + ) + + # Define the image region using a specific segment from the segmentation + ref_segment = hd.sr.ReferencedSegmentationFrame.from_segmentation( + segmentation=seg, + segment_number=1, + ) + + # Construct the measurement group + group = hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + referenced_segment=ref_segment, + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +Volumetric ROI Image Measurements Group (`TID1411 `_) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This type of Measurement Group applies to a specific volumetric sub-region of +the source image or images. This is implemented in the class +:class:`highdicom.sr.VolumetricROIMeasurementsAndQualitativeEvaluations`. + +Like the similar Planar ROI class, this class takes a parameter +specifying the region. In this case there are three options: + +- ``referenced_regions``: The image region is specified directly in the SR in + image coordinates using one or more objects of type + :class:`highdicom.sr.ImageRegion` passed as the ``referenced_regions`` + parameter, representing the volumetric region as a set of 2D regions across + multiple images or frames. +- ``referenced_volume_surface``: The region is specified directly in the SR as + a single volumetric region defined in frame of reference coordinates using a + single :class:`highdicom.sr.VolumeSurface` object passed to the + ``referenced_volume_surface`` parameter. +- ``referenced_segment``: The region is specified indirectly as a reference to + an entire segment (which may spread across multiple images or frames) of a + Segmentation Image object, specified by passing a + :class:`highdicom.sr.ReferencedSegment` to the ``referenced_segment`` + parameter, which contains UIDs to identify the Segmentation Image along with + the segment number of the specific segment within it. + +Note that exactly one of ``referenced_regions``, ``referenced_volume_surface``, +or ``referenced_segment`` should be passed. + +The following example uses a list of :class:`highdicom.sr.ImageRegion` objects +as the ``referenced_regions``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + im1 = dcmread('/path/to/file1.dcm') + im2 = dcmread('/path/to/file2.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region0001', + uid=hd.UID(), + ) + + # Define the image regions (a circle in two images) using image coordinates + region1 = hd.sr.ImageRegion( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + graphic_data=np.array([[45.0, 55.0], [45.0, 65.0]]), + source_image=hd.sr.SourceImageForRegion.from_source_image(im1), + ) + region2 = hd.sr.ImageRegion( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + graphic_data=np.array([[40.0, 50.0], [40.0, 60.0]]), + source_image=hd.sr.SourceImageForRegion.from_source_image(im2), + ) + + # Construct the measurement group + group = hd.sr.VolumetricROIMeasurementsAndQualitativeEvaluations( + referenced_regions=[region1, region2], + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +This example uses a :class:`highdicom.sr.VolumeSurface` object as the +``referenced_volume_surface``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + im = dcmread('/path/to/file.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region0001', + uid=hd.UID(), + ) + + # Define the image region (a point) using frame-of-reference coordinates + volume_surface = hd.sr.VolumeSurface( + graphic_type=hd.sr.GraphicTypeValues.POINT, + graphic_data=np.array([[123.5, 234.1, -23.7]]), + source_images=[hd.sr.SourceImageForSegmentation.from_source_image(im)], + frame_of_reference_uid=im.FrameOfReferenceUID, + ) + + # Construct the measurement group + group = hd.sr.VolumetricROIMeasurementsAndQualitativeEvaluations( + referenced_volume_surface=volume_surface, + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +The final example uses an :class:`highdicom.sr.ReferencedSegment` as the +``referenced_segment``: + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom import dcmread + + # The image dataset referenced + im = dcmread('/path/to/file.dcm') + + # A segmentation dataset, assumed to contain a segmentation of the source + # image above + seg = dcmread('/path/to/seg.dcm') + + # A tracking identifier for this measurement group + tracking_id = hd.sr.TrackingIdentifier( + identifier='Region3D0001', + uid=hd.UID(), + ) + + # Define the image region using a specific segment from the segmentation + ref_segment = hd.sr.ReferencedSegment.from_segmentation( + segmentation=seg, + segment_number=1, + ) + + # Construct the measurement group + group = hd.sr.VolumetricROIMeasurementsAndQualitativeEvaluations( + referenced_segment=ref_segment, + tracking_identifier=tracking_id, + measurements=[...], + qualitative_evaluations=[...], + ) + +Further Parameters for Measurement Groups +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The three types of measurement group are more alike than different. The +following parameters may be used for all Measurement Groups, regardless of +type (some have been omitted for brevity): + +- ``tracking_identifier`` (:class:`highdicom.sr.TrackingIdentifier`): + Identifier for tracking measurement groups. This allows this region to + be referred to unambiguously in future objects. +- ``finding_type`` (:class:`highdicom.sr.CodedConcept`, optional) + Type of observed finding +- ``algorithm_id``: (:class:`highdicom.sr.AlgorithmIdentification`, optional) + Identification of algorithm used for making measurements. +- ``finding_sites``: (Sequence of :class:`highdicom.sr.FindingSite`, optional) + Coded description of one or more anatomic locations at which + finding was observed +- ``measurements``: (Sequence of :class:`highdicom.sr.Measurement`, optional) + Numeric measurements +- ``qualitative_evaluations``: (Sequence of :class:`highdicom.sr.CodedConcept`, optional) + Coded name-value pairs that describe qualitative qualitative_evaluations +- ``finding_category``: (:class:`highdicom.sr.CodedConcept`, optional) + Category of observed finding, e.g., anatomic structure or + morphologically abnormal structure + + +Measurements and Qualitative Evaluations +---------------------------------------- + +Finally, we get down to the bottom of the content tree, and the measurements +and qualitative evaluations themselves. Information derived from the images or +image regions represented by the measurement group may be stored as either +measurements, qualitative evaluations, or a mixture or the two. These two +concepts play a similar role in the SR, but measurements have numerical values +and qualitative evaluations have categorical values. + +Qualitative Evaluations +~~~~~~~~~~~~~~~~~~~~~~~ + +A Qualitative Evaluation is essentially a categorical value inferred from an +image. For example, this could represent a diagnosis derived from the +referenced region or a severity grading. These are represented in *highdicom* +using the class :class:`highdicom.sr.QualitativeEvalution`, which is +essentially a single :class:`highdicom.sr.CodeContentItem` within a special +template. + +To create a Qualitative Evaluation, just pass the ``name`` and ``value`` +parameters as coded values: + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # An evaluation of disease severity as "mild" + severity_item = hd.sr.QualitativeEvalution( + name=codes.SCT.Severity, + value=codes.SCT.Mild, + ) + + # An evaluation of tumor morphology as adenocarcinoma + morphology_item = hd.sr.QualitativeEvalution( + name=codes.SCT.AssociatedMorphology, + value=codes.SCT.Anenocarcinoma, + ) + +Measurements (`TID300 `_) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A Measurement is essentially a numerical (decimal) value derived from the image +or image region. In *highdicom*, a measurement is represented by the class +:class:`highdicom.sr.Measurement`. It is a small template that contains at its +core a :class:`highdicom.sr.NumContentItem` containing the value, a +:class:`highdicom.sr.CodeContentItem` specifying the unit of the measurement, +and optionally several more content items describing further context or +qualifications for the measurement. + +Here is a basic example: + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # A volume measurement + measurement = hd.sr.Measurement( + name=codes.SCT.Volume, + value=1983.123, + unit=codes.UCUM.CubicMillimeter, + ) + +In addition, the following optional parameters are available (see the API +reference for more information): + +- **Qualifier:** Qualification of the measurement. +- **Tracking Identifier:** Identifier for uniquely identifying and tracking + measurements. +- **Algorithm:** Identification of algorithm used for making measurements. +- **Derivation:** How the value was computed. +- **Finding Sites:** Coded description of one or more anatomic locations + corresponding to the image region from which measurement was taken. +- **Method:** Measurement method. +- **Properties:** Measurement properties, including qualitative evaluations of its + normality and/or significance, its relationship to a reference population, + and an indication of its selection from a set of measurements +- **Referenced Images:** Referenced images which were used as sources for the + measurement. +- **Referenced Real World Value Map:** + Referenced real world value map for referenced source images used to + generate the measurement. + +.. _tid1500_full_example: + +Putting It All Together +----------------------- + +The snippet below is a full example of creating an SR document using the +TID1500 template. You can find the file created by this snippet in the +highdicom test data within the highdicom repository at +``data/test_files/sr_document_with_multiple_groups.dcm``. + +.. code-block:: python + + import numpy as np + from pydicom.sr.codedict import codes + import pydicom + import highdicom as hd + + im = pydicom.dcmread("data/test_files/ct_image.dcm") + + # Information about the observer + observer_person_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Person, + observer_identifying_attributes=hd.sr.PersonObserverIdentifyingAttributes( + name='Doe^John' + ) + ) + observer_device_context = hd.sr.ObserverContext( + observer_type=codes.DCM.Device, + observer_identifying_attributes=hd.sr.DeviceObserverIdentifyingAttributes( + uid=hd.UID() + ) + ) + observation_context = hd.sr.ObservationContext( + observer_person_context=observer_person_context, + observer_device_context=observer_device_context, + ) + + # An object describing the source image for the measurements + source_image = hd.sr.SourceImageForMeasurementGroup.from_source_image(im) + + # First, we define an image measurement group for the CT image describing + # the intensity histogram at a certain vertebral level + + # A tracking identifier for this measurement group + im_tracking_id = hd.sr.TrackingIdentifier( + identifier='Image0001', + uid=hd.UID(), + ) + + # A measurement using an IBSI code (not in pydicom) + histogram_intensity_code = hd.sr.CodedConcept( + value="X6K6", + meaning="Intensity Histogram Mean", + scheme_designator="IBSI", + ) + hist_measurement = hd.sr.Measurement( + name=histogram_intensity_code, + value=-119.0738525390625, + unit=codes.UCUM.HounsfieldUnit, + ) + im_evaluation = hd.sr.QualitativeEvaluation( + name=codes.SCT.AnatomicalPosition, + value=codes.SCT.LevelOfT4T5IntervertebralDisc, + ) + + # Construct the measurement group + im_group = hd.sr.MeasurementsAndQualitativeEvaluations( + source_images=[source_image], + tracking_identifier=im_tracking_id, + measurements=[hist_measurement], + qualitative_evaluations=[im_evaluation], + ) + + # Next, we define a planar ROI measurement group describing a lung nodule + + # A tracking identifier for this measurement group + lung_nodule_roi_tracking_id = hd.sr.TrackingIdentifier( + identifier='LungNodule0001', + uid=hd.UID(), + ) + + # Define the image region (a circle) using image coordinates + region = hd.sr.ImageRegion( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + graphic_data=np.array([[45.0, 55.0], [45.0, 65.0]]), + source_image=hd.sr.SourceImageForRegion.from_source_image(im), + ) + nodule_measurement = hd.sr.Measurement( + name=codes.SCT.Diameter, + value=10.0, + unit=codes.UCUM.mm, + ) + nodule_evaluation = hd.sr.QualitativeEvaluation( + name=codes.DCM.LevelOfSignificance, + value=codes.SCT.NotSignificant, + ) + + # Construct the measurement group + planar_group_1 = hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + referenced_region=region, + tracking_identifier=lung_nodule_roi_tracking_id, + finding_type=codes.SCT.Nodule, + finding_category=codes.SCT.MorphologicallyAbnormalStructure, + finding_sites=[hd.sr.FindingSite(codes.SCT.Lung)], + measurements=[nodule_measurement], + qualitative_evaluations=[nodule_evaluation], + ) + + # Next, we define a second planar ROI measurement group describing the + # aorta + + # A tracking identifier for this measurement group + aorta_roi_tracking_id = hd.sr.TrackingIdentifier( + identifier='Aorta0001', + uid=hd.UID(), + ) + + # Define the image region (a circle) using image coordinates + region = hd.sr.ImageRegion( + graphic_type=hd.sr.GraphicTypeValues.POLYLINE, + graphic_data=np.array([[25.0, 45.0], [45.0, 45.0], [45.0, 65.0], [25.0, 65.0]]), + source_image=hd.sr.SourceImageForRegion.from_source_image(im), + ) + aorta_measurement = hd.sr.Measurement( + name=codes.SCT.Diameter, + value=20.0, + unit=codes.UCUM.mm, + ) + + # Construct the measurement group + planar_group_2 = hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( + referenced_region=region, + tracking_identifier=aorta_roi_tracking_id, + finding_type=codes.SCT.Aorta, + finding_category=structure_code, + measurements=[aorta_measurement], + ) + + # Finally, we define a volumetric ROI measurement group describing a + # vertebral body + + # A tracking identifier for this measurement group + volumetric_roi_tracking_id = hd.sr.TrackingIdentifier( + identifier='Vertebra0001', + uid=hd.UID(), + ) + + # Define the region (a point) using frame of reference coordinates + volume_surface = hd.sr.VolumeSurface( + graphic_type=hd.sr.GraphicTypeValues3D.POINT, + graphic_data=np.array([[123.5, 234.1, -23.7]]), + source_images=[hd.sr.SourceImageForSegmentation.from_source_image(im)], + frame_of_reference_uid=im.FrameOfReferenceUID, + ) + vol_measurement = hd.sr.Measurement( + name=codes.SCT.Volume, + value=200.0, + unit=codes.UCUM.CubicMillimeter, + ) + + # Construct the measurement group + vol_group = hd.sr.VolumetricROIMeasurementsAndQualitativeEvaluations( + referenced_volume_surface=volume_surface, + tracking_identifier=volumetric_roi_tracking_id, + finding_category=structure_code, + finding_type=codes.SCT.Vertebra, + measurements=[vol_measurement], + ) + + measurement_report = hd.sr.MeasurementReport( + observation_context=observation_context, # from above + procedure_reported=codes.LN.CTUnspecifiedBodyRegion, + imaging_measurements=[im_group, planar_group_1, planar_group_2, vol_group], + title=codes.DCM.ImagingMeasurementReport, + ) + + # Create the Structured Report instance + sr_dataset = hd.sr.Comprehensive3DSR( + evidence=[im], # all datasets referenced in the report + content=measurement_report, + series_number=1, + series_instance_uid=hd.UID(), + sop_instance_uid=hd.UID(), + instance_number=1, + manufacturer='Manufacturer' + ) + sr_dataset.save_as("sr_document_with_multiple_groups.dcm") + +.. _tid300def: http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_300 +.. _tid1500def: http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1500 +.. _tid1501def: http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1501 +.. _tid1410def: http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1410 +.. _tid1411def: http://dicom.nema.org/medical/dicom/current/output/chtml/part16/chapter_A.html#sect_TID_1411 + diff --git a/docs/tid1500parsing.rst b/docs/tid1500parsing.rst new file mode 100644 index 00000000..2273c770 --- /dev/null +++ b/docs/tid1500parsing.rst @@ -0,0 +1,345 @@ +Parsing Measurement Reports +=========================== + +In addition to the ability to create TID 1500 Structured Reports, *highdicom* +also includes functionality to help you find and extract information from +existing SR documents in this format. + +First you must get the SR dataset into the format of a `highdicom` class. You +can do this using the :func:`highdicom.sr.srread()` function: + +.. code-block:: python + + import highdicom as hd + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document.dcm") + +Alternatively, if you already have a ``pydicom.Dataset`` in memory, you can use +the relevant ``from_dataset`` method like this: + +.. code-block:: python + + import pydicom + import highdicom as hd + + sr_dataset = pydicom.dcmread("data/test_files/sr_document.dcm") + + # Use the appropriate class depending on the specific IOD, here it is a + # Comprehensive3DSR + sr = hd.sr.Comprehensive3DSR.from_dataset(sr_dataset) + +If the Structured Report conforms to the TID 1500 measurement report template, +when you access the ``content`` property, a +:class:`highdicom.sr.MeasurementReport` object will be returned. Otherwise, +a general :class:`highdicom.sr.ContentSequence` object is returned. + +The resulting :class:`highdicom.sr.MeasurementReport` object has methods that +allow you to find and access the content of the report conveniently. + +Searching For Measurement Groups +-------------------------------- + +To search for measurement groups, the :class:`highdicom.sr.MeasurementReport` +class has +:meth:`highdicom.sr.MeasurementReport.get_image_measurement_groups`, +:meth:`highdicom.sr.MeasurementReport.get_planar_roi_measurement_groups`, and +:meth:`highdicom.sr.MeasurementReport.get_volumetric_roi_measurement_groups` +methods, each of which returns a list of the measurement groups of the three +different types from the structured SR. You can additionally provide filters +to return only those measurement groups that meet certain criteria. + +The available search criteria include: tracking UID, finding type, finding +site, referenced SOP instance UID, and referenced SOP class UID. If you provide +multiple criteria, the methods return those groups that meet *all* the +specified criteria. + +The returned objects are of type +:class:`highdicom.sr.MeasurementsAndQualitativeEvaluations`, +:class:`highdicom.sr.PlanarROIMeasurementsAndQualitativeEvaluations`, or +:class:`highdicom.sr.VolumetricROIMeasurementsAndQualitativeEvaluations`, +respectively, representing the entire sub-template in the SR content tree. + +Here are just some examples of using these methods to find +measurement groups of interest within a measurement report. As an example +SR document, we use the SR document created on the previous page (see +:ref:`tid1500_full_example` for the relevant snippet). + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Get a list of all image measurement groups referencing an image with a + # particular SOP Instance UID + groups = sr.content.get_image_measurement_groups( + referenced_sop_instance_uid="1.3.6.1.4.1.5962.1.1.1.1.1.20040119072730.12322", + ) + assert len(groups) == 1 + + # Get a list of all image measurement groups with a particular tracking UID + groups = sr.content.get_image_measurement_groups( + tracking_uid="1.2.826.0.1.3680043.10.511.3.77718622501224431322963356892468048", + ) + assert len(groups) == 1 + + # Get a list of all planar ROI measurement groups with finding type "Nodule" + # AND finding site "Lung" + groups = sr.content.get_planar_roi_measurement_groups( + finding_type=codes.SCT.Nodule, + finding_site=codes.SCT.Lung, + ) + assert len(groups) == 1 + + # Get a list of all volumetric ROI measurement groups (with no filters) + groups = sr.content.get_volumetric_roi_measurement_groups() + assert len(groups) == 1 + +Additionally for +:meth:`highdicom.sr.MeasurementReport.get_planar_roi_measurement_groups`, and +:meth:`highdicom.sr.MeasurementReport.get_volumetric_roi_measurement_groups` it +is possible to filter by graphic type and reference type (how the ROI is +specified in the measurement group). + +To search by graphic type, pass an instance of either the +:class:`highdicom.sr.GraphicTypeValues` or +:class:`highdicom.sr.GraphicTypeValues3D` enums: + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Get a list of all planar ROI measurement groups with graphic type CIRCLE + groups = sr.content.get_planar_roi_measurement_groups( + graphic_type=hd.sr.GraphicTypeValues.CIRCLE, + ) + assert len(groups) == 1 + +For reference type, you should provide one of the following values (which +reflect how the SR document stores the information internally): + +- ``CodedConcept(value="111030", meaning="Image Region", scheme_designator="DCM")`` + aka ``pydicom.sr.codedict.codes.DCM.ImageRegion`` for ROIs defined in the SR + as image regions (vector coordinates for planar regions defined within the + SR document). +- ``CodedConcept(value="121231", meaning="Volume Surface", scheme_designator="DCM")`` + aka ``pydicom.sr.codedict.codes.DCM.VolumeSurface`` for ROIs defined in the + SR as a volume surface (vector coordinates for a volumetric region defined + within the SR document). +- ``CodedConcept(value="121191", meaning="Referenced Segment", scheme_designator="DCM")`` + aka ``pydicom.sr.codedict.codes.DCM.ReferencedSegment`` for ROIs defined in the + SR indirectly by referencing a segment stored in a DICOM Segmentation Image. +- ``CodedConcept(value="121191", meaning="Region In Space", scheme_designator="DCM")`` + For ROIs defined in the SR indirectly by referencing a region stored in a + DICOM RT Struct object (this is not currently supported by the `highdicom` + constructor, but is an option in the standard). Unfortunately this code is + not including in ``pydicom.sr.codedict.codes`` at this time. + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Get a list of all planar ROI measurement groups stored as regions + groups = sr.content.get_planar_roi_measurement_groups( + reference_type=codes.DCM.ImageRegion, + ) + assert len(groups) == 2 + + # Get a list of all volumetric ROI measurement groups stored as volume + # surfaces + groups = sr.content.get_volumetric_roi_measurement_groups( + reference_type=codes.DCM.VolumeSurface, + ) + assert len(groups) == 1 + + +Accessing Data in Measurement Groups +------------------------------------ + +Once you have found measurement groups, there are various properties on the +returned object that allow you to access the information that you may need. +These may be in the form of basic Python data types extracted from the +measurement group's content items, or `highdicom` classes representing full +sub-templates that in turn have methods and properties defined on them. These +classes are the same classes that you use to construct the objects. + +The following example demonstrates some examples, see the API documentation +of the relevant class for a full list. + +.. code-block:: python + + import highdicom as hd + import numpy as np + from pydicom.sr.codedict import codes + + # This example is in the highdicom test data files in the repository + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Use the first (only) image measurement group as an example + group = sr.content.get_image_measurement_groups()[0] + + # tracking_identifier returns a Python str + assert group.tracking_identifier == "Image0001" + + # tracking_uid returns a hd.UID, a subclass of str + assert group.tracking_uid == "1.2.826.0.1.3680043.10.511.3.77718622501224431322963356892468048" + + # source_images returns a list of hd.sr.SourceImageForMeasurementGroup, which + # in turn have some properties to access data + assert isinstance(group.source_images[0], hd.sr.SourceImageForMeasurementGroup) + assert group.source_images[0].referenced_sop_instance_uid == "1.3.6.1.4.1.5962.1.1.1.1.1.20040119072730.12322" + + # for the various optional pieces of information in a measurement, accessing + # the relevant property returns None if the information is not present + assert group.finding_type is None + + # Now use the first planar ROI group as a second example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # finding_type returns a CodedConcept + assert group.finding_type == codes.SCT.Nodule + + # finding_sites returns a list of hd.sr.FindingSite objects (a sub-template) + assert isinstance(group.finding_sites[0], hd.sr.FindingSite) + # the value of a finding site is a CodedConcept + assert group.finding_sites[0].value == codes.SCT.Lung + + # reference_type returns a CodedConcept (the same values used above for + # filtering) + assert group.reference_type == codes.DCM.ImageRegion + + # since this has reference type ImageRegion, we can access the referenced roi + # using 'roi', which will return an hd.sr.ImageRegion object + assert isinstance(group.roi, hd.sr.ImageRegion) + + # the graphic type and actual ROI coordinates (as a numpy array) can be + # accessed with the graphic_type and value properties of the roi + assert group.roi.graphic_type == hd.sr.GraphicTypeValues.CIRCLE + assert isinstance(group.roi.value, np.ndarray) + assert group.roi.value.shape == (2, 2) + +A volumetric group returns a :class:`highdicom.sr.VolumeSurface` or list of +:class:`highdicom.sr.ImageRegion` objects, depending on the reference type. If +instead, a planar/volumetric measurement group uses the ``ReferencedSegment`` +reference type, the referenced segment can be accessed by the +``group.referenced_segmention_frame`` property (for planar groups) or +``group.referenced_segment`` property (for volumetric groups), which return +objects of type :class:`highdicom.sr.ReferencedSegmentationFrame` and +:class:`highdicom.sr.ReferencedSegment` respectively. + +Searching for Measurements +-------------------------- + +Each measurement group may optionally contain any number of "measurements", +represented by the TID300 "Measurement" template and the +:class:`highdicom.sr.Measurement` class that implements it in *highdicom*. +A measurement contains a numerical measurement derived from the image, along +with the physical unit of the measurement and various other optional +descriptive metadata + +You can search for measurements within a measurements group using the +``get_measurements()`` method on the relevant measurement group class. You can +optionally provide a ``name`` parameter, which should be a coded value that +allows you to find measurements with a particular name. + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # Use the same example file in the highdicom test data + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Use the first planar measurement group as an example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # Get a list of all measurements + measurements = group.get_measurements() + + # Get a list of measurements for diameter + measurements = group.get_measurements(name=codes.SCT.Diameter) + + +Note that although there will usually be only a single measurement with a given +name within a measurement group, multiple measurements with the same name are +not disallowed by the standard. Consequently, the ``get_measurements()`` method +returns a list containing 0 or more measurements. + +Accessing Data in Measurements +------------------------------ + +You can access the name of a measurement with the ``name`` property (returns a +:class:`highdicom.sr.CodedConcept`), its numerical value with the ``value`` +property (returns a ``float``), and the unit with the ``unit`` property. + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # Use the same example file in the highdicom test data + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Use the first planar measurement group as an example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # Get the diameter measurement in this group + measurement = group.get_measurements(name=codes.SCT.Diameter)[0] + + # Access the measurement's name + assert measurement.name == codes.SCT.Diameter + + # Access the measurement's value + assert measurement.value == 10.0 + + # Access the measurement's unit + assert measurement.unit == codes.UCUM.mm + +Additionally, the properties ``method``, ``finding_sites``, ``qualifier``, +``referenced_images``, and ``derivation`` allow you to access further optional +metadata that may be present in the stored measurement. + +Searching for Evaluations +------------------------- + +In addition to numerical measurements, measurement groups may also contain +"Qualitative Evaluations". These contain an evaluation of the image represented +using a coded concept. + +Similar to measurements, you can search for evaluations with the +``get_qualitative_evaluations()`` method. You can optionally filter by name +with the ``name`` parameter. You can access the name and value of the returned +evaluations with the ``name`` and ``value`` properties. + +.. code-block:: python + + import highdicom as hd + from pydicom.sr.codedict import codes + + # Use the same example file in the highdicom test data + sr = hd.sr.srread("data/test_files/sr_document_with_multiple_groups.dcm") + + # Use the first planar measurement group as an example + group = sr.content.get_planar_roi_measurement_groups()[0] + + # Get the level of significance evaluation in this group + evaluation = group.get_qualitative_evaluations( + name=codes.DCM.LevelOfSignificance + )[0] + + # Access the evaluation's name + assert evaluation.name == codes.DCM.LevelOfSignificance + + # Access the evaluation's value + assert evaluation.value == codes.SCT.NotSignificant diff --git a/docs/usage.rst b/docs/usage.rst index a6f50b58..6740183d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -3,637 +3,10 @@ User guide ========== -Creating and parsing DICOM objects using the :mod:`highdicom` package. +.. toctree:: + :maxdepth: 2 + :caption: Contents: -.. _creating-seg: - -Creating Segmentation (SEG) images ----------------------------------- - -Derive a Segmentation image from a series of single-frame Computed Tomography -(CT) images: - -.. code-block:: python - - from pathlib import Path - - import highdicom as hd - import numpy as np - from pydicom.sr.codedict import codes - from pydicom.filereader import dcmread - - # Path to directory containing single-frame legacy CT Image instances - # stored as PS3.10 files - series_dir = Path('path/to/series/directory') - image_files = series_dir.glob('*.dcm') - - # Read CT Image data sets from PS3.10 files on disk - image_datasets = [dcmread(str(f)) for f in image_files] - - # Create a binary segmentation mask - mask = np.zeros( - shape=( - len(image_datasets), - image_datasets[0].Rows, - image_datasets[0].Columns - ), - dtype=np.bool - ) - mask[1:-1, 10:-10, 100:-100] = True - - # Describe the algorithm that created the segmentation - algorithm_identification = hd.AlgorithmIdentificationSequence( - name='test', - version='v1.0', - family=codes.cid7162.ArtificialIntelligence - ) - - # Describe the segment - description_segment_1 = hd.seg.SegmentDescription( - segment_number=1, - segment_label='first segment', - segmented_property_category=codes.cid7150.Tissue, - segmented_property_type=codes.cid7166.ConnectiveTissue, - algorithm_type=hd.seg.SegmentAlgorithmTypeValues.AUTOMATIC, - algorithm_identification=algorithm_identification, - tracking_uid=hd.UID(), - tracking_id='test segmentation of computed tomography image' - ) - - # Create the Segmentation instance - seg_dataset = hd.seg.Segmentation( - source_images=image_datasets, - pixel_array=mask, - segmentation_type=hd.seg.SegmentationTypeValues.BINARY, - segment_descriptions=[description_segment_1], - series_instance_uid=hd.UID(), - series_number=2, - sop_instance_uid=hd.UID(), - instance_number=1, - manufacturer='Manufacturer', - manufacturer_model_name='Model', - software_versions='v1', - device_serial_number='Device XYZ', - ) - - print(seg_dataset) - - seg_dataset.save_as("seg.dcm") - - -Derive a Segmentation image from a multi-frame Slide Microscopy (SM) image: - -.. code-block:: python - - from pathlib import Path - - import highdicom as hd - import numpy as np - from pydicom.sr.codedict import codes - from pydicom.filereader import dcmread - - # Path to multi-frame SM image instance stored as PS3.10 file - image_file = Path('/path/to/image/file') - - # Read SM Image data set from PS3.10 files on disk - image_dataset = dcmread(str(image_file)) - - # Create a binary segmentation mask - mask = np.max(image_dataset.pixel_array, axis=3) > 1 - - # Describe the algorithm that created the segmentation - algorithm_identification = hd.AlgorithmIdentificationSequence( - name='test', - version='v1.0', - family=codes.cid7162.ArtificialIntelligence - ) - - # Describe the segment - description_segment_1 = hd.seg.SegmentDescription( - segment_number=1, - segment_label='first segment', - segmented_property_category=codes.cid7150.Tissue, - segmented_property_type=codes.cid7166.ConnectiveTissue, - algorithm_type=hd.seg.SegmentAlgorithmTypeValues.AUTOMATIC, - algorithm_identification=algorithm_identification, - tracking_uid=hd.UID(), - tracking_id='test segmentation of slide microscopy image' - ) - - # Create the Segmentation instance - seg_dataset = hd.seg.Segmentation( - source_images=[image_dataset], - pixel_array=mask, - segmentation_type=hd.seg.SegmentationTypeValues.BINARY, - segment_descriptions=[description_segment_1], - series_instance_uid=hd.UID(), - series_number=2, - sop_instance_uid=hd.UID(), - instance_number=1, - manufacturer='Manufacturer', - manufacturer_model_name='Model', - software_versions='v1', - device_serial_number='Device XYZ' - ) - - print(seg_dataset) - -.. _parsing-seg: - -Parsing Segmentation (SEG) images ---------------------------------- - -Finding relevant segments in a segmentation image instance and retrieving masks -for them: - -.. code-block:: python - - import highdicom as hd - import numpy as np - from pydicom.sr.codedict import codes - - # Read SEG Image data set from PS3.10 files on disk into a Segmentation - # object - # This example is a test file in the highdicom git repository - seg = hd.seg.segread('data/test_files/seg_image_ct_binary_overlap.dcm') - - # Check the number of segments - assert seg.number_of_segments == 2 - - # Find segments (identified by their segment number) that have segmented - # property type "Bone" - bone_segment_numbers = seg.get_segment_numbers( - segmented_property_type=codes.SCT.Bone - ) - assert bone_segment_numbers == [1] - - # List SOP Instance UIDs of the images from which the segmentation was - # derived - for study_uid, series_uid, sop_uid in seg.get_source_image_uids(): - print(study_uid, series_uid, sop_uid) - # '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.1, 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.2, 1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93' - # ... - - # Here is a list of known SOP Instance UIDs that are a subset of those - # from which the segmentation was derived - source_image_uids = [ - '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.93', - '1.3.6.1.4.1.5962.1.1.0.0.0.1196530851.28319.0.94', - ] - - # Retrieve a binary segmentation mask for these images for the bone segment - mask = seg.get_pixels_by_source_instance( - source_sop_instance_uids=source_image_uids, - segment_numbers=bone_segment_numbers, - ) - # Output is a numpy array of shape (instances x rows x columns x segments) - assert mask.shape == (2, 16, 16, 1) - assert np.unique(mask).tolist() == [0, 1] - - # Alternatively, retrieve the segmentation mask for the full list of segments - # (2 in this case), and combine the resulting array into a "label mask", where - # pixel value represents segment number - mask = seg.get_pixels_by_source_instance( - source_sop_instance_uids=source_image_uids, - combine_segments=True, - skip_overlap_checks=True, # the segments in this image overlap - ) - # Output is a numpy array of shape (instances x rows x columns) - assert mask.shape == (2, 16, 16) - assert np.unique(mask).tolist() == [0, 1, 2] - - -.. _creating-sr: - -Creating Structured Report (SR) documents ------------------------------------------ - -Create a Structured Report document that contains a numeric area measurement for -a planar region of interest (ROI) in a single-frame computed tomography (CT) -image: - -.. code-block:: python - - from pathlib import Path - - import highdicom as hd - import numpy as np - from pydicom.filereader import dcmread - from pydicom.sr.codedict import codes - from pydicom.uid import generate_uid - from highdicom.sr.content import FindingSite - from highdicom.sr.templates import Measurement, TrackingIdentifier - - # Path to single-frame CT image instance stored as PS3.10 file - image_file = Path('/path/to/image/file') - - # Read CT Image data set from PS3.10 files on disk - image_dataset = dcmread(str(image_file)) - - # Describe the context of reported observations: the person that reported - # the observations and the device that was used to make the observations - observer_person_context = hd.sr.ObserverContext( - observer_type=codes.DCM.Person, - observer_identifying_attributes=hd.sr.PersonObserverIdentifyingAttributes( - name='Foo' - ) - ) - observer_device_context = hd.sr.ObserverContext( - observer_type=codes.DCM.Device, - observer_identifying_attributes=hd.sr.DeviceObserverIdentifyingAttributes( - uid=hd.UID() - ) - ) - observation_context = hd.sr.ObservationContext( - observer_person_context=observer_person_context, - observer_device_context=observer_device_context, - ) - - # Describe the image region for which observations were made - # (in physical space based on the frame of reference) - referenced_region = hd.sr.ImageRegion3D( - graphic_type=hd.sr.GraphicTypeValues3D.POLYGON, - graphic_data=np.array([ - (165.0, 200.0, 134.0), - (170.0, 200.0, 134.0), - (170.0, 220.0, 134.0), - (165.0, 220.0, 134.0), - (165.0, 200.0, 134.0), - ]), - frame_of_reference_uid=image_dataset.FrameOfReferenceUID - ) - - # Describe the anatomic site at which observations were made - finding_sites = [ - FindingSite( - anatomic_location=codes.SCT.CervicoThoracicSpine, - topographical_modifier=codes.SCT.VertebralForamen - ), - ] - - # Describe the imaging measurements for the image region defined above - measurements = [ - Measurement( - name=codes.SCT.AreaOfDefinedRegion, - tracking_identifier=hd.sr.TrackingIdentifier(uid=generate_uid()), - value=1.7, - unit=codes.UCUM.SquareMillimeter, - properties=hd.sr.MeasurementProperties( - normality=hd.sr.CodedConcept( - value="17621005", - meaning="Normal", - scheme_designator="SCT" - ), - level_of_significance=codes.SCT.NotSignificant - ) - ) - ] - imaging_measurements = [ - hd.sr.PlanarROIMeasurementsAndQualitativeEvaluations( - tracking_identifier=TrackingIdentifier( - uid=hd.UID(), - identifier='Planar ROI Measurements' - ), - referenced_region=referenced_region, - finding_type=codes.SCT.SpinalCord, - measurements=measurements, - finding_sites=finding_sites - ) - ] - - # Create the report content - measurement_report = hd.sr.MeasurementReport( - observation_context=observation_context, - procedure_reported=codes.LN.CTUnspecifiedBodyRegion, - imaging_measurements=imaging_measurements - ) - - # Create the Structured Report instance - sr_dataset = hd.sr.Comprehensive3DSR( - evidence=[image_dataset], - content=measurement_report, - series_number=1, - series_instance_uid=hd.UID(), - sop_instance_uid=hd.UID(), - instance_number=1, - manufacturer='Manufacturer' - ) - - print(sr_dataset) - - -.. _parsing-sr: - -Parsing Structured Report (SR) documents ----------------------------------------- - -Finding relevant content in the nested SR content tree: - -.. code-block:: python - - from pathlib import Path - - import highdicom as hd - from pydicom.filereader import dcmread - from pydicom.sr.codedict import codes - - # Path to SR document instance stored as PS3.10 file - document_file = Path('/path/to/document/file') - - # Load document from file on disk - sr_dataset = dcmread(str(document_file)) - - # Find all content items that may contain other content items. - containers = hd.sr.utils.find_content_items( - dataset=sr_dataset, - relationship_type=RelationshipTypeValues.CONTAINS - ) - print(containers) - - # Query content of SR document, where content is structured according - # to TID 1500 "Measurment Report" - if sr_dataset.ContentTemplateSequence[0].TemplateIdentifier == 'TID1500': - # Determine who made the observations reported in the document - observers = hd.sr.utils.find_content_items( - dataset=sr_dataset, - name=codes.DCM.PersonObserverName - ) - print(observers) - - # Find all imaging measurements reported in the document - measurements = hd.sr.utils.find_content_items( - dataset=sr_dataset, - name=codes.DCM.ImagingMeasurements, - recursive=True - ) - print(measurements) - - # Find all findings reported in the document - findings = hd.sr.utils.find_content_items( - dataset=sr_dataset, - name=codes.DCM.Finding, - recursive=True - ) - print(findings) - - # Find regions of interest (ROI) described in the document - # in form of spatial coordinates (SCOORD) - regions = hd.sr.utils.find_content_items( - dataset=sr_dataset, - value_type=ValueTypeValues.SCOORD, - recursive=True - ) - print(regions) - - -.. _creating-sc: - -Creating Secondary Capture (SC) images --------------------------------------- - -Secondary captures are a way to store images that were not created directly -by an imaging modality within a DICOM file. They are often used to store -screenshots or overlays, and are widely supported by viewers. However other -methods of displaying image derived information, such as segmentation images -and structured reports should be preferred if they are supported because they -can capture more detail about how the derived information was obtained and -what it represents. - -In this example, we use a secondary capture to store an image containing a -labeled bounding box region drawn over a CT image. - -.. code-block:: python - - import highdicom as hd - import numpy as np - from pydicom import dcmread - from pydicom.uid import RLELossless - from PIL import Image, ImageDraw - - # Read in the source CT image - image_dataset = dcmread('/path/to/image.dcm') - - # Create an image for display by windowing the original image and drawing a - # bounding box over it using Pillow's ImageDraw module - slope = getattr(image_dataset, 'RescaleSlope', 1) - intercept = getattr(image_dataset, 'RescaleIntercept', 0) - original_image = image_dataset.pixel_array * slope + intercept - - # Window the image to a soft tissue window (center 40, width 400) - # and rescale to the range 0 to 255 - lower = -160 - upper = 240 - windowed_image = np.clip(original_image, lower, upper) - windowed_image = (windowed_image - lower) * 255 / (upper - lower) - windowed_image = windowed_image.astype(np.uint8) - - # Create RGB channels - windowed_image = np.tile(windowed_image[:, :, np.newaxis], [1, 1, 3]) - - # Cast to a PIL image for easy drawing of boxes and text - pil_image = Image.fromarray(windowed_image) - - # Draw a red bounding box over part of the image - x0 = 10 - y0 = 10 - x1 = 60 - y1 = 60 - draw_obj = ImageDraw.Draw(pil_image) - draw_obj.rectangle( - [x0, y0, x1, y1], - outline='red', - fill=None, - width=3 - ) - - # Add some text - draw_obj.text(xy=[10, 70], text='Region of Interest', fill='red') - - # Convert to numpy array - pixel_array = np.array(pil_image) - - # The patient orientation defines the directions of the rows and columns of the - # image, relative to the anatomy of the patient. In a standard CT axial image, - # the rows are oriented leftwards and the columns are oriented posteriorly, so - # the patient orientation is ['L', 'P'] - patient_orientation=['L', 'P'] - - # Create the secondary capture image. By using the `from_ref_dataset` - # constructor, all the patient and study information will be copied from the - # original image dataset - sc_image = hd.sc.SCImage.from_ref_dataset( - ref_dataset=image_dataset, - pixel_array=pixel_array, - photometric_interpretation=hd.PhotometricInterpretationValues.RGB, - bits_allocated=8, - coordinate_system=hd.CoordinateSystemNames.PATIENT, - series_instance_uid=hd.UID(), - sop_instance_uid=hd.UID(), - series_number=100, - instance_number=1, - manufacturer='Manufacturer', - pixel_spacing=image_dataset.PixelSpacing, - patient_orientation=patient_orientation, - transfer_syntax_uid=RLELossless - ) - - # Save the file - sc_image.save_as('sc_output.dcm') - - -To save a 3D image as a series of output slices, simply loop over the 2D -slices and ensure that the individual output instances share a common series -instance UID. Here is an example for a CT scan that is in a NumPy array called -"ct_to_save" where we do not have the original DICOM files on hand. We want to -overlay a segmentation that is stored in a NumPy array called "seg_out". - -.. code-block:: python - - import highdicom as hd - import numpy as np - import os - - pixel_spacing = [1.0, 1.0] - sz = ct_to_save.shape[2] - series_instance_uid = hd.UID() - study_instance_uid = hd.UID() - - for iz in range(sz): - this_slice = ct_to_save[:, :, iz] - - # Window the image to a soft tissue window (center 40, width 400) - # and rescale to the range 0 to 255 - lower = -160 - upper = 240 - windowed_image = np.clip(this_slice, lower, upper) - windowed_image = (windowed_image - lower) * 255 / (upper - lower) - - # Create RGB channels - pixel_array = np.tile(windowed_image[:, :, np.newaxis], [1, 1, 3]) - - # transparency level - alpha = 0.1 - - pixel_array[:, :, 0] = 255 * (1 - alpha) * seg_out[:, :, iz] + alpha * pixel_array[:, :, 0] - pixel_array[:, :, 1] = alpha * pixel_array[:, :, 1] - pixel_array[:, :, 2] = alpha * pixel_array[:, :, 2] - - patient_orientation = ['L', 'P'] - - # Create the secondary capture image - sc_image = hd.sc.SCImage( - pixel_array=pixel_array.astype(np.uint8), - photometric_interpretation=hd.PhotometricInterpretationValues.RGB, - bits_allocated=8, - coordinate_system=hd.CoordinateSystemNames.PATIENT, - study_instance_uid=study_instance_uid, - series_instance_uid=series_instance_uid, - sop_instance_uid=hd.UID(), - series_number=100, - instance_number=iz + 1, - manufacturer='Manufacturer', - pixel_spacing=pixel_spacing, - patient_orientation=patient_orientation, - ) - - sc_image.save_as(os.path.join("output", 'sc_output_' + str(iz) + '.dcm')) - - -Creating Grayscale Softcopy Presentation State (GSPS) Objects -------------------------------------------------------------- - -A presentation state contains information about how another image should be -rendered, and may include "annotations" in the form of basic shapes, polylines, -and text overlays. Note that a GSPS is not recommended for storing annotations -for any purpose except visualization. A structured report would usually be -preferred for storing annotations for clinical or research purposes. - -.. code-block:: python - - import highdicom as hd - - import numpy as np - from pydicom import dcmread - from pydicom.valuerep import PersonName - - - # Read in an example CT image - image_dataset = dcmread('path/to/image.dcm') - - # Create an annotation containing a polyline - polyline = hd.pr.GraphicObject( - graphic_type=hd.pr.GraphicTypeValues.POLYLINE, - graphic_data=np.array([ - [10.0, 10.0], - [20.0, 10.0], - [20.0, 20.0], - [10.0, 20.0]] - ), # coordinates of polyline vertices - units=hd.pr.AnnotationUnitsValues.PIXEL, # units for graphic data - tracking_id='Finding1', # site-specific ID - tracking_uid=hd.UID() # highdicom will generate a unique ID - ) - - # Create a text object annotation - text = hd.pr.TextObject( - text_value='Important Finding!', - bounding_box=np.array( - [30.0, 30.0, 40.0, 40.0] # left, top, right, bottom - ), - units=hd.pr.AnnotationUnitsValues.PIXEL, # units for bounding box - tracking_id='Finding1Text', # site-specific ID - tracking_uid=hd.UID() # highdicom will generate a unique ID - ) - - # Create a single layer that will contain both graphics - # There may be multiple layers, and each GraphicAnnotation object - # belongs to a single layer - layer = hd.pr.GraphicLayer( - layer_name='LAYER1', - order=1, # order in which layers are displayed (lower first) - description='Simple Annotation Layer', - ) - - # A GraphicAnnotation may contain multiple text and/or graphic objects - # and is rendered over all referenced images - annotation = hd.pr.GraphicAnnotation( - referenced_images=[image_dataset], - graphic_layer=layer, - graphic_objects=[polyline], - text_objects=[text] - ) - - # Assemble the components into a GSPS object - gsps = hd.pr.GrayscaleSoftcopyPresentationState( - referenced_images=[image_dataset], - series_instance_uid=hd.UID(), - series_number=123, - sop_instance_uid=hd.UID(), - instance_number=1, - manufacturer='Manufacturer', - manufacturer_model_name='Model', - software_versions='v1', - device_serial_number='Device XYZ', - content_label='ANNOTATIONS', - graphic_layers=[layer], - graphic_annotations=[annotation], - institution_name='MGH', - institutional_department_name='Radiology', - content_creator_name=PersonName.from_named_components( - family_name='Doe', - given_name='John' - ), - ) - - # Save the GSPS file - gsps.save_as('gsps.dcm') - - -.. .. _creation-legacy: - -.. Creating Legacy Converted Enhanced Images -.. ----------------------------------------- - -.. .. code-block:: python - -.. from highdicom.legacy.sop import LegacyConvertedEnhancedCTImage + quickstart + general + iods diff --git a/setup.py b/setup.py index 857d77da..a1034d4c 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ def get_version(): package_dir={'': 'src'}, python_requires='>=3.6', install_requires=[ - 'pydicom>=2.3.0', + 'pydicom>=2.3.0,!=2.4.0', 'numpy>=1.19', 'pillow>=8.3', 'pillow-jpls>=1.0', diff --git a/src/highdicom/ann/enum.py b/src/highdicom/ann/enum.py index b5008277..8a53d783 100644 --- a/src/highdicom/ann/enum.py +++ b/src/highdicom/ann/enum.py @@ -57,7 +57,7 @@ class GraphicTypeValues(Enum): """ POINT = 'POINT' - """An individual piont defined by a single coordinate.""" + """An individual point defined by a single coordinate.""" POLYLINE = 'POLYLINE' """Connected line segments defined by two or more ordered coordinates. diff --git a/src/highdicom/ann/sop.py b/src/highdicom/ann/sop.py index 7ae86e40..8f07561d 100644 --- a/src/highdicom/ann/sop.py +++ b/src/highdicom/ann/sop.py @@ -78,7 +78,7 @@ def __init__( UID that should be assigned to the instance instance_number: int Number that should be assigned to the instance - manufacturer: Union[str, None], optional + manufacturer: Union[str, None] Name of the manufacturer (developer) of the device (software) that creates the instance manufacturer_model_name: str diff --git a/src/highdicom/frame.py b/src/highdicom/frame.py index 6055d38d..0f55cda2 100644 --- a/src/highdicom/frame.py +++ b/src/highdicom/frame.py @@ -63,7 +63,7 @@ def encode_frame( ------- bytes Encoded pixel data (potentially compressed in case of encapsulated - format encoding, depending on the transfer snytax) + format encoding, depending on the transfer syntax) Raises ------ diff --git a/src/highdicom/ko/sop.py b/src/highdicom/ko/sop.py index 947d9e2d..454abc66 100644 --- a/src/highdicom/ko/sop.py +++ b/src/highdicom/ko/sop.py @@ -68,7 +68,7 @@ def __init__( Name of the department of the person or device that creates the document instance requested_procedures: Union[Sequence[pydicom.dataset.Dataset], None], optional - Requested procedures that are being fullfilled by creation of the + Requested procedures that are being fulfilled by creation of the document transfer_syntax_uid: str, optional UID of transfer syntax that should be used for encoding of diff --git a/src/highdicom/legacy/sop.py b/src/highdicom/legacy/sop.py index d7c3df4f..9c6e872a 100644 --- a/src/highdicom/legacy/sop.py +++ b/src/highdicom/legacy/sop.py @@ -328,7 +328,7 @@ def _convert_legacy_to_enhanced( # All remaining unassigned attributes will be collected in either the # UnassignedSharedConvertedAttributesSequence or the # UnassignedPerFrameConvertedAttributesSequence, depending on whether - # values vary accross frames (original single-frame image instances). + # values vary across frames (original single-frame image instances). unassigned_shared_ca_item = Dataset() unassigned_perframe_ca_items = [ Dataset() diff --git a/src/highdicom/pm/sop.py b/src/highdicom/pm/sop.py index ef29b73a..2bc8a645 100644 --- a/src/highdicom/pm/sop.py +++ b/src/highdicom/pm/sop.py @@ -222,7 +222,7 @@ def __init__( Identifying information for the person who created the content of this parametric map. palette_color_lut_transformation: Union[highdicom.PaletteColorLUTTransformation, None], optional - Description of the Palette Color LUT Transformation for tranforming + Description of the Palette Color LUT Transformation for transforming grayscale into RGB color pixel values **kwargs: Any, optional Additional keyword arguments that will be passed to the constructor @@ -737,9 +737,11 @@ def __init__( # Frame Content frame_content_item = Dataset() frame_content_item.DimensionIndexValues = [ - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 + int( + np.where( + (dimension_position_values[idx] == pos) + )[0][0] + 1 + ) for idx, pos in enumerate(plane_position_values[i]) ] diff --git a/src/highdicom/pr/content.py b/src/highdicom/pr/content.py index 48809dd1..e69166e4 100644 --- a/src/highdicom/pr/content.py +++ b/src/highdicom/pr/content.py @@ -136,7 +136,7 @@ def __init__( label: str Name used to identify the Graphic Group (maximum 64 characters). description: Union[str, None], optional - Description of the group (maxiumum 10240 characters). + Description of the group (maximum 10240 characters). """ super().__init__() @@ -431,7 +431,7 @@ def __init__( raise ValueError( 'All coordinates in the bounding box must be non-negative.' ) - self.AnchorPoint = anchor_point + self.AnchorPoint = list(anchor_point) self.AnchorPointAnnotationUnits = units.value self.AnchorPointVisibility = 'Y' if anchor_point_visible else 'N' if units == AnnotationUnitsValues.DISPLAY: diff --git a/src/highdicom/pr/sop.py b/src/highdicom/pr/sop.py index 36e4ec5e..d3b7afca 100644 --- a/src/highdicom/pr/sop.py +++ b/src/highdicom/pr/sop.py @@ -389,7 +389,7 @@ def __init__( device_serial_number: Union[str, None] Manufacturer's serial number of the device palette_color_lut_transformation: highdicom.PaletteColorLUTTransformation - Description of the Palette Color LUT Transformation for tranforming + Description of the Palette Color LUT Transformation for transforming grayscale into RGB color pixel values content_label: str A label used to describe the content of this presentation state. @@ -419,10 +419,10 @@ def __init__( Identifying information for the person who created the content of this presentation state. modality_lut_transformation: Union[highdicom.ModalityLUTTransformation, None], optional - Description of the Modality LUT Transformation for tranforming modality + Description of the Modality LUT Transformation for transforming modality dependent into modality independent pixel values voi_lut_transformations: Union[Sequence[highdicom.pr.SoftcopyVOILUTTransformation], None], optional - Description of the VOI LUT Transformation for tranforming + Description of the VOI LUT Transformation for transforming modality pixel values into pixel values that are of interest to a user or an application icc_profile: Union[bytes, None], optional diff --git a/src/highdicom/seg/content.py b/src/highdicom/seg/content.py index 72208aa2..94b94435 100644 --- a/src/highdicom/seg/content.py +++ b/src/highdicom/seg/content.py @@ -481,8 +481,9 @@ def get_plane_positions_of_image( ) elif self._coordinate_system == CoordinateSystemNames.SLIDE: if hasattr(image, 'PerFrameFunctionalGroupsSequence'): - plane_positions = [ + plane_positions = [PlanePositionSequence.from_sequence( item.PlanePositionSlideSequence + ) for item in image.PerFrameFunctionalGroupsSequence ] else: @@ -491,7 +492,7 @@ def get_plane_positions_of_image( plane_positions = compute_plane_position_slide_per_frame(image) else: plane_positions = [ - item.PlanePositionSequence + PlanePositionSequence.from_sequence(item.PlanePositionSequence) for item in image.PerFrameFunctionalGroupsSequence ] diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py index 02905dd2..1789692c 100644 --- a/src/highdicom/seg/sop.py +++ b/src/highdicom/seg/sop.py @@ -589,7 +589,7 @@ def iterate_indices_by_source_instance( relabel: bool, optional If True and ``combine_segments`` is ``True``, the output segment numbers are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. @@ -690,7 +690,7 @@ def iterate_indices_by_source_frame( relabel: bool, optional If True and ``combine_segments`` is ``True``, the output segment numbers are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. @@ -793,7 +793,7 @@ def iterate_indices_by_dimension_index_values( relabel: bool, optional If True and ``combine_segments`` is ``True``, the output segment numbers are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. @@ -1009,8 +1009,9 @@ def __init__( data elements. The following lossless compressed transfer syntaxes are supported for encapsulated format encoding in case of FRACTIONAL segmentation type: - RLE Lossless (``"1.2.840.10008.1.2.5"``) and - JPEG 2000 Lossless (``"1.2.840.10008.1.2.4.90"``). + RLE Lossless (``"1.2.840.10008.1.2.5"``), + JPEG 2000 Lossless (``"1.2.840.10008.1.2.4.90"``), and + JPEG LS Lossless (``"1.2.840.10008.1.2.4.00"``). pixel_measures: Union[highdicom.PixelMeasures, None], optional Physical spacing of image pixels in `pixel_array`. If ``None``, it will be assumed that the segmentation image has the @@ -1354,7 +1355,10 @@ def __init__( if pixel_measures is not None: sffg_item.PixelMeasuresSequence = pixel_measures - if plane_orientation is not None: + if ( + self._coordinate_system is not None and + self._coordinate_system == CoordinateSystemNames.PATIENT + ): sffg_item.PlaneOrientationSequence = plane_orientation self.SharedFunctionalGroupsSequence = [sffg_item] @@ -1612,9 +1616,11 @@ def __init__( CoordinateSystemNames.SLIDE ): index_values = [ - np.where( - (dimension_position_values[idx] == pos) - )[0][0] + 1 + int( + np.where( + (dimension_position_values[idx] == pos) + )[0][0] + 1 + ) for idx, pos in enumerate( plane_position_values[j] ) @@ -1625,11 +1631,12 @@ def __init__( # Sequence points to (Image Position Patient) has a # value multiplicity greater than one. index_values = [ - np.where( - (dimension_position_values[idx] == pos).all( - axis=1 - ) - )[0][0] + 1 + int( + np.where( + (dimension_position_values[idx] == pos) + .all(axis=1) + )[0][0] + 1 + ) for idx, pos in enumerate( plane_position_values[j] ) @@ -1641,7 +1648,7 @@ def __init__( 'dimension index values: {}'.format(j, error) ) frame_content_item.DimensionIndexValues = ( - [segment_number] + index_values + [int(segment_number)] + index_values ) pffp_item.FrameContentSequence = [frame_content_item] if has_ref_frame_uid: @@ -1696,7 +1703,7 @@ def __init__( logger.warning('spatial locations not preserved') identification = Dataset() - identification.ReferencedSegmentNumber = segment_number + identification.ReferencedSegmentNumber = int(segment_number) pffp_item.SegmentIdentificationSequence = [ identification, ] @@ -2647,7 +2654,7 @@ def _get_pixels_by_seg_frame( relabel: bool If True and ``combine_segments`` is ``True``, the pixel values in the output array are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. rescale_fractional: bool @@ -3019,7 +3026,7 @@ def get_pixels_by_source_instance( relabel: bool, optional If True and ``combine_segments`` is ``True``, the pixel values in the output array are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. ignore_spatial_locations: bool, optional @@ -3238,7 +3245,7 @@ def get_pixels_by_source_frame( relabel: bool, optional If True and ``combine_segments`` is ``True``, the pixel values in the output array are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. ignore_spatial_locations: bool, optional @@ -3503,7 +3510,7 @@ def get_pixels_by_dimension_index_values( relabel: bool, optional If True and ``combine_segments`` is ``True``, the pixel values in the output array are relabelled into the range ``0`` to - ``len(segment_numbers)`` (inclusive) accoring to the position of + ``len(segment_numbers)`` (inclusive) according to the position of the original segment numbers in ``segment_numbers`` parameter. If ``combine_segments`` is ``False``, this has no effect. assert_missing_frames_are_empty: bool, optional diff --git a/src/highdicom/sr/__init__.py b/src/highdicom/sr/__init__.py index 69008253..e116f319 100644 --- a/src/highdicom/sr/__init__.py +++ b/src/highdicom/sr/__init__.py @@ -1,4 +1,4 @@ -"""Package for creationg of Structured Report (SR) instances.""" +"""Package for creation of Structured Report (SR) instances.""" from highdicom.sr.coding import CodedConcept from highdicom.sr.content import ( FindingSite, @@ -72,6 +72,7 @@ TextContentItem, TimeContentItem, UIDRefContentItem, + WaveformContentItem, ) SOP_CLASS_UIDS = { diff --git a/src/highdicom/sr/sop.py b/src/highdicom/sr/sop.py index c4a4aaf9..44ed97a2 100644 --- a/src/highdicom/sr/sop.py +++ b/src/highdicom/sr/sop.py @@ -127,7 +127,7 @@ def __init__( performed_procedure_codes: Union[List[highdicom.sr.CodedConcept], None], optional Codes of the performed procedures that resulted in the SR document requested_procedures: Union[List[pydicom.dataset.Dataset], None], optional - Requested procedures that are being fullfilled by creation of the + Requested procedures that are being fulfilled by creation of the SR document previous_versions: Union[List[pydicom.dataset.Dataset], None], optional Instances representing previous versions of the SR document @@ -356,7 +356,7 @@ class EnhancedSR(_SR): """SOP class for an Enhanced Structured Report (SR) document, whose content may include textual and a minimal amount of coded information, - numeric measurement values, references to SOP Instances (retricted to the + numeric measurement values, references to SOP Instances (restricted to the leaves of the tree), as well as 2D spatial or temporal regions of interest within such SOP Instances. """ @@ -432,7 +432,7 @@ def __init__( performed_procedure_codes: Union[List[highdicom.sr.CodedConcept], None], optional Codes of the performed procedures that resulted in the SR document requested_procedures: Union[List[pydicom.dataset.Dataset], None], optional - Requested procedures that are being fullfilled by creation of the + Requested procedures that are being fulfilled by creation of the SR document previous_versions: Union[List[pydicom.dataset.Dataset], None], optional Instances representing previous versions of the SR document @@ -563,7 +563,7 @@ def __init__( performed_procedure_codes: Union[List[highdicom.sr.CodedConcept], None], optional Codes of the performed procedures that resulted in the SR document requested_procedures: Union[List[pydicom.dataset.Dataset], None], optional - Requested procedures that are being fullfilled by creation of the + Requested procedures that are being fulfilled by creation of the SR document previous_versions: Union[List[pydicom.dataset.Dataset], None], optional Instances representing previous versions of the SR document @@ -725,7 +725,7 @@ def __init__( performed_procedure_codes: Union[List[highdicom.sr.CodedConcept], None], optional Codes of the performed procedures that resulted in the SR document requested_procedures: Union[List[pydicom.dataset.Dataset], None], optional - Requested procedures that are being fullfilled by creation of the + Requested procedures that are being fulfilled by creation of the SR document previous_versions: Union[List[pydicom.dataset.Dataset], None], optional Instances representing previous versions of the SR document diff --git a/src/highdicom/sr/templates.py b/src/highdicom/sr/templates.py index 484c2556..42681bf2 100644 --- a/src/highdicom/sr/templates.py +++ b/src/highdicom/sr/templates.py @@ -2350,7 +2350,7 @@ def __init__( "General Region of Interest Measurement Modifiers" for options) finding_sites: Union[Sequence[highdicom.sr.FindingSite], None], optional - Coded description of one or more anatomic locations corresonding + Coded description of one or more anatomic locations corresponding to the image region from which measurement was taken method: Union[highdicom.sr.CodedConcept, pydicom.sr.coding.Code, None], optional Measurement method (see @@ -3274,7 +3274,7 @@ def __init__( algorithm_id: Union[highdicom.sr.AlgorithmIdentification, None], optional Identification of algorithm used for making measurements finding_sites: Union[Sequence[highdicom.sr.FindingSite], None], optional - Coded description of one or more anatomic locations corresonding + Coded description of one or more anatomic locations corresponding to the image region from which measurement was taken session: Union[str, None], optional Description of the session diff --git a/src/highdicom/sr/value_types.py b/src/highdicom/sr/value_types.py index dfa07487..16a1958c 100644 --- a/src/highdicom/sr/value_types.py +++ b/src/highdicom/sr/value_types.py @@ -1458,7 +1458,7 @@ def __init__( Number of frame(s) to which the reference applies in case of a multi-frame image referenced_segment_numbers: Union[int, Sequence[int], None], optional - Number of segment(s) to which the refernce applies in case of a + Number of segment(s) to which the reference applies in case of a segmentation image relationship_type: Union[highdicom.sr.RelationshipTypeValues, str, None], optional Type of relationship with parent content item @@ -1702,7 +1702,7 @@ class Scoord3DContentItem(ContentItem): Note ---- Spatial coordinates are defined in the patient or specimen-based coordinate - system and have milimeter unit. + system and have millimeter unit. """ @@ -1955,7 +1955,7 @@ def __init__( referenced_waveform_channels: Union[Sequence[Tuple[int, int]], None], optional Pairs of waveform number (number of item in the Waveform Sequence) and channel definition number (number of item in the Channel - Defition Sequence) to which the reference applies in case of a + Definition Sequence) to which the reference applies in case of a multi-channel waveform relationship_type: Union[highdicom.sr.RelationshipTypeValues, str, None], optional Type of relationship with parent content item diff --git a/src/highdicom/utils.py b/src/highdicom/utils.py index 85521e87..07c9a15f 100644 --- a/src/highdicom/utils.py +++ b/src/highdicom/utils.py @@ -106,7 +106,7 @@ def compute_plane_position_tiled_full( Returns ------- highdicom.PlanePositionSequence - Positon of the plane in the slide coordinate system + Position, of the plane in the slide coordinate system Raises ------ @@ -262,7 +262,7 @@ def _compute_plane_position_tiled_full_efficiently( Returns ------- highdicom.PlanePositionSequence - Positon of the plane in the slide coordinate system + Position, of the plane in the slide coordinate system """ row_offset_frame = ((row_index - 1) * rows) diff --git a/tests/test_ko.py b/tests/test_ko.py index d4ab94db..c88fdf4b 100644 --- a/tests/test_ko.py +++ b/tests/test_ko.py @@ -89,8 +89,8 @@ def test_construction(self): container = content[0] assert isinstance(container, ContainerContentItem) assert container.ContentTemplateSequence[0].TemplateIdentifier == '2010' - # Oberver Context (Person): 2 - # Oberver Context (Device): 3 + # Observer Context (Person): 2 + # Observer Context (Device): 3 # Description: 1 # Referenced Objects: 4 assert len(container.ContentSequence) == 10 diff --git a/tests/test_seg.py b/tests/test_seg.py index 85b363d2..5da1cf8d 100644 --- a/tests/test_seg.py +++ b/tests/test_seg.py @@ -953,7 +953,7 @@ def test_construction_2(self): src_pm_item = src_shared_item.PixelMeasuresSequence[0] assert pm_item.PixelSpacing == src_pm_item.PixelSpacing assert pm_item.SliceThickness == src_pm_item.SliceThickness - assert len(shared_item.PlaneOrientationSequence) == 1 + assert not hasattr(shared_item, "PlaneOrientationSequence") assert instance.ImageOrientationSlide == \ self._sm_image.ImageOrientationSlide assert instance.TotalPixelMatrixOriginSequence == \ @@ -2507,11 +2507,33 @@ def test_spatial_positions_not_preserved(self): pm_item = shared_item.PixelMeasuresSequence[0] assert pm_item.PixelSpacing == list(pixel_spacing) assert pm_item.SliceThickness == slice_thickness - assert len(shared_item.PlaneOrientationSequence) == 1 - po_item = shared_item.PlaneOrientationSequence[0] - assert po_item.ImageOrientationSlide == list(image_orientation) + assert not hasattr(shared_item, 'PlaneOrientationSequence') self.check_dimension_index_vals(instance) + def test_get_plane_positions_of_image_patient(self): + seq = DimensionIndexSequence( + coordinate_system='PATIENT' + ) + plane_positions = seq.get_plane_positions_of_image(self._ct_multiframe) + for position in plane_positions: + assert isinstance(position, PlanePositionSequence) + + def test_get_plane_positions_of_image_slide(self): + seq = DimensionIndexSequence( + coordinate_system='SLIDE' + ) + plane_positions = seq.get_plane_positions_of_image(self._sm_image) + for position in plane_positions: + assert isinstance(position, PlanePositionSequence) + + def test_get_plane_positions_of_series(self): + seq = DimensionIndexSequence( + coordinate_system='PATIENT' + ) + plane_positions = seq.get_plane_positions_of_series(self._ct_series) + for position in plane_positions: + assert isinstance(position, PlanePositionSequence) + class TestSegmentationParsing():