RCAL-965 Provide conversion from TVAC/FPS models to ScienceRawModel (#…

…455) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
spacetelescope · Feb 13, 2025 · 1734f6b · 1734f6b
1 parent a601ee7
commit 1734f6b
Show file tree

Hide file tree

Showing 4 changed files with 229 additions and 30 deletions.
diff --git a/changes/455.feature.rst b/changes/455.feature.rst
@@ -0,0 +1 @@
+Provide conversion from TVAC/FPS models to ScienceRawModel
diff --git a/src/roman_datamodels/datamodels/_datamodels.py b/src/roman_datamodels/datamodels/_datamodels.py
@@ -6,15 +6,13 @@
     from the schema manifest defined by RAD.
 """
 
-from collections.abc import Mapping
-
 import asdf
-import numpy as np
 from astropy.table import QTable
 
 from roman_datamodels import stnode
 
 from ._core import DataModel
+from ._utils import _node_update
 
 __all__ = []
 
@@ -130,6 +128,49 @@ class ImageModel(_RomanDataModel):
 class ScienceRawModel(_RomanDataModel):
     _node_type = stnode.WfiScienceRaw
 
+    @classmethod
+    def from_tvac_raw(cls, model):
+        """Convert TVAC/FPS into ScienceRawModel
+
+        romancal supports processing a selection of files which use an outdated
+        schema. It supports these with a bespoke method that converts the files
+        to the new format when they are read in dq_init. This conversion does
+        not do a detailed mapping between all of the new and old metadata, but
+        instead opportunistically looks for fields with common names and
+        assigns them. Other metadata with non-matching names is simply copied
+        in place. This allows processing to proceed and preserves the original
+        metadata, but the resulting files have duplicates of many entries.
+
+        Parameters
+        ----------
+        model : ScienceRawModel, TvacModel, FpsModel
+          Model to convert from.
+
+        Returns
+        -------
+        science_raw_model : ScienceRawModel
+            The ScienceRawModel built from the input model.
+            If the input was a ScienceRawModel, that model is simply returned.
+
+        """
+        ALLOWED_MODELS = (FpsModel, ScienceRawModel, TvacModel)
+
+        if isinstance(model, cls):
+            return model
+        if not isinstance(model, ALLOWED_MODELS):
+            raise ValueError(f"Input must be one of {ALLOWED_MODELS}")
+
+        # Create base raw node with dummy values (for validation)
+        from roman_datamodels.maker_utils import mk_level1_science_raw
+
+        raw = mk_level1_science_raw(shape=model.shape)
+
+        _node_update(raw, model, extras=("meta.statistics",), extras_key="tvac")
+
+        # Create model from node
+        raw_model = ScienceRawModel(raw)
+        return raw_model
+
 
 class MsosStackModel(_RomanDataModel):
     _node_type = stnode.MsosStack
@@ -140,22 +181,29 @@ class RampModel(_RomanDataModel):
 
     @classmethod
     def from_science_raw(cls, model):
-        """
-        Attempt to construct a RampModel from a DataModel
+        """Attempt to construct a RampModel from a DataModel
 
         If the model has a resultantdq attribute, this is copied into
         the RampModel.groupdq attribute.
 
+        Otherwise, this conversion does not do a detailed mapping between all
+        of the new and old metadata, but instead opportunistically looks for
+        fields with common names and assigns them. Other metadata with
+        non-matching names is simply copied in place. This allows processing to
+        proceed and preserves the original metadata, but the resulting files
+        have duplicates of many entries.
+
         Parameters
         ----------
-        model : ScienceRawModel, TvacModel
+        model : FpsModel, RampModel, ScienceRawModel, TvacModel
             The input data model (a RampModel will also work).
 
         Returns
         -------
         ramp_model : RampModel
             The RampModel built from the input model. If the input is already
             a RampModel, it is simply returned.
+
         """
         ALLOWED_MODELS = (FpsModel, RampModel, ScienceRawModel, TvacModel)
 
@@ -173,25 +221,7 @@ def from_science_raw(cls, model):
         if hasattr(model, "resultantdq"):
             ramp.groupdq = model.resultantdq.copy()
 
-        # Define how to recursively copy all attributes.
-        def node_update(ramp, other):
-            """Implement update to directly access each value"""
-            for key in other.keys():
-                if key == "resultantdq":
-                    continue
-                if key in ramp:
-                    if isinstance(ramp[key], Mapping):
-                        node_update(getattr(ramp, key), getattr(other, key))
-                    elif isinstance(ramp[key], list):
-                        setattr(ramp, key, getattr(other, key).data)
-                    elif isinstance(ramp[key], np.ndarray):
-                        setattr(ramp, key, getattr(other, key).astype(ramp[key].dtype))
-                    else:
-                        setattr(ramp, key, getattr(other, key))
-                else:
-                    ramp[key] = other[key]
-
-        node_update(ramp, model)
+        _node_update(ramp, model, ignore=("resultantdq",))
 
         # Create model from node
         ramp_model = RampModel(ramp)

diff --git a/src/roman_datamodels/datamodels/_utils.py b/src/roman_datamodels/datamodels/_utils.py
@@ -8,6 +8,7 @@
 from pathlib import Path
 
 import asdf
+import numpy as np
 
 from roman_datamodels import validate
 
@@ -23,6 +24,100 @@ class FilenameMismatchWarning(UserWarning):
     """
 
 
+def _node_update(to_node, from_node, extras=None, extras_key=None, ignore=None):
+    """Copy node contents from an existing node to another existing node
+
+    How the copy occurs depends on existence of keys in `to_node`
+
+    If key exists in `to_node`, contents are converted from `from_node` stnode type to
+    the stnode type expected in order to preserve validation of the node.
+
+    If key only exists in `from_node`, the contents are copied as-is.
+
+    If key exists in the list `extras`, the contents are placed in the dict `["extras"]`.
+    if `extras_key` is given, then the sub-dictionary `["extras"][extras_key]` is used.
+    Extra keys are used to avoid collisions between node trees where the underlying structures are
+    completely different.
+
+    Keys in `ignore` are not considered.
+
+    Keys are also
+
+    Parameters
+    ----------
+    to_node : stnode
+        Node to receive the contents.
+
+    from_node : stnode, DataModel
+        Node to copy from
+
+    extras : [str[,...]]
+        Keys that may create collisions between the two node trees. All such keys are placed
+        in the `extras` key. If `extras_key` is defined, the contents are placed in a subdict
+        of that name.
+
+    extras_key : str or None
+        See parameter `extras`.
+
+    ignore : list-like or None
+        Keys that should be completely ignored.
+    """
+
+    # Define utilities functions
+    def _descend(attributes, key):
+        next_attributes = list()
+        for item in attributes:
+            level, _, name = item.partition(".")
+            if level == key and name:
+                next_attributes.append(name)
+        return next_attributes
+
+    def _traverse(to_node, from_node, extras=None, ignore=None):
+        if extras is None:
+            extras = tuple()
+        new_extras = dict()
+        if ignore is None:
+            ignore = tuple()
+
+        for key in from_node.keys():
+            if key in ignore:
+                continue
+            if key in extras:
+                new_extras[key] = from_node[key]
+                continue
+            if key in to_node:
+                if isinstance(to_node[key], Mapping):
+                    next_extras = _descend(extras, key)
+                    next_ignores = _descend(ignore, key)
+                    returned_extras = _traverse(
+                        getattr(to_node, key), getattr(from_node, key), extras=next_extras, ignore=next_ignores
+                    )
+                    if returned_extras:
+                        new_extras[key] = returned_extras
+                else:
+                    if isinstance(to_node[key], list):
+                        value = getattr(from_node, key).data
+                    elif isinstance(to_node[key], np.ndarray):
+                        value = getattr(from_node, key).astype(to_node[key].dtype)
+                        value = getattr(value, "value", value)
+                    else:
+                        value = getattr(from_node, key)
+                    setattr(to_node, key, value)
+            else:
+                to_node[key] = from_node[key]
+        return new_extras
+
+    # Now do the copy.
+    new_extras = _traverse(to_node, from_node, extras=extras, ignore=ignore)
+    if new_extras:
+        extras_node = to_node.get("extras", dict())
+        if extras_key:
+            extras_node[extras_key] = new_extras
+        else:
+            extras_node.update(new_extras)
+        to_node["extras"] = extras_node
+
+
 def _open_asdf(init, lazy_tree=True, **kwargs):
     """
     Open init with `asdf.open`.

diff --git a/tests/test_models.py b/tests/test_models.py
@@ -938,9 +938,17 @@ def test_model_only_init_with_correct_node(node, correct, model):
         model(img)
 
 
-def test_ramp_from_science_raw():
-    raw = datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8)))
-
+@pytest.mark.parametrize(
+    "mk_raw",
+    [
+        lambda: datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8), dq=True)),
+        lambda: datamodels.TvacModel(utils.mk_tvac(shape=(2, 8, 8))),
+        lambda: datamodels.FpsModel(utils.mk_fps(shape=(2, 8, 8))),
+        lambda: datamodels.RampModel(utils.mk_ramp(shape=(2, 8, 8))),
+    ],
+)
+def test_ramp_from_science_raw(mk_raw):
+    raw = mk_raw()
     ramp = datamodels.RampModel.from_science_raw(raw)
     for key in ramp:
         if not hasattr(raw, key):
@@ -952,21 +960,86 @@ def test_ramp_from_science_raw():
             assert_array_equal(ramp_value, raw_value.astype(ramp_value.dtype))
 
         elif key == "meta":
-            for meta_key in ramp_value:
+            ramp_meta = ramp_value.to_flat_dict(include_arrays=False, recursive=True)
+            raw_meta = raw_value.to_flat_dict(include_arrays=False, recursive=True)
+            for meta_key in ramp_meta:
                 if meta_key == "model_type":
                     ramp_value[meta_key] = ramp.__class__.__name__
                     raw_value[meta_key] = raw.__class__.__name__
                     continue
                 elif meta_key == "cal_step":
                     continue
-                assert_node_equal(ramp_value[meta_key], raw_value[meta_key])
+                if meta_key in raw_meta:
+                    assert ramp_meta[meta_key] == raw_meta[meta_key]
 
         elif isinstance(ramp_value, stnode.DNode):
             assert_node_equal(ramp_value, raw_value)
 
         else:
             raise ValueError(f"Unexpected type {type(ramp_value)}, {key}")  # pragma: no cover
 
+    # Check that resultantdq gets copied to groupdq
+    if hasattr(raw, "resultantdq"):
+        assert hasattr(ramp, "groupdq")
+        assert not hasattr(ramp, "resultantdq")
+
+
+def test_science_raw_from_tvac_raw_invalid_input():
+    """Test for invalid input"""
+    model = datamodels.RampModel(utils.mk_ramp())
+    with pytest.raises(ValueError):
+        _ = datamodels.ScienceRawModel.from_tvac_raw(model)
+
+
+@pytest.mark.parametrize(
+    "mk_tvac",
+    [
+        lambda: datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8))),
+        lambda: datamodels.TvacModel(utils.mk_tvac(shape=(2, 8, 8))),
+        lambda: datamodels.FpsModel(utils.mk_fps(shape=(2, 8, 8))),
+    ],
+)
+def test_science_raw_from_tvac_raw(mk_tvac):
+    """Test conversion from expected inputs"""
+    tvac = mk_tvac()
+
+    raw = datamodels.ScienceRawModel.from_tvac_raw(tvac)
+    for key in raw:
+        if not hasattr(tvac, key):
+            continue
+
+        raw_value = getattr(raw, key)
+        tvac_value = getattr(tvac, key)
+        if isinstance(raw_value, np.ndarray):
+            assert_array_equal(raw_value, tvac_value.astype(raw_value.dtype))
+
+        elif key == "meta":
+            raw_meta = raw_value.to_flat_dict(include_arrays=False, recursive=True)
+            tvac_meta = tvac_value.to_flat_dict(include_arrays=False, recursive=True)
+            for meta_key in raw_meta:
+                if meta_key == "model_type":
+                    raw_value[meta_key] = raw.__class__.__name__
+                    tvac_value[meta_key] = tvac.__class__.__name__
+                    continue
+                elif meta_key == "cal_step":
+                    continue
+                if meta_key in tvac_meta:
+                    assert raw_meta[meta_key] == tvac_meta[meta_key]
+
+        elif isinstance(raw_value, stnode.DNode):
+            assert_node_equal(raw_value, tvac_value)
+
+        else:
+            raise ValueError(f"Unexpected type {type(raw_value)}, {key}")  # pragma: no cover
+
+    # If tvac/fps, check that statistics are handled properly
+    if isinstance(tvac, datamodels.TvacModel | datamodels.FpsModel):
+        assert hasattr(raw, "extras")
+        assert hasattr(raw.extras, "tvac")
+        assert hasattr(raw.extras.tvac, "meta")
+        assert hasattr(raw.extras.tvac.meta, "statistics")
+        assert raw.extras.tvac.meta.statistics == tvac.meta.statistics
+
 
 @pytest.mark.parametrize("model", datamodels.MODEL_REGISTRY.values())
 @pytest.mark.filterwarnings("ignore:This function assumes shape is 2D")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Provide conversion from TVAC/FPS models to ScienceRawModel