Skip to content

Commit

Permalink
RCAL-965 Provide conversion from TVAC/FPS models to ScienceRawModel (#…
Browse files Browse the repository at this point in the history
…455)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
stscieisenhamer and pre-commit-ci[bot] authored Feb 13, 2025
1 parent a601ee7 commit 1734f6b
Show file tree
Hide file tree
Showing 4 changed files with 229 additions and 30 deletions.
1 change: 1 addition & 0 deletions changes/455.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Provide conversion from TVAC/FPS models to ScienceRawModel
80 changes: 55 additions & 25 deletions src/roman_datamodels/datamodels/_datamodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,13 @@
from the schema manifest defined by RAD.
"""

from collections.abc import Mapping

import asdf
import numpy as np
from astropy.table import QTable

from roman_datamodels import stnode

from ._core import DataModel
from ._utils import _node_update

__all__ = []

Expand Down Expand Up @@ -130,6 +128,49 @@ class ImageModel(_RomanDataModel):
class ScienceRawModel(_RomanDataModel):
_node_type = stnode.WfiScienceRaw

@classmethod
def from_tvac_raw(cls, model):
"""Convert TVAC/FPS into ScienceRawModel
romancal supports processing a selection of files which use an outdated
schema. It supports these with a bespoke method that converts the files
to the new format when they are read in dq_init. This conversion does
not do a detailed mapping between all of the new and old metadata, but
instead opportunistically looks for fields with common names and
assigns them. Other metadata with non-matching names is simply copied
in place. This allows processing to proceed and preserves the original
metadata, but the resulting files have duplicates of many entries.
Parameters
----------
model : ScienceRawModel, TvacModel, FpsModel
Model to convert from.
Returns
-------
science_raw_model : ScienceRawModel
The ScienceRawModel built from the input model.
If the input was a ScienceRawModel, that model is simply returned.
"""
ALLOWED_MODELS = (FpsModel, ScienceRawModel, TvacModel)

if isinstance(model, cls):
return model
if not isinstance(model, ALLOWED_MODELS):
raise ValueError(f"Input must be one of {ALLOWED_MODELS}")

# Create base raw node with dummy values (for validation)
from roman_datamodels.maker_utils import mk_level1_science_raw

raw = mk_level1_science_raw(shape=model.shape)

_node_update(raw, model, extras=("meta.statistics",), extras_key="tvac")

# Create model from node
raw_model = ScienceRawModel(raw)
return raw_model


class MsosStackModel(_RomanDataModel):
_node_type = stnode.MsosStack
Expand All @@ -140,22 +181,29 @@ class RampModel(_RomanDataModel):

@classmethod
def from_science_raw(cls, model):
"""
Attempt to construct a RampModel from a DataModel
"""Attempt to construct a RampModel from a DataModel
If the model has a resultantdq attribute, this is copied into
the RampModel.groupdq attribute.
Otherwise, this conversion does not do a detailed mapping between all
of the new and old metadata, but instead opportunistically looks for
fields with common names and assigns them. Other metadata with
non-matching names is simply copied in place. This allows processing to
proceed and preserves the original metadata, but the resulting files
have duplicates of many entries.
Parameters
----------
model : ScienceRawModel, TvacModel
model : FpsModel, RampModel, ScienceRawModel, TvacModel
The input data model (a RampModel will also work).
Returns
-------
ramp_model : RampModel
The RampModel built from the input model. If the input is already
a RampModel, it is simply returned.
"""
ALLOWED_MODELS = (FpsModel, RampModel, ScienceRawModel, TvacModel)

Expand All @@ -173,25 +221,7 @@ def from_science_raw(cls, model):
if hasattr(model, "resultantdq"):
ramp.groupdq = model.resultantdq.copy()

# Define how to recursively copy all attributes.
def node_update(ramp, other):
"""Implement update to directly access each value"""
for key in other.keys():
if key == "resultantdq":
continue
if key in ramp:
if isinstance(ramp[key], Mapping):
node_update(getattr(ramp, key), getattr(other, key))
elif isinstance(ramp[key], list):
setattr(ramp, key, getattr(other, key).data)
elif isinstance(ramp[key], np.ndarray):
setattr(ramp, key, getattr(other, key).astype(ramp[key].dtype))
else:
setattr(ramp, key, getattr(other, key))
else:
ramp[key] = other[key]

node_update(ramp, model)
_node_update(ramp, model, ignore=("resultantdq",))

# Create model from node
ramp_model = RampModel(ramp)
Expand Down
95 changes: 95 additions & 0 deletions src/roman_datamodels/datamodels/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path

import asdf
import numpy as np

from roman_datamodels import validate

Expand All @@ -23,6 +24,100 @@ class FilenameMismatchWarning(UserWarning):
"""


def _node_update(to_node, from_node, extras=None, extras_key=None, ignore=None):
"""Copy node contents from an existing node to another existing node
How the copy occurs depends on existence of keys in `to_node`
If key exists in `to_node`, contents are converted from `from_node` stnode type to
the stnode type expected in order to preserve validation of the node.
If key only exists in `from_node`, the contents are copied as-is.
If key exists in the list `extras`, the contents are placed in the dict `["extras"]`.
if `extras_key` is given, then the sub-dictionary `["extras"][extras_key]` is used.
Extra keys are used to avoid collisions between node trees where the underlying structures are
completely different.
Keys in `ignore` are not considered.
Keys are also
Parameters
----------
to_node : stnode
Node to receive the contents.
from_node : stnode, DataModel
Node to copy from
extras : [str[,...]]
Keys that may create collisions between the two node trees. All such keys are placed
in the `extras` key. If `extras_key` is defined, the contents are placed in a subdict
of that name.
extras_key : str or None
See parameter `extras`.
ignore : list-like or None
Keys that should be completely ignored.
"""

# Define utilities functions
def _descend(attributes, key):
next_attributes = list()
for item in attributes:
level, _, name = item.partition(".")
if level == key and name:
next_attributes.append(name)
return next_attributes

def _traverse(to_node, from_node, extras=None, ignore=None):
if extras is None:
extras = tuple()
new_extras = dict()
if ignore is None:
ignore = tuple()

for key in from_node.keys():
if key in ignore:
continue
if key in extras:
new_extras[key] = from_node[key]
continue
if key in to_node:
if isinstance(to_node[key], Mapping):
next_extras = _descend(extras, key)
next_ignores = _descend(ignore, key)
returned_extras = _traverse(
getattr(to_node, key), getattr(from_node, key), extras=next_extras, ignore=next_ignores
)
if returned_extras:
new_extras[key] = returned_extras
else:
if isinstance(to_node[key], list):
value = getattr(from_node, key).data
elif isinstance(to_node[key], np.ndarray):
value = getattr(from_node, key).astype(to_node[key].dtype)
value = getattr(value, "value", value)
else:
value = getattr(from_node, key)
setattr(to_node, key, value)
else:
to_node[key] = from_node[key]
return new_extras

# Now do the copy.
new_extras = _traverse(to_node, from_node, extras=extras, ignore=ignore)
if new_extras:
extras_node = to_node.get("extras", dict())
if extras_key:
extras_node[extras_key] = new_extras
else:
extras_node.update(new_extras)
to_node["extras"] = extras_node


def _open_asdf(init, lazy_tree=True, **kwargs):
"""
Open init with `asdf.open`.
Expand Down
83 changes: 78 additions & 5 deletions tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -938,9 +938,17 @@ def test_model_only_init_with_correct_node(node, correct, model):
model(img)


def test_ramp_from_science_raw():
raw = datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8)))

@pytest.mark.parametrize(
"mk_raw",
[
lambda: datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8), dq=True)),
lambda: datamodels.TvacModel(utils.mk_tvac(shape=(2, 8, 8))),
lambda: datamodels.FpsModel(utils.mk_fps(shape=(2, 8, 8))),
lambda: datamodels.RampModel(utils.mk_ramp(shape=(2, 8, 8))),
],
)
def test_ramp_from_science_raw(mk_raw):
raw = mk_raw()
ramp = datamodels.RampModel.from_science_raw(raw)
for key in ramp:
if not hasattr(raw, key):
Expand All @@ -952,21 +960,86 @@ def test_ramp_from_science_raw():
assert_array_equal(ramp_value, raw_value.astype(ramp_value.dtype))

elif key == "meta":
for meta_key in ramp_value:
ramp_meta = ramp_value.to_flat_dict(include_arrays=False, recursive=True)
raw_meta = raw_value.to_flat_dict(include_arrays=False, recursive=True)
for meta_key in ramp_meta:
if meta_key == "model_type":
ramp_value[meta_key] = ramp.__class__.__name__
raw_value[meta_key] = raw.__class__.__name__
continue
elif meta_key == "cal_step":
continue
assert_node_equal(ramp_value[meta_key], raw_value[meta_key])
if meta_key in raw_meta:
assert ramp_meta[meta_key] == raw_meta[meta_key]

elif isinstance(ramp_value, stnode.DNode):
assert_node_equal(ramp_value, raw_value)

else:
raise ValueError(f"Unexpected type {type(ramp_value)}, {key}") # pragma: no cover

# Check that resultantdq gets copied to groupdq
if hasattr(raw, "resultantdq"):
assert hasattr(ramp, "groupdq")
assert not hasattr(ramp, "resultantdq")


def test_science_raw_from_tvac_raw_invalid_input():
"""Test for invalid input"""
model = datamodels.RampModel(utils.mk_ramp())
with pytest.raises(ValueError):
_ = datamodels.ScienceRawModel.from_tvac_raw(model)


@pytest.mark.parametrize(
"mk_tvac",
[
lambda: datamodels.ScienceRawModel(utils.mk_level1_science_raw(shape=(2, 8, 8))),
lambda: datamodels.TvacModel(utils.mk_tvac(shape=(2, 8, 8))),
lambda: datamodels.FpsModel(utils.mk_fps(shape=(2, 8, 8))),
],
)
def test_science_raw_from_tvac_raw(mk_tvac):
"""Test conversion from expected inputs"""
tvac = mk_tvac()

raw = datamodels.ScienceRawModel.from_tvac_raw(tvac)
for key in raw:
if not hasattr(tvac, key):
continue

raw_value = getattr(raw, key)
tvac_value = getattr(tvac, key)
if isinstance(raw_value, np.ndarray):
assert_array_equal(raw_value, tvac_value.astype(raw_value.dtype))

elif key == "meta":
raw_meta = raw_value.to_flat_dict(include_arrays=False, recursive=True)
tvac_meta = tvac_value.to_flat_dict(include_arrays=False, recursive=True)
for meta_key in raw_meta:
if meta_key == "model_type":
raw_value[meta_key] = raw.__class__.__name__
tvac_value[meta_key] = tvac.__class__.__name__
continue
elif meta_key == "cal_step":
continue
if meta_key in tvac_meta:
assert raw_meta[meta_key] == tvac_meta[meta_key]

elif isinstance(raw_value, stnode.DNode):
assert_node_equal(raw_value, tvac_value)

else:
raise ValueError(f"Unexpected type {type(raw_value)}, {key}") # pragma: no cover

# If tvac/fps, check that statistics are handled properly
if isinstance(tvac, datamodels.TvacModel | datamodels.FpsModel):
assert hasattr(raw, "extras")
assert hasattr(raw.extras, "tvac")
assert hasattr(raw.extras.tvac, "meta")
assert hasattr(raw.extras.tvac.meta, "statistics")
assert raw.extras.tvac.meta.statistics == tvac.meta.statistics


@pytest.mark.parametrize("model", datamodels.MODEL_REGISTRY.values())
@pytest.mark.filterwarnings("ignore:This function assumes shape is 2D")
Expand Down

0 comments on commit 1734f6b

Please sign in to comment.