From 763b6d88c730f8eafcecc3ed6cc3738b146982a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Therese=20Natter=C3=B8y?= <61694854+tnatt@users.noreply.github.com> Date: Mon, 18 Mar 2024 23:08:17 +0100 Subject: [PATCH] CLN: Use pydantic for internal MetaData --- src/fmu/dataio/_metadata.py | 344 +++++------------- src/fmu/dataio/_utils.py | 4 +- src/fmu/dataio/case.py | 11 +- src/fmu/dataio/dataio.py | 47 ++- .../datastructure/_internal/internal.py | 48 ++- src/fmu/dataio/providers/_objectdata.py | 8 +- src/fmu/dataio/providers/_objectdata_base.py | 8 +- src/fmu/dataio/providers/_objectdata_xtgeo.py | 2 +- tests/test_units/test_dataio.py | 13 +- tests/test_units/test_metadata_class.py | 207 +++++------ .../test_prerealization_surfaces.py | 4 +- 11 files changed, 303 insertions(+), 393 deletions(-) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 942891c04..cbb46648d 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -28,7 +28,6 @@ read_metadata_from_file, ) from .datastructure._internal import internal -from .datastructure.configuration import global_configuration from .datastructure.meta import meta from .providers._filedata import FileDataProvider from .providers._fmu import FmuProvider @@ -42,21 +41,7 @@ logger: Final = null_logger(__name__) -# Generic, being resused several places: - - -def default_meta_dollars() -> dict[str, str]: - return internal.JsonSchemaMetadata( - schema_=TypeAdapter(AnyHttpUrl).validate_strings(SCHEMA), # type: ignore[call-arg] - version=VERSION, - source=SOURCE, - ).model_dump( - mode="json", - by_alias=True, - ) - - -def generate_meta_tracklog() -> list[meta.TracklogEvent]: +def generate_meta_tracklog() -> meta.TracklogEvent: """Initialize the tracklog with the 'created' event only.""" return [ meta.TracklogEvent.model_construct( @@ -80,6 +65,56 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]: ] +def _get_objectdata_provider( + object: types.Inferrable, + dataio: ExportData, + meta_existing: dict | None = None, +) -> ObjectDataProvider: + """Analyze the actual object together with input settings. + + This will provide input to the ``data`` block of the metas but has also + valuable settings which are needed when providing filedata etc. + + Hence this must be ran early or first. + """ + objdata = objectdata_provider_factory(object, dataio, meta_existing) + objdata.derive_metadata() + return objdata + + +def _get_filedata_provider( + dataio: ExportData, objdata: ObjectDataProvider, fmudata: FmuProvider | None +) -> FileDataProvider: + filedata = FileDataProvider( + dataio=dataio, + objdata=objdata, + rootpath=dataio._rootpath, # has been updated to case_path if fmurun + itername=fmudata.get_iter_name() if fmudata else "", + realname=fmudata.get_real_name() if fmudata else "", + ) + filedata.derive_filedata() + return filedata + + +def _compute_md5( + dataio: ExportData, objdata: ObjectDataProvider, object: types.Inferrable +) -> str: + """Return the file block in the metadata.""" + + if not objdata.extension.startswith("."): + raise ValueError("An extension must start with '.'") + with NamedTemporaryFile( + buffering=0, + suffix=objdata.extension, + ) as tf: + logger.info("Compute MD5 sum for tmp file...: %s", tf.name) + return export_file_compute_checksum_md5( + obj=object, + filename=Path(tf.name), + flag=dataio._usefmtflag, + ) + + @dataclass class MetaData: """Class for sampling, process and holding all metadata in an ExportData instance. @@ -111,245 +146,68 @@ class MetaData: # input variables obj: types.Inferrable dataio: ExportData - compute_md5: bool = True - - # storage state variables - objdata: ObjectDataProvider | None = field(default=None, init=False) - fmudata: FmuProvider | None = field(default=None, init=False) - iter_name: str = field(default="", init=False) - real_name: str = field(default="", init=False) - - meta_class: str = field(default="", init=False) - meta_masterdata: dict = field(default_factory=dict, init=False) - meta_objectdata: dict = field(default_factory=dict, init=False) - meta_dollars: dict = field(default_factory=default_meta_dollars, init=False) - meta_access: dict = field(default_factory=dict, init=False) - meta_file: dict = field(default_factory=dict, init=False) - meta_tracklog: list = field(default_factory=list, init=False) - meta_fmu: dict = field(default_factory=dict, init=False) - # temporary storage for preprocessed data: - meta_xpreprocessed: dict = field(default_factory=dict, init=False) - - # relevant when ERT* fmu_context; same as rootpath in the ExportData class!: - rootpath: str = field(default="", init=False) - - # if re-using existing metadata - meta_existing: dict = field(default_factory=dict, init=False) - - def __post_init__(self) -> None: - logger.info("Initialize _MetaData instance.") - - # one special case is that obj is a file path, and dataio.reuse_metadata_rule is - # active. In this case we read the existing metadata here and reuse parts - # according to rule described in string self.reuse_metadata_rule! - if isinstance(self.obj, (str, Path)) and self.dataio.reuse_metadata_rule: - logger.info("Partially reuse existing metadata from %s", self.obj) - self.meta_existing = read_metadata_from_file(self.obj) - - self.rootpath = str(self.dataio._rootpath.absolute()) - - def _populate_meta_objectdata(self) -> None: - """Analyze the actual object together with input settings. - - This will provide input to the ``data`` block of the metas but has also - valuable settings which are needed when providing filedata etc. - - Hence this must be ran early or first. - """ - self.objdata = objectdata_provider_factory( - self.obj, self.dataio, self.meta_existing - ) - self.objdata.derive_metadata() - self.meta_objectdata = self.objdata.metadata - - def _populate_meta_fmu(self) -> None: - """Populate the fmu block in the metadata. - - This block may be missing in case the client is not within a FMU run, e.g. - it runs from RMS interactive - - The _FmuDataProvider is ran to provide this information - """ - fmudata = FmuProvider( - model=self.dataio.config.get("model", None), - fmu_context=FmuContext.get(self.dataio.fmu_context), - casepath_proposed=self.dataio.casepath or "", - include_ertjobs=self.dataio.include_ertjobs, - forced_realization=self.dataio.realization, - workflow=self.dataio.workflow, - ) - logger.info("FMU provider is %s", fmudata.get_provider()) - - self.meta_fmu = fmudata.get_metadata() - self.rootpath = fmudata.get_casepath() - self.iter_name = fmudata.get_iter_name() - self.real_name = fmudata.get_real_name() - - logger.debug("Rootpath is now %s", self.rootpath) - - def _populate_meta_file(self) -> None: - """Populate the file block in the metadata. - - The file block also contains all needed info for doing the actual file export. - - It requires that the _ObjectDataProvider is ran first -> self.objdata + fmudata: FmuProvider | None = field(default=None) + compute_md5: bool = field(default=True) - - relative_path, seen from rootpath - - absolute_path, as above but full path - - checksum_md5, if required (a bit special treatment of this) - - In additional _optional_ symlink adresses - - relative_path_symlink, seen from rootpath - - absolute_path_symlink, as above but full path - """ - - assert self.objdata is not None - - fdata = FileDataProvider( - self.dataio, - self.objdata, - Path(self.rootpath), - self.iter_name, - self.real_name, - ) - fdata.derive_filedata() - - if self.compute_md5: - if not self.objdata.extension.startswith("."): - raise ValueError("A extension must start with '.'") - with NamedTemporaryFile( - buffering=0, - suffix=self.objdata.extension, - ) as tf: - logger.info("Compute MD5 sum for tmp file...: %s", tf.name) - checksum_md5 = export_file_compute_checksum_md5( - self.obj, - Path(tf.name), - flag=self.dataio._usefmtflag, - ) - else: - logger.info("Do not compute MD5 sum at this stage!") - checksum_md5 = None - - self.meta_file = meta.File( - absolute_path=fdata.absolute_path, - relative_path=fdata.relative_path, - checksum_md5=checksum_md5, - relative_path_symlink=fdata.relative_path_symlink, - absolute_path_symlink=fdata.absolute_path_symlink, - ).model_dump( - mode="json", - exclude_none=True, - by_alias=True, - ) - - def _populate_meta_class(self) -> None: - """Get the general class which is a simple string.""" - assert self.objdata is not None - self.meta_class = self.objdata.classname - - def _populate_meta_tracklog(self) -> None: - """Create the tracklog metadata, which here assumes 'created' only.""" - self.meta_tracklog = [ - x.model_dump(mode="json", exclude_none=True, by_alias=True) - for x in generate_meta_tracklog() - ] - - def _populate_meta_masterdata(self) -> None: - """Populate metadata from masterdata section in config.""" - self.meta_masterdata = self.dataio.config.get("masterdata", {}) - - def _populate_meta_access(self) -> None: - """Populate metadata overall from access section in config + allowed keys. - - Access should be possible to change per object, based on user input. - This is done through the access_ssdl input argument. - - The "asset" field shall come from the config. This is static information. - - The "ssdl" field can come from the config, or be explicitly given through - the "access_ssdl" input argument. If the access_ssdl input argument is present, - its contents shall take presedence. - - """ - self.meta_access = ( - global_configuration.Access.model_validate( - self.dataio.config["access"] - ).model_dump(mode="json", exclude_none=True) - if self.dataio._config_is_valid - else {} - ) - - def _populate_meta_display(self) -> None: - """Populate the display block.""" - - # display.name - if self.dataio.display_name is not None: - display_name = self.dataio.display_name - else: - assert self.objdata is not None - display_name = self.objdata.name - - self.meta_display = {"name": display_name} - - def _populate_meta_xpreprocessed(self) -> None: - """Populate a few necessary 'tmp' metadata needed for preprocessed data.""" - if self.dataio.fmu_context == FmuContext.PREPROCESSED: - self.meta_xpreprocessed["name"] = self.dataio.name - self.meta_xpreprocessed["tagname"] = self.dataio.tagname - self.meta_xpreprocessed["subfolder"] = self.dataio.subfolder - - def _reuse_existing_metadata(self, meta: dict) -> dict: + def _reuse_existing_metadata(self, meta: dict, meta_existing: dict) -> dict: """Perform a merge procedure if the key `reuse_metadata_rule` is active.""" - if self.dataio and self.dataio.reuse_metadata_rule: - oldmeta = self.meta_existing - newmeta = meta.copy() - if self.dataio.reuse_metadata_rule == "preprocessed": - return glue_metadata_preprocessed(oldmeta, newmeta) - raise ValueError( - f"The reuse_metadata_rule {self.dataio.reuse_metadata_rule} is not " - "supported." - ) - return meta + if self.dataio.reuse_metadata_rule == "preprocessed": + return glue_metadata_preprocessed(meta_existing, meta.copy()) + raise ValueError( + f"The reuse_metadata_rule {self.dataio.reuse_metadata_rule} is not " + "supported." + ) def generate_export_metadata( self, skip_null: bool = True ) -> dict: # TODO! -> skip_null? """Main function to generate the full metadata""" - # populate order matters, in particular objectdata provides input to class/file - self._populate_meta_masterdata() - self._populate_meta_access() - - if self.dataio._fmurun: - self._populate_meta_fmu() - - self._populate_meta_tracklog() - self._populate_meta_objectdata() - self._populate_meta_class() - self._populate_meta_file() - self._populate_meta_display() - self._populate_meta_xpreprocessed() - - # glue together metadata, order is as legacy code (but will be screwed if reuse - # of existing metadata...) - meta = self.meta_dollars.copy() - meta["tracklog"] = self.meta_tracklog - meta["class"] = self.meta_class + meta_existing = {} + if isinstance(self.obj, (str, Path)) and self.dataio.reuse_metadata_rule: + logger.info("Partially reuse existing metadata from %s", self.obj) + meta_existing = read_metadata_from_file(self.obj) - meta["fmu"] = self.meta_fmu - meta["file"] = self.meta_file + objdata = _get_objectdata_provider(self.obj, self.dataio, meta_existing) + filedata = _get_filedata_provider(self.dataio, objdata, self.fmudata) - meta["data"] = self.meta_objectdata - meta["display"] = self.meta_display + checksum_md5 = ( + _compute_md5(self.dataio, objdata, self.obj) if self.compute_md5 else None + ) - meta["access"] = self.meta_access - meta["masterdata"] = self.meta_masterdata + datameta = internal.DataMetaSchema( + schema_=TypeAdapter(AnyHttpUrl).validate_strings(SCHEMA), # type: ignore[call-arg] + version=VERSION, + source=SOURCE, + class_=objdata.classname, + masterdata=self.dataio.config.get("masterdata"), + fmu=self.fmudata.get_metadata() if self.fmudata else None, + access=self.dataio.config.get("access"), + data=objdata.metadata, + file=meta.File( + absolute_path=filedata.absolute_path, + relative_path=filedata.relative_path, + checksum_md5=checksum_md5, + relative_path_symlink=filedata.relative_path_symlink, + absolute_path_symlink=filedata.absolute_path_symlink, + ), + tracklog=generate_meta_tracklog(), + display=meta.Display(name=self.dataio.display_name or objdata.name), + preprocessed=internal.PreprocessedInfo( + name=self.dataio.name, + tagname=self.dataio.tagname, + subfolder=self.dataio.subfolder, + ) + if self.dataio.fmu_context == FmuContext.PREPROCESSED + else None, + ) - if self.dataio.fmu_context == FmuContext.PREPROCESSED: - meta["_preprocessed"] = self.meta_xpreprocessed + meta_ = datameta.model_dump(mode="json", exclude_none=True, by_alias=True) if skip_null: - meta = drop_nones(meta) + meta_ = drop_nones(meta_) + + if self.dataio.reuse_metadata_rule: + return self._reuse_existing_metadata(meta_, meta_existing) - return self._reuse_existing_metadata(meta) + return meta_ diff --git a/src/fmu/dataio/_utils.py b/src/fmu/dataio/_utils.py index 240a82ed2..be85eee33 100644 --- a/src/fmu/dataio/_utils.py +++ b/src/fmu/dataio/_utils.py @@ -431,8 +431,8 @@ def glue_metadata_preprocessed( meta = oldmeta.copy() - if "_preprocessed" in meta: - del meta["_preprocessed"] + if "preprocessed" in meta: + del meta["preprocessed"] meta["fmu"] = newmeta["fmu"] meta["file"] = newmeta["file"] diff --git a/src/fmu/dataio/case.py b/src/fmu/dataio/case.py index d1d3d2082..c621fb73b 100644 --- a/src/fmu/dataio/case.py +++ b/src/fmu/dataio/case.py @@ -13,6 +13,7 @@ from ._logging import null_logger from .datastructure._internal import internal from .datastructure.configuration import global_configuration +from .datastructure.meta import meta logger: Final = null_logger(__name__) @@ -116,9 +117,9 @@ def generate_metadata(self) -> dict: warnings.warn(exists_warning, UserWarning) return {} - meta = internal.CaseSchema( - masterdata=internal.Masterdata.model_validate(self.config["masterdata"]), - access=internal.Access.model_validate(self.config["access"]), + case_meta = internal.CaseSchema( + masterdata=meta.Masterdata.model_validate(self.config["masterdata"]), + access=meta.Access.model_validate(self.config["access"]), fmu=internal.FMUModel( model=global_configuration.Model.model_validate( self.config["model"], @@ -126,7 +127,7 @@ def generate_metadata(self) -> dict: case=internal.CaseMetadata( name=self.casename, uuid=str(self._case_uuid()), - user=internal.User(id=self.caseuser), + user=meta.User(id=self.caseuser), ), ), tracklog=_metadata.generate_meta_tracklog(), @@ -137,7 +138,7 @@ def generate_metadata(self) -> dict: by_alias=True, ) - self._metadata = _utils.drop_nones(meta) + self._metadata = _utils.drop_nones(case_meta) logger.info("The case metadata are now ready!") return copy.deepcopy(self._metadata) diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index da30d5139..3f4f2f8b4 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -37,7 +37,7 @@ AllowedContent, ) from .datastructure.configuration import global_configuration -from .providers._fmu import FmuEnv +from .providers._fmu import FmuEnv, FmuProvider # DATAIO_EXAMPLES: Final = dataio_examples() INSIDE_RMS: Final = detect_inside_rms() @@ -635,24 +635,34 @@ def _check_obj_if_file(self, obj: types.Inferrable) -> types.Inferrable: self.reuse_metadata_rule = "preprocessed" currentmeta = read_metadata(obj) - if "_preprocessed" not in currentmeta: + if "preprocessed" not in currentmeta: raise ValidationError( - "The special entry for preprocessed data <_preprocessed> is" + "The special entry for preprocessed data is" "missing in the metadata. A possible solution is to rerun the" "preprocessed export." ) + preprocessed = currentmeta["preprocessed"] - if not self.name and currentmeta["_preprocessed"].get("name", ""): - self.name = currentmeta["_preprocessed"]["name"] - - if not self.tagname and currentmeta["_preprocessed"].get("tagname", ""): - self.tagname = currentmeta["_preprocessed"]["tagname"] - - if not self.subfolder and currentmeta["_preprocessed"].get("subfolder", ""): - self.subfolder = currentmeta["_preprocessed"]["subfolder"] + self.name = self.name or preprocessed.get("name", "") + self.tagname = self.tagname or preprocessed.get("tagname", "") + self.subfolder = self.subfolder or preprocessed.get("subfolder", "") return obj + def _get_fmu_provider(self) -> FmuProvider | None: + return ( + FmuProvider( + model=self.config.get("model", None), + fmu_context=FmuContext.get(self.fmu_context), + casepath_proposed=self.casepath or "", + include_ertjobs=self.include_ertjobs, + forced_realization=self.realization, + workflow=self.workflow, + ) + if self._fmurun + else None + ) + # ================================================================================== # Public methods: # ================================================================================== @@ -704,10 +714,19 @@ def generate_metadata( self._validate_content_key() self._update_fmt_flag() - metaobj = _metadata.MetaData(obj, self, compute_md5=compute_md5) - self._metadata = metaobj.generate_export_metadata() + fmudata = self._get_fmu_provider() + + # update rootpath based on fmurun or not + # TODO: This rootpath check and update should only belong to the ExportData + # init, however since we allow users to update settings on the export. + # Keep it here for now. + self._rootpath = Path( + fmudata.get_casepath() if fmudata else str(self._rootpath.absolute()) + ) + logger.debug("Rootpath is now %s", self._rootpath) - self._rootpath = Path(metaobj.rootpath) + metaobj = _metadata.MetaData(obj, self, fmudata, compute_md5=compute_md5) + self._metadata = metaobj.generate_export_metadata() logger.info("The metadata are now ready!") diff --git a/src/fmu/dataio/datastructure/_internal/internal.py b/src/fmu/dataio/datastructure/_internal/internal.py index 7c4d823a3..061f5ff6a 100644 --- a/src/fmu/dataio/datastructure/_internal/internal.py +++ b/src/fmu/dataio/datastructure/_internal/internal.py @@ -16,8 +16,9 @@ from fmu.dataio.datastructure.configuration.global_configuration import ( Model as GlobalConfigurationModel, ) -from fmu.dataio.datastructure.meta.meta import Access, Masterdata, TracklogEvent, User +from fmu.dataio.datastructure.meta import content, meta from pydantic import AnyHttpUrl, BaseModel, Field, TypeAdapter, model_validator +from typing_extensions import Annotated def seismic_warn() -> None: @@ -154,7 +155,7 @@ class JsonSchemaMetadata(BaseModel, populate_by_name=True): class CaseMetadata(BaseModel): name: str uuid: str - user: User + user: meta.User class FMUModel(BaseModel): @@ -162,10 +163,47 @@ class FMUModel(BaseModel): case: CaseMetadata +class PreprocessedInfo(BaseModel): + name: str + tagname: str + subfolder: str + + +class UnsetContent(content.Content): + content: Literal["unset"] + + +class Context(BaseModel): + stage: Literal[ + "realization", + "case", + "case_symlink_realization", + "preprocessed", + "non_fmu", + ] + + +class FMUClassMetaData(meta.FMUClassMetaData): + # This class is identical to the one used in the schema + # exept for more fmu context values are allowed internally + context: Context + + +class DataMetaSchema(JsonSchemaMetadata, meta.FMUDataClassMeta): + fmu: Optional[FMUClassMetaData] + masterdata: Optional[meta.Masterdata] + access: Optional[meta.SsdlAccess] + data: Annotated[ + Union[meta.content.AnyContent, UnsetContent], Field(discriminator="content") + ] + display: meta.Display + preprocessed: Optional[PreprocessedInfo] + + class CaseSchema(JsonSchemaMetadata): class_: Literal["case"] = Field(alias="class", default="case") - masterdata: Masterdata - access: Access + masterdata: meta.Masterdata + access: meta.Access fmu: FMUModel description: Optional[List[str]] - tracklog: List[TracklogEvent] + tracklog: List[meta.TracklogEvent] diff --git a/src/fmu/dataio/providers/_objectdata.py b/src/fmu/dataio/providers/_objectdata.py index b4e038fc1..7431b42db 100644 --- a/src/fmu/dataio/providers/_objectdata.py +++ b/src/fmu/dataio/providers/_objectdata.py @@ -238,7 +238,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: fmt=(fmt := self.dataio.table_fformat), extension=self._validate_get_ext(fmt, "DataFrame", ValidFormats().table), spec=self.get_spec(), - bbox=self.get_bbox(), + bbox=self.get_bbox() or None, table_index=self._derive_index(), ) @@ -266,8 +266,8 @@ def get_objectdata(self) -> DerivedObjectDescriptor: efolder="dictionaries", fmt=(fmt := self.dataio.dict_fformat), extension=self._validate_get_ext(fmt, "JSON", ValidFormats().dictionary), - spec=self.get_spec(), - bbox=self.get_bbox(), + spec=self.get_spec() or None, + bbox=self.get_bbox() or None, table_index=None, ) @@ -339,7 +339,7 @@ def get_objectdata(self) -> DerivedObjectDescriptor: fmt=(fmt := self.dataio.arrow_fformat), extension=self._validate_get_ext(fmt, "ArrowTable", ValidFormats().table), spec=self.get_spec(), - bbox=self.get_bbox(), + bbox=self.get_bbox() or None, ) diff --git a/src/fmu/dataio/providers/_objectdata_base.py b/src/fmu/dataio/providers/_objectdata_base.py index 0057392d4..a26cbbea2 100644 --- a/src/fmu/dataio/providers/_objectdata_base.py +++ b/src/fmu/dataio/providers/_objectdata_base.py @@ -62,8 +62,8 @@ class DerivedObjectDescriptor: ) fmt: str extension: str - spec: Dict[str, Any] - bbox: Dict[str, Any] + spec: Dict[str, Any] | None + bbox: Dict[str, Any] | None table_index: Optional[list[str]] @@ -99,7 +99,7 @@ class DerivedNamedStratigraphy: stratigraphic: bool stratigraphic_alias: list[str] - offset: int | None + offset: int base: str | None top: str | None @@ -189,7 +189,7 @@ def _derive_name_stratigraphy(self) -> DerivedNamedStratigraphy: stratigraphic_alias=[] if no_start_or_missing_name else strat[name].get("stratigraphic_alias"), - offset=None if no_start_or_missing_name else strat[name].get("offset"), + offset=0.0 if no_start_or_missing_name else strat[name].get("offset", 0.0), top=None if no_start_or_missing_name else strat[name].get("top"), base=None if no_start_or_missing_name else strat[name].get("base"), ) diff --git a/src/fmu/dataio/providers/_objectdata_xtgeo.py b/src/fmu/dataio/providers/_objectdata_xtgeo.py index 8d156e993..a06c57ba3 100644 --- a/src/fmu/dataio/providers/_objectdata_xtgeo.py +++ b/src/fmu/dataio/providers/_objectdata_xtgeo.py @@ -353,6 +353,6 @@ def get_objectdata(self) -> DerivedObjectDescriptor: fmt, "CPGridProperty", ValidFormats().grid ), spec=self.get_spec(), - bbox=self.get_bbox(), + bbox=self.get_bbox() or None, table_index=None, ) diff --git a/tests/test_units/test_dataio.py b/tests/test_units/test_dataio.py index 6d199b882..1fc21421f 100644 --- a/tests/test_units/test_dataio.py +++ b/tests/test_units/test_dataio.py @@ -6,6 +6,7 @@ import sys from copy import deepcopy +import pydantic import pytest import yaml from fmu.dataio._definitions import FmuContext @@ -213,10 +214,16 @@ def test_content_invalid_dict(globalconfig1): def test_content_valid_string(regsurf, globalconfig2): - eobj = ExportData(config=globalconfig2, name="TopVolantis", content="seismic") + eobj = ExportData(config=globalconfig2, name="TopVolantis", content="depth") mymeta = eobj.generate_metadata(regsurf) - assert mymeta["data"]["content"] == "seismic" - assert "seismic" not in mymeta["data"] + assert mymeta["data"]["content"] == "depth" + assert "depth" not in mymeta["data"] + + +def test_seismic_content_require_seismic_data(regsurf, globalconfig2): + eobj = ExportData(config=globalconfig2, content="seismic") + with pytest.raises(pydantic.ValidationError, match="Field required "): + eobj.generate_metadata(regsurf) def test_content_valid_dict(regsurf, globalconfig2): diff --git a/tests/test_units/test_metadata_class.py b/tests/test_units/test_metadata_class.py index e8b460cbc..3b6258c16 100644 --- a/tests/test_units/test_metadata_class.py +++ b/tests/test_units/test_metadata_class.py @@ -5,12 +5,19 @@ import fmu.dataio as dio import pytest -from fmu.dataio._metadata import SCHEMA, SOURCE, VERSION, MetaData +from fmu.dataio._metadata import ( + SCHEMA, + SOURCE, + VERSION, + MetaData, + _get_objectdata_provider, +) from fmu.dataio._utils import prettyprint_dict from fmu.dataio.datastructure.meta.meta import ( SystemInformationOperatingSystem, TracklogEvent, ) +from pydantic import ValidationError # pylint: disable=no-member @@ -21,14 +28,14 @@ # -------------------------------------------------------------------------------------- -def test_metadata_dollars(edataobj1): +def test_metadata_dollars(edataobj1, regsurf): """Testing the dollars part which is hard set.""" - mymeta = MetaData("dummy", edataobj1) + mymeta = edataobj1.generate_metadata(obj=regsurf) - assert mymeta.meta_dollars["version"] == VERSION - assert mymeta.meta_dollars["$schema"] == SCHEMA - assert mymeta.meta_dollars["source"] == SOURCE + assert mymeta["version"] == VERSION + assert mymeta["$schema"] == SCHEMA + assert mymeta["source"] == SOURCE # -------------------------------------------------------------------------------------- @@ -36,10 +43,9 @@ def test_metadata_dollars(edataobj1): # -------------------------------------------------------------------------------------- -def test_generate_meta_tracklog_fmu_dataio_version(edataobj1): - mymeta = MetaData("dummy", edataobj1) - mymeta._populate_meta_tracklog() - tracklog = mymeta.meta_tracklog +def test_generate_meta_tracklog_fmu_dataio_version(regsurf, edataobj1): + mymeta = MetaData(regsurf, edataobj1).generate_export_metadata() + tracklog = mymeta["tracklog"] assert isinstance(tracklog, list) assert len(tracklog) == 1 # assume "created" @@ -57,13 +63,12 @@ def test_generate_meta_tracklog_fmu_dataio_version(edataobj1): assert parsed.sysinfo.fmu_dataio.version is not None -def test_generate_meta_tracklog_komodo_version(edataobj1, monkeypatch): +def test_generate_meta_tracklog_komodo_version(edataobj1, regsurf, monkeypatch): fake_komodo_release = "" monkeypatch.setenv("KOMODO_RELEASE", fake_komodo_release) - mymeta = MetaData("dummy", edataobj1) - mymeta._populate_meta_tracklog() - tracklog = mymeta.meta_tracklog + mymeta = MetaData(regsurf, edataobj1).generate_export_metadata() + tracklog = mymeta["tracklog"] assert isinstance(tracklog, list) assert len(tracklog) == 1 # assume "created" @@ -81,10 +86,9 @@ def test_generate_meta_tracklog_komodo_version(edataobj1, monkeypatch): assert parsed.sysinfo.komodo.version == fake_komodo_release -def test_generate_meta_tracklog_operating_system(edataobj1): - mymeta = MetaData("dummy", edataobj1) - mymeta._populate_meta_tracklog() - tracklog = mymeta.meta_tracklog +def test_generate_meta_tracklog_operating_system(edataobj1, regsurf): + mymeta = MetaData(regsurf, edataobj1).generate_export_metadata() + tracklog = mymeta["tracklog"] assert isinstance(tracklog, list) assert len(tracklog) == 1 # assume "created" @@ -102,9 +106,12 @@ def test_generate_meta_tracklog_operating_system(edataobj1): def test_populate_meta_objectdata(regsurf, edataobj2): - mymeta = MetaData(regsurf, edataobj2) - mymeta._populate_meta_objectdata() - assert mymeta.objdata.name == "VOLANTIS GP. Top" + mymeta = MetaData(regsurf, edataobj2).generate_export_metadata() + objdata = _get_objectdata_provider(regsurf, edataobj2) + + assert objdata.name == "VOLANTIS GP. Top" + assert mymeta["display"]["name"] == objdata.name + assert edataobj2.name == "TopVolantis" def test_populate_meta_undef_is_zero(regsurf, globalconfig2): @@ -140,30 +147,27 @@ def test_populate_meta_undef_is_zero(regsurf, globalconfig2): # -------------------------------------------------------------------------------------- -def test_metadata_populate_masterdata_is_empty(globalconfig1): +def test_metadata_populate_masterdata_is_empty(globalconfig1, regsurf): """Testing the masterdata part, first with no settings.""" config = deepcopy(globalconfig1) del config["masterdata"] # to force missing masterdata some = dio.ExportData(config=config, content="depth") - assert not some._config_is_valid - mymeta = MetaData("dummy", some) + assert not some._config_is_valid - mymeta._populate_meta_masterdata() - assert not mymeta.meta_masterdata + mymeta = MetaData(regsurf, some).generate_export_metadata() + assert "masterdata" not in mymeta -def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2): +def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2, regsurf): """Testing the masterdata part with OK metdata.""" - mymeta = MetaData("dummy", edataobj1) - mymeta._populate_meta_masterdata() - assert mymeta.meta_masterdata == edataobj1.config["masterdata"] + mymeta = MetaData(regsurf, edataobj1).generate_export_metadata() + assert mymeta["masterdata"] == edataobj1.config["masterdata"] - mymeta = MetaData("dummy", edataobj2) - mymeta._populate_meta_masterdata() - assert mymeta.meta_masterdata == edataobj2.config["masterdata"] + mymeta = MetaData(regsurf, edataobj2).generate_export_metadata() + assert mymeta["masterdata"] == edataobj2.config["masterdata"] # -------------------------------------------------------------------------------------- @@ -171,35 +175,31 @@ def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2): # -------------------------------------------------------------------------------------- -def test_metadata_populate_access_miss_config_access(globalconfig1): +def test_metadata_populate_access_miss_config_access(globalconfig1, regsurf): """Testing the access part, now with config missing access.""" cfg1_edited = deepcopy(globalconfig1) del cfg1_edited["access"] edata = dio.ExportData(config=cfg1_edited, content="depth") - assert not edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert not mymeta.meta_access + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert "access" not in mymeta -def test_metadata_populate_access_ok_config(edataobj2): +def test_metadata_populate_access_ok_config(edataobj2, regsurf): """Testing the access part, now with config ok access.""" - mymeta = MetaData("dummy", edataobj2) - - mymeta._populate_meta_access() - assert mymeta.meta_access == { + mymeta = MetaData(regsurf, edataobj2).generate_export_metadata() + assert mymeta["access"] == { "asset": {"name": "Drogon"}, "ssdl": {"access_level": "internal", "rep_include": True}, "classification": "internal", } -def test_metadata_populate_from_argument(globalconfig1): +def test_metadata_populate_from_argument(globalconfig1, regsurf): """Testing the access part, now with ok config and a change in access.""" # test assumptions @@ -210,17 +210,16 @@ def test_metadata_populate_from_argument(globalconfig1): access_ssdl={"access_level": "restricted", "rep_include": True}, content="depth", ) - mymeta = MetaData("dummy", edata) + mymeta = MetaData(regsurf, edata).generate_export_metadata() - mymeta._populate_meta_access() - assert mymeta.meta_access == { + assert mymeta["access"] == { "asset": {"name": "Test"}, "ssdl": {"access_level": "restricted", "rep_include": True}, "classification": "restricted", # mirroring ssdl.access_level } -def test_metadata_populate_partial_access_ssdl(globalconfig1): +def test_metadata_populate_partial_access_ssdl(globalconfig1, regsurf): """Test what happens if ssdl_access argument is partial.""" # test assumptions @@ -232,11 +231,10 @@ def test_metadata_populate_partial_access_ssdl(globalconfig1): config=globalconfig1, access_ssdl={"rep_include": True}, content="depth" ) - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is True - assert mymeta.meta_access["ssdl"]["access_level"] == "internal" # default - assert mymeta.meta_access["classification"] == "internal" # default + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is True + assert mymeta["access"]["ssdl"]["access_level"] == "internal" # default + assert mymeta["access"]["classification"] == "internal" # default # access_level only, but in config edata = dio.ExportData( @@ -244,14 +242,13 @@ def test_metadata_populate_partial_access_ssdl(globalconfig1): access_ssdl={"access_level": "restricted"}, content="depth", ) - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is False # default - assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" - assert mymeta.meta_access["classification"] == "restricted" + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is False # default + assert mymeta["access"]["ssdl"]["access_level"] == "restricted" + assert mymeta["access"]["classification"] == "restricted" -def test_metadata_populate_wrong_config(globalconfig1): +def test_metadata_populate_wrong_config(globalconfig1, regsurf): """Test error in access_ssdl in config.""" # test assumptions @@ -263,12 +260,11 @@ def test_metadata_populate_wrong_config(globalconfig1): assert not edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert not mymeta.meta_access + with pytest.raises(ValidationError, match="access.ssdl.access_level"): + MetaData(regsurf, edata).generate_export_metadata() -def test_metadata_populate_wrong_argument(globalconfig1): +def test_metadata_populate_wrong_argument(globalconfig1, regsurf): """Test error in access_ssdl in arguments.""" with pytest.warns(UserWarning): @@ -279,12 +275,11 @@ def test_metadata_populate_wrong_argument(globalconfig1): ) assert not edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert not mymeta.meta_access + with pytest.raises(ValidationError, match="access.ssdl.access_level"): + MetaData(regsurf, edata).generate_export_metadata() -def test_metadata_access_correct_input(globalconfig1): +def test_metadata_access_correct_input(globalconfig1, regsurf): """Test giving correct input.""" # Input is "restricted" and False - correct use, shall work edata = dio.ExportData( @@ -292,11 +287,10 @@ def test_metadata_access_correct_input(globalconfig1): content="depth", access_ssdl={"access_level": "restricted", "rep_include": False}, ) - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is False - assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" - assert mymeta.meta_access["classification"] == "restricted" + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is False + assert mymeta["access"]["ssdl"]["access_level"] == "restricted" + assert mymeta["access"]["classification"] == "restricted" # Input is "internal" and True - correct use, shall work edata = dio.ExportData( @@ -304,14 +298,13 @@ def test_metadata_access_correct_input(globalconfig1): content="depth", access_ssdl={"access_level": "internal", "rep_include": True}, ) - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is True - assert mymeta.meta_access["ssdl"]["access_level"] == "internal" - assert mymeta.meta_access["classification"] == "internal" + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is True + assert mymeta["access"]["ssdl"]["access_level"] == "internal" + assert mymeta["access"]["classification"] == "internal" -def test_metadata_access_deprecated_input(globalconfig1): +def test_metadata_access_deprecated_input(globalconfig1, regsurf): """Test giving deprecated input.""" # Input is "asset". Is deprecated, shall work with warning. # Output shall be "restricted". @@ -326,13 +319,12 @@ def test_metadata_access_deprecated_input(globalconfig1): ) assert edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" - assert mymeta.meta_access["classification"] == "restricted" + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["access_level"] == "restricted" + assert mymeta["access"]["classification"] == "restricted" -def test_metadata_access_illegal_input(globalconfig1): +def test_metadata_access_illegal_input(globalconfig1, regsurf): """Test giving illegal input, should provide empty access field""" # Input is "secret" @@ -344,9 +336,8 @@ def test_metadata_access_illegal_input(globalconfig1): ) assert not edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert not mymeta.meta_access + with pytest.raises(ValidationError, match="access.ssdl.access_level"): + MetaData(regsurf, edata).generate_export_metadata() # Input is "open". Not allowed, shall fail. with pytest.warns(UserWarning): @@ -356,12 +347,11 @@ def test_metadata_access_illegal_input(globalconfig1): content="depth", ) assert not edata._config_is_valid - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert not mymeta.meta_access + with pytest.raises(ValidationError, match="access.ssdl.access_level"): + MetaData(regsurf, edata).generate_export_metadata() -def test_metadata_access_no_input(globalconfig1): +def test_metadata_access_no_input(globalconfig1, regsurf): """Test not giving any input arguments.""" # No input, revert to config @@ -369,22 +359,20 @@ def test_metadata_access_no_input(globalconfig1): configcopy["access"]["ssdl"]["access_level"] = "restricted" configcopy["access"]["ssdl"]["rep_include"] = True edata = dio.ExportData(config=configcopy, content="depth") - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is True - assert mymeta.meta_access["ssdl"]["access_level"] == "restricted" - assert mymeta.meta_access["classification"] == "restricted" # mirrored + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is True + assert mymeta["access"]["ssdl"]["access_level"] == "restricted" + assert mymeta["access"]["classification"] == "restricted" # mirrored # No input, no config, shall default to "internal" and False configcopy = deepcopy(globalconfig1) del configcopy["access"]["ssdl"]["access_level"] del configcopy["access"]["ssdl"]["rep_include"] edata = dio.ExportData(config=globalconfig1, content="depth") - mymeta = MetaData("dummy", edata) - mymeta._populate_meta_access() - assert mymeta.meta_access["ssdl"]["rep_include"] is False # default - assert mymeta.meta_access["ssdl"]["access_level"] == "internal" # default - assert mymeta.meta_access["classification"] == "internal" # mirrored + mymeta = MetaData(regsurf, edata).generate_export_metadata() + assert mymeta["access"]["ssdl"]["rep_include"] is False # default + assert mymeta["access"]["ssdl"]["access_level"] == "internal" # default + assert mymeta["access"]["classification"] == "internal" # mirrored # -------------------------------------------------------------------------------------- @@ -395,24 +383,23 @@ def test_metadata_access_no_input(globalconfig1): def test_metadata_display_name_not_given(regsurf, edataobj2): """Test that display.name == data.name when not explicitly provided.""" - mymeta = MetaData(regsurf, edataobj2) - mymeta._populate_meta_objectdata() - mymeta._populate_meta_display() + mymeta = MetaData(regsurf, edataobj2).generate_export_metadata() + objdata = _get_objectdata_provider(regsurf, edataobj2) - assert "name" in mymeta.meta_display - assert mymeta.meta_display["name"] == mymeta.objdata.name + assert "name" in mymeta["display"] + assert mymeta["display"]["name"] == objdata.name def test_metadata_display_name_given(regsurf, edataobj2): """Test that display.name is set when explicitly given.""" - mymeta = MetaData(regsurf, edataobj2) edataobj2.display_name = "My Display Name" - mymeta._populate_meta_objectdata() - mymeta._populate_meta_display() - assert mymeta.meta_display["name"] == "My Display Name" - assert mymeta.objdata.name == mymeta.meta_objectdata["name"] == "VOLANTIS GP. Top" + mymeta = MetaData(regsurf, edataobj2).generate_export_metadata() + objdata = _get_objectdata_provider(regsurf, edataobj2) + + assert mymeta["display"]["name"] == "My Display Name" + assert objdata.name == "VOLANTIS GP. Top" # -------------------------------------------------------------------------------------- diff --git a/tests/test_units/test_prerealization_surfaces.py b/tests/test_units/test_prerealization_surfaces.py index 48793ce3b..aca88b186 100644 --- a/tests/test_units/test_prerealization_surfaces.py +++ b/tests/test_units/test_prerealization_surfaces.py @@ -108,7 +108,7 @@ def _export_data_from_rms(rmssetup, rmsglobalconfig, regsurf): == "share/preprocessed/maps/topvolantis--20240802_20200909.gri" ) assert metadata["data"]["name"] == "VOLANTIS GP. Top" - assert "_preprocessed" in metadata + assert "preprocessed" in metadata return edata.export(regsurf) @@ -147,7 +147,7 @@ def _run_case_fmu(fmurun_w_casemetadata, rmsglobalconfig, surfacepath): assert "merged" in metadata["tracklog"][-1]["event"] assert metadata["data"]["name"] == "VOLANTIS GP. Top" assert "TopVolantis" in metadata["data"]["alias"] - assert "_preprocessed" not in metadata + assert "preprocessed" not in metadata # do the actual export (which will copy data to case/share/observations/...) edata.export(