Skip to content

Commit

Permalink
CLN: Switch to pydantic for access field in metadata class (#539)
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt authored Mar 19, 2024
1 parent f77f77a commit e2fd923
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 148 deletions.
134 changes: 12 additions & 122 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Final
from warnings import warn

from pydantic import AnyHttpUrl, TypeAdapter

from fmu import dataio
from fmu.dataio._definitions import SCHEMA, SOURCE, VERSION, ConfigurationError
from fmu.dataio._definitions import SCHEMA, SOURCE, VERSION
from fmu.dataio._filedata_provider import FileDataProvider
from fmu.dataio._fmu_provider import FmuProvider
from fmu.dataio._objectdata_provider import ObjectDataProvider
Expand All @@ -31,6 +30,7 @@
read_metadata_from_file,
)
from fmu.dataio.datastructure._internal import internal
from fmu.dataio.datastructure.configuration import global_configuration
from fmu.dataio.datastructure.meta import meta

from . import types
Expand Down Expand Up @@ -80,120 +80,6 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]:
]


def generate_meta_masterdata(config: dict) -> dict | None:
"""Populate metadata from masterdata section in config."""

if not config:
# this may be a temporary solution for a while, which will be told to the user
# in related checks in dataio.py.
warn(
"The global config is empty, hence the 'masterdata' section "
"in the metadata will be omitted.",
UserWarning,
)
return None

if "masterdata" not in config:
raise ValueError("A config exists, but 'masterdata' are not present.")

return config["masterdata"]


def generate_meta_access(config: dict) -> dict | None:
"""Populate metadata overall from access section in config + allowed keys.
Access should be possible to change per object, based on user input.
This is done through the access_ssdl input argument.
The "asset" field shall come from the config. This is static information.
The "ssdl" field can come from the config, or be explicitly given through
the "access_ssdl" input argument. If the access_ssdl input argument is present,
its contents shall take presedence. If no input, and no config, revert to the
following defaults:
access.ssdl.access_level: "internal" (we explicitly elevate to "restricted)
access.ssdl.rep_include: False (we explicitly flag to be included in REP)
The access.ssdl.access_level field shall be "internal" or "restricted". We still
allow for the legacy input argument "asset", however we issue warning and change it
to "restricted".
The access.classification will in the future be the only information classification
field. For now, we simply mirror it from ssdl.access_level to avoid API change.
"""

if not config:
warn("The config is empty or missing", UserWarning)
return None

if config and "access" not in config:
raise ConfigurationError("The config misses the 'access' section")

a_cfg = config["access"] # shortform

if "asset" not in a_cfg:
# asset shall be present if config is used
raise ConfigurationError("The 'access.asset' field not found in the config")

# initialize and populate with defaults from config
a_meta = {} # shortform

# if there is a config, the 'asset' tag shall be present
a_meta["asset"] = a_cfg["asset"]

# ------------------------------------
# classification & ssdl.access_level and ssdl.rep_include
# ------------------------------------

# The information from the input argument "ssdl_access" has previously
# been inserted into the config. Meaning: The fact that it sits in the config
# at this stage, does not necessarily mean that the user actually has it in his
# config on the FMU side. It may come from user arguments.
# See dataio._update_globalconfig_from_settings

# First set defaults
a_meta["ssdl"] = {"access_level": "internal", "rep_include": False}

# Then overwrite from config (which may also actually come from user arguments)
if "ssdl" in a_cfg and "access_level" in a_cfg["ssdl"]:
a_meta["ssdl"]["access_level"] = a_cfg["ssdl"]["access_level"]

if "ssdl" in a_cfg and "rep_include" in a_cfg["ssdl"]:
a_meta["ssdl"]["rep_include"] = a_cfg["ssdl"]["rep_include"]

# check validity
_valid_ssdl_access_levels = ["internal", "restricted", "asset"]
_ssdl_access_level = a_meta["ssdl"]["access_level"]
if _ssdl_access_level not in _valid_ssdl_access_levels:
raise ConfigurationError(
f"Illegal value for access.ssdl.access_level: {_ssdl_access_level} "
f"Valid values are: {_valid_ssdl_access_levels}"
)

_ssdl_rep_include = a_meta["ssdl"]["rep_include"]
if not isinstance(_ssdl_rep_include, bool):
raise ConfigurationError(
f"Illegal value for access.ssdl.rep_include: {_ssdl_rep_include}"
"access.ssdl.rep_include must be a boolean (True/False)."
)

# if "asset", change to "restricted" and give warning
if a_meta["ssdl"]["access_level"] == "asset":
warn(
"The value 'asset' for access.ssdl.access_level is deprecated. "
"Please use 'restricted' in input arguments or global variables to silence "
" this warning.",
UserWarning,
)
a_meta["ssdl"]["access_level"] = "restricted"

# mirror access.ssdl.access_level to access.classification
a_meta["classification"] = a_meta["ssdl"]["access_level"] # mirror

return a_meta


@dataclass
class MetaData:
"""Class for sampling, process and holding all metadata in an ExportData instance.
Expand Down Expand Up @@ -373,7 +259,7 @@ def _populate_meta_tracklog(self) -> None:

def _populate_meta_masterdata(self) -> None:
"""Populate metadata from masterdata section in config."""
self.meta_masterdata = generate_meta_masterdata(self.dataio.config) or {}
self.meta_masterdata = self.dataio.config.get("masterdata", {})

def _populate_meta_access(self) -> None:
"""Populate metadata overall from access section in config + allowed keys.
Expand All @@ -388,8 +274,13 @@ def _populate_meta_access(self) -> None:
its contents shall take presedence.
"""
if self.dataio:
self.meta_access = generate_meta_access(self.dataio.config) or {}
self.meta_access = (
global_configuration.Access.model_validate(
self.dataio.config["access"]
).model_dump(mode="json", exclude_none=True)
if self.dataio._config_is_valid
else {}
)

def _populate_meta_display(self) -> None:
"""Populate the display block."""
Expand Down Expand Up @@ -429,9 +320,8 @@ def generate_export_metadata(
"""Main function to generate the full metadata"""

# populate order matters, in particular objectdata provides input to class/file
if self.dataio._config_is_valid:
self._populate_meta_masterdata()
self._populate_meta_access()
self._populate_meta_masterdata()
self._populate_meta_access()

if self.dataio._fmurun:
self._populate_meta_fmu()
Expand Down
4 changes: 1 addition & 3 deletions src/fmu/dataio/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,7 @@ def generate_metadata(self) -> dict:

meta = internal.CaseSchema(
masterdata=internal.Masterdata.model_validate(self.config["masterdata"]),
access=internal.Access.model_validate(
_metadata.generate_meta_access(self.config)
),
access=internal.Access.model_validate(self.config["access"]),
fmu=internal.FMUModel(
model=global_configuration.Model.model_validate(
self.config["model"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,5 @@ def roundtrip(obj: Dict) -> Dict:
"""
return GlobalConfiguration.model_validate(obj).model_dump(
mode="json",
exclude_defaults=True,
exclude_none=True,
exclude_unset=True,
)
55 changes: 34 additions & 21 deletions tests/test_units/test_metadata_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import fmu.dataio as dio
import pytest
from fmu.dataio._metadata import SCHEMA, SOURCE, VERSION, ConfigurationError, MetaData
from fmu.dataio._metadata import SCHEMA, SOURCE, VERSION, MetaData
from fmu.dataio._utils import prettyprint_dict
from fmu.dataio.datastructure.meta.meta import (
SystemInformationOperatingSystem,
Expand Down Expand Up @@ -142,14 +142,16 @@ def test_populate_meta_undef_is_zero(regsurf, globalconfig2):

def test_metadata_populate_masterdata_is_empty(globalconfig1):
"""Testing the masterdata part, first with no settings."""
config = deepcopy(globalconfig1)
del config["masterdata"] # to force missing masterdata

some = dio.ExportData(config=globalconfig1, content="depth")
del some.config["masterdata"] # to force missing masterdata
some = dio.ExportData(config=config, content="depth")
assert not some._config_is_valid

mymeta = MetaData("dummy", some)

with pytest.raises(ValueError, match="A config exists, but 'masterdata' are not"):
mymeta._populate_meta_masterdata()
mymeta._populate_meta_masterdata()
assert not mymeta.meta_masterdata


def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2):
Expand All @@ -169,16 +171,19 @@ def test_metadata_populate_masterdata_is_present_ok(edataobj1, edataobj2):
# --------------------------------------------------------------------------------------


def test_metadata_populate_access_miss_config_access(edataobj1):
def test_metadata_populate_access_miss_config_access(globalconfig1):
"""Testing the access part, now with config missing access."""

cfg1_edited = deepcopy(edataobj1)
del cfg1_edited.config["access"]
cfg1_edited = deepcopy(globalconfig1)
del cfg1_edited["access"]

edata = dio.ExportData(config=cfg1_edited, content="depth")

mymeta = MetaData("dummy", cfg1_edited)
assert not edata._config_is_valid

with pytest.raises(ConfigurationError):
mymeta._populate_meta_access()
mymeta = MetaData("dummy", edata)
mymeta._populate_meta_access()
assert not mymeta.meta_access


def test_metadata_populate_access_ok_config(edataobj2):
Expand Down Expand Up @@ -226,6 +231,7 @@ def test_metadata_populate_partial_access_ssdl(globalconfig1):
edata = dio.ExportData(
config=globalconfig1, access_ssdl={"rep_include": True}, content="depth"
)

mymeta = MetaData("dummy", edata)
mymeta._populate_meta_access()
assert mymeta.meta_access["ssdl"]["rep_include"] is True
Expand Down Expand Up @@ -255,9 +261,11 @@ def test_metadata_populate_wrong_config(globalconfig1):
with pytest.warns(UserWarning):
edata = dio.ExportData(config=_config, content="depth")

assert not edata._config_is_valid

mymeta = MetaData("dummy", edata)
with pytest.raises(ConfigurationError, match="Illegal value for access"):
mymeta._populate_meta_access()
mymeta._populate_meta_access()
assert not mymeta.meta_access


def test_metadata_populate_wrong_argument(globalconfig1):
Expand All @@ -269,9 +277,11 @@ def test_metadata_populate_wrong_argument(globalconfig1):
access_ssdl={"access_level": "wrong"},
content="depth",
)
assert not edata._config_is_valid

mymeta = MetaData("dummy", edata)
with pytest.raises(ConfigurationError, match="Illegal value for access"):
mymeta._populate_meta_access()
mymeta._populate_meta_access()
assert not mymeta.meta_access


def test_metadata_access_correct_input(globalconfig1):
Expand Down Expand Up @@ -314,6 +324,7 @@ def test_metadata_access_deprecated_input(globalconfig1):
access_ssdl={"access_level": "asset"},
content="depth",
)
assert edata._config_is_valid

mymeta = MetaData("dummy", edata)
mymeta._populate_meta_access()
Expand All @@ -322,19 +333,20 @@ def test_metadata_access_deprecated_input(globalconfig1):


def test_metadata_access_illegal_input(globalconfig1):
"""Test giving illegal input."""
"""Test giving illegal input, should provide empty access field"""

# Input is "secret". Not allowed, shall fail.
# Input is "secret"
with pytest.warns(UserWarning):
edata = dio.ExportData(
config=globalconfig1,
access_ssdl={"access_level": "secret"},
content="depth",
)
assert not edata._config_is_valid

mymeta = MetaData("dummy", edata)
with pytest.raises(ConfigurationError, match="Illegal value for access"):
mymeta._populate_meta_access()
mymeta._populate_meta_access()
assert not mymeta.meta_access

# Input is "open". Not allowed, shall fail.
with pytest.warns(UserWarning):
Expand All @@ -343,9 +355,10 @@ def test_metadata_access_illegal_input(globalconfig1):
access_ssdl={"access_level": "open"},
content="depth",
)
assert not edata._config_is_valid
mymeta = MetaData("dummy", edata)
with pytest.raises(ConfigurationError, match="Illegal value for access"):
mymeta._populate_meta_access()
mymeta._populate_meta_access()
assert not mymeta.meta_access


def test_metadata_access_no_input(globalconfig1):
Expand Down

0 comments on commit e2fd923

Please sign in to comment.