Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Deprecating access_ssdl argument #575

Closed
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
eb3bef8
WIP: Deprecating access_ssdl argument
perolavsvendsen Apr 4, 2024
7b22e5f
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 5, 2024
3021cbe
WIP: Synch access defs across meta and configuration models.
perolavsvendsen Apr 5, 2024
9b1af09
WIP: Take classification argument.
perolavsvendsen Apr 5, 2024
f61ab11
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 5, 2024
4ef028a
WIP: Stop updating config from arguments
perolavsvendsen Apr 7, 2024
9b2bff0
WIP: Stop check config, stop warn, outside class init.
perolavsvendsen Apr 7, 2024
586eaea
WIP: Try to make tests pass, and some linting.
perolavsvendsen Apr 7, 2024
b587a1d
WIP: Parse and validate config with function.
perolavsvendsen Apr 7, 2024
1e3da6d
WIP: Parse configuration outside dataio.py.
perolavsvendsen Apr 7, 2024
669adfb
WIP: Linting.
perolavsvendsen Apr 7, 2024
45fe0a0
WIP: Update schema with correct pydantic version.
perolavsvendsen Apr 7, 2024
4bda6cc
WIP: More tests and some cleanup.
perolavsvendsen Apr 8, 2024
ced8aab
WIP: Linting.
perolavsvendsen Apr 8, 2024
3485aa9
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 9, 2024
c7faf87
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 9, 2024
7ed3114
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 10, 2024
db9b524
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 10, 2024
7c069b6
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 11, 2024
3a60db0
WIP: Linting
perolavsvendsen Apr 11, 2024
cc2e0f4
WIP: Synch with recent changes
perolavsvendsen Apr 11, 2024
780caff
Merge branch 'main' into 540-classification-arg
perolavsvendsen Apr 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 82 additions & 11 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from datetime import timezone
from pathlib import Path
from typing import TYPE_CHECKING, Final
from copy import deepcopy

from pydantic import AnyHttpUrl, TypeAdapter

Expand Down Expand Up @@ -46,9 +47,11 @@ def generate_meta_tracklog() -> list[meta.TracklogEvent]:
user=meta.User.model_construct(id=getpass.getuser()),
sysinfo=meta.SystemInformation.model_construct(
fmu_dataio=meta.VersionInformation.model_construct(version=__version__),
komodo=meta.VersionInformation.model_construct(version=kr)
if (kr := os.environ.get("KOMODO_RELEASE"))
else None,
komodo=(
meta.VersionInformation.model_construct(version=kr)
if (kr := os.environ.get("KOMODO_RELEASE"))
else None
),
operating_system=meta.SystemInformationOperatingSystem.model_construct(
hostname=platform.node(),
operating_system=platform.platform(),
Expand Down Expand Up @@ -99,8 +102,74 @@ def _get_meta_objectdata(
)


def _get_meta_access(access: dict) -> meta.SsdlAccess:
return meta.SsdlAccess.model_validate(access)
def _get_meta_access(dataio) -> dict | None:
perolavsvendsen marked this conversation as resolved.
Show resolved Hide resolved
"""Create the full access block form combination of arguments and config."""

# TMP: Try to carve out the logic here first, then move it to Pydantic

# if access isn't in the config, we return None right away
if dataio.config.get("access") is None:
return

# Validate the input from config
# We have tests that are expecting a UserWarning if we try to create metadata
# using config that has errors.
meta.SsdlAccess.model_validate(dataio.config.get("access"))

# Now build the access block element by element
asset = dataio.config.get("access", {}).get("asset", None) # always from config.
classification = _meta_access_classification(dataio)
rep_include = _meta_access_rep_include(dataio)

m_access = {
"asset": asset,
"classification": classification,
"ssdl": {
"access_level": classification, # legacy
"rep_include": rep_include,
},
}

return m_access


def _meta_access_classification(dataio) -> str:

# Ideally, user provides the classification argument
# If they don't, we fall back to defaults in the config

# 1. Use the (optional) argument
classification = dataio.classification

# 2. If argument was not provided, fall back to the default from config
if classification is None:
classification = dataio.config.get("access", {}).get("classification")

# 3. If not found, fall back to (legacy) access.ssdl.access_level from config
if classification is None:
classification = (
dataio.config.get("access", {}).get("ssdl", {}).get("access_level", None)
)

# if none of the above works, then classification is None

return classification


def _meta_access_rep_include(dataio) -> bool:

# 1. Check the (optional) argument
rep_include = dataio.rep_include

# 2. Check the config
if rep_include is None:
rep_include = (
dataio.config.get("access", {}).get("ssdl", {}).get("rep_include", None)
)

# if none of the above works, then rep_include is None

return rep_include


def _get_meta_masterdata(masterdata: dict) -> meta.Masterdata:
Expand Down Expand Up @@ -151,7 +220,7 @@ def generate_export_metadata(
* meta_fmu: nested dict of model, case, etc (complex)
* meta_file: dict of paths and checksums
* meta_masterdata: dict of (currently) smda masterdata
* meta_access: dict with name of field + access rules
* meta_access: dict with name of asset + security classification
* meta_objectdata: the data block, may be complex
* meta_display: dict of default display settings (experimental)

Expand All @@ -166,7 +235,7 @@ def generate_export_metadata(
filedata = _get_filedata_provider(dataio, obj, objdata, fmudata, compute_md5)

masterdata = dataio.config.get("masterdata")
access = dataio.config.get("access")
access = _get_meta_access(dataio)

metadata = internal.DataClassMeta(
schema_=TypeAdapter(AnyHttpUrl).validate_strings(SCHEMA), # type: ignore[call-arg]
Expand All @@ -175,14 +244,16 @@ def generate_export_metadata(
class_=objdata.classname,
fmu=_get_meta_fmu(fmudata) if fmudata else None,
masterdata=_get_meta_masterdata(masterdata) if masterdata else None,
access=_get_meta_access(access) if access else None,
access=access if access else None,
data=_get_meta_objectdata(objdata),
file=filedata.get_metadata(),
tracklog=generate_meta_tracklog(),
display=_get_meta_display(dataio, objdata),
preprocessed=_get_meta_preprocessed_info(dataio)
if dataio.fmu_context == FmuContext.PREPROCESSED
else None,
preprocessed=(
_get_meta_preprocessed_info(dataio)
if dataio.fmu_context == FmuContext.PREPROCESSED
else None
),
).model_dump(mode="json", exclude_none=True, by_alias=True)

if skip_null:
Expand Down
75 changes: 72 additions & 3 deletions src/fmu/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
from .datastructure.configuration import global_configuration
from .providers._fmu import FmuEnv, FmuProvider

# always show PendingDeprecationWarnings
warnings.simplefilter("always", PendingDeprecationWarning)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had to include this to make warnings actually show, not sure if this is specific for me 🤷‍♂️

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Later learned that we should use other types of warnings to make sure they are actually seen by end users.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will remove this, but keeping it for now as a TODO: We need to make sure warnings are actually seen by end users/modellers. Possibly separate PR.


# DATAIO_EXAMPLES: Final = dataio_examples()
INSIDE_RMS: Final = detect_inside_rms()

Expand Down Expand Up @@ -246,7 +249,7 @@ class ExportData:


Args:
access_ssdl: Optional. A dictionary that will overwrite or append
access_ssdl: DEPRECATED. Optional. A dictionary that will overwrite or append
to the default ssdl settings read from the config. Example:
``{"access_level": "restricted", "rep_include": False}``

Expand All @@ -255,6 +258,9 @@ class ExportData:
the file structure or by other means. See also fmu_context, where "case"
may need an explicit casepath!

classification: Optional. The security classification of this data object, if
override defaults. Valid values are: ["restricted", "internal"].

config: Required in order to produce valid metadata, either as key (here) or
through an environment variable. A dictionary with static settings.
In the standard case this is read from FMU global variables
Expand Down Expand Up @@ -323,6 +329,8 @@ class ExportData:
detected automatically from the FMU run. Can be used to override in rare
cases. If so, numbers must be >= 0

rep_include: Optional. Boolean flag for REP to display this data object.

runpath: TODO! Optional and deprecated. The relative location of the current run
root. Optional and will in most cases be auto-detected, assuming that FMU
folder conventions are followed. For an ERT run e.g.
Expand Down Expand Up @@ -403,9 +411,10 @@ class ExportData:
_inside_rms: ClassVar[bool] = False # developer only! if True pretend inside RMS

# input keys (alphabetic)
perolavsvendsen marked this conversation as resolved.
Show resolved Hide resolved
access_ssdl: dict = field(default_factory=dict)
access_ssdl: dict = field(default_factory=dict) # deprecated
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deprecating access_ssdl, adding classification and rep_include

aggregation: bool = False
casepath: Optional[Union[str, Path]] = None
classification: Optional[str] = None
config: dict = field(default_factory=dict)
content: Optional[Union[dict, str]] = None
depth_reference: str = "msl"
Expand All @@ -422,6 +431,7 @@ class ExportData:
undef_is_zero: bool = False
parent: str = ""
realization: int = -999
rep_include: Optional[bool] = None
reuse_metadata_rule: Optional[str] = None # deprecated
runpath: Optional[Union[str, Path]] = None
subfolder: str = ""
Expand Down Expand Up @@ -489,6 +499,8 @@ def __post_init__(self) -> None:

self._validate_content_key()
self._validate_fmucontext_key()
if self.access_ssdl is not None:
self._validate_access_ssdl()
self._update_globalconfig_from_settings()

# check state of global config
Expand All @@ -502,6 +514,27 @@ def __post_init__(self) -> None:
logger.info("FMU context is %s", self.fmu_context)
logger.info("Ran __post_init__")

def _validate_access_ssdl(self) -> None:
# The access_ssdl argument is deprecated, replaced by 'rep_include' and
# 'classification' arguments. While still supported, we don't want to mix old
# and new. I.e. when someone starts using any of the new arguments, we expect
# them to move away from 'access_ssdl' completely - in arguments AND in config.

# Check if we are getting both old and new arguments, and raise if we do.
if self.classification is not None:
if "access_level" in self.access_ssdl:
raise ValueError(
"Conflicting arguments: When using 'classification', the (legacy) "
"'access_ssdl' is not supported."
)

if self.rep_include is not None:
if "rep_include" in self.access_ssdl:
raise ValueError(
"Conflicting arguments: When using 'rep_include', the (legacy) "
"'access_ssdl' is not supported."
)

def _show_deprecations_or_notimplemented(self) -> None:
"""Warn on deprecated keys or on stuff not implemented yet."""

Expand All @@ -519,6 +552,13 @@ def _show_deprecations_or_notimplemented(self) -> None:
PendingDeprecationWarning,
)

if self.access_ssdl:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the deprecation warning intended for users who still use the access_ssdl argument

warn(
"The 'access_ssdl' key is deprecated, and replaced by arguments "
"'classification' and 'rep_include'. Please update your code.",
PendingDeprecationWarning,
)

def _validate_content_key(self) -> None:
"""Validate the given 'content' input."""
self._usecontent, self._content_specific = _check_content(self.content)
Expand Down Expand Up @@ -569,10 +609,36 @@ def _update_check_settings(self, newsettings: dict) -> None:

def _update_globalconfig_from_settings(self) -> None:
"""A few user settings may update/append the global config directly."""

# TODO Not sure where else to put this
# While deprecating the 'ssdl.access_level' from all over, if a config has
# both 'ssdl.access_level' AND classification defined, issue warning, and use
# the classification value further.

_conf_ssdl_access_level = (
self.config.get("access", {}).get("ssdl", {}).get("access_level")
)
_conf_classification = self.config.get("access", {}).get("classification")

if _conf_ssdl_access_level and _conf_classification:
# warning triggers only when both are present, i.e. the user has actively
# started using access.classification, but has not removed ssdl.access_level
warn(
"The config contains both 'access.ssdl.access_level (deprecated) and "
"access.classification. The value from access.classification will be "
"used. Remove 'access.ssdl.access_level' to silence this warning."
)

self.config["access"]["ssdl"]["access_level"] = self.config["access"][
"classification"
]

newglobals = deepcopy(self.config)

if self.access_ssdl:
if "ssdl" not in self.config["access"]:
if "access" not in self.config:
newglobals["access"] = {}
if "ssdl" not in newglobals["access"]:
newglobals["access"]["ssdl"] = {}

newglobals["access"]["ssdl"] = deepcopy(self.access_ssdl)
Expand Down Expand Up @@ -715,6 +781,9 @@ def generate_metadata(
# TODO: This needs refinement: _config_is_valid should be removed
self.config = global_configuration.roundtrip(self.config)

if self.access_ssdl is not None:
self._validate_access_ssdl()

self._check_process_object(obj) # obj --> self._object

self._establish_pwd_rootpath()
Expand Down
36 changes: 33 additions & 3 deletions src/fmu/dataio/datastructure/meta/meta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import warnings

from collections import ChainMap
from pathlib import Path
from typing import Dict, List, Literal, Optional, TypeVar, Union
Expand Down Expand Up @@ -31,19 +33,47 @@ class Ssdl(BaseModel):
Sub-Surface Data Lake
"""

access_level: enums.AccessLevel
rep_include: bool
access_level: enums.AccessLevel = Field(
default=enums.AccessLevel.internal,
)
rep_include: bool = Field(
default=False,
)

@model_validator(mode="after")
def _migrate_asset_to_restricted(self) -> Ssdl:
if self.access_level == enums.AccessLevel.asset:
warnings.warn(
"The value 'asset' for access.ssdl.access_level is deprecated. "
"Please use 'restricted' in input arguments or global variables "
"to silence this warning.",
FutureWarning,
)
self.access_level = enums.AccessLevel.restricted
return self


class Access(BaseModel):
asset: Asset
ssdl: Ssdl
classification: Optional[enums.AccessLevel] = Field(default=None)

@model_validator(mode="after")
def _classification_mirrors_accesslevel(self) -> Access:
# Ideally we want to only copy if the user has NOT
# set the classification.
# See: https://github.com/equinor/fmu-dataio/issues/540
self.classification = self.ssdl.access_level
return self


class SsdlAccess(Access):
class SsdlAccess(Access): # arver fra baseclassen
ssdl: Ssdl


# OK med kun en klasse for "access"?


class File(BaseModel):
"""
Block describing the file as the data appear in FMU context
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def fixture_globalconfig1():
access_level=global_configuration.enums.AccessLevel.internal,
rep_include=False,
),
classification=global_configuration.enums.AccessLevel.internal,
),
model=global_configuration.Model(
name="Test",
Expand Down
Loading
Loading