From 0fb9b5464872399ea98b04b37ac3a31ac1afc243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Therese=20Natter=C3=B8y?= <61694854+tnatt@users.noreply.github.com> Date: Wed, 17 Apr 2024 19:30:00 +0200 Subject: [PATCH] CLN: Simplify export path logic --- src/fmu/dataio/_metadata.py | 4 +- src/fmu/dataio/datastructure/meta/meta.py | 9 + src/fmu/dataio/providers/_filedata.py | 160 ++++++++---------- src/fmu/dataio/providers/_fmu.py | 4 + .../test_units/test_filedataprovider_class.py | 63 +++---- 5 files changed, 115 insertions(+), 125 deletions(-) diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 81cec7b1b..c1bdcb3e9 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -81,9 +81,7 @@ def _get_filedata_provider( return FileDataProvider( dataio=dataio, objdata=objdata, - rootpath=dataio._rootpath, # has been updated to case_path if fmurun - itername=fmudata.get_iter_name() if fmudata else "", - realname=fmudata.get_real_name() if fmudata else "", + runpath=fmudata.get_runpath() if fmudata else None, obj=obj, compute_md5=compute_md5, ) diff --git a/src/fmu/dataio/datastructure/meta/meta.py b/src/fmu/dataio/datastructure/meta/meta.py index 66fc83276..a3b482f3e 100644 --- a/src/fmu/dataio/datastructure/meta/meta.py +++ b/src/fmu/dataio/datastructure/meta/meta.py @@ -70,6 +70,15 @@ class File(BaseModel): relative_path_symlink: Optional[Path] = Field(default=None) absolute_path_symlink: Optional[Path] = Field(default=None) + @model_validator(mode="before") + @classmethod + def _check_for_non_ascii_in_path(cls, values: Dict) -> Dict: + if (path := values.get("absolute_path")) and not str(path).isascii(): + raise ValueError( + f"Path has non-ascii elements which is not supported: {path}" + ) + return values + class Parameters(RootModel): root: Dict[str, Union[Parameters, int, float, str]] diff --git a/src/fmu/dataio/providers/_filedata.py b/src/fmu/dataio/providers/_filedata.py index b46faa335..3210272d4 100644 --- a/src/fmu/dataio/providers/_filedata.py +++ b/src/fmu/dataio/providers/_filedata.py @@ -6,8 +6,8 @@ from __future__ import annotations -from copy import deepcopy -from dataclasses import dataclass, field +from dataclasses import dataclass +from enum import Enum from pathlib import Path from typing import TYPE_CHECKING, Final, Optional from warnings import warn @@ -27,6 +27,29 @@ from .objectdata._provider import ObjectDataProvider +class ShareFolder(str, Enum): + PREPROCESSED = "share/preprocessed" + OBSERVATIONS = "share/observations" + RESULTS = "share/results" + + +def get_share_folders(dataio: ExportData, objdata: ObjectDataProvider) -> Path: + """Get the export share folders.""" + if dataio.fmu_context == FmuContext.PREPROCESSED: + sharefolder = Path(ShareFolder.PREPROCESSED.value) + elif dataio.is_observation: + sharefolder = Path(ShareFolder.OBSERVATIONS.value) + else: + sharefolder = Path(ShareFolder.RESULTS.value) + + sharefolder = sharefolder / objdata.efolder + if dataio.subfolder: + sharefolder = sharefolder / dataio.subfolder + + logger.info("Export share folders are %s", sharefolder) + return sharefolder + + @dataclass class FileDataProvider: """Class for providing metadata for the 'files' block in fmu-dataio. @@ -41,22 +64,36 @@ class FileDataProvider: # input dataio: ExportData objdata: ObjectDataProvider - rootpath: Path = field(default_factory=Path) - itername: str = "" - realname: str = "" - obj: Optional[types.Inferrable] = field(default=None) + runpath: Path | None = None + obj: Optional[types.Inferrable] = None compute_md5: bool = False - # storing results in these variables - forcefolder_is_absolute: bool = field(default=False, init=False) - @property def name(self) -> str: return self.dataio.name or self.objdata.name def get_metadata(self) -> meta.File: - relpath = self._get_path() - relative_path, absolute_path = self._derive_filedata_generic(relpath) + if self.dataio.forcefolder and ( + forcefolder := self._get_forcefolder_if_absolute() + ): + absolute_path = self._add_filename_to_path(forcefolder) + relative_path = self._try_to_resolve_forcefolder(absolute_path) + + else: + rootpath = ( + self.runpath + if self.runpath and self.dataio.fmu_context == FmuContext.REALIZATION + else self.dataio._rootpath + ) + share_folders = get_share_folders(self.dataio, self.objdata) + export_folder = rootpath / share_folders + + absolute_path = self._add_filename_to_path(export_folder) + relative_path = absolute_path.relative_to(self.dataio._rootpath) + # TODO: move this resolve before creating relative paths + # need to change a bit of our test set-up to make it work + absolute_path = absolute_path.resolve() + logger.info("Returning metadata pydantic model meta.File") return meta.File( absolute_path=absolute_path, @@ -64,44 +101,6 @@ def get_metadata(self) -> meta.File: checksum_md5=self._compute_md5() if self.compute_md5 else None, ) - def _derive_filedata_generic(self, inrelpath: Path) -> tuple[Path, Path]: - """This works with both normal data and symlinks.""" - stem = self._get_filestem() - - path = Path(inrelpath) / stem.lower() - path = path.with_suffix(path.suffix + self.objdata.extension) - - # resolve() will fix ".." e.g. change '/some/path/../other' to '/some/other' - abspath = path.resolve() - - try: - str(abspath).encode("ascii") - except UnicodeEncodeError: - print(f"!! Path has non-ascii elements which is not supported: {abspath}") - raise - - if self.forcefolder_is_absolute: - # may become meaningsless as forcefolder can be something else, but will try - try: - relpath = path.relative_to(self.rootpath) - except ValueError as verr: - if ("does not start with" in str(verr)) or ( - "not in the subpath of" in str(verr) - ): - relpath = abspath - logger.info( - "Relative path equal to absolute path due to forcefolder " - "with absolute path deviating for rootpath %s", - self.rootpath, - ) - else: - raise - else: - relpath = path.relative_to(self.rootpath) - - logger.info("Derived filedata") - return relpath, abspath - def _compute_md5(self) -> str: """Compute an MD5 sum using a temporary file.""" if self.obj is None: @@ -110,6 +109,10 @@ def _compute_md5(self) -> str: self.obj, self.objdata.extension, self.dataio._usefmtflag ) + def _add_filename_to_path(self, path: Path) -> Path: + stem = self._get_filestem() + return (path / stem).with_suffix(path.suffix + self.objdata.extension) + def _get_filestem(self) -> str: """Construct the file""" @@ -150,49 +153,32 @@ def _get_filestem(self) -> str: # BUG(?): What about germen letter like "Ü"? stem = stem.replace("æ", "ae") stem = stem.replace("ø", "oe") - return stem.replace("å", "aa") - - def _get_path(self) -> Path: - """Construct and get the folder path(s).""" - mode = self.dataio.fmu_context - outroot = deepcopy(self.rootpath) - - logger.info("FMU context is %s", mode) - if mode == FmuContext.REALIZATION: - if self.realname: - outroot = outroot / self.realname # TODO: if missing self.realname? - - if self.itername: - outroot = outroot / self.itername - - outroot = outroot / "share" - - if mode == FmuContext.PREPROCESSED: - outroot = outroot / "preprocessed" - if self.dataio.forcefolder and self.dataio.forcefolder.startswith("/"): - raise ValueError( - "Cannot use absolute path to 'forcefolder' with preprocessed data" - ) - - if mode != FmuContext.PREPROCESSED: - if self.dataio.is_observation: - outroot = outroot / "observations" - else: - outroot = outroot / "results" - - dest = outroot / self.objdata.efolder # e.g. "maps" + stem = stem.replace("å", "aa") + return stem.lower() - if self.dataio.forcefolder and self.dataio.forcefolder.startswith("/"): + def _get_forcefolder_if_absolute(self) -> Path | None: + if self.dataio.forcefolder.startswith("/"): if not self.dataio.allow_forcefolder_absolute: raise ValueError( - "The forcefolder includes an absolute path, i.e. " + "Cannot use absolute path to 'forcefolder', i.e. " "starting with '/'. This is strongly discouraged and is only " "allowed if classvariable allow_forcefolder_absolute is set to True" ) warn("Using absolute paths in forcefolder is not recommended!") + return Path(self.dataio.forcefolder).absolute() + return None - # absolute if starts with "/", otherwise relative to outroot - dest = Path(self.dataio.forcefolder).absolute() - self.forcefolder_is_absolute = True - - return dest if not self.dataio.subfolder else dest / self.dataio.subfolder + def _try_to_resolve_forcefolder(self, forcefolder: Path) -> Path: + try: + return forcefolder.relative_to(self.dataio._rootpath) + except ValueError as verr: + if ("does not start with" in str(verr)) or ( + "not in the subpath of" in str(verr) + ): + logger.info( + "Relative path equal to absolute path due to forcefolder " + "with absolute path deviating for rootpath %s", + self.dataio._rootpath, + ) + return forcefolder.resolve() + raise diff --git a/src/fmu/dataio/providers/_fmu.py b/src/fmu/dataio/providers/_fmu.py index 9692ec140..3ff697bb8 100644 --- a/src/fmu/dataio/providers/_fmu.py +++ b/src/fmu/dataio/providers/_fmu.py @@ -159,6 +159,10 @@ def get_casepath(self) -> Path | None: """Return updated casepath in a FMU run, will be updated if initially blank.""" return self._casepath + def get_runpath(self) -> Path | None: + """Return runpath for a FMU run.""" + return self._runpath + def get_metadata(self) -> internal.FMUClassMetaData | None: """Construct the metadata FMU block for an ERT forward job.""" logger.debug("Generate ERT metadata...") diff --git a/tests/test_units/test_filedataprovider_class.py b/tests/test_units/test_filedataprovider_class.py index 4859841f1..131575e2f 100644 --- a/tests/test_units/test_filedataprovider_class.py +++ b/tests/test_units/test_filedataprovider_class.py @@ -1,12 +1,13 @@ """Test the _MetaData class from the _metadata.py module""" import os +from copy import deepcopy from pathlib import Path import pytest from fmu.dataio import ExportData from fmu.dataio.datastructure.meta import meta -from fmu.dataio.providers._filedata import FileDataProvider +from fmu.dataio.providers._filedata import FileDataProvider, get_share_folders from fmu.dataio.providers.objectdata._base import derive_name from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory from xtgeo.cube import Cube @@ -157,6 +158,7 @@ def test_get_filestem_shall_fail( objdata.time0 = time0 objdata.time1 = time1 + edataobj1 = deepcopy(edataobj1) edataobj1.tagname = tagname edataobj1.parent = parentname edataobj1.name = "" @@ -168,53 +170,51 @@ def test_get_filestem_shall_fail( assert message in str(msg) -def test_get_paths_path_exists_already(regsurf, edataobj1, tmp_path): - """Testing the private _get_path method.""" +def test_get_share_folders(regsurf): + """Testing the get_share_folders method.""" - os.chdir(tmp_path) - newpath = tmp_path / "share" / "results" / "efolder" - newpath.mkdir(parents=True, exist_ok=True) - - edataobj1.name = "some" + edataobj1 = ExportData(name="some") objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "some" objdata.efolder = "efolder" - fdata = FileDataProvider(edataobj1, objdata) + share_folders = get_share_folders(edataobj1, objdata) + assert isinstance(share_folders, Path) + assert share_folders == Path("share/results/efolder") - path = fdata._get_path() - assert str(path) == "share/results/efolder" + # check that the path present in the metadata matches the share folders + fdata = FileDataProvider(edataobj1, objdata) + fmeta = fdata.get_metadata() + assert str(fmeta.absolute_path.parent).endswith("share/results/efolder") -def test_get_paths_not_exists_so_create(regsurf, edataobj1, tmp_path): +def test_get_share_folders_with_subfolder(regsurf): """Testing the private _get_path method, creating the path.""" - os.chdir(tmp_path) + edataobj1 = ExportData(name="some", subfolder="sub") objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "some" objdata.efolder = "efolder" - cfg = edataobj1 - - cfg._rootpath = Path(".") - fdata = FileDataProvider(cfg, objdata) + share_folders = get_share_folders(edataobj1, objdata) + assert share_folders == Path("share/results/efolder/sub") - path = fdata._get_path() - assert str(path) == "share/results/efolder" + # check that the path present in the metadata matches the share folders + fdata = FileDataProvider(edataobj1, objdata) + fmeta = fdata.get_metadata() + assert str(fmeta.absolute_path.parent).endswith("share/results/efolder/sub") -def test_filedata_provider(regsurf, edataobj1, tmp_path): +def test_filedata_provider(regsurf, tmp_path): """Testing the derive_filedata function.""" os.chdir(tmp_path) - cfg = edataobj1 - cfg._rootpath = Path(".") - cfg.name = "" + cfg = ExportData(name="", parent="parent", tagname="tag") - objdata = objectdata_provider_factory(regsurf, edataobj1) + objdata = objectdata_provider_factory(regsurf, cfg) objdata.name = "name" objdata.efolder = "efolder" objdata.extension = ".ext" @@ -233,24 +233,17 @@ def test_filedata_provider(regsurf, edataobj1, tmp_path): assert filemeta.absolute_path == absdata -def test_filedata_has_nonascii_letters(regsurf, edataobj1, tmp_path): +def test_filedata_has_nonascii_letters(regsurf, tmp_path): """Testing the get_metadata function.""" os.chdir(tmp_path) - - cfg = edataobj1 - cfg._rootpath = Path(".") - cfg.name = "mynõme" + edataobj1 = ExportData(name="mynõme") objdata = objectdata_provider_factory(regsurf, edataobj1) objdata.name = "anynõme" - objdata.efolder = "efolder" - objdata.extension = ".ext" - objdata.time0 = "t1" - objdata.time1 = "t2" - fdata = FileDataProvider(cfg, objdata) - with pytest.raises(UnicodeEncodeError, match=r"codec can't encode character"): + fdata = FileDataProvider(edataobj1, objdata) + with pytest.raises(ValueError, match="Path has non-ascii elements"): fdata.get_metadata()