Skip to content

Commit

Permalink
CLN: Simplify export path logic
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt committed Apr 18, 2024
1 parent 0d371c1 commit 0fb9b54
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 125 deletions.
4 changes: 1 addition & 3 deletions src/fmu/dataio/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,7 @@ def _get_filedata_provider(
return FileDataProvider(
dataio=dataio,
objdata=objdata,
rootpath=dataio._rootpath, # has been updated to case_path if fmurun
itername=fmudata.get_iter_name() if fmudata else "",
realname=fmudata.get_real_name() if fmudata else "",
runpath=fmudata.get_runpath() if fmudata else None,
obj=obj,
compute_md5=compute_md5,
)
Expand Down
9 changes: 9 additions & 0 deletions src/fmu/dataio/datastructure/meta/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ class File(BaseModel):
relative_path_symlink: Optional[Path] = Field(default=None)
absolute_path_symlink: Optional[Path] = Field(default=None)

@model_validator(mode="before")
@classmethod
def _check_for_non_ascii_in_path(cls, values: Dict) -> Dict:
if (path := values.get("absolute_path")) and not str(path).isascii():
raise ValueError(
f"Path has non-ascii elements which is not supported: {path}"
)
return values


class Parameters(RootModel):
root: Dict[str, Union[Parameters, int, float, str]]
Expand Down
160 changes: 73 additions & 87 deletions src/fmu/dataio/providers/_filedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

from __future__ import annotations

from copy import deepcopy
from dataclasses import dataclass, field
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import TYPE_CHECKING, Final, Optional
from warnings import warn
Expand All @@ -27,6 +27,29 @@
from .objectdata._provider import ObjectDataProvider


class ShareFolder(str, Enum):
PREPROCESSED = "share/preprocessed"
OBSERVATIONS = "share/observations"
RESULTS = "share/results"


def get_share_folders(dataio: ExportData, objdata: ObjectDataProvider) -> Path:
"""Get the export share folders."""
if dataio.fmu_context == FmuContext.PREPROCESSED:
sharefolder = Path(ShareFolder.PREPROCESSED.value)
elif dataio.is_observation:
sharefolder = Path(ShareFolder.OBSERVATIONS.value)
else:
sharefolder = Path(ShareFolder.RESULTS.value)

sharefolder = sharefolder / objdata.efolder
if dataio.subfolder:
sharefolder = sharefolder / dataio.subfolder

logger.info("Export share folders are %s", sharefolder)
return sharefolder


@dataclass
class FileDataProvider:
"""Class for providing metadata for the 'files' block in fmu-dataio.
Expand All @@ -41,67 +64,43 @@ class FileDataProvider:
# input
dataio: ExportData
objdata: ObjectDataProvider
rootpath: Path = field(default_factory=Path)
itername: str = ""
realname: str = ""
obj: Optional[types.Inferrable] = field(default=None)
runpath: Path | None = None
obj: Optional[types.Inferrable] = None
compute_md5: bool = False

# storing results in these variables
forcefolder_is_absolute: bool = field(default=False, init=False)

@property
def name(self) -> str:
return self.dataio.name or self.objdata.name

def get_metadata(self) -> meta.File:
relpath = self._get_path()
relative_path, absolute_path = self._derive_filedata_generic(relpath)
if self.dataio.forcefolder and (
forcefolder := self._get_forcefolder_if_absolute()
):
absolute_path = self._add_filename_to_path(forcefolder)
relative_path = self._try_to_resolve_forcefolder(absolute_path)

else:
rootpath = (
self.runpath
if self.runpath and self.dataio.fmu_context == FmuContext.REALIZATION
else self.dataio._rootpath
)
share_folders = get_share_folders(self.dataio, self.objdata)
export_folder = rootpath / share_folders

absolute_path = self._add_filename_to_path(export_folder)
relative_path = absolute_path.relative_to(self.dataio._rootpath)
# TODO: move this resolve before creating relative paths
# need to change a bit of our test set-up to make it work
absolute_path = absolute_path.resolve()

logger.info("Returning metadata pydantic model meta.File")
return meta.File(
absolute_path=absolute_path,
relative_path=relative_path,
checksum_md5=self._compute_md5() if self.compute_md5 else None,
)

def _derive_filedata_generic(self, inrelpath: Path) -> tuple[Path, Path]:
"""This works with both normal data and symlinks."""
stem = self._get_filestem()

path = Path(inrelpath) / stem.lower()
path = path.with_suffix(path.suffix + self.objdata.extension)

# resolve() will fix ".." e.g. change '/some/path/../other' to '/some/other'
abspath = path.resolve()

try:
str(abspath).encode("ascii")
except UnicodeEncodeError:
print(f"!! Path has non-ascii elements which is not supported: {abspath}")
raise

if self.forcefolder_is_absolute:
# may become meaningsless as forcefolder can be something else, but will try
try:
relpath = path.relative_to(self.rootpath)
except ValueError as verr:
if ("does not start with" in str(verr)) or (
"not in the subpath of" in str(verr)
):
relpath = abspath
logger.info(
"Relative path equal to absolute path due to forcefolder "
"with absolute path deviating for rootpath %s",
self.rootpath,
)
else:
raise
else:
relpath = path.relative_to(self.rootpath)

logger.info("Derived filedata")
return relpath, abspath

def _compute_md5(self) -> str:
"""Compute an MD5 sum using a temporary file."""
if self.obj is None:
Expand All @@ -110,6 +109,10 @@ def _compute_md5(self) -> str:
self.obj, self.objdata.extension, self.dataio._usefmtflag
)

def _add_filename_to_path(self, path: Path) -> Path:
stem = self._get_filestem()
return (path / stem).with_suffix(path.suffix + self.objdata.extension)

def _get_filestem(self) -> str:
"""Construct the file"""

Expand Down Expand Up @@ -150,49 +153,32 @@ def _get_filestem(self) -> str:
# BUG(?): What about germen letter like "Ü"?
stem = stem.replace("æ", "ae")
stem = stem.replace("ø", "oe")
return stem.replace("å", "aa")

def _get_path(self) -> Path:
"""Construct and get the folder path(s)."""
mode = self.dataio.fmu_context
outroot = deepcopy(self.rootpath)

logger.info("FMU context is %s", mode)
if mode == FmuContext.REALIZATION:
if self.realname:
outroot = outroot / self.realname # TODO: if missing self.realname?

if self.itername:
outroot = outroot / self.itername

outroot = outroot / "share"

if mode == FmuContext.PREPROCESSED:
outroot = outroot / "preprocessed"
if self.dataio.forcefolder and self.dataio.forcefolder.startswith("/"):
raise ValueError(
"Cannot use absolute path to 'forcefolder' with preprocessed data"
)

if mode != FmuContext.PREPROCESSED:
if self.dataio.is_observation:
outroot = outroot / "observations"
else:
outroot = outroot / "results"

dest = outroot / self.objdata.efolder # e.g. "maps"
stem = stem.replace("å", "aa")
return stem.lower()

if self.dataio.forcefolder and self.dataio.forcefolder.startswith("/"):
def _get_forcefolder_if_absolute(self) -> Path | None:
if self.dataio.forcefolder.startswith("/"):
if not self.dataio.allow_forcefolder_absolute:
raise ValueError(
"The forcefolder includes an absolute path, i.e. "
"Cannot use absolute path to 'forcefolder', i.e. "
"starting with '/'. This is strongly discouraged and is only "
"allowed if classvariable allow_forcefolder_absolute is set to True"
)
warn("Using absolute paths in forcefolder is not recommended!")
return Path(self.dataio.forcefolder).absolute()
return None

# absolute if starts with "/", otherwise relative to outroot
dest = Path(self.dataio.forcefolder).absolute()
self.forcefolder_is_absolute = True

return dest if not self.dataio.subfolder else dest / self.dataio.subfolder
def _try_to_resolve_forcefolder(self, forcefolder: Path) -> Path:
try:
return forcefolder.relative_to(self.dataio._rootpath)
except ValueError as verr:
if ("does not start with" in str(verr)) or (
"not in the subpath of" in str(verr)
):
logger.info(
"Relative path equal to absolute path due to forcefolder "
"with absolute path deviating for rootpath %s",
self.dataio._rootpath,
)
return forcefolder.resolve()
raise
4 changes: 4 additions & 0 deletions src/fmu/dataio/providers/_fmu.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,10 @@ def get_casepath(self) -> Path | None:
"""Return updated casepath in a FMU run, will be updated if initially blank."""
return self._casepath

def get_runpath(self) -> Path | None:
"""Return runpath for a FMU run."""
return self._runpath

def get_metadata(self) -> internal.FMUClassMetaData | None:
"""Construct the metadata FMU block for an ERT forward job."""
logger.debug("Generate ERT metadata...")
Expand Down
63 changes: 28 additions & 35 deletions tests/test_units/test_filedataprovider_class.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Test the _MetaData class from the _metadata.py module"""

import os
from copy import deepcopy
from pathlib import Path

import pytest
from fmu.dataio import ExportData
from fmu.dataio.datastructure.meta import meta
from fmu.dataio.providers._filedata import FileDataProvider
from fmu.dataio.providers._filedata import FileDataProvider, get_share_folders
from fmu.dataio.providers.objectdata._base import derive_name
from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory
from xtgeo.cube import Cube
Expand Down Expand Up @@ -157,6 +158,7 @@ def test_get_filestem_shall_fail(
objdata.time0 = time0
objdata.time1 = time1

edataobj1 = deepcopy(edataobj1)
edataobj1.tagname = tagname
edataobj1.parent = parentname
edataobj1.name = ""
Expand All @@ -168,53 +170,51 @@ def test_get_filestem_shall_fail(
assert message in str(msg)


def test_get_paths_path_exists_already(regsurf, edataobj1, tmp_path):
"""Testing the private _get_path method."""
def test_get_share_folders(regsurf):
"""Testing the get_share_folders method."""

os.chdir(tmp_path)
newpath = tmp_path / "share" / "results" / "efolder"
newpath.mkdir(parents=True, exist_ok=True)

edataobj1.name = "some"
edataobj1 = ExportData(name="some")

objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "some"
objdata.efolder = "efolder"

fdata = FileDataProvider(edataobj1, objdata)
share_folders = get_share_folders(edataobj1, objdata)
assert isinstance(share_folders, Path)
assert share_folders == Path("share/results/efolder")

path = fdata._get_path()
assert str(path) == "share/results/efolder"
# check that the path present in the metadata matches the share folders
fdata = FileDataProvider(edataobj1, objdata)
fmeta = fdata.get_metadata()
assert str(fmeta.absolute_path.parent).endswith("share/results/efolder")


def test_get_paths_not_exists_so_create(regsurf, edataobj1, tmp_path):
def test_get_share_folders_with_subfolder(regsurf):
"""Testing the private _get_path method, creating the path."""

os.chdir(tmp_path)
edataobj1 = ExportData(name="some", subfolder="sub")

objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "some"
objdata.efolder = "efolder"
cfg = edataobj1

cfg._rootpath = Path(".")

fdata = FileDataProvider(cfg, objdata)
share_folders = get_share_folders(edataobj1, objdata)
assert share_folders == Path("share/results/efolder/sub")

path = fdata._get_path()
assert str(path) == "share/results/efolder"
# check that the path present in the metadata matches the share folders
fdata = FileDataProvider(edataobj1, objdata)
fmeta = fdata.get_metadata()
assert str(fmeta.absolute_path.parent).endswith("share/results/efolder/sub")


def test_filedata_provider(regsurf, edataobj1, tmp_path):
def test_filedata_provider(regsurf, tmp_path):
"""Testing the derive_filedata function."""

os.chdir(tmp_path)

cfg = edataobj1
cfg._rootpath = Path(".")
cfg.name = ""
cfg = ExportData(name="", parent="parent", tagname="tag")

objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata = objectdata_provider_factory(regsurf, cfg)
objdata.name = "name"
objdata.efolder = "efolder"
objdata.extension = ".ext"
Expand All @@ -233,24 +233,17 @@ def test_filedata_provider(regsurf, edataobj1, tmp_path):
assert filemeta.absolute_path == absdata


def test_filedata_has_nonascii_letters(regsurf, edataobj1, tmp_path):
def test_filedata_has_nonascii_letters(regsurf, tmp_path):
"""Testing the get_metadata function."""

os.chdir(tmp_path)

cfg = edataobj1
cfg._rootpath = Path(".")
cfg.name = "mynõme"
edataobj1 = ExportData(name="mynõme")

objdata = objectdata_provider_factory(regsurf, edataobj1)
objdata.name = "anynõme"
objdata.efolder = "efolder"
objdata.extension = ".ext"
objdata.time0 = "t1"
objdata.time1 = "t2"

fdata = FileDataProvider(cfg, objdata)
with pytest.raises(UnicodeEncodeError, match=r"codec can't encode character"):
fdata = FileDataProvider(edataobj1, objdata)
with pytest.raises(ValueError, match="Path has non-ascii elements"):
fdata.get_metadata()


Expand Down

0 comments on commit 0fb9b54

Please sign in to comment.