Skip to content

Commit

Permalink
BUG: Handle content which requiring extra info in AggregatedData
Browse files Browse the repository at this point in the history
  • Loading branch information
tnatt committed Apr 15, 2024
1 parent 6ba4b49 commit 68ffbb0
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 37 deletions.
15 changes: 15 additions & 0 deletions src/fmu/dataio/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import uuid
from copy import deepcopy
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import Any, Final, Literal

import numpy as np
Expand Down Expand Up @@ -188,6 +189,20 @@ def export_file_compute_checksum_md5(
return md5sum(filename)


def compute_md5_using_temp_file(
obj: types.Inferrable, extension: str, flag: str = ""
) -> str:
"""Compute an MD5 sum using a temporary file."""
if not extension.startswith("."):
raise ValueError("An extension must start with '.'")

with NamedTemporaryFile(buffering=0, suffix=extension) as tf:
logger.info("Compute MD5 sum for tmp file...: %s", tf.name)
return export_file_compute_checksum_md5(
obj=obj, filename=Path(tf.name), flag=flag
)


def create_symlink(source: str, target: str) -> None:
"""Create a symlinked file with some checks."""

Expand Down
34 changes: 16 additions & 18 deletions src/fmu/dataio/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

from . import _utils, dataio, types
from ._logging import null_logger
from ._metadata import generate_meta_tracklog
from .providers.objectdata._provider import objectdata_provider_factory

logger: Final = null_logger(__name__)

Expand Down Expand Up @@ -230,32 +232,28 @@ def _generate_aggrd_metadata(
template["fmu"]["context"]["stage"] = "iteration"

# next, the new object will trigger update of: 'file', 'data' (some fields) and
# 'tracklog'. The trick is to create an ExportData() instance and just retrieve
# the metadata from that, and then blend the needed metadata from here into the
# template -> final metadata

fakeconfig = {
"access": self.source_metadata[0]["access"],
"masterdata": self.source_metadata[0]["masterdata"],
"model": self.source_metadata[0]["fmu"]["model"],
}

# 'tracklog'.
content = template["data"]["content"]
etemp = dataio.ExportData(config=fakeconfig, name=self.name, content=content)
etempmeta = etemp.generate_metadata(obj, compute_md5=compute_md5)
etemp = dataio.ExportData(name=self.name, content=content)
objdata = objectdata_provider_factory(obj=obj, dataio=etemp).get_objectdata()

template["tracklog"] = etempmeta["tracklog"]
template["file"] = etempmeta["file"] # actually only use the checksum_md5
template["file"]["relative_path"] = str(relpath)
template["file"]["absolute_path"] = str(abspath) if abspath else None
template["tracklog"] = generate_meta_tracklog()[0].model_dump(mode="json")
template["file"] = {
"relative_path": str(relpath),
"absolute_path": str(abspath) if abspath else None,
}
if compute_md5:
template["file"]["checksum_md5"] = _utils.compute_md5_using_temp_file(
obj, objdata.extension
)

# data section
if self.name:
template["data"]["name"] = self.name
if self.tagname:
template["data"]["tagname"] = self.tagname
if etempmeta["data"].get("bbox"):
template["data"]["bbox"] = etempmeta["data"]["bbox"]
if objdata.bbox:
template["data"]["bbox"] = objdata.bbox

self._metadata = template

Expand Down
16 changes: 6 additions & 10 deletions src/fmu/dataio/providers/_filedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
from copy import deepcopy
from dataclasses import dataclass, field
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Final, Optional
from warnings import warn

from fmu.dataio._definitions import FmuContext
from fmu.dataio._logging import null_logger
from fmu.dataio._utils import export_file_compute_checksum_md5
from fmu.dataio._utils import (
compute_md5_using_temp_file,
)
from fmu.dataio.datastructure.meta import meta

logger: Final = null_logger(__name__)
Expand Down Expand Up @@ -110,14 +111,9 @@ def _compute_md5(self) -> str:
"""Compute an MD5 sum using a temporary file."""
if self.obj is None:
raise ValueError("Can't compute MD5 sum without an object.")
if not self.objdata.extension.startswith("."):
raise ValueError("An extension must start with '.'")

with NamedTemporaryFile(buffering=0, suffix=self.objdata.extension) as tf:
logger.info("Compute MD5 sum for tmp file...: %s", tf.name)
return export_file_compute_checksum_md5(
obj=self.obj, filename=Path(tf.name), flag=self.dataio._usefmtflag
)
return compute_md5_using_temp_file(
self.obj, self.objdata.extension, self.dataio._usefmtflag
)

def _get_filestem(self) -> str:
"""Construct the file"""
Expand Down
48 changes: 39 additions & 9 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,17 +474,11 @@ def fixture_arrowtable():
return None


@pytest.fixture(name="aggr_surfs_mean", scope="function")
def fixture_aggr_surfs_mean(fmurun_w_casemetadata, rmsglobalconfig, regsurf):
"""Create aggregated surfaces, and return aggr. mean surface + lists of metadata"""
logger.debug("Ran %s", _current_function_name())

origfolder = os.getcwd()
os.chdir(fmurun_w_casemetadata)

# helper function for the two fixtures below
def _create_aggregated_surface_dataset(rmsglobalconfig, regsurf, content):
edata = dio.ExportData(
config=rmsglobalconfig, # read from global config
content="depth",
content=content,
)

aggs = []
Expand All @@ -505,6 +499,42 @@ def fixture_aggr_surfs_mean(fmurun_w_casemetadata, rmsglobalconfig, regsurf):

metas.append(meta)
surfs.append([surf])
return surfs, metas


@pytest.fixture(name="aggr_sesimic_surfs_mean", scope="function")
def fixture_aggr_seismic_surfs_mean(fmurun_w_casemetadata, rmsglobalconfig, regsurf):
"""Create aggregated surfaces, and return aggr. mean surface + lists of metadata"""
logger.debug("Ran %s", _current_function_name())

origfolder = os.getcwd()
os.chdir(fmurun_w_casemetadata)

surfs, metas = _create_aggregated_surface_dataset(
rmsglobalconfig, regsurf, content={"seismic": {"attribute": "amplitude"}}
)

aggregated = surfs.statistics()
logger.debug(
"Aggr. mean is %s", aggregated["mean"].values.mean()
) # shall be 1238.5

os.chdir(origfolder)

return (aggregated["mean"], metas)


@pytest.fixture(name="aggr_surfs_mean", scope="function")
def fixture_aggr_surfs_mean(fmurun_w_casemetadata, rmsglobalconfig, regsurf):
"""Create aggregated surfaces, and return aggr. mean surface + lists of metadata"""
logger.debug("Ran %s", _current_function_name())

origfolder = os.getcwd()
os.chdir(fmurun_w_casemetadata)

surfs, metas = _create_aggregated_surface_dataset(
rmsglobalconfig, regsurf, content="depth"
)

aggregated = surfs.statistics()
logger.debug(
Expand Down
26 changes: 26 additions & 0 deletions tests/test_units/test_aggregated_surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,32 @@ def test_regsurf_aggregated(fmurun_w_casemetadata, aggr_surfs_mean):
assert newmeta["fmu"]["context"]["stage"] == "iteration"


def test_regsurf_aggregated_content_seismic(
fmurun_w_casemetadata, aggr_sesimic_surfs_mean
):
"""
Test generating aggragated metadata for a surface, where the content is seismic
which will require more info.
"""
logger.info("Active folder is %s", fmurun_w_casemetadata)

os.chdir(fmurun_w_casemetadata)

aggr_mean, metas = aggr_sesimic_surfs_mean # xtgeo_object, list-of-metadata-dicts
logger.info("Aggr. mean is %s", aggr_mean.values.mean())

aggdata = dataio.AggregatedData(
source_metadata=metas,
operation="mean",
name="myaggrd",
aggregation_id="1234",
)
newmeta = aggdata.generate_metadata(aggr_mean)
logger.debug("New metadata:\n%s", utils.prettyprint_dict(newmeta))
assert newmeta["fmu"]["aggregation"]["id"] == "1234"
assert newmeta["fmu"]["context"]["stage"] == "iteration"


def test_regsurf_aggregated_export(fmurun_w_casemetadata, aggr_surfs_mean):
"""Test generating aggragated metadata, now with export method.
Expand Down

0 comments on commit 68ffbb0

Please sign in to comment.