From 14abe9b17efbb2bae6944e5500431cad7e187159 Mon Sep 17 00:00:00 2001 From: JB Lovland Date: Mon, 8 Jan 2024 14:49:01 +0100 Subject: [PATCH] Package fully typed --- .github/workflows/ci-fmudataio.yml | 4 +- .github/workflows/fmudataio-documention.yml | 6 +- .github/workflows/fmudataio-publish-pypi.yml | 4 +- .github/workflows/linting.yml | 11 +- .github/workflows/mypy.yml | 22 +++ .gitignore | 3 + mypy.ini | 11 +- src/fmu/dataio/_definitions.py | 59 +++++--- src/fmu/dataio/_design_kw.py | 64 ++++---- src/fmu/dataio/_filedata_provider.py | 27 ++-- src/fmu/dataio/_fmu_provider.py | 27 ++-- src/fmu/dataio/_metadata.py | 52 ++++--- src/fmu/dataio/_objectdata_provider.py | 142 +++++++++++------- src/fmu/dataio/_oyaml.py | 1 + src/fmu/dataio/_utils.py | 124 +++++++-------- src/fmu/dataio/dataio.py | 134 +++++++++++------ src/fmu/dataio/hook_implementations/jobs.py | 4 +- .../dataio/scripts/create_case_metadata.py | 39 +++-- tests/test_units/test_aggregated_surfaces.py | 13 +- 19 files changed, 452 insertions(+), 295 deletions(-) create mode 100644 .github/workflows/mypy.yml diff --git a/.github/workflows/ci-fmudataio.yml b/.github/workflows/ci-fmudataio.yml index 9f70338f8..9a92e73ea 100644 --- a/.github/workflows/ci-fmudataio.yml +++ b/.github/workflows/ci-fmudataio.yml @@ -17,7 +17,9 @@ jobs: os: [ubuntu-latest] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v2 diff --git a/.github/workflows/fmudataio-documention.yml b/.github/workflows/fmudataio-documention.yml index cd0362eef..168da15da 100644 --- a/.github/workflows/fmudataio-documention.yml +++ b/.github/workflows/fmudataio-documention.yml @@ -15,11 +15,13 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8"] + python-version: ["3.10"] os: [ubuntu-latest] steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v4 diff --git a/.github/workflows/fmudataio-publish-pypi.yml b/.github/workflows/fmudataio-publish-pypi.yml index 39e9123bb..76fdcb9f6 100644 --- a/.github/workflows/fmudataio-publish-pypi.yml +++ b/.github/workflows/fmudataio-publish-pypi.yml @@ -9,7 +9,9 @@ jobs: name: Build and publish Python 🐍 distributions 📦 to PyPI runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python 3.10 uses: actions/setup-python@v4 with: diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 013c25922..d36d213d7 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -9,12 +9,15 @@ jobs: matrix: python-version: ["3.10"] steps: - - uses: actions/checkout@v2 - + - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up python uses: actions/setup-python@v4 - - name: Check black style and linting - run: pip install ruff + - name: Install dev-env. + run: | + pip install -U pip + pip install ".[dev]" - name: Ruff check if: ${{ always() }} run: ruff check . diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml new file mode 100644 index 000000000..dae12f38b --- /dev/null +++ b/.github/workflows/mypy.yml @@ -0,0 +1,22 @@ +name: Mypy + +on: [push, pull_request] + +jobs: + mypy: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Set up python + uses: actions/setup-python@v4 + - name: Install dev-env. + run: | + pip install -U pip + pip install ".[dev]" + - name: Mypy + run: mypy . diff --git a/.gitignore b/.gitignore index 60e773de2..de0775d63 100644 --- a/.gitignore +++ b/.gitignore @@ -96,3 +96,6 @@ venv.bak/ # setuptools_scm version src/fmu/dataio/version.py + +# mypy +.dmypy.json diff --git a/mypy.ini b/mypy.ini index 467a0d254..e0a4b13b7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,5 +1,10 @@ [mypy] - -[mypy-numpy.*] -# Applies to Python 3.6: +disallow_untyped_defs = True +exclude = ^((tests|docs|examples|bin)/|conftest.py?) +extra_checks = True ignore_missing_imports = True +python_version = 3.8 +strict_equality = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unused_ignores = True diff --git a/src/fmu/dataio/_definitions.py b/src/fmu/dataio/_definitions.py index 9ff6785e0..4de75b3cc 100644 --- a/src/fmu/dataio/_definitions.py +++ b/src/fmu/dataio/_definitions.py @@ -1,41 +1,62 @@ """Various definitions and hard settings used in fmu-dataio.""" +from __future__ import annotations + from dataclasses import dataclass, field +from typing import Final -SCHEMA = ( +SCHEMA: Final = ( "https://main-fmu-schemas-prod.radix.equinor.com/schemas/0.8.0/fmu_results.json" ) -VERSION = "0.8.0" -SOURCE = "fmu" +VERSION: Final = "0.8.0" +SOURCE: Final = "fmu" @dataclass class _ValidFormats: - surface: dict = field(default_factory=dict) - grid: dict = field(default_factory=dict) - cube: dict = field(default_factory=dict) - table: dict = field(default_factory=dict) - polygons: dict = field(default_factory=dict) - points: dict = field(default_factory=dict) - dictionary: dict = field(default_factory=dict) - - def __post_init__(self): - self.surface = {"irap_binary": ".gri"} - self.grid = {"hdf": ".hdf", "roff": ".roff"} - self.cube = {"segy": ".segy"} - self.table = {"hdf": ".hdf", "csv": ".csv", "arrow": ".arrow"} - self.polygons = { + surface: dict = field( + default_factory=lambda: { + "irap_binary": ".gri", + } + ) + grid: dict = field( + default_factory=lambda: { + "hdf": ".hdf", + "roff": ".roff", + } + ) + cube: dict = field( + default_factory=lambda: { + "segy": ".segy", + } + ) + table: dict = field( + default_factory=lambda: { + "hdf": ".hdf", + "csv": ".csv", + "arrow": ".arrow", + } + ) + polygons: dict = field( + default_factory=lambda: { "hdf": ".hdf", "csv": ".csv", # columns will be X Y Z, ID "csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, ... POLY_ID "irap_ascii": ".pol", } - self.points = { + ) + points: dict = field( + default_factory=lambda: { "hdf": ".hdf", "csv": ".csv", # columns will be X Y Z "csv|xtgeo": ".csv", # use default xtgeo columns: X_UTME, Y_UTMN, Z_TVDSS "irap_ascii": ".poi", } - self.dictionary = {"json": ".json"} + ) + dictionary: dict = field( + default_factory=lambda: { + "json": ".json", + } + ) ALLOWED_CONTENTS = { diff --git a/src/fmu/dataio/_design_kw.py b/src/fmu/dataio/_design_kw.py index 0b83df652..ae5754dea 100644 --- a/src/fmu/dataio/_design_kw.py +++ b/src/fmu/dataio/_design_kw.py @@ -5,22 +5,24 @@ It is copied here instead of pip-installed in order to avoid dragging along all dependencies of semeio""" -# pylint: disable=logging-fstring-interpolation +from __future__ import annotations + import logging import re import shlex +from typing import Any, Final, Iterable -_STATUS_FILE_NAME = "DESIGN_KW.OK" +_STATUS_FILE_NAME: Final = "DESIGN_KW.OK" -_logger = logging.getLogger(__name__) +_logger: Final = logging.getLogger(__name__) def run( - template_file_name, - result_file_name, - log_level, - parameters_file_name="parameters.txt", -): + template_file_name: str, + result_file_name: str, + log_level: str, + parameters_file_name: str = "parameters.txt", +) -> None: # Get all key, value pairs # If FWL key is having multiple entries in the parameters file # KeyError is raised. This will be logged, and no OK @@ -28,8 +30,6 @@ def run( _logger.setLevel(log_level) - valid = True - with open(parameters_file_name) as parameters_file: parameters = parameters_file.readlines() @@ -40,24 +40,22 @@ def run( with open(template_file_name) as template_file: template = template_file.readlines() - if valid: - with open(result_file_name, "w") as result_file: - for line in template: - if not is_comment(line): - for key, value in key_vals.items(): - line = line.replace(f"<{key}>", str(value)) + with open(result_file_name, "w") as result_file: + for line in template: + if not is_comment(line): + for key, value in key_vals.items(): + line = line.replace(f"<{key}>", str(value)) - if not all_matched(line, template_file_name, template): - valid = False + if not all_matched(line, template_file_name, template): + pass - result_file.write(line) + result_file.write(line) - if valid: - with open(_STATUS_FILE_NAME, "w") as status_file: - status_file.write("DESIGN_KW OK\n") + with open(_STATUS_FILE_NAME, "w") as status_file: + status_file.write("DESIGN_KW OK\n") -def all_matched(line, template_file_name, template): +def all_matched(line: str, template_file_name: str, template: list[str]) -> bool: valid = True for unmatched in unmatched_templates(line): if is_perl(template_file_name, template): @@ -73,24 +71,24 @@ def all_matched(line, template_file_name, template): return valid -def is_perl(file_name, template): - return file_name.endswith(".pl") or template[0].find("perl") != -1 +def is_perl(file_name: str, template: list[str]) -> bool: + return bool(file_name.endswith(".pl") or template[0].find("perl") != -1) -def unmatched_templates(line): +def unmatched_templates(line: str) -> list[str]: bracketpattern = re.compile("<.+?>") if bracketpattern.search(line): return bracketpattern.findall(line) return [] -def is_comment(line): +def is_comment(line: str) -> bool: ecl_comment_pattern = re.compile("^--") std_comment_pattern = re.compile("^#") - return ecl_comment_pattern.search(line) or std_comment_pattern.search(line) + return bool(ecl_comment_pattern.search(line) or std_comment_pattern.search(line)) -def extract_key_value(parameters): +def extract_key_value(parameters: Iterable[str]) -> dict[str, str]: """Parses a list of strings, looking for key-value pairs pr. line separated by whitespace, into a dictionary. @@ -128,7 +126,10 @@ def extract_key_value(parameters): return res -def rm_genkw_prefix(paramsdict, ignoreprefixes="LOG10_"): +def rm_genkw_prefix( + paramsdict: dict[str, Any], + ignoreprefixes: str | list[str] | None = "LOG10_", +) -> dict[str, Any]: """Strip prefixes from keys in a dictionary. Prefix is any string before a colon. No colon means no prefix. @@ -152,7 +153,8 @@ def rm_genkw_prefix(paramsdict, ignoreprefixes="LOG10_"): ignoreprefixes = [] if isinstance(ignoreprefixes, str): ignoreprefixes = [ignoreprefixes] - ignoreprefixes = filter(None, ignoreprefixes) + + ignoreprefixes = list(filter(None, ignoreprefixes)) for ignore_str in ignoreprefixes: paramsdict = { diff --git a/src/fmu/dataio/_filedata_provider.py b/src/fmu/dataio/_filedata_provider.py index 33c8d81e0..b7aa29d54 100644 --- a/src/fmu/dataio/_filedata_provider.py +++ b/src/fmu/dataio/_filedata_provider.py @@ -3,15 +3,16 @@ Populate and verify stuff in the 'file' block in fmu (partial excpetion is checksum_md5 as this is convinient to populate later, on demand) """ +from __future__ import annotations import logging from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Optional +from typing import Any, Final, Optional from warnings import warn -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) @dataclass @@ -41,7 +42,7 @@ class _FileDataProvider: absolute_path_symlink: Optional[str] = field(default="", init=False) checksum_md5: Optional[str] = field(default="", init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) if self.dataio.name: @@ -63,10 +64,11 @@ def __post_init__(self): self.fmu_context = self.dataio._usecontext # may be None! - logger.info("Initialize %s", __class__) + logger.info("Initialize %s", self.__class__) - def derive_filedata(self): + def derive_filedata(self) -> None: relpath, symrelpath = self._get_path() + assert relpath is not None relative, absolute = self._derive_filedata_generic(relpath) self.relative_path = relative self.absolute_path = absolute @@ -78,7 +80,7 @@ def derive_filedata(self): logger.info("Derived filedata") - def _derive_filedata_generic(self, inrelpath): + def _derive_filedata_generic(self, inrelpath: Path) -> tuple[str, str]: """This works with both normal data and symlinks.""" stem = self._get_filestem() @@ -116,7 +118,7 @@ def _derive_filedata_generic(self, inrelpath): logger.info("Derived filedata") return str(relpath), str(abspath) - def _get_filestem(self): + def _get_filestem(self) -> str: """Construct the file""" if not self.name: @@ -153,13 +155,13 @@ def _get_filestem(self): stem = stem.replace("__", "_") # treat norwegian special letters + # BUG(?): What about germen letter like "Ü"? stem = stem.replace("æ", "ae") stem = stem.replace("ø", "oe") return stem.replace("å", "aa") - def _get_path(self): + def _get_path(self) -> tuple[Path, Path | None]: """Construct and get the folder path(s).""" - dest = None linkdest = None dest = self._get_path_generic(mode=self.fmu_context, allow_forcefolder=True) @@ -171,7 +173,9 @@ def _get_path(self): return dest, linkdest - def _get_path_generic(self, mode="realization", allow_forcefolder=True, info=""): + def _get_path_generic( + self, mode: str = "realization", allow_forcefolder: bool = True, info: str = "" + ) -> Path: """Generically construct and get the folder path and verify.""" dest = None @@ -212,8 +216,7 @@ def _get_path_generic(self, mode="realization", allow_forcefolder=True, info="") warn("Using absolute paths in forcefolder is not recommended!") # absolute if starts with "/", otherwise relative to outroot - dest = Path(self.dataio.forcefolder) - dest = dest.absolute() + dest = Path(self.dataio.forcefolder).absolute() self.forcefolder_is_absolute = True if not allow_forcefolder: diff --git a/src/fmu/dataio/_fmu_provider.py b/src/fmu/dataio/_fmu_provider.py index f2fad2586..b2765578b 100644 --- a/src/fmu/dataio/_fmu_provider.py +++ b/src/fmu/dataio/_fmu_provider.py @@ -6,6 +6,8 @@ Note that FMU may potentially have different providers, e.g. ERT2 vs ERT3 or it can detect that no providers are present (e.g. just ran from RMS interactive) """ +from __future__ import annotations + import json import logging import pathlib @@ -14,7 +16,7 @@ from dataclasses import dataclass, field from os import environ from pathlib import Path -from typing import Any, Optional +from typing import Any, Final, Optional from warnings import warn from fmu.config import utilities as ut @@ -22,10 +24,10 @@ from . import _utils # case metadata relative to rootpath -ERT2_RELATIVE_CASE_METADATA_FILE = "share/metadata/fmu_case.yml" -RESTART_PATH_ENVNAME = "RESTART_FROM_PATH" +ERT2_RELATIVE_CASE_METADATA_FILE: Final = "share/metadata/fmu_case.yml" +RESTART_PATH_ENVNAME: Final = "RESTART_FROM_PATH" -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) def _get_folderlist(current: Path) -> list: @@ -65,16 +67,16 @@ class _FmuProvider: metadata: dict = field(default_factory=dict, init=False) rootpath: Optional[Path] = field(default=None, init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) self.rootpath = Path(self.dataio._rootpath.absolute()) self.rootpath_initial = self.rootpath - logger.info("Initialize %s", __class__) + logger.info("Initialize %s", self.__class__) - def detect_provider(self): + def detect_provider(self) -> None: """First order method to detect provider, ans also check fmu_context.""" if self._detect_ert2provider() or self._detect_ert2provider_case_only(): self.provider = "ERT2" @@ -188,7 +190,7 @@ def _detect_ert2provider_case_only(self) -> bool: return True return False - def get_ert2_information(self): + def get_ert2_information(self) -> None: """Retrieve information from an ERT2 run.""" if not self.iter_path: return @@ -197,7 +199,9 @@ def get_ert2_information(self): parameters_file = self.iter_path / "parameters.txt" if parameters_file.is_file(): params = _utils.read_parameters_txt(parameters_file) - nested_params = _utils.nested_parameters_dict(params) + # BUG(?): value can contain Nones, loop in fn. below + # does contains check, will fail. + nested_params = _utils.nested_parameters_dict(params) # type: ignore self.ert2["params"] = nested_params logger.debug("parameters.txt parsed.") else: @@ -245,13 +249,14 @@ def get_ert2_information(self): logger.debug("ERT files has been parsed.") - def get_ert2_case_metadata(self): + def get_ert2_case_metadata(self) -> None: """Check if metadatafile file for CASE exists, and if so parse metadata. If file does not exist, still give a proposed file path, but the self.case_metadata will be {} (empty) and the physical file will not be made. """ + assert self.rootpath is not None self.case_metafile = self.rootpath / ERT2_RELATIVE_CASE_METADATA_FILE self.case_metafile = self.case_metafile.resolve() if self.case_metafile.exists(): @@ -263,7 +268,7 @@ def get_ert2_case_metadata(self): "Case metadata file does not exists as %s", str(self.case_metafile) ) - def generate_ert2_metadata(self): + def generate_ert2_metadata(self) -> None: """Construct the metadata FMU block for an ERT2 forward job.""" logger.info("Generate ERT2 metadata...") diff --git a/src/fmu/dataio/_metadata.py b/src/fmu/dataio/_metadata.py index 988b87839..39857ca84 100644 --- a/src/fmu/dataio/_metadata.py +++ b/src/fmu/dataio/_metadata.py @@ -4,13 +4,15 @@ """ # https://realpython.com/python-data-classes/#basic-data-classes +from __future__ import annotations + import datetime import getpass import logging from dataclasses import dataclass, field from datetime import timezone from pathlib import Path -from typing import Any, Optional +from typing import Any, Final from warnings import warn from fmu.dataio._definitions import SCHEMA, SOURCE, VERSION @@ -24,7 +26,7 @@ read_metadata, ) -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) class ConfigurationError(ValueError): @@ -42,7 +44,7 @@ def default_meta_dollars() -> dict: return dollars -def generate_meta_tracklog() -> list: +def generate_meta_tracklog() -> list[dict]: """Create the tracklog metadata, which here assumes 'created' only.""" meta = [] @@ -52,8 +54,9 @@ def generate_meta_tracklog() -> list: return meta -def generate_meta_masterdata(config: dict) -> Optional[dict]: +def generate_meta_masterdata(config: dict) -> dict | None: """Populate metadata from masterdata section in config.""" + if not config: # this may be a temporary solution for a while, which will be told to the user # in related checks in dataio.py. @@ -70,7 +73,7 @@ def generate_meta_masterdata(config: dict) -> Optional[dict]: return config["masterdata"] -def generate_meta_access(config: dict) -> Optional[dict]: +def generate_meta_access(config: dict) -> dict | None: """Populate metadata overall from access section in config + allowed keys. Access should be possible to change per object, based on user input. @@ -219,7 +222,7 @@ class _MetaData: # if re-using existing metadata meta_existing: dict = field(default_factory=dict, init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) logger.info("Initialize _MetaData instance.") @@ -230,7 +233,7 @@ def __post_init__(self): logger.info("Partially reuse existing metadata from %s", self.obj) self.meta_existing = read_metadata(self.obj) - def _populate_meta_objectdata(self): + def _populate_meta_objectdata(self) -> None: """Analyze the actual object together with input settings. This will provide input to the ``data`` block of the metas but has also @@ -242,7 +245,7 @@ def _populate_meta_objectdata(self): self.objdata.derive_metadata() self.meta_objectdata = self.objdata.metadata - def _get_case_metadata(self): + def _get_case_metadata(self) -> object: """Detect existing fmu CASE block in the metadata. This block may be missing in case the client is not within a FMU run, e.g. @@ -255,7 +258,7 @@ def _get_case_metadata(self): logger.info("FMU provider is %s", self.fmudata.provider) return self.fmudata.case_metadata - def _populate_meta_fmu(self): + def _populate_meta_fmu(self) -> None: """Populate the fmu block in the metadata. This block may be missing in case the client is not within a FMU run, e.g. @@ -267,9 +270,9 @@ def _populate_meta_fmu(self): self.fmudata.detect_provider() logger.info("FMU provider is %s", self.fmudata.provider) self.meta_fmu = self.fmudata.metadata - self.rootpath = self.fmudata.rootpath + self.rootpath = str(self.fmudata.rootpath if self.fmudata.rootpath else "") - def _populate_meta_file(self): + def _populate_meta_file(self) -> None: """Populate the file block in the metadata. The file block also contains all needed info for doing the actual file export. @@ -288,7 +291,7 @@ def _populate_meta_file(self): fdata = _FileDataProvider( self.dataio, self.objdata, - self.rootpath, + Path(self.rootpath), self.fmudata.iter_name, self.fmudata.real_name, self.verbosity, @@ -305,7 +308,8 @@ def _populate_meta_file(self): logger.info("Compute MD5 sum for tmp file...") _, self.meta_file["checksum_md5"] = export_file_compute_checksum_md5( self.obj, - "tmp", + "tmp", # type: ignore + # tmp = true given, this arg is not needed. self.objdata.extension, tmp=True, flag=self.dataio._usefmtflag, @@ -314,19 +318,19 @@ def _populate_meta_file(self): logger.info("Do not compute MD5 sum at this stage!") self.meta_file["checksum_md5"] = None - def _populate_meta_class(self): + def _populate_meta_class(self) -> None: """Get the general class which is a simple string.""" self.meta_class = self.objdata.classname - def _populate_meta_tracklog(self): + def _populate_meta_tracklog(self) -> None: """Create the tracklog metadata, which here assumes 'created' only.""" self.meta_tracklog = generate_meta_tracklog() - def _populate_meta_masterdata(self): + def _populate_meta_masterdata(self) -> None: """Populate metadata from masterdata section in config.""" - self.meta_masterdata = generate_meta_masterdata(self.dataio.config) + self.meta_masterdata = generate_meta_masterdata(self.dataio.config) or {} - def _populate_meta_access(self): + def _populate_meta_access(self) -> None: """Populate metadata overall from access section in config + allowed keys. Access should be possible to change per object, based on user input. @@ -340,9 +344,9 @@ def _populate_meta_access(self): """ if self.dataio: - self.meta_access = generate_meta_access(self.dataio.config) + self.meta_access = generate_meta_access(self.dataio.config) or {} - def _populate_meta_display(self): + def _populate_meta_display(self) -> None: """Populate the display block.""" # display.name @@ -353,14 +357,14 @@ def _populate_meta_display(self): self.meta_display = {"name": display_name} - def _populate_meta_xpreprocessed(self): + def _populate_meta_xpreprocessed(self) -> None: """Populate a few necessary 'tmp' metadata needed for preprocessed data.""" if self.dataio.fmu_context == "preprocessed": self.meta_xpreprocessed["name"] = self.dataio.name self.meta_xpreprocessed["tagname"] = self.dataio.tagname self.meta_xpreprocessed["subfolder"] = self.dataio.subfolder - def _reuse_existing_metadata(self, meta): + def _reuse_existing_metadata(self, meta: dict) -> dict: """Perform a merge procedure if the key `reuse_metadata_rule` is active.""" if self.dataio and self.dataio.reuse_metadata_rule: oldmeta = self.meta_existing @@ -373,7 +377,9 @@ def _reuse_existing_metadata(self, meta): ) return meta - def generate_export_metadata(self, skip_null=True) -> dict: # TODO! -> skip_null? + def generate_export_metadata( + self, skip_null: bool = True + ) -> dict: # TODO! -> skip_null? """Main function to generate the full metadata""" # populate order matters, in particular objectdata provides input to class/file diff --git a/src/fmu/dataio/_objectdata_provider.py b/src/fmu/dataio/_objectdata_provider.py index e40405996..c3c36cc6c 100644 --- a/src/fmu/dataio/_objectdata_provider.py +++ b/src/fmu/dataio/_objectdata_provider.py @@ -83,28 +83,30 @@ - Depth surfaces extracted from the structural model """ +from __future__ import annotations + import logging from dataclasses import dataclass, field from datetime import datetime as dt from pathlib import Path -from typing import Any, Optional, Tuple +from typing import Any, Dict, Final, Optional from warnings import warn import numpy as np -import pandas as pd # type: ignore -import xtgeo # type: ignore +import pandas as pd +import xtgeo from ._definitions import ALLOWED_CONTENTS, STANDARD_TABLE_INDEX_COLUMNS, _ValidFormats from ._utils import generate_description, parse_timedata try: - import pyarrow as pa # type: ignore + import pyarrow as pa except ImportError: HAS_PYARROW = False else: HAS_PYARROW = True -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) class ConfigurationError(ValueError): @@ -143,7 +145,7 @@ class _ObjectDataProvider: time0: str = field(default="", init=False) time1: str = field(default="", init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.info("Ran __post_init__") def _derive_name_stratigraphy(self) -> dict: @@ -157,7 +159,7 @@ def _derive_name_stratigraphy(self) -> dict: """ logger.info("Evaluate data:name attribute and stratigraphy") - result = {} # shorter form + result: Dict[str, Any] = {} name = self.dataio.name @@ -179,7 +181,7 @@ def _derive_name_stratigraphy(self) -> dict: result["name"] = strat[name].get("name", name) result["alias"] = strat[name].get("alias", []) if result["name"] != "name": - result["alias"].append(name) # type: ignore + result["alias"].append(name) result["stratigraphic"] = strat[name].get("stratigraphic", False) result["stratigraphic_alias"] = strat[name].get("stratigraphic_alias", None) result["offset"] = strat[name].get("offset", None) @@ -190,7 +192,11 @@ def _derive_name_stratigraphy(self) -> dict: return result @staticmethod - def _validate_get_ext(fmt, subtype, validator): + def _validate_get_ext( + fmt: str, + subtype: str, + validator: dict[str, Any], + ) -> object | None: """Validate that fmt (file format) matches data and return legal extension.""" if fmt not in validator: raise ConfigurationError( @@ -200,10 +206,10 @@ def _validate_get_ext(fmt, subtype, validator): return validator.get(fmt, None) - def _derive_objectdata(self): + def _derive_objectdata(self) -> dict: """Derive object spesific data.""" logger.info("Evaluate data settings for object") - result = {} + result: Dict[str, Any] = {} if isinstance(self.obj, xtgeo.RegularSurface): result["subtype"] = "RegularSurface" @@ -212,7 +218,9 @@ def _derive_objectdata(self): result["efolder"] = "maps" result["fmt"] = self.dataio.surface_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().surface + result["fmt"], + result["subtype"], + _ValidFormats().surface, ) result["spec"], result["bbox"] = self._derive_spec_bbox_regularsurface() @@ -223,7 +231,9 @@ def _derive_objectdata(self): result["efolder"] = "polygons" result["fmt"] = self.dataio.polygons_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().polygons + result["fmt"], + result["subtype"], + _ValidFormats().polygons, ) result["spec"], result["bbox"] = self._derive_spec_bbox_polygons() @@ -234,7 +244,9 @@ def _derive_objectdata(self): result["efolder"] = "points" result["fmt"] = self.dataio.points_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().points + result["fmt"], + result["subtype"], + _ValidFormats().points, ) result["spec"], result["bbox"] = self._derive_spec_bbox_points() @@ -245,7 +257,9 @@ def _derive_objectdata(self): result["efolder"] = "cubes" result["fmt"] = self.dataio.cube_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().cube + result["fmt"], + result["subtype"], + _ValidFormats().cube, ) result["spec"], result["bbox"] = self._derive_spec_bbox_cube() @@ -256,7 +270,9 @@ def _derive_objectdata(self): result["efolder"] = "grids" result["fmt"] = self.dataio.grid_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().grid + result["fmt"], + result["subtype"], + _ValidFormats().grid, ) result["spec"], result["bbox"] = self._derive_spec_bbox_cpgrid() @@ -267,7 +283,9 @@ def _derive_objectdata(self): result["efolder"] = "grids" result["fmt"] = self.dataio.grid_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().grid + result["fmt"], + result["subtype"], + _ValidFormats().grid, ) result["spec"], result["bbox"] = self._derive_spec_bbox_cpgridproperty() @@ -280,7 +298,9 @@ def _derive_objectdata(self): result["efolder"] = "tables" result["fmt"] = self.dataio.table_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().table + result["fmt"], + result["subtype"], + _ValidFormats().table, ) result["spec"], result["bbox"] = self._derive_spec_bbox_dataframe() @@ -293,7 +313,9 @@ def _derive_objectdata(self): result["efolder"] = "tables" result["fmt"] = self.dataio.arrow_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().table + result["fmt"], + result["subtype"], + _ValidFormats().table, ) result["spec"], result["bbox"] = self._derive_spec_bbox_arrowtable() @@ -304,7 +326,9 @@ def _derive_objectdata(self): result["efolder"] = "dictionaries" result["fmt"] = self.dataio.dict_fformat result["extension"] = self._validate_get_ext( - result["fmt"], result["subtype"], _ValidFormats().dictionary + result["fmt"], + result["subtype"], + _ValidFormats().dictionary, ) result["spec"], result["bbox"] = self._derive_spec_bbox_dict() @@ -326,7 +350,7 @@ def _derive_objectdata(self): return result - def _derive_spec_bbox_regularsurface(self): + def _derive_spec_bbox_regularsurface(self) -> tuple[dict, dict]: """Process/collect the data.spec and data.bbox for RegularSurface""" logger.info("Derive bbox and specs for RegularSurface") regsurf = self.obj @@ -350,7 +374,7 @@ def _derive_spec_bbox_regularsurface(self): return specs, bbox - def _derive_spec_bbox_polygons(self): + def _derive_spec_bbox_polygons(self) -> tuple[dict, dict]: """Process/collect the data.spec and data.bbox for Polygons""" logger.info("Derive bbox and specs for Polygons") poly = self.obj @@ -369,13 +393,14 @@ def _derive_spec_bbox_polygons(self): bbox["zmax"] = float(zmax) return specs, bbox - def _derive_spec_bbox_points(self): + def _derive_spec_bbox_points(self) -> tuple[Dict[str, Any], Dict[str, Any]]: """Process/collect the data.spec and data.bbox for Points""" logger.info("Derive bbox and specs for Points") pnts = self.obj - specs = {} - bbox = {} + specs: Dict[str, Any] = {} + + bbox: Dict[str, Any] = {} if len(pnts.dataframe.columns) > 3: attrnames = pnts.dataframe.columns[3:] @@ -391,7 +416,7 @@ def _derive_spec_bbox_points(self): return specs, bbox - def _derive_spec_bbox_cube(self): + def _derive_spec_bbox_cube(self) -> tuple[dict, dict]: """Process/collect the data.spec and data.bbox Cube""" logger.info("Derive bbox and specs for Cube") cube = self.obj @@ -428,7 +453,7 @@ def _derive_spec_bbox_cube(self): return specs, bbox - def _derive_spec_bbox_cpgrid(self): + def _derive_spec_bbox_cpgrid(self) -> tuple[dict, dict]: """Process/collect the data.spec and data.bbox CornerPoint Grid geometry""" logger.info("Derive bbox and specs for Gride (geometry)") grid = self.obj @@ -452,55 +477,61 @@ def _derive_spec_bbox_cpgrid(self): bbox["zmax"] = round(float(geox["zmax"]), 4) return specs, bbox - def _derive_spec_bbox_cpgridproperty(self): + def _derive_spec_bbox_cpgridproperty(self) -> tuple[dict, dict]: """Process/collect the data.spec and data.bbox GridProperty""" logger.info("Derive bbox and specs for GridProperty") gridprop = self.obj - specs = {} - bbox = {} + specs: Dict[str, Any] = {} + bbox: Dict[str, Any] = {} specs["ncol"] = gridprop.ncol specs["nrow"] = gridprop.nrow specs["nlay"] = gridprop.nlay return specs, bbox - def _derive_spec_bbox_dataframe(self): + def _derive_spec_bbox_dataframe( + self, + ) -> tuple[ + Dict[str, Any], + Dict[str, Any], + ]: """Process/collect the data items for DataFrame.""" logger.info("Process data metadata for DataFrame (tables)") dfr = self.obj - specs = {} - bbox = {} + specs: Dict[str, Any] = {} + bbox: Dict[str, Any] = {} specs["columns"] = list(dfr.columns) specs["size"] = int(dfr.size) return specs, bbox - def _derive_spec_bbox_arrowtable(self): + def _derive_spec_bbox_arrowtable( + self, + ) -> tuple[ + Dict[str, Any], + Dict[str, Any], + ]: """Process/collect the data items for Arrow table.""" logger.info("Process data metadata for arrow (tables)") table = self.obj - specs = {} - bbox = {} + specs: Dict[str, Any] = {} + bbox: Dict[str, Any] = {} specs["columns"] = list(table.column_names) specs["size"] = table.num_columns * table.num_rows return specs, bbox - def _derive_spec_bbox_dict(self): + def _derive_spec_bbox_dict(self) -> tuple[Dict[str, Any], Dict[str, Any]]: """Process/collect the data items for dictionary.""" logger.info("Process data metadata for dictionary") + return {}, {} - specs = {} - bbox = {} - - return specs, bbox - - def _get_columns(self): + def _get_columns(self) -> list[str]: """Get the columns from table""" if isinstance(self.obj, pd.DataFrame): logger.debug("pandas") @@ -511,7 +542,7 @@ def _get_columns(self): logger.debug("Available columns in table %s ", columns) return columns - def _derive_index(self): + def _derive_index(self) -> list[str]: """Derive table index""" # This could in the future also return context columns = self._get_columns() @@ -534,7 +565,7 @@ def _derive_index(self): self._check_index(index) return index - def _check_index(self, index): + def _check_index(self, index: list[str]) -> None: """Check the table index. Args: index (list): list of column names @@ -547,7 +578,7 @@ def _check_index(self, index): for not_found in not_founds: raise KeyError(f"{not_found} is not in table") - def _derive_timedata(self): + def _derive_timedata(self) -> dict: """Format input timedata to metadata.""" tdata = self.dataio.timedata @@ -560,11 +591,12 @@ def _derive_timedata(self): timedata = self._derive_timedata_newformat() return timedata - def _derive_timedata_legacy(self): + def _derive_timedata_legacy(self) -> Dict[str, Any]: """Format input timedata to metadata. legacy version.""" + # TODO(JB): Covnert tresult to TypedDict or Dataclass. tdata = self.dataio.timedata - tresult = {} + tresult: Dict[str, Any] = {} tresult["time"] = [] if len(tdata) == 1: elem = tdata[0] @@ -598,7 +630,7 @@ def _derive_timedata_legacy(self): logger.info("Timedata: time0 is %s while time1 is %s", self.time0, self.time1) return tresult - def _derive_timedata_newformat(self): + def _derive_timedata_newformat(self) -> dict[str, Any]: """Format input timedata to metadata, new format. When using two dates, input convention is [[newestdate, "monitor"], [oldestdate, @@ -608,7 +640,7 @@ def _derive_timedata_newformat(self): set for those who wants it turned around). """ tdata = self.dataio.timedata - tresult = {} + tresult: Dict[str, Any] = {} if len(tdata) == 1: elem = tdata[0] @@ -642,11 +674,12 @@ def _derive_timedata_newformat(self): logger.info("Timedata: time0 is %s while time1 is %s", self.time0, self.time1) return tresult - def _derive_from_existing(self): + def _derive_from_existing(self) -> None: """Derive from existing metadata.""" # do not change any items in 'data' block, as it may ruin e.g. stratigrapical # setting (i.e. changing data.name is not allowed) + assert self.meta_existing is not None self.metadata = self.meta_existing["data"] self.name = self.meta_existing["data"]["name"] @@ -661,9 +694,10 @@ def _derive_from_existing(self): self.extension = relpath.suffix self.fmt = self.meta_existing["data"]["format"] - self.time0, self.time1 = parse_timedata(self.meta_existing["data"]) + # TODO: Clean up types below. + self.time0, self.time1 = parse_timedata(self.meta_existing["data"]) # type: ignore - def _process_content(self) -> Tuple[str, Optional[dict]]: + def _process_content(self) -> tuple[str, dict | None]: """Work with the `content` metadata""" # content == "unset" is not wanted, but in case metadata has been produced while @@ -693,7 +727,7 @@ def _process_content(self) -> Tuple[str, Optional[dict]]: return content, content_spesific - def derive_metadata(self): + def derive_metadata(self) -> None: """Main function here, will populate the metadata block for 'data'.""" logger.info("Derive all metadata for data object...") diff --git a/src/fmu/dataio/_oyaml.py b/src/fmu/dataio/_oyaml.py index 90b64c130..554576d0a 100644 --- a/src/fmu/dataio/_oyaml.py +++ b/src/fmu/dataio/_oyaml.py @@ -1,4 +1,5 @@ # flake8: noqa +# type: ignore # Borrowed from OYAML 0.5 on the internet... import sys from collections import OrderedDict diff --git a/src/fmu/dataio/_utils.py b/src/fmu/dataio/_utils.py index 7faabb430..82ec5c05a 100644 --- a/src/fmu/dataio/_utils.py +++ b/src/fmu/dataio/_utils.py @@ -1,4 +1,7 @@ """Module for private utilities/helpers for DataIO class.""" +from __future__ import annotations + +import contextlib import hashlib import json import logging @@ -10,39 +13,26 @@ from copy import deepcopy from datetime import datetime from pathlib import Path -from typing import Dict, List, Optional, Union +from typing import Any, Final -import pandas as pd # type: ignore +import pandas as pd import yaml from fmu.config import utilities as ut try: - import pyarrow as pa # type: ignore + import pyarrow as pa except ImportError: HAS_PYARROW = False else: HAS_PYARROW = True from pyarrow import feather -import contextlib - -import xtgeo # type: ignore +import xtgeo from . import _design_kw, _oyaml as oyaml -logger = logging.getLogger(__name__) - - -def inherit_docstring(inherit_from): - """Local decorator to inherit a docstring""" - - def decorator_set_docstring(func): - if func.__doc__ is None and inherit_from.__doc__ is not None: - func.__doc__ = inherit_from.__doc__ - return func - - return decorator_set_docstring +logger: Final = logging.getLogger(__name__) def detect_inside_rms() -> bool: @@ -54,7 +44,7 @@ def detect_inside_rms() -> bool: """ inside_rms = False try: - import roxar # type: ignore + import roxar inside_rms = True logger.info("Roxar version is %s", roxar.__version__) @@ -92,14 +82,16 @@ def drop_nones(dinput: dict) -> dict: return dd -def export_metadata_file(yfile, metadata, savefmt="yaml", verbosity="WARNING") -> None: +def export_metadata_file( + yfile: Path, metadata: dict, savefmt: str = "yaml", verbosity: str = "WARNING" +) -> None: """Export genericly and ordered to the complementary metadata file.""" logger.setLevel(level=verbosity) if metadata: xdata = drop_nones(metadata) if savefmt == "yaml": - yamlblock = oyaml.safe_dump(xdata, allow_unicode=True) + yamlblock = oyaml.safe_dump(xdata, allow_unicode=True) # type: ignore with open(yfile, "w", encoding="utf8") as stream: stream.write(yamlblock) else: @@ -115,7 +107,12 @@ def export_metadata_file(yfile, metadata, savefmt="yaml", verbosity="WARNING") - logger.info("Yaml file on: %s", yfile) -def export_file(obj, filename, extension, flag=None): +def export_file( + obj: object, + filename: Path, + extension: str, + flag: str | None = None, +) -> str: """Export a valid object to file""" if isinstance(obj, Path): @@ -125,6 +122,7 @@ def export_file(obj, filename, extension, flag=None): obj.to_file(filename, fformat="irap_binary") elif extension == ".csv" and isinstance(obj, (xtgeo.Polygons, xtgeo.Points)): out = obj.copy() # to not modify incoming instance! + assert flag is not None if "xtgeo" not in flag: out.xname = "X" out.yname = "Y" @@ -160,7 +158,7 @@ def export_file(obj, filename, extension, flag=None): return str(filename) -def md5sum(fname): +def md5sum(fname: Path) -> str: hash_md5 = hashlib.md5() with open(fname, "rb") as fil: for chunk in iter(lambda: fil.read(4096), b""): @@ -168,14 +166,21 @@ def md5sum(fname): return hash_md5.hexdigest() -def export_file_compute_checksum_md5(obj, filename, extension, flag=None, tmp=False): +def export_file_compute_checksum_md5( + obj: object, + filename: Path, + extension: str, + flag: str | None = None, + tmp: bool = False, +) -> tuple[Path | None, str]: """Export and compute checksum, with possibility to use a tmp file.""" - usefile = filename + usefile: Path | None = filename if tmp: tmpdir = tempfile.TemporaryDirectory() usefile = Path(tmpdir.name) / "tmpfile" + assert usefile is not None export_file(obj, usefile, extension, flag=flag) checksum = md5sum(usefile) if tmp: @@ -184,7 +189,7 @@ def export_file_compute_checksum_md5(obj, filename, extension, flag=None, tmp=Fa return usefile, checksum -def create_symlink(source, target): +def create_symlink(source: str, target: str) -> None: """Create a symlinked file with some checks.""" thesource = Path(source) @@ -202,16 +207,16 @@ def create_symlink(source, target): raise OSError(f"Target file {thesource} does not exist or is not a symlink.") -def size(fname): +def size(fname: str) -> int: return Path(fname).stat().st_size -def uuid_from_string(string): +def uuid_from_string(string: str) -> str: """Produce valid and repeteable UUID4 as a hash of given string""" return str(uuid.UUID(hashlib.md5(string.encode("utf-8")).hexdigest())) -def read_parameters_txt(pfile: Union[Path, str]) -> Dict[str, Union[str, float, int]]: +def read_parameters_txt(pfile: Path | str) -> dict[str, str | float | int | None]: """Read the parameters.txt file and convert to a dict. The parameters.txt file has this structure:: SENSNAME rms_seed @@ -255,8 +260,8 @@ def read_parameters_txt(pfile: Union[Path, str]) -> Dict[str, Union[str, float, def nested_parameters_dict( - paramdict: Dict[str, Union[str, int, float]], -) -> Dict[str, Union[str, int, float, Dict[str, Union[str, int, float]]]]: + paramdict: dict[str, str | int | float], +) -> dict[str, str | int | float | dict[str, str | int | float]]: """Interpret a flat parameters dictionary into a nested dictionary, based on presence of colons in keys. @@ -265,10 +270,8 @@ def nested_parameters_dict( In design_kw (semeio) this namespace identifier is actively ignored, meaning that the keys without the namespace must be unique. """ - nested_dict: Dict[ - str, Union[str, int, float, Dict[str, Union[str, int, float]]] - ] = {} - unique_keys: List[str] = [] + nested_dict: dict[str, str | int | float | dict[str, str | int | float]] = {} + unique_keys: list[str] = [] for key, value in paramdict.items(): if ":" in key: subdict, newkey = key.split(":", 1) @@ -285,26 +288,22 @@ def nested_parameters_dict( return nested_dict -def check_if_number(value): +def check_if_number(value: str | None) -> int | float | str | None: """Check if value (str) looks like a number and return the converted value.""" if value is None: return None - res = None - try: - res = int(value) - except ValueError: - with contextlib.suppress(ValueError): - res = float(value) + with contextlib.suppress(ValueError): + return int(value) - if res is not None: - return res + with contextlib.suppress(ValueError): + return float(value) return value -def get_object_name(obj): +def get_object_name(obj: Path) -> str | None: """Get the name of the object. If not possible, return None. @@ -342,7 +341,7 @@ def prettyprint_dict(inp: dict) -> str: return str(json.dumps(inp, indent=2, default=str, ensure_ascii=False)) -def some_config_from_env(envvar="FMU_GLOBAL_CONFIG") -> dict: +def some_config_from_env(envvar: str = "FMU_GLOBAL_CONFIG") -> dict | None: """Get the config from environment variable. This function is only called if config SHALL be fetched from the environment @@ -354,14 +353,12 @@ def some_config_from_env(envvar="FMU_GLOBAL_CONFIG") -> dict: cfg_path = os.environ[envvar] else: warnings.warn( - ( - "No config was received. " - "The config should be given explicitly as an input argument, or " - f"the environment variable {envvar} must point to a valid yaml file. " - "A missing config will still export a file, but without a metadata " - "file. Such exports may be disabled in a future version of fmu.dataio", - UserWarning, - ) + "No config was received. " + "The config should be given explicitly as an input argument, or " + f"the environment variable {envvar} must point to a valid yaml file. " + "A missing config will still export a file, but without a metadata " + "file. Such exports may be disabled in a future version of fmu.dataio", + UserWarning, ) return None @@ -394,7 +391,7 @@ def filter_validate_metadata(metadata_in: dict) -> dict: return metadata -def generate_description(desc: Optional[Union[str, list]] = None) -> Union[list, None]: +def generate_description(desc: str | list | None = None) -> list | None: """Parse desciption input (generic).""" if not desc: return None @@ -407,7 +404,7 @@ def generate_description(desc: Optional[Union[str, list]] = None) -> Union[list, raise ValueError("Description of wrong type, must be list of strings or string") -def read_metadata(filename: Union[str, Path]) -> dict: +def read_metadata(filename: str | Path) -> dict: """Read the metadata as a dictionary given a filename. If the filename is e.g. /some/path/mymap.gri, the assosiated metafile @@ -431,7 +428,9 @@ def read_metadata(filename: Union[str, Path]) -> dict: return yaml.safe_load(stream) -def glue_metadata_preprocessed(oldmeta, newmeta): +def glue_metadata_preprocessed( + oldmeta: dict[str, Any], newmeta: dict[str, Any] +) -> dict[str, Any]: """Glue (combine) to metadata dicts according to rule 'preprocessed'.""" meta = oldmeta.copy() @@ -448,7 +447,10 @@ def glue_metadata_preprocessed(oldmeta, newmeta): return meta -def parse_timedata(datablock: dict, isoformat=True): +def parse_timedata( + datablock: dict, + isoformat: bool = True, +) -> tuple[str | None, str | None]: """The time section under datablock has variants to parse. Formats:: @@ -492,7 +494,7 @@ def parse_timedata(datablock: dict, isoformat=True): if isinstance(datablock["time"], list): date0 = datablock["time"][0]["value"] - if len(datablock["time"] == 2): + if len(datablock["time"]) == 2: date1 = datablock["time"][1]["value"] elif isinstance(datablock["time"], dict): @@ -503,10 +505,10 @@ def parse_timedata(datablock: dict, isoformat=True): if not isoformat: if date0: tdate0 = datetime.strptime(date0, "%Y-%m-%dT%H:%M:%S") - date0 = tdate0.datetime.strftime("%Y%m%d") + date0 = tdate0.strftime("%Y%m%d") if date1: tdate1 = datetime.strptime(date1, "%Y-%m-%dT%H:%M:%S") - date1 = tdate1.datetime.strftime("%Y%m%d") + date1 = tdate1.strftime("%Y%m%d") return (date0, date1) diff --git a/src/fmu/dataio/dataio.py b/src/fmu/dataio/dataio.py index aa426add3..5e03f5ca3 100644 --- a/src/fmu/dataio/dataio.py +++ b/src/fmu/dataio/dataio.py @@ -2,6 +2,8 @@ The metadata spec is documented as a JSON schema, stored under schema/. """ +from __future__ import annotations + import logging import os import uuid @@ -9,10 +11,10 @@ from copy import deepcopy from dataclasses import dataclass, field from pathlib import Path -from typing import Any, ClassVar, List, Optional, Tuple, Union +from typing import Any, ClassVar, Final, List, Optional, Union from warnings import warn -import pandas as pd # type: ignore +import pandas as pd from . import _metadata from ._definitions import ( @@ -35,13 +37,13 @@ uuid_from_string, ) -INSIDE_RMS = detect_inside_rms() +INSIDE_RMS: Final = detect_inside_rms() -GLOBAL_ENVNAME = "FMU_GLOBAL_CONFIG" -SETTINGS_ENVNAME = "FMU_DATAIO_CONFIG" # input settings from a spesific file! +GLOBAL_ENVNAME: Final = "FMU_GLOBAL_CONFIG" +SETTINGS_ENVNAME: Final = "FMU_DATAIO_CONFIG" # input settings from a spesific file! -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) logging.captureWarnings(True) @@ -54,13 +56,17 @@ class ValidationError(ValueError, KeyError): # ====================================================================================== -def _validate_variable(key, value, legals) -> bool: +def _validate_variable(key: str, value: type, legals: dict[str, str | type]) -> bool: """Use data from __annotions__ to validate that overriden var. is of legal type.""" if key not in legals: logger.warning("Unsupported key, raise an error") raise ValidationError(f"The input key '{key}' is not supported") - valid_type = eval(legals[key]) if isinstance(legals[key], str) else legals[key] + legal_key = legals[key] + # Potential issue: Eval will use the modules namespace. If given + # "from typing import ClassVar" or simular + # is missing from the namespace, eval(...) will fail. + valid_type = eval(legal_key) if isinstance(legal_key, str) else legal_key try: validcheck = valid_type.__args__ @@ -68,6 +74,7 @@ def _validate_variable(key, value, legals) -> bool: validcheck = valid_type if "typing." not in str(validcheck): + print(f"{value=}, {validcheck=}, {type(value)=}, {type(validcheck)=}") if not isinstance(value, validcheck): logger.warning("Wrong type of value, raise an error") raise ValidationError( @@ -173,14 +180,13 @@ def _check_global_config( "be missing is a temporary solution that may change in future versions!" ) warnings.warn(msg, PendingDeprecationWarning) - return False return True # the two next content key related function may require refactoring/simplification -def _check_content(proposed: Union[str, dict]) -> Any: +def _check_content(proposed: str | dict | None) -> Any: """Check content and return a validated version.""" logger.info("Evaluate content") @@ -230,7 +236,7 @@ def _check_content(proposed: Union[str, dict]) -> Any: return usecontent, content_specific -def _content_validate(name, fields): +def _content_validate(name: str, fields: dict[str, type]) -> None: logger.debug("starting staticmethod _data_process_content_validate") valid = ALLOWED_CONTENTS.get(name, None) if valid is None: @@ -292,7 +298,7 @@ def _content_validate(name, fields): # ====================================================================================== -def read_metadata(filename: Union[str, Path]) -> dict: +def read_metadata(filename: str | Path) -> dict: """Read the metadata as a dictionary given a filename. If the filename is e.g. /some/path/mymap.gri, the assosiated metafile @@ -596,7 +602,7 @@ class ExportData: # << NB! storing ACTUAL casepath: _rootpath: Path = field(default_factory=Path, init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) logger.info("Running __post_init__ ExportData") logger.debug("Global config is %s", prettyprint_dict(self.config)) @@ -610,7 +616,7 @@ def __post_init__(self): if external_input: # derive legal input from dataclass signature - annots = getattr(self, "__annotations__", None) + annots = getattr(self, "__annotations__", {}) legals = { key: val for key, val in annots.items() if not key.startswith("_") } @@ -628,6 +634,7 @@ def __post_init__(self): # global config which may be given as env variable -> a file; will override if GLOBAL_ENVNAME in os.environ: theconfig = some_config_from_env(GLOBAL_ENVNAME) + assert theconfig is not None self._config_is_valid = _check_global_config( theconfig, strict=True, action="warn" ) @@ -649,7 +656,7 @@ def __post_init__(self): logger.info("FMU context is %s", self.fmu_context) logger.info("Ran __post_init__") - def _show_deprecations_or_notimplemented(self): + def _show_deprecations_or_notimplemented(self) -> None: """Warn on deprecated keys or on stuff not implemented yet.""" if self.runpath: @@ -666,12 +673,11 @@ def _show_deprecations_or_notimplemented(self): PendingDeprecationWarning, ) - def _validate_content_key(self): + def _validate_content_key(self) -> None: """Validate the given 'content' input.""" - self._usecontent, self._content_specific = _check_content(self.content) - def _validate_fmucontext_key(self): + def _validate_fmucontext_key(self) -> None: """Validate the given 'fmu_context' input.""" if self.fmu_context not in ALLOWED_FMU_CONTEXTS: msg = "" @@ -713,7 +719,7 @@ def _update_check_settings(self, newsettings: dict) -> None: self._validate_fmucontext_key() logger.info("Validate FMU context which is now %s", self.fmu_context) - def _update_globalconfig_from_settings(self): + def _update_globalconfig_from_settings(self) -> None: """A few user settings may update/append the global config directly.""" newglobals = deepcopy(self.config) @@ -729,7 +735,7 @@ def _update_globalconfig_from_settings(self): self.config = newglobals - def _establish_pwd_rootpath(self): + def _establish_pwd_rootpath(self) -> None: """Establish state variables pwd and the (initial) rootpath. The self._pwd stores the process working directory, i.e. the folder @@ -761,7 +767,7 @@ def _establish_pwd_rootpath(self): if self._inside_rms or INSIDE_RMS or "RUN_DATAIO_EXAMPLES" in os.environ: self._rootpath = (self._pwd / "../../.").absolute().resolve() logger.info("Run from inside RMS (or pretend)") - self._inside_rms = True + self._inside_rms = True # BUG(?): Should be ExportData._inside_rms? # make some extra keys in settings: self._usecontext = self.fmu_context # may change later! @@ -806,7 +812,9 @@ def _check_obj_if_file(self, obj: Any) -> Any: # Public methods: # ================================================================================== - def generate_metadata(self, obj: Any, compute_md5: bool = True, **kwargs) -> dict: + def generate_metadata( + self, obj: object, compute_md5: bool = True, **kwargs: object + ) -> dict: """Generate and return the complete metadata for a provided object. An object may be a map, 3D grid, cube, table, etc which is of a known and @@ -858,7 +866,12 @@ def generate_metadata(self, obj: Any, compute_md5: bool = True, **kwargs) -> dic return deepcopy(self._metadata) - def export(self, obj, return_symlink=False, **kwargs) -> str: + def export( + self, + obj: object, + return_symlink: bool = False, + **kwargs: Any, + ) -> str: """Export data objects of 'known' type to FMU storage solution with metadata. This function will also collect the data spesific class metadata. For "classic" @@ -886,17 +899,23 @@ def export(self, obj, return_symlink=False, **kwargs) -> str: outfile = Path(metadata["file"]["absolute_path"]) metafile = outfile.parent / ("." + str(outfile.name) + ".yml") - useflag: Union[bool, str] - if isinstance(obj, pd.DataFrame): - useflag = self.table_include_index - else: - useflag = self._usefmtflag + useflag = ( + self.table_include_index + if isinstance(obj, pd.DataFrame) + else self._usefmtflag + ) obj = self._check_obj_if_file(obj) logger.info("Export to file and compute MD5 sum, using flag: <%s>", useflag) - outfile, md5 = export_file_compute_checksum_md5( - obj, outfile, outfile.suffix, flag=useflag + toutfile, md5 = export_file_compute_checksum_md5( + obj, + outfile, + outfile.suffix, + flag=useflag, # type: ignore + # BUG(?): Looks buggy, if flag is bool export_file will blow up. ) + assert toutfile is not None + outfile = toutfile # inject md5 checksum in metadata metadata["file"]["checksum_md5"] = md5 logger.info("Actual file is: %s", outfile) @@ -912,9 +931,9 @@ def export(self, obj, return_symlink=False, **kwargs) -> str: if metadata["file"].get("absolute_path_symlink"): outfile_target = Path(metadata["file"]["absolute_path_symlink"]) outfile_source = Path(metadata["file"]["absolute_path"]) - create_symlink(outfile_source, outfile_target) + create_symlink(str(outfile_source), str(outfile_target)) metafile_target = outfile_target.parent / ("." + str(outfile.name) + ".yml") - create_symlink(metafile, metafile_target) + create_symlink(str(metafile), str(metafile_target)) self._metadata = metadata @@ -973,11 +992,13 @@ class InitializeCase: # pylint: disable=too-few-public-methods _pwd: Path = field(default_factory=Path, init=False) _casepath: Path = field(default_factory=Path, init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) if not self.config or GLOBAL_ENVNAME in os.environ: - self.config = some_config_from_env(GLOBAL_ENVNAME) + cnf = some_config_from_env(GLOBAL_ENVNAME) + assert cnf is not None + self.config = cnf # For this class, the global config must be valid; hence error if not _check_global_config(self.config, strict=True, action="error") @@ -1006,7 +1027,7 @@ def _update_settings(self, newsettings: dict) -> None: logger.setLevel(level=self.verbosity) logger.info("New setting OK for %s", setting) - def _establish_pwd_casepath(self): + def _establish_pwd_casepath(self) -> None: """Establish state variables pwd and casepath. See ExportData's method but this is much simpler (e.g. no RMS context) @@ -1027,7 +1048,7 @@ def _establish_pwd_casepath(self): logger.info("Set PWD (case): %s", str(self._pwd)) logger.info("Set rootpath (case): %s", str(self._casepath)) - def _check_already_metadata_or_create_folder(self, force=False) -> bool: + def _check_already_metadata_or_create_folder(self, force: bool = False) -> bool: if not self._casepath.exists(): self._casepath.mkdir(parents=True, exist_ok=True) logger.info("Created rootpath (case) %s", self._casepath) @@ -1050,8 +1071,11 @@ def _check_already_metadata_or_create_folder(self, force=False) -> bool: # ================================================================================== def generate_metadata( - self, force: bool = False, skip_null=True, **kwargs - ) -> Union[dict, None]: + self, + force: bool = False, + skip_null: bool = True, + **kwargs: object, + ) -> dict | None: """Generate case metadata. Args: @@ -1087,6 +1111,7 @@ def generate_metadata( # only asset, not ssdl access = _metadata.generate_meta_access(self.config) + assert access is not None meta["access"] = {} meta["access"]["asset"] = access["asset"] @@ -1114,7 +1139,12 @@ def generate_metadata( # alias generate_case_metadata = generate_metadata - def export(self, force: bool = False, skip_null=True, **kwargs) -> Union[str, None]: + def export( + self, + force: bool = False, + skip_null: bool = True, + **kwargs: dict[str, Any], + ) -> str | None: """Export case metadata to file. Args: @@ -1193,7 +1223,7 @@ class AggregatedData: _metadata: dict = field(default_factory=dict, init=False) _metafile: Path = field(default_factory=Path, init=False) - def __post_init__(self): + def __post_init__(self) -> None: logger.setLevel(level=self.verbosity) @staticmethod @@ -1221,7 +1251,7 @@ def _update_settings(self, newsettings: dict) -> None: logger.setLevel(level=self.verbosity) logger.info("New setting OK for %s", setting) - def _construct_filename(self, template: dict) -> Tuple[Path, Path]: + def _construct_filename(self, template: dict) -> tuple[Path, Path | None]: """Construct the paths/filenames for aggregated data. These filenames are constructed a bit different than in a forward job, since we @@ -1323,8 +1353,12 @@ def _construct_filename(self, template: dict) -> Tuple[Path, Path]: return relname, absname def _generate_aggrd_metadata( - self, obj: Any, real_ids: List[int], uuids: List[str], compute_md5: bool = True - ): + self, + obj: object, + real_ids: list[int], + uuids: list[str], + compute_md5: bool = True, + ) -> None: logger.info( "self.aggregation is %s (%s)", self.aggregation_id, @@ -1392,10 +1426,10 @@ def _generate_aggrd_metadata( def generate_metadata( self, - obj: Any, + obj: object, compute_md5: bool = True, skip_null: bool = True, - **kwargs, + **kwargs: object, ) -> dict: """Generate metadata for the aggregated data. @@ -1441,17 +1475,17 @@ def generate_metadata( # alias method def generate_aggregation_metadata( self, - obj: Any, + obj: object, compute_md5: bool = True, skip_null: bool = True, - **kwargs, + **kwargs: object, ) -> dict: """Alias method name, see ``generate_metadata``""" return self.generate_metadata( obj, compute_md5=compute_md5, skip_null=skip_null, **kwargs ) - def export(self, obj, **kwargs) -> str: + def export(self, obj: object, **kwargs: object) -> str: """Export aggregated file with metadata to file. Args: @@ -1478,7 +1512,9 @@ def export(self, obj, **kwargs) -> str: metafile = outfile.parent / ("." + str(outfile.name) + ".yml") logger.info("Export to file and compute MD5 sum") - outfile, md5 = export_file_compute_checksum_md5(obj, outfile, outfile.suffix) + toutfile, md5 = export_file_compute_checksum_md5(obj, outfile, outfile.suffix) + assert toutfile is not None + outfile = Path(toutfile) # inject the computed md5 checksum in metadata metadata["file"]["checksum_md5"] = md5 diff --git a/src/fmu/dataio/hook_implementations/jobs.py b/src/fmu/dataio/hook_implementations/jobs.py index 9c06e79fb..19098eab6 100644 --- a/src/fmu/dataio/hook_implementations/jobs.py +++ b/src/fmu/dataio/hook_implementations/jobs.py @@ -1,3 +1,5 @@ +from __future__ import annotations + try: from ert.shared.plugins.plugin_manager import hook_implementation from ert.shared.plugins.plugin_response import plugin_response @@ -8,5 +10,5 @@ @hook_implementation @plugin_response(plugin_name="fmu_dataio") -def installable_workflow_jobs(): +def installable_workflow_jobs() -> dict: return {} diff --git a/src/fmu/dataio/scripts/create_case_metadata.py b/src/fmu/dataio/scripts/create_case_metadata.py index 38cce4bab..0d6492daa 100644 --- a/src/fmu/dataio/scripts/create_case_metadata.py +++ b/src/fmu/dataio/scripts/create_case_metadata.py @@ -8,25 +8,28 @@ pointed towards the produced global_variables, fmu-config should run before this script to make sure global_variables is updated.""" +from __future__ import annotations + import argparse import logging from pathlib import Path +from typing import Final import yaml try: - from ert.shared.plugins.plugin_manager import hook_implementation # type: ignore + from ert.shared.plugins.plugin_manager import hook_implementation except ModuleNotFoundError: - from ert_shared.plugins.plugin_manager import hook_implementation # type: ignore + from ert_shared.plugins.plugin_manager import hook_implementation try: - from ert import ErtScript # type: ignore + from ert import ErtScript except ImportError: - from res.job_queue import ErtScript # type: ignore + from res.job_queue import ErtScript from fmu.dataio import InitializeCase -logger = logging.getLogger(__name__) +logger: Final = logging.getLogger(__name__) logger.setLevel(logging.CRITICAL) # This documentation is for ERT workflow @@ -74,7 +77,7 @@ class WfCreateCaseMetadata(ErtScript): # name in fmu-dataio # pylint: disable=too-few-public-methods - def run(self, *args) -> None: + def run(self, *args: str) -> None: # pylint: disable=no-self-use """Parse arguments and call _create_case_metadata_main()""" parser = get_parser() @@ -82,18 +85,19 @@ def run(self, *args) -> None: create_case_metadata_main(workflow_args) -def create_case_metadata_main(args) -> None: +def create_case_metadata_main(args: argparse.Namespace) -> None: """Create the case metadata and register case on Sumo.""" logger.setLevel(level=args.verbosity) check_arguments(args) case_metadata_path = create_metadata(args) + assert case_metadata_path is not None register_on_sumo(args, case_metadata_path) logger.debug("create_case_metadata.py has finished.") -def create_metadata(args) -> str: +def create_metadata(args: argparse.Namespace) -> str | None: """Create the case metadata and print them to the disk""" _global_variables_path = Path(args.ert_config_path, args.global_variables_path) global_variables = _parse_yaml(_global_variables_path) @@ -105,7 +109,8 @@ def create_metadata(args) -> str: rootfolder=args.ert_caseroot, casename=args.ert_casename, caseuser=args.ert_username, - description=None, + description=None, # type: ignore + # BUG(JB): description must be str accoring to dataclass ) logger.info("Case metadata has been made: %s", case_metadata_path) @@ -113,7 +118,10 @@ def create_metadata(args) -> str: return case_metadata_path -def register_on_sumo(args, case_metadata_path) -> str: +def register_on_sumo( + args: argparse.Namespace, + case_metadata_path: str, +) -> str | None: """Register the case on Sumo by sending the case metadata""" env = args.sumo_env @@ -142,14 +150,14 @@ def register_on_sumo(args, case_metadata_path) -> str: return sumo_id -def _parse_yaml(path): +def _parse_yaml(path: Path) -> dict: """Parse the global variables, return as dict""" with open(path) as stream: return yaml.safe_load(stream) -def check_arguments(args): +def check_arguments(args: argparse.Namespace) -> None: """Do basic sanity checks of input""" logger.debug("Checking input arguments") @@ -188,9 +196,12 @@ def get_parser() -> argparse.ArgumentParser: @hook_implementation -def legacy_ertscript_workflow(config) -> None: +def legacy_ertscript_workflow(config: object) -> None: """Hook the WfCreateCaseMetadata class with documentation into ERT.""" - workflow = config.add_workflow(WfCreateCaseMetadata, "WF_CREATE_CASE_METADATA") + workflow = config.add_workflow( # type: ignore + WfCreateCaseMetadata, + "WF_CREATE_CASE_METADATA", + ) workflow.parser = get_parser workflow.description = DESCRIPTION workflow.examples = EXAMPLES diff --git a/tests/test_units/test_aggregated_surfaces.py b/tests/test_units/test_aggregated_surfaces.py index 421f6401d..d854ef7a2 100644 --- a/tests/test_units/test_aggregated_surfaces.py +++ b/tests/test_units/test_aggregated_surfaces.py @@ -74,21 +74,17 @@ def test_regsurf_aggregated_alt_keys(fmurun_w_casemetadata, aggr_surfs_mean): aggr_mean, metas = aggr_surfs_mean # xtgeo_object, list-of-metadata-dicts logger.info("Aggr. mean is %s", aggr_mean.values.mean()) - aggdata1 = dataio.AggregatedData( + meta1 = dataio.AggregatedData( source_metadata=metas, operation="mean", name="myaggrd", tagname="mean", verbosity="INFO", aggregation_id="1234", - ) - - meta1 = aggdata1.generate_metadata(aggr_mean) + ).generate_metadata(aggr_mean) # alternative - aggdata2 = dataio.AggregatedData() - - meta2 = aggdata2.generate_metadata( + meta2 = dataio.AggregatedData().generate_metadata( aggr_mean, source_metadata=metas, operation="mean", @@ -100,8 +96,7 @@ def test_regsurf_aggregated_alt_keys(fmurun_w_casemetadata, aggr_surfs_mean): # alternative with export aggdata3 = dataio.AggregatedData() - - _ = aggdata3.export( + aggdata3.export( aggr_mean, source_metadata=metas, operation="mean",