Package fully typed

equinor · Jan 8, 2024 · 90f92d7 · 90f92d7
1 parent 49011ad
commit 90f92d7
Show file tree

Hide file tree

Showing 15 changed files with 425 additions and 293 deletions.
diff --git a/.github/workflows/fmudataio-documention.yml b/.github/workflows/fmudataio-documention.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.10"]
         os: [ubuntu-latest]
 
     steps:

diff --git a/.gitignore b/.gitignore
@@ -96,3 +96,6 @@ venv.bak/
 
 # setuptools_scm version
 src/fmu/dataio/version.py
+
+# mypy
+.dmypy.json
diff --git a/examples/s/d/nn/_project/aggregate_surfaces.py b/examples/s/d/nn/_project/aggregate_surfaces.py
@@ -54,8 +54,6 @@ def main():
     # This is the ID we assign to this set of aggregations
     aggregation_id = "something_very_unique"  # IRL this will usually be a uuid
 
-    # We aggregate these source surfaces and collect results in list of dictionaries
-
     # Initialize an AggregatedData object for this set of aggregations
     exp = fmu.dataio.AggregatedData(
         source_metadata=source_metadata,

diff --git a/mypy.ini b/mypy.ini
@@ -1,4 +1,12 @@
 [mypy]
+disallow_untyped_defs = True
+extra_checks = True
+ignore_missing_imports = True
+strict_equality = True
+warn_redundant_casts = True
+warn_unused_configs = True
+warn_unused_ignores = True
+exclude = ^((tests|docs|examples|bin)/|conftest.py?)
 
 [mypy-numpy.*]
 # Applies to Python 3.6:

diff --git a/src/fmu/dataio/_definitions.py b/src/fmu/dataio/_definitions.py
@@ -1,4 +1,6 @@
 """Various definitions and hard settings used in fmu-dataio."""
+from __future__ import annotations
+
 from dataclasses import dataclass, field
 
 SCHEMA = (
@@ -10,32 +12,50 @@
 
 @dataclass
 class _ValidFormats:
-    surface: dict = field(default_factory=dict)
-    grid: dict = field(default_factory=dict)
-    cube: dict = field(default_factory=dict)
-    table: dict = field(default_factory=dict)
-    polygons: dict = field(default_factory=dict)
-    points: dict = field(default_factory=dict)
-    dictionary: dict = field(default_factory=dict)
-
-    def __post_init__(self):
-        self.surface = {"irap_binary": ".gri"}
-        self.grid = {"hdf": ".hdf", "roff": ".roff"}
-        self.cube = {"segy": ".segy"}
-        self.table = {"hdf": ".hdf", "csv": ".csv", "arrow": ".arrow"}
-        self.polygons = {
+    surface: dict = field(
+        default_factory=lambda: {
+            "irap_binary": ".gri",
+        }
+    )
+    grid: dict = field(
+        default_factory=lambda: {
+            "hdf": ".hdf",
+            "roff": ".roff",
+        }
+    )
+    cube: dict = field(
+        default_factory=lambda: {
+            "segy": ".segy",
+        }
+    )
+    table: dict = field(
+        default_factory=lambda: {
+            "hdf": ".hdf",
+            "csv": ".csv",
+            "arrow": ".arrow",
+        }
+    )
+    polygons: dict = field(
+        default_factory=lambda: {
             "hdf": ".hdf",
             "csv": ".csv",  # columns will be X Y Z, ID
             "csv|xtgeo": ".csv",  # use default xtgeo columns: X_UTME, ... POLY_ID
             "irap_ascii": ".pol",
         }
-        self.points = {
+    )
+    points: dict = field(
+        default_factory=lambda: {
             "hdf": ".hdf",
             "csv": ".csv",  # columns will be X Y Z
             "csv|xtgeo": ".csv",  # use default xtgeo columns: X_UTME, Y_UTMN, Z_TVDSS
             "irap_ascii": ".poi",
         }
-        self.dictionary = {"json": ".json"}
+    )
+    dictionary: dict = field(
+        default_factory=lambda: {
+            "json": ".json",
+        }
+    )
 
 
 ALLOWED_CONTENTS = {

diff --git a/src/fmu/dataio/_design_kw.py b/src/fmu/dataio/_design_kw.py
@@ -5,31 +5,31 @@
 It is copied here instead of pip-installed in order to avoid dragging
 along all dependencies of semeio"""
 
-# pylint: disable=logging-fstring-interpolation
+from __future__ import annotations
+
 import logging
 import re
 import shlex
+from typing import Any, Iterable
 
 _STATUS_FILE_NAME = "DESIGN_KW.OK"
 
 _logger = logging.getLogger(__name__)
 
 
 def run(
-    template_file_name,
-    result_file_name,
-    log_level,
-    parameters_file_name="parameters.txt",
-):
+    template_file_name: str,
+    result_file_name: str,
+    log_level: str,
+    parameters_file_name: str = "parameters.txt",
+) -> None:
     # Get all key, value pairs
     # If FWL key is having multiple entries in the parameters file
     # KeyError is raised. This will be logged, and no OK
     # file is written
 
     _logger.setLevel(log_level)
 
-    valid = True
-
     with open(parameters_file_name) as parameters_file:
         parameters = parameters_file.readlines()
 
@@ -40,24 +40,22 @@ def run(
     with open(template_file_name) as template_file:
         template = template_file.readlines()
 
-    if valid:
-        with open(result_file_name, "w") as result_file:
-            for line in template:
-                if not is_comment(line):
-                    for key, value in key_vals.items():
-                        line = line.replace(f"<{key}>", str(value))
+    with open(result_file_name, "w") as result_file:
+        for line in template:
+            if not is_comment(line):
+                for key, value in key_vals.items():
+                    line = line.replace(f"<{key}>", str(value))
 
-                    if not all_matched(line, template_file_name, template):
-                        valid = False
+                if not all_matched(line, template_file_name, template):
+                    pass
 
-                result_file.write(line)
+            result_file.write(line)
 
-    if valid:
-        with open(_STATUS_FILE_NAME, "w") as status_file:
-            status_file.write("DESIGN_KW OK\n")
+    with open(_STATUS_FILE_NAME, "w") as status_file:
+        status_file.write("DESIGN_KW OK\n")
 
 
-def all_matched(line, template_file_name, template):
+def all_matched(line: str, template_file_name: str, template: list[str]) -> bool:
     valid = True
     for unmatched in unmatched_templates(line):
         if is_perl(template_file_name, template):
@@ -73,24 +71,24 @@ def all_matched(line, template_file_name, template):
     return valid
 
 
-def is_perl(file_name, template):
-    return file_name.endswith(".pl") or template[0].find("perl") != -1
+def is_perl(file_name: str, template: list[str]) -> bool:
+    return bool(file_name.endswith(".pl") or template[0].find("perl") != -1)
 
 
-def unmatched_templates(line):
+def unmatched_templates(line: str) -> list[str]:
     bracketpattern = re.compile("<.+?>")
     if bracketpattern.search(line):
         return bracketpattern.findall(line)
     return []
 
 
-def is_comment(line):
+def is_comment(line: str) -> bool:
     ecl_comment_pattern = re.compile("^--")
     std_comment_pattern = re.compile("^#")
-    return ecl_comment_pattern.search(line) or std_comment_pattern.search(line)
+    return bool(ecl_comment_pattern.search(line) or std_comment_pattern.search(line))
 
 
-def extract_key_value(parameters):
+def extract_key_value(parameters: Iterable[str]) -> dict[str, str]:
     """Parses a list of strings, looking for key-value pairs pr. line
     separated by whitespace, into a dictionary.
 
@@ -128,7 +126,10 @@ def extract_key_value(parameters):
     return res
 
 
-def rm_genkw_prefix(paramsdict, ignoreprefixes="LOG10_"):
+def rm_genkw_prefix(
+    paramsdict: dict[str, Any],
+    ignoreprefixes: str | list[str] | None = "LOG10_",
+) -> dict[str, Any]:
     """Strip prefixes from keys in a dictionary.
 
     Prefix is any string before a colon. No colon means no prefix.
@@ -152,7 +153,8 @@ def rm_genkw_prefix(paramsdict, ignoreprefixes="LOG10_"):
         ignoreprefixes = []
     if isinstance(ignoreprefixes, str):
         ignoreprefixes = [ignoreprefixes]
-    ignoreprefixes = filter(None, ignoreprefixes)
+
+    ignoreprefixes = list(filter(None, ignoreprefixes))
 
     for ignore_str in ignoreprefixes:
         paramsdict = {

diff --git a/src/fmu/dataio/_filedata_provider.py b/src/fmu/dataio/_filedata_provider.py
@@ -3,12 +3,13 @@
 Populate and verify stuff in the 'file' block in fmu (partial excpetion is checksum_md5
 as this is convinient to populate later, on demand)
 """
+from __future__ import annotations
 
 import logging
 from copy import deepcopy
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any
 from warnings import warn
 
 logger = logging.getLogger(__name__)
@@ -35,13 +36,13 @@ class _FileDataProvider:
     verbosity: str = "CRITICAL"
 
     # storing results in these variables
-    relative_path: Optional[str] = field(default="", init=False)
-    relative_path_symlink: Optional[str] = field(default="", init=False)
-    absolute_path: Optional[str] = field(default="", init=False)
-    absolute_path_symlink: Optional[str] = field(default="", init=False)
-    checksum_md5: Optional[str] = field(default="", init=False)
+    relative_path: str | None = field(default="", init=False)
+    relative_path_symlink: str | None = field(default="", init=False)
+    absolute_path: str | None = field(default="", init=False)
+    absolute_path_symlink: str | None = field(default="", init=False)
+    checksum_md5: str | None = field(default="", init=False)
 
-    def __post_init__(self):
+    def __post_init__(self) -> None:
         logger.setLevel(level=self.verbosity)
 
         if self.dataio.name:
@@ -63,10 +64,11 @@ def __post_init__(self):
 
         self.fmu_context = self.dataio._usecontext  # may be None!
 
-        logger.info("Initialize %s", __class__)
+        logger.info("Initialize %s", self.__class__)
 
-    def derive_filedata(self):
+    def derive_filedata(self) -> None:
         relpath, symrelpath = self._get_path()
+        assert relpath is not None
         relative, absolute = self._derive_filedata_generic(relpath)
         self.relative_path = relative
         self.absolute_path = absolute
@@ -78,7 +80,7 @@ def derive_filedata(self):
 
         logger.info("Derived filedata")
 
-    def _derive_filedata_generic(self, inrelpath):
+    def _derive_filedata_generic(self, inrelpath: Path) -> tuple[str, str]:
         """This works with both normal data and symlinks."""
         stem = self._get_filestem()
 
@@ -116,7 +118,7 @@ def _derive_filedata_generic(self, inrelpath):
         logger.info("Derived filedata")
         return str(relpath), str(abspath)
 
-    def _get_filestem(self):
+    def _get_filestem(self) -> str:
         """Construct the file"""
 
         if not self.name:
@@ -153,13 +155,13 @@ def _get_filestem(self):
             stem = stem.replace("__", "_")
 
         # treat norwegian special letters
+        # BUG(?): What about germen letter like "Ü"?
         stem = stem.replace("æ", "ae")
         stem = stem.replace("ø", "oe")
         return stem.replace("å", "aa")
 
-    def _get_path(self):
+    def _get_path(self) -> tuple[Path, Path | None]:
         """Construct and get the folder path(s)."""
-        dest = None
         linkdest = None
 
         dest = self._get_path_generic(mode=self.fmu_context, allow_forcefolder=True)
@@ -171,7 +173,9 @@ def _get_path(self):
 
         return dest, linkdest
 
-    def _get_path_generic(self, mode="realization", allow_forcefolder=True, info=""):
+    def _get_path_generic(
+        self, mode: str = "realization", allow_forcefolder: bool = True, info: str = ""
+    ) -> Path:
         """Generically construct and get the folder path and verify."""
         dest = None
 
@@ -212,8 +216,7 @@ def _get_path_generic(self, mode="realization", allow_forcefolder=True, info="")
             warn("Using absolute paths in forcefolder is not recommended!")
 
             # absolute if starts with "/", otherwise relative to outroot
-            dest = Path(self.dataio.forcefolder)
-            dest = dest.absolute()
+            dest = Path(self.dataio.forcefolder).absolute()
             self.forcefolder_is_absolute = True
 
             if not allow_forcefolder: