diff --git a/mypy.ini b/mypy.ini index 0fcd3fff5..b8fc97559 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,6 +1,7 @@ [mypy] +plugins = pydantic.mypy disallow_untyped_defs = True -exclude = ^((tests|docs|examples|build)/|conftest.py?) +exclude = ^((tests|docs|examples|build|tools)/|conftest.py?) extra_checks = True ignore_missing_imports = True python_version = 3.8 diff --git a/pyproject.toml b/pyproject.toml index 7972c0faf..e944f2b98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "numpy", "pandas", "pyarrow", + "pydantic>=2.5.0", "PyYAML", "xtgeo>=2.16", ] @@ -81,10 +82,6 @@ write_to = "src/fmu/dataio/version.py" [tool.pytest.ini_options] minversion = "6.0" -addopts = "--verbose" -log_cli = "False" -log_cli_format = "%(levelname)8s (%(relativeCreated)6.0fms) %(filename)44s [%(funcName)40s()] %(lineno)4d >> %(message)s" -log_cli_level = "INFO" testpaths = "tests" markers = ["integration: marks a test as an integration test"] xfail_strict = true diff --git a/radixconfig.yaml b/radixconfig.yaml index 9d7a56876..3fbaa7ac9 100644 --- a/radixconfig.yaml +++ b/radixconfig.yaml @@ -1,3 +1,5 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/equinor/radix-operator/release/json-schema/radixapplication.json +# Only works if the redhat yaml ext. in vs-code is installed. apiVersion: radix.equinor.com/v1 kind: RadixApplication metadata: diff --git a/src/fmu/dataio/datastructure/meta/__init__.py b/src/fmu/dataio/datastructure/meta/__init__.py new file mode 100644 index 000000000..1e35c2d3a --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/__init__.py @@ -0,0 +1,6 @@ +from .meta import Root, dump + +__all__ = [ + "dump", + "Root", +] diff --git a/src/fmu/dataio/datastructure/meta/__main__.py b/src/fmu/dataio/datastructure/meta/__main__.py new file mode 100644 index 000000000..990d73f19 --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/__main__.py @@ -0,0 +1,8 @@ +from __future__ import annotations + +import json + +from . import dump + +if __name__ == "__main__": + print(json.dumps(dump(), indent=2)) diff --git a/src/fmu/dataio/datastructure/meta/content.py b/src/fmu/dataio/datastructure/meta/content.py new file mode 100644 index 000000000..329e2d744 --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/content.py @@ -0,0 +1,382 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, Field, GetJsonSchemaHandler, RootModel, model_validator +from pydantic_core import CoreSchema +from typing_extensions import Annotated + +from . import enums, specification + + +class FMUTimeObject(BaseModel): + """ + Time stamp for data object. + """ + + label: Optional[str] = Field( + default=None, + examples=["base", "monitor", "mylabel"], + ) + value: Optional[str] = Field( + default=None, + examples=["2020-10-28T14:28:02"], + ) + + +class Time(BaseModel): + t0: Optional[FMUTimeObject] = None + t1: Optional[FMUTimeObject] = None + + +class Seismic(BaseModel): + """ + Conditional field + """ + + attribute: Optional[str] = Field( + default=None, + examples=["amplitude_timeshifted"], + ) + calculation: Optional[str] = Field( + default=None, + examples=["mean"], + ) + filter_size: Optional[float] = Field( + allow_inf_nan=False, + default=None, + ) + scaling_factor: Optional[float] = Field( + allow_inf_nan=False, + default=None, + ) + stacking_offset: Optional[str] = Field( + default=None, + examples=["0-15"], + ) + zrange: Optional[float] = Field( + allow_inf_nan=False, + default=None, + ) + + +class FluidContact(BaseModel): + """ + Conditional field + """ + + contact: Literal["owc", "fwl", "goc", "fgl"] = Field( + examples=["owc", "fwl"], + ) + truncated: bool = Field(default=False) + + +class FieldOutline(BaseModel): + """ + Conditional field + """ + + contact: str + + +class FieldRegion(BaseModel): + """ + Conditional field + """ + + id: int = Field( + description="The ID of the region", + ) + + +class GridModel(BaseModel): + name: str = Field(examples=["MyGrid"]) + + +class Layer(BaseModel): + name: str = Field( + description=( + "Name of the data object. If stratigraphic, " + "match the entry in the stratigraphic column" + ), + examples=["VIKING GP. Top"], + ) + offset: float = Field( + allow_inf_nan=False, + default=0, + ) + stratigraphic: bool = Field( + default=False, + description=( + "True if data object represents an entity in the stratigraphic colum" + ), + ) + + +class BoundingBox(BaseModel): + xmin: float = Field( + description="Minimum x-coordinate", + allow_inf_nan=False, + ) + xmax: float = Field( + description="Maximum x-coordinate", + allow_inf_nan=False, + ) + ymin: float = Field( + description="Minimum y-coordinate", + allow_inf_nan=False, + ) + ymax: float = Field( + description="Maximum y-coordinate", + allow_inf_nan=False, + ) + zmin: float = Field( + description="Minimum z-coordinate", + allow_inf_nan=False, + ) + zmax: float = Field( + description="Maximum z-coordinate", + allow_inf_nan=False, + ) + + +class Content(BaseModel): + content: enums.ContentEnum = Field(description="The contents of this data object") + + alias: Optional[List[str]] = Field(default=None) + + # Only valid for cooridate based meta. + bbox: Optional[BoundingBox] = Field(default=None) + + description: Optional[List[str]] = Field( + default=None, + ) + format: str = Field( + examples=["irap_binary"], + ) + + grid_model: Optional[GridModel] = Field(default=None) + is_observation: bool = Field( + title="Is observation flag", + ) + is_prediction: bool = Field( + title="Is prediction flag", + ) + layout: Optional[str] = Field( + default=None, + examples=["regular"], + ) + name: str = Field( + description=( + "Name of the data object. If stratigraphic, " + "match the entry in the stratigraphic column" + ), + examples=["VIKING GP. Top"], + ) + offset: float = Field( + default=0.0, + allow_inf_nan=False, + ) + spec: Optional[specification.AnySpecification] = Field(default=None) + stratigraphic_alias: Optional[List[str]] = Field(default=None) + stratigraphic: bool = Field( + description=( + "True if data object represents an entity in the stratigraphic column" + ), + ) + tagname: Optional[str] = Field( + default=None, + description="A semi-human readable tag for internal usage and uniqueness", + examples=["ds_extract_geogrid", "ds_post_strucmod"], + ) + time: Optional[Time] = Field(default=None) + + undef_is_zero: Optional[bool] = Field( + default=None, + description="Flag if undefined values are to be interpreted as zero", + ) + unit: str = Field( + default="", + examples=["m"], + ) + vertical_domain: Optional[Literal["depth", "time"]] = Field( + default=None, + examples=["depth"], + ) + + # Both must be set, or none. + base: Optional[Layer] = None + top: Optional[Layer] = None + + +class DepthContent(Content): + content: Literal[enums.ContentEnum.depth] + depth_reference: Literal["msl", "sb", "rkb"] + + +class FaultLinesContent(Content): + content: Literal[enums.ContentEnum.fault_lines] + + +class FieldOutlineContent(Content): + content: Literal[enums.ContentEnum.field_outline] + field_outline: FieldOutline = Field( + description="Conditional field", + ) + + +class FieldRegionContent(Content): + content: Literal[enums.ContentEnum.field_region] + field_region: FieldRegion = Field( + description="Conditional field", + ) + + +class FluidContactContent(Content): + content: Literal[enums.ContentEnum.fluid_contact] + fluid_contact: FluidContact = Field( + description="Conditional field", + ) + + +class InplaceVolumesContent(Content): + content: Literal[enums.ContentEnum.inplace_volumes] + + +class KPProductContent(Content): + content: Literal[enums.ContentEnum.khproduct] + + +class LiftCurvesContent(Content): + content: Literal[enums.ContentEnum.lift_curves] + + +class ParametersContent(Content): + content: Literal[enums.ContentEnum.parameters] + + +class PinchoutContent(Content): + content: Literal[enums.ContentEnum.pinchout] + + +class PropertyContent(Content): + content: Literal[enums.ContentEnum.property] + + +class PVTContent(Content): + content: Literal[enums.ContentEnum.pvt] + + +class RegionsContent(Content): + content: Literal[enums.ContentEnum.regions] + + +class RelpermContent(Content): + content: Literal[enums.ContentEnum.relperm] + + +class RFTContent(Content): + content: Literal[enums.ContentEnum.rft] + + +class SeismicContent(Content): + content: Literal[enums.ContentEnum.seismic] + seismic: Seismic = Field( + description="Conditional field", + ) + + +class SubcropContent(Content): + content: Literal[enums.ContentEnum.subcrop] + + +class ThicknessContent(Content): + content: Literal[enums.ContentEnum.thickness] + + +class TimeContent(Content): + content: Literal[enums.ContentEnum.time] + + +class TimeSeriesContent(Content): + content: Literal[enums.ContentEnum.timeseries] + + +class TransmissibilitiesContent(Content): + content: Literal[enums.ContentEnum.transmissibilities] + + +class VelocityContent(Content): + content: Literal[enums.ContentEnum.velocity] + + +class VolumesContent(Content): + content: Literal[enums.ContentEnum.volumes] + + +class VolumetricsContent(Content): + content: Literal[enums.ContentEnum.volumetrics] + + +class WellPicksContent(Content): + content: Literal[enums.ContentEnum.wellpicks] + + +class AnyContent(RootModel): + root: Annotated[ + Union[ + DepthContent, + FaultLinesContent, + FieldOutlineContent, + FieldRegionContent, + FluidContactContent, + InplaceVolumesContent, + KPProductContent, + LiftCurvesContent, + ParametersContent, + PinchoutContent, + PropertyContent, + PVTContent, + RegionsContent, + RelpermContent, + RFTContent, + SeismicContent, + SubcropContent, + ThicknessContent, + TimeContent, + TimeSeriesContent, + VelocityContent, + VolumesContent, + VolumetricsContent, + WellPicksContent, + ], + Field(discriminator="content"), + ] + + @model_validator(mode="before") + @classmethod + def _top_and_base_(cls, values: Dict) -> Dict: + top, base = values.get("top"), values.get("base") + if top is None and base is None: + return values + if top is not None and base is not None: + return values + raise ValueError("Both 'top' and 'base' must be set together or both be unset") + + @classmethod + def __get_pydantic_json_schema__( + cls, + core_schema: CoreSchema, + handler: GetJsonSchemaHandler, + ) -> Dict[str, Any]: + json_schema = super().__get_pydantic_json_schema__(core_schema, handler) + json_schema = handler.resolve_ref_schema(json_schema) + json_schema.update( + { + "dependencies": { + "top": {"required": ["base"]}, + "base": {"required": ["top"]}, + } + } + ) + return json_schema diff --git a/src/fmu/dataio/datastructure/meta/enums.py b/src/fmu/dataio/datastructure/meta/enums.py new file mode 100644 index 000000000..d930bf967 --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/enums.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from enum import Enum, IntEnum + + +class ContentEnum(str, Enum): + depth = "depth" + fault_lines = "fault_lines" + field_outline = "field_outline" + field_region = "field_region" + fluid_contact = "fluid_contact" + inplace_volumes = "inplace_volumes" + khproduct = "khproduct" + lift_curves = "lift_curves" + parameters = "parameters" + pinchout = "pinchout" + property = "property" + pvt = "pvt" + regions = "regions" + relperm = "relperm" + rft = "rft" + seismic = "seismic" + subcrop = "subcrop" + thickness = "thickness" + time = "time" + timeseries = "timeseries" + transmissibilities = "transmissibilities" + velocity = "velocity" + volumes = "volumes" + volumetrics = "volumetrics" + wellpicks = "wellpicks" + + +class FMUClassEnum(str, Enum): + case = "case" + surface = "surface" + table = "table" + cpgrid = "cpgrid" + cpgrid_property = "cpgrid_property" + polygons = "polygons" + cube = "cube" + well = "well" + points = "points" + dictionary = "dictionary" + + +class AccessLevel(str, Enum): + asset = "asset" + internal = "internal" + restricted = "restricted" + + +class AxisOrientation(IntEnum): + normal = 1 + flipped = -1 diff --git a/src/fmu/dataio/datastructure/meta/meta.py b/src/fmu/dataio/datastructure/meta/meta.py new file mode 100644 index 000000000..72614968c --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/meta.py @@ -0,0 +1,443 @@ +from __future__ import annotations + +from collections import ChainMap +from pathlib import Path +from typing import Dict, List, Literal, Optional, Union +from uuid import UUID + +from pydantic import ( + AwareDatetime, + BaseModel, + Field, + GetJsonSchemaHandler, + NaiveDatetime, + RootModel, + model_validator, +) +from pydantic_core import CoreSchema +from typing_extensions import Annotated + +from . import content, enums + + +class Asset(BaseModel): + name: str = Field(examples=["Drogon"]) + + +class Ssdl(BaseModel): + """ + Sub-Surface Data Lake + """ + + access_level: enums.AccessLevel + rep_include: bool + + +class Access(BaseModel): + asset: Asset + classification: Optional[enums.AccessLevel] = Field(default=None) + + +class SsdlAccess(Access): + ssdl: Ssdl + + +class File(BaseModel): + """ + Block describing the file as the data appear in FMU context + """ + + absolute_path: Optional[Path] = Field( + default=None, + description="The absolute file path", + examples=["/abs/path/share/results/maps/volantis_gp_base--depth.gri"], + ) + relative_path: Path = Field( + description="The file path relative to RUNPATH", + examples=["share/results/maps/volantis_gp_base--depth.gri"], + ) + checksum_md5: str = Field( + description="md5 checksum of the file or bytestring", + examples=["kjhsdfvsdlfk23knerknvk23"], + ) + size_bytes: Optional[int] = Field( + default=None, + description="Size of file object in bytes", + ) + + +class Parameters(RootModel): + root: Dict[str, Union[Parameters, int, float, str]] + + +class Aggregation(BaseModel): + id: UUID = Field( + description="The ID of this aggregation", + examples=["15ce3b84-766f-4c93-9050-b154861f9100"], + ) + operation: str = Field( + description="The aggregation performed", + ) + realization_ids: List[int] = Field( + description="Array of realization ids included in this aggregation" + ) + parameters: Optional[Parameters] = Field( + default=None, + description="Parameters for this realization", + ) + + +class Workflow(BaseModel): + reference: str = Field( + description="Reference to the part of the FMU workflow that produced this" + ) + + +class User(BaseModel): + id: str = Field( + examples=["peesv", "jlov"], + title="User ID", + ) + + +class FMUCase(BaseModel): + name: str = Field( + description="The case name", + examples=["MyCaseName"], + ) + user: User = Field( + description="The user name used in ERT", + ) + uuid: UUID = Field( + examples=["15ce3b84-766f-4c93-9050-b154861f9100"], + ) + description: Optional[List[str]] = Field( + default=None, + ) + + +class Iteration(BaseModel): + id: int = Field( + description=( + "The internal identification of this iteration, e.g. the iteration number" + ), + ) + name: str = Field( + description="The convential name of this iteration, e.g. iter-0 or pred", + examples=["iter-0"], + ) + uuid: UUID = Field( + examples=["15ce3b84-766f-4c93-9050-b154861f9100"], + ) + restart_from: Optional[UUID] = Field( + default=None, + description=( + "A uuid reference to another iteration that this " + "iteration was restarted from" + ), + examples=["15ce3b84-766f-4c93-9050-b154861f9100"], + ) + + +class FMUModel(BaseModel): + description: Optional[List[str]] = Field( + default=None, + description="This is a free text description of the model setup", + ) + name: Optional[str] = Field( + default=None, + examples=["Drogon"], + ) + revision: Optional[str] = Field( + default=None, + examples=["21.0.0.dev"], + ) + + +class RealizationJobListing(BaseModel): + arg_types: List[str] + argList: List[Path] + error_file: Optional[Path] + executable: Path + license_path: Optional[Path] + max_arg: int + max_running_minutes: Optional[int] + max_running: Optional[int] + min_arg: int + name: str + start_file: Optional[str] + stderr: Optional[str] + stdin: Optional[str] + stdout: Optional[str] + target_file: Optional[Path] + + +class RealizationJobs(BaseModel): + data_root: Path = Field(alias="DATA_ROOT") + ert_pid: str + global_environment: Dict[str, str] + global_update_path: dict + job_list: List[RealizationJobListing] = Field(alias="jobList") + run_id: str + umask: str + + +class Realization(BaseModel): + id: int = Field( + description="The unique number of this realization as used in FMU", + ) + name: str = Field( + description="The convential name of this iteration, e.g. iter-0 or pred", + examples=["iter-0"], + ) + parameters: Optional[Parameters] = Field( + default=None, + description="Parameters for this realization", + ) + jobs: Optional[RealizationJobs] = Field( + default=None, + description=( + "Content directly taken from the ERT jobs.json file for this realization" + ), + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class CountryItem(BaseModel): + identifier: str = Field( + examples=["Norway"], + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class DiscoveryItem(BaseModel): + short_identifier: str = Field( + examples=["SomeDiscovery"], + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class FieldItem(BaseModel): + identifier: str = Field( + examples=["OseFax"], + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class CoordinateSystem(BaseModel): + identifier: str = Field( + examples=["ST_WGS84_UTM37N_P32637"], + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class StratigraphicColumn(BaseModel): + identifier: str = Field( + examples=["DROGON_2020"], + ) + uuid: UUID = Field(examples=["15ce3b84-766f-4c93-9050-b154861f9100"]) + + +class Smda(BaseModel): + coordinate_system: CoordinateSystem + country: List[CountryItem] + discovery: List[DiscoveryItem] + field: List[FieldItem] + stratigraphic_column: StratigraphicColumn + + +class Masterdata(BaseModel): + smda: Smda + + +class TracklogEvent(BaseModel): + # TODO: Update ex. to inc. timezone + # update NaiveDatetime -> AwareDatetime + # On upload, sumo adds timezone if its lacking. + # For roundtripping i need an Union here. + datetime: Union[NaiveDatetime, AwareDatetime] = Field( + examples=["2020-10-28T14:28:02"], + ) + event: str = Field( + examples=["created", "updated"], + ) + user: User + + +class FMU(BaseModel): + """ + The FMU block records properties that are specific to FMU + """ + + case: FMUCase + model: FMUModel + iteration: Optional[Iteration] = Field(default=None) + workflow: Optional[Workflow] = Field(default=None) + aggregation: Optional[Aggregation] = Field(default=None) + realization: Optional[Realization] = Field(default=None) + + @model_validator(mode="before") + @classmethod + def _dependencies_aggregation_realization(cls, values: Dict) -> Dict: + aggregation, realization = values.get("aggregation"), values.get("realization") + if aggregation and realization: + raise ValueError( + "Both 'aggregation' and 'realization' cannot be set " + "at the same time. Please set only one." + ) + return values + + @classmethod + def __get_pydantic_json_schema__( + cls, + core_schema: CoreSchema, + handler: GetJsonSchemaHandler, + ) -> Dict[str, object]: + json_schema = super().__get_pydantic_json_schema__(core_schema, handler) + json_schema = handler.resolve_ref_schema(json_schema) + json_schema.update( + { + "dependencies": { + "aggregation": {"not": {"required": ["realization"]}}, + "realization": {"not": {"required": ["aggregation"]}}, + } + } + ) + return json_schema + + +class ClassMeta(BaseModel): + class_: enums.FMUClassEnum = Field( + alias="class", + title="Metadata class", + ) + masterdata: Masterdata + tracklog: List[TracklogEvent] + source: Literal["fmu"] = Field(description="Data source (FMU)") + version: Literal["0.8.0"] = Field(title="FMU results metadata version") + + +class FMUCaseClassMeta(ClassMeta): + class_: Literal[enums.FMUClassEnum.case] = Field( + alias="class", + title="Metadata class", + ) + fmu: FMU + access: Access + + +class FMUDataClassMeta(ClassMeta): + class_: Literal[ + enums.FMUClassEnum.surface, + enums.FMUClassEnum.table, + enums.FMUClassEnum.cpgrid, + enums.FMUClassEnum.cpgrid_property, + enums.FMUClassEnum.polygons, + enums.FMUClassEnum.cube, + enums.FMUClassEnum.well, + enums.FMUClassEnum.points, + enums.FMUClassEnum.dictionary, + ] = Field( + alias="class", + title="Metadata class", + ) + + # The presence of the a feild controlls what kind of + # FMUObj it is. The fmu_discriminator will inspects + # the obj. and returns a tag that tells pydantic + # what model to use. + fmu: FMU + access: SsdlAccess + data: content.AnyContent + file: File + + +class Root( + RootModel[ + Annotated[ + Union[ + FMUCaseClassMeta, + FMUDataClassMeta, + ], + Field(discriminator="class_"), + ] + ] +): + @model_validator(mode="before") + @classmethod + def _check_class_data_spec(cls, values: Dict) -> Dict: + class_ = values.get("class_") + data = values.get("data") + + if class_ in ["table", "surface"] and (data is None or "spec" not in data): + raise ValueError( + "When 'class' is 'table' or 'surface', " + "'data' must contain the 'spec' field." + ) + return values + + @classmethod + def __get_pydantic_json_schema__( + cls, + core_schema: CoreSchema, + handler: GetJsonSchemaHandler, + ) -> Dict[str, object]: + json_schema = super().__get_pydantic_json_schema__(core_schema, handler) + json_schema = handler.resolve_ref_schema(json_schema) + json_schema.update( + { + "if": {"properties": {"class": {"enum": ["table", "surface"]}}}, + "then": {"properties": {"data": {"required": ["spec"]}}}, + } + ) + return json_schema + + +def dump() -> dict: + return dict( + ChainMap( + { + "$contractual": [ + "class", + "source", + "version", + "tracklog", + "data.format", + "data.name", + "data.stratigraphic", + "data.alias", + "data.stratigraphic_alias", + "data.offset", + "data.content", + "data.tagname", + "data.vertical_domain", + "data.grid_model", + "data.bbox", + "data.time", + "data.is_prediction", + "data.is_observation", + "data.seismic.attribute", + "data.spec.columns", + "access", + "masterdata", + "fmu.model", + "fmu.workflow", + "fmu.case", + "fmu.iteration.name", + "fmu.iteration.uuid", + "fmu.realization.name", + "fmu.realization.id", + "fmu.realization.uuid", + "fmu.aggregation.operation", + "fmu.aggregation.realization_ids", + "fmu.context.stage", + "file.relative_path", + "file.checksum_md5", + "file.size_bytes", + ], + # schema must be present for "dependencies" key to work. + "$schema": "http://json-schema.org/draft-07/schema", + }, + Root.model_json_schema(), + ) + ) diff --git a/src/fmu/dataio/datastructure/meta/specification.py b/src/fmu/dataio/datastructure/meta/specification.py new file mode 100644 index 000000000..dbb681ee9 --- /dev/null +++ b/src/fmu/dataio/datastructure/meta/specification.py @@ -0,0 +1,155 @@ +from __future__ import annotations + +from typing import List, Union + +from pydantic import BaseModel, Field + +from . import enums + + +class Shape(BaseModel): + nrow: int = Field( + description="The number of rows", + ) + ncol: int = Field( + description="The number of columns", + ) + nlay: int = Field( + description="The number of layers", + ) + + +class SurfaceSpecification(Shape): + rotation: float = Field( + description="Rotation angle", + allow_inf_nan=False, + ) + undef: float = Field( + description="Value representing undefined data", + allow_inf_nan=False, + ) + xinc: float = Field( + description="Increment along the x-axis", + allow_inf_nan=False, + ) + xori: float = Field( + description="Origin along the x-axis", + allow_inf_nan=False, + ) + yflip: enums.AxisOrientation = Field( + description="Flip along the y-axis, -1 or 1", + ) + yori: float = Field( + description="Origin along the y-axis", + allow_inf_nan=False, + ) + + +class TableSpecification(BaseModel): + columns: List[str] = Field( + description="List of columns present in a table.", + ) + size: int = Field( + description="Size of data object.", + examples=[1, 9999], + ) + + +class CPGridSpecification(Shape): + """Corner point grid""" + + xshift: float = Field( + description="Shift along the x-axis", + allow_inf_nan=False, + ) + yshift: float = Field( + description="Shift along the y-axis", + allow_inf_nan=False, + ) + zshift: float = Field( + description="Shift along the z-axis", + allow_inf_nan=False, + ) + + xscale: float = Field( + description="Scaling factor for the x-axis", + allow_inf_nan=False, + ) + yscale: float = Field( + description="Scaling factor for the y-axis", + allow_inf_nan=False, + ) + zscale: float = Field( + description="Scaling factor for the z-axis", + allow_inf_nan=False, + ) + + +class CPGridPropertySpecification(Shape): + ... + + +class PolygonsSpecification(BaseModel): + npolys: int = Field( + description="The number of individual polygons in the data object", + ) + + +class CubeSpecification(SurfaceSpecification): + # Increment + xinc: float = Field( + description="Increment along the x-axis", + allow_inf_nan=False, + ) + yinc: float = Field( + description="Increment along the y-axis", + allow_inf_nan=False, + ) + zinc: float = Field( + description="Increment along the z-axis", + allow_inf_nan=False, + ) + + # Origin + xori: float = Field( + description="Origin along the x-axis", + allow_inf_nan=False, + ) + yori: float = Field( + description="Origin along the y-axis", + allow_inf_nan=False, + ) + zori: float = Field( + description="Origin along the z-axis", + allow_inf_nan=False, + ) + + # Miscellaneous + yflip: enums.AxisOrientation = Field( + description="Flip along the y-axis, -1 or 1", + ) + zflip: enums.AxisOrientation = Field( + description="Flip along the z-axis, -1 or 1", + ) + rotation: float = Field( + description="Rotation angle", + allow_inf_nan=False, + ) + undef: float = Field( + description="Value representing undefined data", + ) + + +class WellPointsDictionaryCaseSpecification(BaseModel): + ... + + +AnySpecification = Union[ + CPGridPropertySpecification, + CPGridSpecification, + CubeSpecification, + PolygonsSpecification, + SurfaceSpecification, + TableSpecification, + WellPointsDictionaryCaseSpecification, +] diff --git a/tests/conftest.py b/tests/conftest.py index 2cfc20b5b..c478901d1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -75,12 +75,12 @@ def wrapper(*args, **kwargs): return wrapper -@pytest.fixture(name="testroot", scope="session", autouse=True) +@pytest.fixture(name="testroot", scope="session") def fixture_testroot(): return ROOTPWD -@pytest.fixture(name="fmurun", scope="session", autouse=True) +@pytest.fixture(name="fmurun", scope="session") def fixture_fmurun(tmp_path_factory): """Create a tmp folder structure for testing; here a new fmurun.""" tmppath = tmp_path_factory.mktemp("data") @@ -90,7 +90,7 @@ def fixture_fmurun(tmp_path_factory): return newpath -@pytest.fixture(name="fmurun_w_casemetadata", scope="session", autouse=True) +@pytest.fixture(name="fmurun_w_casemetadata", scope="session") def fixture_fmurun_w_casemetadata(tmp_path_factory): """Create a tmp folder structure for testing; here existing fmurun w/ case meta!""" tmppath = tmp_path_factory.mktemp("data3") @@ -101,7 +101,7 @@ def fixture_fmurun_w_casemetadata(tmp_path_factory): return rootpath -@pytest.fixture(name="fmurun_w_casemetadata_pred", scope="session", autouse=True) +@pytest.fixture(name="fmurun_w_casemetadata_pred", scope="session") def fixture_fmurun_w_casemetadata_pred(tmp_path_factory): """Create a tmp folder structure for testing; here existing fmurun w/ case meta!""" tmppath = tmp_path_factory.mktemp("data3") @@ -112,7 +112,7 @@ def fixture_fmurun_w_casemetadata_pred(tmp_path_factory): return rootpath -@pytest.fixture(name="fmurun_pred", scope="session", autouse=True) +@pytest.fixture(name="fmurun_pred", scope="session") def fixture_fmurun_pred(tmp_path_factory): """Create a tmp folder structure for testing; here a new fmurun for prediction.""" tmppath = tmp_path_factory.mktemp("data_pred") @@ -122,7 +122,7 @@ def fixture_fmurun_pred(tmp_path_factory): return newpath -@pytest.fixture(name="rmsrun_fmu_w_casemetadata", scope="session", autouse=True) +@pytest.fixture(name="rmsrun_fmu_w_casemetadata", scope="session") def fixture_rmsrun_fmu_w_casemetadata(tmp_path_factory): """Create a tmp folder structure for testing; here existing fmurun w/ case meta! @@ -139,7 +139,7 @@ def fixture_rmsrun_fmu_w_casemetadata(tmp_path_factory): return rmspath -@pytest.fixture(name="rmssetup", scope="module", autouse=True) +@pytest.fixture(name="rmssetup", scope="module") def fixture_rmssetup(tmp_path_factory): """Create the folder structure to mimic RMS project.""" @@ -157,7 +157,7 @@ def fixture_rmssetup(tmp_path_factory): return rmspath -@pytest.fixture(name="rmsglobalconfig", scope="module", autouse=True) +@pytest.fixture(name="rmsglobalconfig", scope="module") def fixture_rmsglobalconfig(rmssetup): """Read global config.""" # read the global config @@ -171,7 +171,7 @@ def fixture_rmsglobalconfig(rmssetup): return global_cfg -@pytest.fixture(name="globalvars_norw_letters", scope="module", autouse=True) +@pytest.fixture(name="globalvars_norw_letters", scope="module") def fixture_globalvars_norw_letters(tmp_path_factory): """Read a global config with norwegian special letters w/ fmu.config utilities.""" @@ -193,7 +193,7 @@ def fixture_globalvars_norw_letters(tmp_path_factory): return (rmspath, cfg, gname) -@pytest.fixture(name="casesetup", scope="module", autouse=True) +@pytest.fixture(name="casesetup", scope="module") def fixture_casesetup(tmp_path_factory): """Create the folder structure to mimic a fmu run""" @@ -206,7 +206,7 @@ def fixture_casesetup(tmp_path_factory): return tmppath -@pytest.fixture(name="caseglobalconfig", scope="module", autouse=True) +@pytest.fixture(name="caseglobalconfig", scope="module") def fixture_caseglobalconfig(): """Create as global config for case testing.""" gconfig = {} @@ -343,15 +343,14 @@ def fixture_edataobj2(globalconfig2): # ====================================================================================== -@pytest.fixture(name="schema_080", scope="session", autouse=True) +@pytest.fixture(name="schema_080", scope="session") def fixture_schema_080(): """Return 0.8.0 version of schema as json.""" return _parse_json(ROOTPWD / "schema/definitions/0.8.0/schema/fmu_results.json") -@pytest.fixture(name="metadata_examples", scope="session", autouse=True) -def fixture_metadata_examples(): +def metadata_examples(): """Parse all metadata examples. Returns: @@ -366,19 +365,30 @@ def fixture_metadata_examples(): } +@pytest.fixture(name="metadata_examples", scope="session") +def fixture_metadata_examples(): + """Parse all metadata examples. + + Returns: + Dict: Dictionary with filename as key, file contents as value. + + """ + return metadata_examples() + + # ====================================================================================== # Various objects # ====================================================================================== -@pytest.fixture(name="regsurf", scope="module", autouse=True) +@pytest.fixture(name="regsurf", scope="module") def fixture_regsurf(): """Create an xtgeo surface.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) return xtgeo.RegularSurface(ncol=12, nrow=10, xinc=20, yinc=20, values=1234.0) -@pytest.fixture(name="polygons", scope="module", autouse=True) +@pytest.fixture(name="polygons", scope="module") def fixture_polygons(): """Create an xtgeo polygons.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) @@ -392,7 +402,7 @@ def fixture_polygons(): ) -@pytest.fixture(name="points", scope="module", autouse=True) +@pytest.fixture(name="points", scope="module") def fixture_points(): """Create an xtgeo points instance.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) @@ -407,35 +417,35 @@ def fixture_points(): ) -@pytest.fixture(name="cube", scope="module", autouse=True) +@pytest.fixture(name="cube", scope="module") def fixture_cube(): """Create an xtgeo cube instance.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) return xtgeo.Cube(ncol=3, nrow=4, nlay=5, xinc=12, yinc=12, zinc=4, rotation=30) -@pytest.fixture(name="grid", scope="module", autouse=True) +@pytest.fixture(name="grid", scope="module") def fixture_grid(): """Create an xtgeo grid instance.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) return xtgeo.create_box_grid((3, 4, 5)) -@pytest.fixture(name="gridproperty", scope="module", autouse=True) +@pytest.fixture(name="gridproperty", scope="module") def fixture_gridproperty(): """Create an xtgeo gridproperty instance.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) return xtgeo.GridProperty(ncol=3, nrow=7, nlay=3, values=123.0) -@pytest.fixture(name="dataframe", scope="module", autouse=True) +@pytest.fixture(name="dataframe", scope="module") def fixture_dataframe(): """Create an pandas dataframe instance.""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) return pd.DataFrame({"COL1": [1, 2, 3, 4], "COL2": [99.0, 98.0, 97.0, 96.0]}) -@pytest.fixture(name="wellpicks", scope="module", autouse=True) +@pytest.fixture(name="wellpicks", scope="module") def fixture_wellpicks(): """Create a pandas dataframe containing wellpicks""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) @@ -467,7 +477,7 @@ def fixture_wellpicks(): ) -@pytest.fixture(name="arrowtable", scope="module", autouse=True) +@pytest.fixture(name="arrowtable", scope="module") def fixture_arrowtable(): """Create an arrow table instance.""" try: @@ -485,7 +495,7 @@ def fixture_arrowtable(): return None -@pytest.fixture(name="aggr_surfs_mean", scope="module", autouse=True) +@pytest.fixture(name="aggr_surfs_mean", scope="module") def fixture_aggr_surfs_mean(fmurun_w_casemetadata, rmsglobalconfig, regsurf): """Create aggregated surfaces, and return aggr. mean surface + lists of metadata""" logger.info("Ran %s", inspect.currentframe().f_code.co_name) diff --git a/tests/test_schema/test_schema_logic_pydantic.py b/tests/test_schema/test_schema_logic_pydantic.py new file mode 100644 index 000000000..b546d8a3c --- /dev/null +++ b/tests/test_schema/test_schema_logic_pydantic.py @@ -0,0 +1,400 @@ +"""Test the schema""" +import logging +from copy import deepcopy + +import jsonschema +import pytest +from conftest import metadata_examples +from fmu.dataio._definitions import ALLOWED_CONTENTS +from fmu.dataio.datastructure.meta import Root, dump +from fmu.dataio.datastructure.meta.enums import ContentEnum + +# pylint: disable=no-member + +logger = logging.getLogger(__name__) + + +@pytest.fixture(scope="session") +def pydantic_schema(): + return dump() + + +@pytest.mark.parametrize("file, example", metadata_examples().items()) +def test_schema_example_filenames(file, example): + """Assert that all examples are .yml, not .yaml""" + assert file.endswith(".yml") + + +# ====================================================================================== +# 0.8.0 +# ====================================================================================== + + +@pytest.mark.parametrize("file, example", metadata_examples().items()) +def test_jsonschema_validate(pydantic_schema, file, example): + """Confirm that examples are valid against the schema""" + jsonschema.validate(instance=example, schema=pydantic_schema) + + +@pytest.mark.parametrize("file, example", metadata_examples().items()) +def test_pydantic_model_validate(pydantic_schema, file, example): + """Confirm that examples are valid against the schema""" + Root.model_validate(example) + + +def test_pydantic_schema_file_block(pydantic_schema, metadata_examples): + """Test variations on the file block.""" + + # get a specific example + example = metadata_examples["surface_depth.yml"] + + # Root.model_validate(example) + # shall validate as-is + jsonschema.validate(instance=example, schema=pydantic_schema) + + # shall validate without absolute_path + _example = deepcopy(example) + del _example["file"]["absolute_path"] + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # md5 checksum shall be a string + _example["file"]["checksum_md5"] = 123.4 + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # shall not validate without checksum_md5 + del _example["file"]["checksum_md5"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # shall validate when checksum is put back in + _example["file"]["checksum_md5"] = "somechecksum" + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # shall not validate without relative_path + del _example["file"]["relative_path"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + +def test_pydantic_schema_logic_case(pydantic_schema, metadata_examples): + """Asserting validation failure when illegal contents in case example""" + + example = metadata_examples["case.yml"] + + # assert validation with no changes + jsonschema.validate(instance=example, schema=pydantic_schema) + + # assert validation error when "fmu" is missing + _example = deepcopy(example) + del _example["fmu"] + + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # assert validation error when "fmu.model" is missing + _example = deepcopy(example) + del _example["fmu"]["model"] + + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + +def test_pydantic_schema_logic_fmu_block_aggr_real(pydantic_schema, metadata_examples): + """Test that fmu.realization and fmu.aggregation are not allowed at the same time""" + + metadata = deepcopy(metadata_examples["surface_depth.yml"]) + # check that assumptions for the test is true + assert "realization" in metadata["fmu"] + assert "aggregation" not in metadata["fmu"] + + # assert validation as-is + jsonschema.validate(instance=metadata, schema=pydantic_schema) + + # add aggregation, shall fail. Get this from an actual example that validates. + _metadata_aggregation = metadata_examples["aggregated_surface_depth.yml"] + metadata["fmu"]["aggregation"] = _metadata_aggregation["fmu"]["aggregation"] + + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=metadata, schema=pydantic_schema) + + +def test_pydantic_schema_logic_data_top_base(pydantic_schema, metadata_examples): + """Test require data.top and data.base. + + * Require both data.top and data.base, or none. + """ + + metadata = metadata_examples["surface_seismic_amplitude.yml"] + + # check that assumptions for the test is true + assert "top" in metadata["data"] + assert "base" in metadata["data"] + + # assert validation as-is + jsonschema.validate(instance=metadata, schema=pydantic_schema) + + # remove "top" - shall fail + _metadata = deepcopy(metadata) + del _metadata["data"]["top"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + # remove "base" - shall fail + _metadata = deepcopy(metadata) + del _metadata["data"]["base"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + # remove both - shall pass + del _metadata["data"]["top"] + assert "top" not in _metadata["data"] # test assumption + assert "base" not in _metadata["data"] # test assumption + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + +def test_pydantic_schema_logic_field_outline(pydantic_schema, metadata_examples): + """Test content-specific rule. + + When content == field_outline, require the field_outline field + """ + + metadata = metadata_examples["polygons_field_outline.yml"] + + # check that assumptions for the test is true + assert metadata["data"]["content"] == "field_outline" + assert "field_outline" in metadata["data"] + + # assert validation as-is + jsonschema.validate(instance=metadata, schema=pydantic_schema) + + # assert failure when content is field_outline and fluid_contact is missing + _metadata = deepcopy(metadata) + del _metadata["data"]["field_outline"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + +def test_pydantic_schema_logic_field_region(pydantic_schema, metadata_examples): + """Test content-specific rule: field_region + + When content == field_outline, require the data.field_region field. + """ + + metadata = metadata_examples["polygons_field_region.yml"] + + # check assumptions + assert metadata["data"]["content"] == "field_region" + assert "field_region" in metadata["data"] + assert "id" in metadata["data"]["field_region"] + jsonschema.validate(instance=metadata, schema=pydantic_schema) + + # assert that data.field_region is required + _metadata = deepcopy(metadata) + del _metadata["data"]["field_region"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + # validation of data.field_region + _metadata = deepcopy(metadata) + del _metadata["data"]["field_region"]["id"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + _metadata = deepcopy(metadata) + _metadata["data"]["field_region"]["id"] = "NotANumber" + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + +def test_pydantic_schema_logic_fluid_contact(pydantic_schema, metadata_examples): + """Test content-specific rule. + + When content == fluid_contact, require the fluid_contact field + """ + + # parse the schema and polygons + metadata = metadata_examples["surface_fluid_contact.yml"] + + # check that assumptions for the test is true + assert metadata["data"]["content"] == "fluid_contact" + assert "fluid_contact" in metadata["data"] + + # assert failure when content is fluid_contact and fluid_contact block missing + _metadata = deepcopy(metadata) + del _metadata["data"]["fluid_contact"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_metadata, schema=pydantic_schema) + + +def test_pydantic_schema_masterdata_smda(pydantic_schema, metadata_examples): + """Test schema logic for masterdata.smda.""" + + example = metadata_examples["case.yml"] + + # assert validation with no changes + jsonschema.validate(instance=example, schema=pydantic_schema) + + # assert validation error when masterdata block is missing + _example = deepcopy(example) + del _example["masterdata"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # assert validation error when masterdata.smda is missing + # print(example["masterdata"]) + _example = deepcopy(example) + del _example["masterdata"]["smda"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # assert validation error when missing attribute + for block in [ + "country", + "discovery", + "field", + "coordinate_system", + "stratigraphic_column", + ]: + _example = deepcopy(example) + del _example["masterdata"]["smda"][block] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # assert validation error if not correct type + for block, type_ in [ + ("country", list), + ("discovery", list), + ("coordinate_system", dict), + ("stratigraphic_column", dict), + ]: + _example = deepcopy(example) + assert isinstance(_example["masterdata"]["smda"][block], type_) + + _example["masterdata"]["smda"][block] = "somestring" + + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + +def test_pydantic_schema_data_time(pydantic_schema, metadata_examples): + """Test schema logic for data.time.""" + + # fetch one example that contains the data.time element + example = metadata_examples["surface_seismic_amplitude.yml"] + assert "time" in example["data"] + + # assert validation with no changes + jsonschema.validate(instance=example, schema=pydantic_schema) + + # valid when data.time is missing + _example = deepcopy(example) + del _example["data"]["time"] + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # valid when only t0 + _example = deepcopy(example) + del _example["data"]["time"]["t1"] + assert "t0" in _example["data"]["time"] # test assumption + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # valid without labels + _example = deepcopy(example) + del _example["data"]["time"]["t0"]["label"] + jsonschema.validate(instance=_example, schema=pydantic_schema) + + # NOT valid when other types + for testvalue in [ + [{"t0": "2020-10-28T14:28:02", "label": "mylabel"}], + "2020-10-28T14:28:02", + 123, + 123.4, + ]: + _example = deepcopy(example) + _example["data"]["time"] = testvalue + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=_example, schema=pydantic_schema) + + +def test_schema_logic_classification(pydantic_schema, metadata_examples): + """Test the classification of individual files.""" + + # fetch example + example = deepcopy(metadata_examples["surface_depth.yml"]) + + # assert validation with no changes + jsonschema.validate(instance=example, schema=pydantic_schema) + + # assert "internal" and "restricted" validates + example["access"]["classification"] = "internal" + jsonschema.validate(instance=example, schema=pydantic_schema) + + example["access"]["classification"] = "restricted" + jsonschema.validate(instance=example, schema=pydantic_schema) + + # assert erroneous value does not validate + example["access"]["classification"] = "open" + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=example, schema=pydantic_schema) + + +def test_schema_logic_data_spec(pydantic_schema, metadata_examples): + """Test schema logic for data.spec""" + + # fetch surface example + example_surface = deepcopy(metadata_examples["surface_depth.yml"]) + + # assert validation with no changes + jsonschema.validate(instance=example_surface, schema=pydantic_schema) + + # assert data.spec required when class == surface + del example_surface["data"]["spec"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=example_surface, schema=pydantic_schema) + + # fetch table example + example_table = deepcopy(metadata_examples["table_inplace.yml"]) + + # assert validation with no changes + jsonschema.validate(instance=example_table, schema=pydantic_schema) + + # assert data.spec required when class == table + del example_table["data"]["spec"] + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=example_table, schema=pydantic_schema) + + # fetch dictionary example + example_dict = deepcopy(metadata_examples["dictionary_parameters.yml"]) + + # assert data.spec is not present + with pytest.raises(KeyError): + example_dict["data"]["spec"] + + # assert data.spec not required when class === dictionary + jsonschema.validate(instance=example_dict, schema=pydantic_schema) + + +def test_schema_logic_content_whitelist(pydantic_schema, metadata_examples): + """Test that validation fails when value of data.content is not in + the whitelist.""" + + # fetch surface example + example_surface = deepcopy(metadata_examples["surface_depth.yml"]) + + # assert validation with no changes + jsonschema.validate(instance=example_surface, schema=pydantic_schema) + + # shall fail when content is not in whitelist + example_surface["data"]["content"] = "not_valid_content" + with pytest.raises(jsonschema.exceptions.ValidationError): + jsonschema.validate(instance=example_surface, schema=pydantic_schema) + + +def test_schema_content_synch_with_code(): + """Currently, the whitelist for content is maintained both in the schema + and in the code. This test asserts that list used in the code is in synch + with schema. Note! This is one-way, and will not fail if additional + elements are added to the schema only.""" + + for allowed_content in ALLOWED_CONTENTS: + assert allowed_content in {v.name for v in ContentEnum} diff --git a/tools/gen.sh b/tools/gen.sh new file mode 100755 index 000000000..757815e26 --- /dev/null +++ b/tools/gen.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +datamodel-codegen \ + --collapse-root-models \ + --disable-timestamp \ + --enable-version-header \ + --enum-field-as-litera all \ + --field-constraints \ + --input $1 \ + --input-file-type jsonschema \ + --output src/fmu/dataio/models/meta.py \ + --output-model-type pydantic_v2.BaseModel \ + --snake-case-field \ + --strict-nullable \ + --strip-default-none \ + --target-python-version 3.8 \ + --use-default-kwarg \ + --use-double-quotes \ + --use-schema-description \ + --use-standard-collections \ + --use-subclass-enum \ + --use-title-as-name diff --git a/tools/schema-examples-validate.py b/tools/schema-examples-validate.py new file mode 100644 index 000000000..b0d35619f --- /dev/null +++ b/tools/schema-examples-validate.py @@ -0,0 +1,24 @@ +# type: ignore +# Ex. usage: time (find schema -name "*.yml" | python3 tools/validate-schema-example.py) + +import sys + +from fmu.dataio.models.meta.model import Root +from orjson import dumps +from yaml import safe_load + + +def read(file): + with open(file) as f: + return f.read() + + +for file in (f.strip() for f in sys.stdin.readlines()): + print(file) + try: + Root.model_validate_json(dumps(safe_load(read(file)))) + except ValueError: + from pprint import pp + + pp(safe_load(read(file))) + raise diff --git a/tools/sumo-explorer-model-validate.py b/tools/sumo-explorer-model-validate.py new file mode 100644 index 000000000..b914f4753 --- /dev/null +++ b/tools/sumo-explorer-model-validate.py @@ -0,0 +1,66 @@ +# type: ignore + +from __future__ import annotations + +from collections import Counter +from contextlib import suppress +from pprint import pp +from random import sample + +from fmu.dataio.datastructure.meta import Root +from fmu.sumo.explorer import Explorer +from tqdm import tqdm + + +def lazy_sampler(x, lenx, k=100): + if lenx <= 0: + return + + sampled_idx = sample(range(lenx), k=k) if k < lenx else range(lenx) + + for i in sampled_idx: + with suppress(IndexError): + yield x[i] + + +def gen(): + e = Explorer(env="dev") + for c in sample(tuple(e.cases), 25): + yield c.metadata + + for cube in lazy_sampler(c.cubes, len(c.cubes)): + yield cube.metadata + + for surf in lazy_sampler(c.surfaces, len(c.surfaces)): + yield surf.metadata + + for poly in lazy_sampler(c.polygons, len(c.surfaces)): + yield poly.metadata + + for tab in lazy_sampler(c.tables, len(c.tables)): + yield tab.metadata + + for dic in lazy_sampler(c.dictionaries, len(c.dictionaries)): + yield dic.metadata + + +if __name__ == "__main__": + tally = Counter() + with tqdm(ascii=True, position=1) as pbar: + for m in gen(): + pbar.update() + try: + parsed = Root.model_validate(m) + content = ( + parsed.root.data.root.content + if hasattr(parsed.root, "data") + else None + ) + tally.update([(parsed.root.class_, content)]) + if sum(tally.values()) % 25 == 0: + pbar.write("-" * 100) + pbar.write("\n".join(str(v) for v in tally.items())) + except Exception as e: + print(str(e)) + pp(m) + raise diff --git a/tools/validate-schema-example.py b/tools/validate-schema-example.py new file mode 100644 index 000000000..cfcc00224 --- /dev/null +++ b/tools/validate-schema-example.py @@ -0,0 +1,24 @@ +# type: ignore +# Ex. usage: time (find schema -name "*.yml" | python3 tools/schema-example-validate.py) + +import sys + +from fmu.dataio.models.meta.model import Root +from orjson import dumps +from yaml import safe_load + + +def read(file): + with open(file) as f: + return f.read() + + +for file in (f.strip() for f in sys.stdin.readlines()): + print(file) + try: + Root.model_validate_json(dumps(safe_load(read(file)))) + except ValueError: + from pprint import pp + + pp(safe_load(read(file))) + raise