diff --git a/pyproject.toml b/pyproject.toml index be3611f6..89a695a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,8 +49,8 @@ dynamic = [ "version", ] dependencies = [ - "anemoi-transform>=0.0.5", - "anemoi-utils[provenance]>=0.4.2", + "anemoi-transform>=0.1", + "anemoi-utils[provenance]>=0.4.9", "cfunits", "numpy", "pyyaml", diff --git a/src/anemoi/datasets/create/__init__.py b/src/anemoi/datasets/create/__init__.py index 5a1b0804..adf5b79f 100644 --- a/src/anemoi/datasets/create/__init__.py +++ b/src/anemoi/datasets/create/__init__.py @@ -19,7 +19,6 @@ import cftime import numpy as np import tqdm -from anemoi.utils.config import DotDict as DotDict from anemoi.utils.dates import as_datetime from anemoi.utils.dates import frequency_to_string from anemoi.utils.dates import frequency_to_timedelta @@ -327,7 +326,7 @@ def create_elements(self, config): self.output = build_output(config.output, parent=self) self.input = build_input_(main_config=config, output_config=self.output) - LOG.info(self.input) + LOG.info("%s", self.input) def build_input_(main_config, output_config): diff --git a/src/anemoi/datasets/create/functions/filters/rename.py b/src/anemoi/datasets/create/functions/filters/rename.py index 331a8c1f..05db8454 100644 --- a/src/anemoi/datasets/create/functions/filters/rename.py +++ b/src/anemoi/datasets/create/functions/filters/rename.py @@ -44,7 +44,6 @@ def __getattr__(self, name): def __repr__(self) -> str: return repr(self.field) - return f"{self.field} -> {self.what} -> {self.renaming}" class RenamedFieldFormat: diff --git a/src/anemoi/datasets/create/functions/sources/grib.py b/src/anemoi/datasets/create/functions/sources/grib.py index 9b8dd57b..40d6b9b4 100644 --- a/src/anemoi/datasets/create/functions/sources/grib.py +++ b/src/anemoi/datasets/create/functions/sources/grib.py @@ -9,11 +9,14 @@ import glob +import logging from earthkit.data import from_source from earthkit.data.indexing.fieldlist import FieldArray from earthkit.data.utils.patterns import Pattern +LOG = logging.getLogger(__name__) + def _load(context, name, record): ds = None @@ -141,4 +144,7 @@ def execute(context, dates, path, latitudes=None, longitudes=None, *args, **kwar if geography is not None: ds = FieldArray([AddGrid(_, geography) for _ in ds]) + if len(ds) == 0: + LOG.warning(f"No fields found for {dates} in {given_paths} (kwargs={kwargs})") + return ds diff --git a/src/anemoi/datasets/create/functions/sources/xarray/__init__.py b/src/anemoi/datasets/create/functions/sources/xarray/__init__.py index 7a0db461..fda14c1f 100644 --- a/src/anemoi/datasets/create/functions/sources/xarray/__init__.py +++ b/src/anemoi/datasets/create/functions/sources/xarray/__init__.py @@ -12,7 +12,6 @@ from earthkit.data.core.fieldlist import MultiFieldList from anemoi.datasets.data.stores import name_to_zarr_store -from anemoi.datasets.utils.fields import NewMetadataField as NewMetadataField from .. import iterate_patterns from .fieldlist import XarrayFieldList diff --git a/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py b/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py index 1a7a1a10..716f7b6b 100644 --- a/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py +++ b/src/anemoi/datasets/create/functions/sources/xarray/fieldlist.py @@ -14,10 +14,8 @@ import yaml from earthkit.data.core.fieldlist import FieldList -from .coordinates import is_scalar as is_scalar from .field import EmptyFieldList from .flavour import CoordinateGuesser -from .metadata import XArrayMetadata as XArrayMetadata from .time import Time from .variable import FilteredVariable from .variable import Variable diff --git a/src/anemoi/datasets/create/input/action.py b/src/anemoi/datasets/create/input/action.py index ae428523..87e06299 100644 --- a/src/anemoi/datasets/create/input/action.py +++ b/src/anemoi/datasets/create/input/action.py @@ -10,14 +10,8 @@ import logging from copy import deepcopy -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta from earthkit.data.core.order import build_remapping -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .context import Context from .misc import is_function diff --git a/src/anemoi/datasets/create/input/context.py b/src/anemoi/datasets/create/input/context.py index 5bd5cf51..4c852ed5 100644 --- a/src/anemoi/datasets/create/input/context.py +++ b/src/anemoi/datasets/create/input/context.py @@ -10,14 +10,8 @@ import logging import textwrap -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta from anemoi.utils.humanize import plural -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .trace import step from .trace import trace diff --git a/src/anemoi/datasets/create/input/data_sources.py b/src/anemoi/datasets/create/input/data_sources.py index 0c1832d2..1feee080 100644 --- a/src/anemoi/datasets/create/input/data_sources.py +++ b/src/anemoi/datasets/create/input/data_sources.py @@ -10,13 +10,6 @@ import logging from functools import cached_property -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .action import Action from .action import action_factory from .misc import _tidy diff --git a/src/anemoi/datasets/create/input/empty.py b/src/anemoi/datasets/create/input/empty.py index 9067ba2e..941e0262 100644 --- a/src/anemoi/datasets/create/input/empty.py +++ b/src/anemoi/datasets/create/input/empty.py @@ -10,13 +10,6 @@ import logging from functools import cached_property -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .misc import assert_fieldlist from .result import Result from .trace import trace_datasource diff --git a/src/anemoi/datasets/create/input/filter.py b/src/anemoi/datasets/create/input/filter.py index 41b240e3..6980ae97 100644 --- a/src/anemoi/datasets/create/input/filter.py +++ b/src/anemoi/datasets/create/input/filter.py @@ -10,13 +10,6 @@ import logging from functools import cached_property -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from ..functions import import_function from .function import FunctionContext from .misc import _tidy diff --git a/src/anemoi/datasets/create/input/function.py b/src/anemoi/datasets/create/input/function.py index dd6221d1..dfcf4f2f 100644 --- a/src/anemoi/datasets/create/input/function.py +++ b/src/anemoi/datasets/create/input/function.py @@ -10,13 +10,6 @@ import logging from functools import cached_property -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from ..functions import import_function from .action import Action from .misc import _tidy diff --git a/src/anemoi/datasets/create/input/misc.py b/src/anemoi/datasets/create/input/misc.py index 9f7552de..19cd1fbf 100644 --- a/src/anemoi/datasets/create/input/misc.py +++ b/src/anemoi/datasets/create/input/misc.py @@ -10,15 +10,9 @@ import logging from functools import wraps -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta from earthkit.data.core.fieldlist import MultiFieldList from earthkit.data.indexing.fieldlist import FieldList -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from ..functions import import_function LOG = logging.getLogger(__name__) diff --git a/src/anemoi/datasets/create/input/pipe.py b/src/anemoi/datasets/create/input/pipe.py index a35f7c07..29ba6b99 100644 --- a/src/anemoi/datasets/create/input/pipe.py +++ b/src/anemoi/datasets/create/input/pipe.py @@ -7,6 +7,7 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import json import logging from .action import Action @@ -20,7 +21,11 @@ class PipeAction(Action): def __init__(self, context, action_path, *configs): super().__init__(context, action_path, *configs) - assert len(configs) > 1, configs + if len(configs) <= 1: + raise ValueError( + f"PipeAction requires at least two actions, got {len(configs)}\n{json.dumps(configs, indent=2)}" + ) + current = action_factory(configs[0], context, action_path + ["0"]) for i, c in enumerate(configs[1:]): current = step_factory(c, context, action_path + [str(i + 1)], previous_step=current) diff --git a/src/anemoi/datasets/create/input/repeated_dates.py b/src/anemoi/datasets/create/input/repeated_dates.py index ee094ff7..d85cd6c2 100644 --- a/src/anemoi/datasets/create/input/repeated_dates.py +++ b/src/anemoi/datasets/create/input/repeated_dates.py @@ -12,12 +12,11 @@ from collections import defaultdict import numpy as np +from anemoi.transform.fields import new_field_with_valid_datetime +from anemoi.transform.fields import new_fieldlist_from_list from anemoi.utils.dates import as_datetime from anemoi.utils.dates import frequency_to_timedelta -from anemoi.datasets.fields import FieldArray -from anemoi.datasets.fields import NewValidDateTimeField - from .action import Action from .action import action_factory from .join import JoinResult @@ -116,9 +115,10 @@ def transform(self, group_of_dates): class DateMapperClimatology(DateMapper): - def __init__(self, source, year, day): + def __init__(self, source, year, day, hour=None): self.year = year self.day = day + self.hour = hour def transform(self, group_of_dates): from anemoi.datasets.dates.groups import GroupOfDates @@ -130,6 +130,8 @@ def transform(self, group_of_dates): new_dates = defaultdict(list) for date in dates: new_date = date.replace(year=self.year, day=self.day) + if self.hour is not None: + new_date = new_date.replace(hour=self.hour, minute=0, second=0) new_dates[new_date].append(date) for date, dates in new_dates.items(): @@ -185,9 +187,12 @@ def datasource(self): for field in self.source_results.datasource: for date in self.original_group_of_dates: - result.append(NewValidDateTimeField(field, date)) + result.append(new_field_with_valid_datetime(field, date)) + + if not result: + raise ValueError("repeated_dates: no input data found") - return FieldArray(result) + return new_fieldlist_from_list(result) class RepeatedDatesAction(Action): diff --git a/src/anemoi/datasets/create/input/result.py b/src/anemoi/datasets/create/input/result.py index b03dcc8a..9ab2c8a8 100644 --- a/src/anemoi/datasets/create/input/result.py +++ b/src/anemoi/datasets/create/input/result.py @@ -15,16 +15,10 @@ from functools import cached_property import numpy as np -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta from anemoi.utils.humanize import seconds_to_human from anemoi.utils.humanize import shorten_list from earthkit.data.core.order import build_remapping -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .trace import trace from .trace import trace_datasource diff --git a/src/anemoi/datasets/create/input/step.py b/src/anemoi/datasets/create/input/step.py index 1215cb89..b7c8793d 100644 --- a/src/anemoi/datasets/create/input/step.py +++ b/src/anemoi/datasets/create/input/step.py @@ -10,13 +10,6 @@ import logging from copy import deepcopy -from anemoi.utils.dates import as_datetime as as_datetime -from anemoi.utils.dates import frequency_to_timedelta as frequency_to_timedelta - -from anemoi.datasets.dates import DatesProvider as DatesProvider -from anemoi.datasets.fields import FieldArray as FieldArray -from anemoi.datasets.fields import NewValidDateTimeField as NewValidDateTimeField - from .action import Action from .context import Context from .misc import is_function diff --git a/src/anemoi/datasets/dates/groups.py b/src/anemoi/datasets/dates/groups.py index 4fb28bd2..e9ad0cc6 100644 --- a/src/anemoi/datasets/dates/groups.py +++ b/src/anemoi/datasets/dates/groups.py @@ -28,7 +28,7 @@ def __init__(self, dates, provider, partial_ok=False): assert isinstance(provider, DatesProvider), type(provider) assert isinstance(dates, list) - self.dates = dates + self.dates = [as_datetime(_) for _ in dates] self.provider = provider self.partial_ok = partial_ok diff --git a/src/anemoi/datasets/fields.py b/src/anemoi/datasets/fields.py deleted file mode 100644 index 93c5c4d6..00000000 --- a/src/anemoi/datasets/fields.py +++ /dev/null @@ -1,68 +0,0 @@ -# (C) Copyright 2024 Anemoi contributors. -# -# This software is licensed under the terms of the Apache Licence Version 2.0 -# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. -# -# In applying this licence, ECMWF does not waive the privileges and immunities -# granted to it by virtue of its status as an intergovernmental organisation -# nor does it submit to any jurisdiction. - - -from earthkit.data.indexing.fieldlist import FieldArray - - -def list_to_fieldlist(fields): - return FieldArray(fields) - - -def empty_fieldlist(): - return FieldArray([]) - - -class WrappedField: - def __init__(self, field): - self._field = field - - def __getattr__(self, name): - return getattr(self._field, name) - - def __repr__(self) -> str: - return repr(self._field) - - -class NewDataField(WrappedField): - def __init__(self, field, data): - super().__init__(field) - self._data = data - self.shape = data.shape - - def to_numpy(self, flatten=False, dtype=None, index=None): - data = self._data - if dtype is not None: - data = data.astype(dtype) - if flatten: - data = data.flatten() - if index is not None: - data = data[index] - return data - - -class NewMetadataField(WrappedField): - def __init__(self, field, **kwargs): - super().__init__(field) - self._metadata = kwargs - - def metadata(self, *args, **kwargs): - if len(args) == 1 and args[0] in self._metadata: - return self._metadata[args[0]] - return self._field.metadata(*args, **kwargs) - - -class NewValidDateTimeField(NewMetadataField): - def __init__(self, field, valid_datetime): - date = valid_datetime.date().strftime("%Y%m%d") - time = valid_datetime.time().strftime("%H%M") - - self.valid_datetime = valid_datetime - - super().__init__(field, date=date, time=time, step=0, valid_datetime=valid_datetime.isoformat())