diff --git a/docs/api.rst b/docs/api.rst index 2d6e6dc..8c74bf6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -31,13 +31,20 @@ pyaro.timeseries - User API pyaro.timeseries.filters - Filters ^^^^^^^^^^^^ +.. automodule:: pyaro.timeseries.Filter + :members: FilterFactory, FilterCollection + :undoc-members: + :imported-members: + :show-inheritance: + .. image:: pics/Filter.svg :alt: Filter UML diagram .. automodule:: pyaro.timeseries.Filter - :members: FilterFactory, FilterCollections, Filter, StationFilter, VariableNameFilter, CountryFilter, BoundingBoxFilter, DuplicateFilter, FlagFilter, TimeBoundsFilter, TimeResolutionFilter, TimeVariableStationFilter, AltitudeFilter, RelativeAltitudeFilter, ValleyFloorRelativeAltitudeFilter + :members: StationFilter, VariableNameFilter, CountryFilter, BoundingBoxFilter, DuplicateFilter, FlagFilter, TimeBoundsFilter, TimeResolutionFilter, TimeVariableStationFilter, AltitudeFilter, RelativeAltitudeFilter, ValleyFloorRelativeAltitudeFilter :undoc-members: :imported-members: + :show-inheritance: pyaro.timeseries - Dev API @@ -55,9 +62,10 @@ pyaro.timeseries - Dev API :private-members: .. automodule:: pyaro.timeseries.Filter - :members: DataIndexFilter + :members: Filter, DataIndexFilter :undoc-members: :imported-members: + :show-inheritance: csvreader for timeseries diff --git a/docs/how-to-add-new-reader.rst b/docs/how-to-add-new-reader.rst index c725f1a..12f831e 100644 --- a/docs/how-to-add-new-reader.rst +++ b/docs/how-to-add-new-reader.rst @@ -22,7 +22,7 @@ with :py:meth:`~pyaro.list_timeseries_readers()`. TimeseriesEngine/Reader subclassing +++++++++++++++++++++++++++++++++++ -It is strongly advised to use the use the helper classes :py:class:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterEngine` +It is strongly advised to use the use the helper classes :py:class:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterEngine` and :py:class:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterReader` to implement a Engine and a Reader since Filters will automatically be handled then. @@ -42,7 +42,7 @@ The ``YourReader`` should extend :py:class:`~pyaro.timeseries.AutoFilterReaderEn - the ``__init__`` method of :py:class:`~pyaro.timeseries.Reader` with two fixed args (`self` and `filename_or_obj_or_url`) and several kwargs, one of them should be `filters` - - it must store the `filters` calling `self._set_filters(filters)` + - it must store the `filters` calling `self._set_filters(filters)` - the :py:meth:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterReader._unfiltered_data` method - the :py:meth:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterReader._unfiltered_stations` method - the :py:meth:`~pyaro.timeseries.AutoFilterReaderEngine.AutoFilterReader._unfiltered_variables` method diff --git a/docs/pics/Filter.png b/docs/pics/Filter.png new file mode 100644 index 0000000..d9bfc6d Binary files /dev/null and b/docs/pics/Filter.png differ diff --git a/docs/pics/Filter.svg b/docs/pics/Filter.svg index 21a43d9..5ae8ad0 100644 --- a/docs/pics/Filter.svg +++ b/docs/pics/Filter.svg @@ -1 +1 @@ -timeseriesFilterFactoryget(name): Filterregister(filter)list(): [str]Filter__init__() # unrestricted and kwargsname(): strinit_kwargs(): dictargs(): dict__rep__(): strfilter_data(data, stations, variables): Datafilter_stations(stations): dict[str, Station]filter_variables(variables): [str] VariableNameFilter__init__(reader_to_new, include, exclude): VariableNameChangingFiltername(): "variables"init_kwargs(): dictfilter_variables(variables): [str]filter_data(data, stations, variables): Datanew_varname(reader_varname): strreader_varname(new_varname): strhas_variable(new_varname): boolhas_reader_variable(reader_varname): boolDataIndexFilterfilter_data(data, stations, variables): Datafilter_data_idx(data, stations, variables): idxStationReductionFilterfilter_data_idx(data, stations, variables): Datafilter_stations(stations): dict[str, Station]StationFilter__init__(include=[station], exclude=[station]): StationFiltername(): "stations"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_station(station): boolCountryFilter__init__(include=[country], exclude=[country]): CountryFiltername(): "countries"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_country(iso2_str): boolBoundingBoxFilter__init__(include=[country], exclude=[country]): BoundingBoxFiltername(): "bounding_boxes"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_location(lat, lon): boolTimeBoundsFilter__init__(start_include=[flag], start_exclude=[flag],start_end_include, startend_exclude,end_include, end_exclude): TimeBoundsFiltername(): "time_bounds"init_kwargs(): dictfilter_data_idx(data, stations, variables): Datahas_envelope(): boolenvelope(): (start, end)contains(start_dt_array, end_dt_array): bool-arrayTimeResolutionFilter__init__(resolutions=[resolution_string]): TimeResolutionFiltername(): "time_resolutions"init_kwargs(): dictfilter_data_idx(data, stations, variables): DataDuplicateFilter__init__(duplicate_keys=["stations", "start_times", "end_times"]): DuplicateFiltername(): "duplicates"init_kwargs(): dictfilter_data_idx(data, stations, variables): DataFlagFilter__init__(include=[flag], exclude=[flag]): FlagFiltername(): "flags"init_kwargs(): dictfilter_data_idx(data, stations, variables): Datausable_flags(): set(flags)Singeltoninstance available aspyaro.timeseries.filters \ No newline at end of file +timeseriesFilterFactoryget(name): Filterregister(filter)list(): [str]Filter__init__() # unrestricted and kwargsname(): strinit_kwargs(): dictargs(): dict__rep__(): strfilter_data(data, stations, variables): Datafilter_stations(stations): dict[str, Station]filter_variables(variables): [str] VariableNameFilter__init__(reader_to_new, include, exclude)name(): "variables"init_kwargs(): dictfilter_variables(variables): [str]filter_data(data, stations, variables): Datanew_varname(reader_varname): strreader_varname(new_varname): strhas_variable(new_varname): boolhas_reader_variable(reader_varname): boolDataIndexFilterfilter_data(data, stations, variables): Datafilter_data_idx(data, stations, variables): idxStationReductionFilterfilter_data_idx(data, stations, variables): Datafilter_stations(stations): dict[str, Station]StationFilter__init__(include=[station], exclude=[station])name(): "stations"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_station(station): boolCountryFilter__init__(include=[country], exclude=[country])name(): "countries"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_country(iso2_str): boolBoundingBoxFilter__init__(include=[country], exclude=[country])name(): "bounding_boxes"init_kwargs(): dictfilter_stations(stations): dict[str, Station]has_location(lat, lon): boolTimeBoundsFilter__init__(start_include=[flag], start_exclude=[flag],start_end_include, startend_exclude,end_include, end_exclude)name(): "time_bounds"init_kwargs(): dictfilter_data_idx(data, stations, variables): Datahas_envelope(): boolenvelope(): (start, end)contains(start_dt_array, end_dt_array): bool-arrayTimeVariableStationFilter__init__(exclude_from_csvfile="",exclude=[(start, end, variable, station)])name(): "time_variable_station"init_kwargs(): dictfilter_data_idx(data, stations, variables): DataTimeResolutionFilter__init__(resolutions=[resolution_string])name(): "time_resolutions"init_kwargs(): dictfilter_data_idx(data, stations, variables): DataDuplicateFilter__init__(duplicate_keys=["stations","start_times", "end_times"])name(): "duplicates"init_kwargs(): dictfilter_data_idx(data, stations, variables): DataFlagFilter__init__(include=[flag], exclude=[flag])name(): "flags"init_kwargs(): dictfilter_data_idx(data, stations, variables): Datausable_flags(): set(flags)AltitudeFilter__init__(min_altitude, max_altitude)name(): "altitude"init_kwargs(): dictfilter_stations(stations): dict[str, Station]RelativeAltitudeFilter__init__(topo_file, topo_var, rdiff)name(): "relaltitude"init_kwargs(): dictfilter_stations(stations): dict[str, Station]ValleyFloorRelativeAltitudeFilter__init__(topo_file, radius,topo_var, lower, upper)name(): "valleyfloor_relaltitude"init_kwargs(): dictfilter_stations(stations): dict[str, Station]Singletoninstance available aspyaro.timeseries.filters \ No newline at end of file diff --git a/docs/puml/Filter.puml b/docs/puml/Filter.puml index ab46c48..1dc5edf 100644 --- a/docs/puml/Filter.puml +++ b/docs/puml/Filter.puml @@ -25,7 +25,7 @@ interface timeseries.Filter { } class timeseries.VariableNameFilter { - + __init__(reader_to_new, include, exclude): VariableNameChangingFilter + + __init__(reader_to_new, include, exclude) + name(): "variables" + init_kwargs(): dict + filter_variables(variables): [str] @@ -48,7 +48,7 @@ abstract class timeseries.StationReductionFilter { } class timeseries.StationFilter { - + __init__(include=[station], exclude=[station]): StationFilter + + __init__(include=[station], exclude=[station]) + name(): "stations" + init_kwargs(): dict + filter_stations(stations): dict[str, Station] @@ -56,7 +56,7 @@ class timeseries.StationFilter { } class timeseries.CountryFilter { - + __init__(include=[country], exclude=[country]): CountryFilter + + __init__(include=[country], exclude=[country]) + name(): "countries" + init_kwargs(): dict + filter_stations(stations): dict[str, Station] @@ -64,7 +64,7 @@ class timeseries.CountryFilter { } class timeseries.BoundingBoxFilter { - + __init__(include=[country], exclude=[country]): BoundingBoxFilter + + __init__(include=[country], exclude=[country]) + name(): "bounding_boxes" + init_kwargs(): dict + filter_stations(stations): dict[str, Station] @@ -72,7 +72,7 @@ class timeseries.BoundingBoxFilter { } class timeseries.TimeBoundsFilter { - + __init__(start_include=[flag], start_exclude=[flag],\n start_end_include, startend_exclude,\n end_include, end_exclude): TimeBoundsFilter + + __init__(start_include=[flag], start_exclude=[flag],\n start_end_include, startend_exclude,\n end_include, end_exclude) + name(): "time_bounds" + init_kwargs(): dict + filter_data_idx(data, stations, variables): Data @@ -81,28 +81,57 @@ class timeseries.TimeBoundsFilter { contains(start_dt_array, end_dt_array): bool-array } +class timeseries.TimeVariableStationFilter { + + __init__(exclude_from_csvfile="",\nexclude=[(start, end, variable, station)]) + + name(): "time_variable_station" + + init_kwargs(): dict + + filter_data_idx(data, stations, variables): Data +} + class timeseries.TimeResolutionFilter { - + __init__(resolutions=[resolution_string]): TimeResolutionFilter + + __init__(resolutions=[resolution_string]) + name(): "time_resolutions" + init_kwargs(): dict + filter_data_idx(data, stations, variables): Data } class timeseries.DuplicateFilter { - + __init__(duplicate_keys=["stations", "start_times", "end_times"]): DuplicateFilter + + __init__(duplicate_keys=["stations",\n"start_times", "end_times"]) + name(): "duplicates" + init_kwargs(): dict + filter_data_idx(data, stations, variables): Data } class timeseries.FlagFilter { - + __init__(include=[flag], exclude=[flag]): FlagFilter + + __init__(include=[flag], exclude=[flag]) + name(): "flags" + init_kwargs(): dict + filter_data_idx(data, stations, variables): Data usable_flags(): set(flags) } +class timeseries.AltitudeFilter { + + __init__(min_altitude, max_altitude) + + name(): "altitude" + + init_kwargs(): dict + + filter_stations(stations): dict[str, Station] +} + +class timeseries.RelativeAltitudeFilter { + + __init__(topo_file, topo_var, rdiff) + + name(): "relaltitude" + + init_kwargs(): dict + + filter_stations(stations): dict[str, Station] +} + +class timeseries.ValleyFloorRelativeAltitudeFilter { + + __init__(topo_file, radius,topo_var, lower, upper) + + name(): "valleyfloor_relaltitude" + + init_kwargs(): dict + + filter_stations(stations): dict[str, Station] +} + + timeseries.Filter <|.. timeseries.VariableNameFilter timeseries.Filter <|.. timeseries.DataIndexFilter @@ -110,8 +139,12 @@ timeseries.DataIndexFilter <|.. timeseries.StationReductionFilter timeseries.StationReductionFilter <|.. timeseries.StationFilter timeseries.StationReductionFilter <|.. timeseries.CountryFilter timeseries.StationReductionFilter <|.. timeseries.BoundingBoxFilter +timeseries.StationReductionFilter <|.. timeseries.AltitudeFilter +timeseries.StationReductionFilter <|.. timeseries.RelativeAltitudeFilter +timeseries.StationReductionFilter <|.. timeseries.ValleyFloorRelativeAltitudeFilter timeseries.DataIndexFilter <|.. timeseries.TimeBoundsFilter timeseries.DataIndexFilter <|.. timeseries.TimeResolutionFilter +timeseries.DataIndexFilter <|.. timeseries.TimeVariableStationFilter timeseries.DataIndexFilter <|.. timeseries.DuplicateFilter timeseries.DataIndexFilter <|.. timeseries.FlagFilter diff --git a/src/pyaro/plugins.py b/src/pyaro/plugins.py index 3a61609..3f75055 100644 --- a/src/pyaro/plugins.py +++ b/src/pyaro/plugins.py @@ -39,7 +39,7 @@ def list_timeseries_engines() -> dict[str, TimeseriesEngine]: Notes ----- - This function lives in the backends namespace (``engs=pyaro.list_timeseries_enginess()``). + This function lives in the backends namespace (``engs=pyaro.list_timeseries_engines()``). More information about each reader is available via the TimeseriesEngine obj.url() and obj.description() diff --git a/src/pyaro/timeseries/Filter.py b/src/pyaro/timeseries/Filter.py index 3c92226..a733739 100644 --- a/src/pyaro/timeseries/Filter.py +++ b/src/pyaro/timeseries/Filter.py @@ -551,20 +551,22 @@ class TimeBoundsFilter(DataIndexFilter): Examples: - end_include: [("2023-01-01 10:00:00", "2024-01-01 07:00:00")] + end_include: `[("2023-01-01 10:00:00", "2024-01-01 07:00:00")]` will only include observations where the end time of each observation is within the interval specified (i.e. "end" >= 2023-01-01 10:00:00 and "end" <= "2024-01-01 07:00:00") Including multiple bounds will act as an OR, allowing multiple selections. If we want every observation in January for 2021, 2022, 2023, and 2024 this - could be made as the following filter: - startend_include: [ - ("2021-01-01 00:00:00", "2021-02-01 00:00:00"), - ("2022-01-01 00:00:00", "2022-02-01 00:00:00"), - ("2023-01-01 00:00:00", "2023-02-01 00:00:00"), - ("2024-01-01 00:00:00", "2024-02-01 00:00:00"), - ] + could be made as the following filter:: + + startend_include: [ + ("2021-01-01 00:00:00", "2021-02-01 00:00:00"), + ("2022-01-01 00:00:00", "2022-02-01 00:00:00"), + ("2023-01-01 00:00:00", "2023-02-01 00:00:00"), + ("2024-01-01 00:00:00", "2024-02-01 00:00:00"), + ] + """ def __init__( @@ -711,9 +713,11 @@ class TimeVariableStationFilter(DataIndexFilter): :param exclude_from_csvfile: this is a helper option to enable a large list of excludes to be read from a "\t" separated file with columns start \t end \t variable \t station + where start and end are timestamps of format YYYY-MM-DD HH:MM:SS in UTC, e.g. the year 2020 is: 2020-01-01 00:00:00 \t 2020-12-31 23:59:59 \t ... + """ def __init__(self, exclude=[], exclude_from_csvfile=""): @@ -955,9 +959,9 @@ class RelativeAltitudeFilter(StationFilter): Filter class which filters stations based on the relative difference between the station altitude, and the gridded topography altitude. - :param topo_file : A .nc file from which to read gridded topography data. - :param topo_var : Name of variable that stores altitude. - :param rdiff : Relative difference (in meters). + :param topo_file: A .nc file from which to read gridded topography data. + :param topo_var: Name of variable that stores altitude. + :param rdiff: Relative difference (in meters). Note: ----- @@ -1014,6 +1018,8 @@ def UNITS_METER(self): """internal representation of units, don't use :return: m-unit in internal representation + + :meta private: """ if self._UNITS_METER is None: self._UNITS_METER = Unit("m") @@ -1023,11 +1029,11 @@ def UNITS_METER(self): def topography(self): """Internal property, don't use. - :raises ModuleNotFoundError: _description_ - :raises ModuleNotFoundError: _description_ - :raises FilterException: _description_ - :raises FilterException: _description_ + :raises ModuleNotFoundError: if cf-units or xarray is not installed + :raises FilterException: if topograpy file is not provided :return: topography as internal representation + + :meta private: """ if "cf_units" not in sys.modules: raise ModuleNotFoundError( @@ -1086,8 +1092,8 @@ def _find_lat_lon_variables(self, topo_xr): These are assigned to self._lat, self._lon, respectively for later use. - :param topo_xr xr.Dataset of topography - :return lat, lon DataArrays + :param topo_xr: xr.Dataset of topography + :return: lat, lon DataArrays """ for var_name in self._topography.coords: unit_str = self._topography[var_name].attrs.get("units", None) @@ -1107,8 +1113,8 @@ def _find_lat_lon_variables(self, topo_xr): def _extract_bounding_box(self, lat, lon): """ Extract the bounding box of the grid, sets self._boundary_(north|east|south|west) - :param lat latitude (DataArray) - :param lon longitude (DataArray) + :param lat: latitude (DataArray) + :param lon: longitude (DataArray) """ self._boundary_west = float(lon.min()) self._boundary_east = float(lon.max())