Skip to content

Commit

Permalink
warn: preventing subset on stereographic data (#217)
Browse files Browse the repository at this point in the history
Add a error if trying to subset on latitude or longitude a dataset with part originalGrid (ie stereographic projection)
  • Loading branch information
uriii3 authored Nov 26, 2024
1 parent 635d19d commit 0c20b83
Show file tree
Hide file tree
Showing 12 changed files with 234 additions and 66 deletions.
4 changes: 2 additions & 2 deletions copernicusmarine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
ServiceNotSupported,
VariableDoesNotExistInTheDataset,
WrongDatetimeFormat,
GeospatialSubsetNotAvailableForNonLatLon,
)
from copernicusmarine.core_functions.models import (
DatasetCoordinatesExtent,
FileGet,
FileStatus,
GeographicalExtent,
Expand Down Expand Up @@ -78,7 +78,6 @@
"CoperniusMarineServiceShortNames",
"CouldNotConnectToAuthenticationSystem",
"CredentialsCannotBeNone",
"DatasetCoordinatesExtent",
"DatasetNotFound",
"DatasetVersionNotFound",
"DatasetVersionPartNotFound",
Expand All @@ -96,6 +95,7 @@
"ServiceNotAvailable",
"ServiceNotHandled",
"ServiceNotSupported",
"GeospatialSubsetNotAvailableForNonLatLon",
"StatusCode",
"FileStatus",
"StatusMessage",
Expand Down
71 changes: 50 additions & 21 deletions copernicusmarine/catalogue_parser/fields_query_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from typing import Optional, Type, get_args, get_origin, get_type_hints
from typing import (
Literal,
Optional,
Type,
Union,
get_args,
get_origin,
get_type_hints,
)

from pydantic import BaseModel

Expand Down Expand Up @@ -55,29 +63,50 @@ def build_query(
) in get_type_hints(type_to_check).items():
if field_name in self.fields_to_include_or_exclude:
query[field_name] = True
elif get_origin(field_type) is list:
continue
all_base_models = self._get_base_models_in_type(field_type)
for base_model, in_an_iterable in (all_base_models or {}).items():
if field_name not in query:
query[field_name] = {"__all__": {}}
query[field_name] = (
{"__all__": {}} if in_an_iterable else {}
)
result = self.build_query(
get_args(field_type)[0],
query[field_name]["__all__"],
base_model,
(
query[field_name]["__all__"]
if in_an_iterable
else query[field_name]
),
)
if not result:
del query[field_name]
elif get_origin(field_type) is dict:
if field_name not in query:
query[field_name] = {"__all__": {}}
result = self.build_query(
get_args(field_type)[1],
query[field_name]["__all__"],
)
if not result:
del query[field_name]
elif check_type_is_base_model(field_type):
if field_name not in query:
query[field_name] = {}
result = self.build_query(field_type, query[field_name])
if not result:
del query[field_name]

return query

def _get_base_models_in_type(
self,
type_to_check: Type,
in_an_iterable: bool = False,
) -> Optional[dict[Type, Literal["__all__", None]]]:
models = {}
if check_type_is_base_model(type_to_check):
return {type_to_check: "__all__" if in_an_iterable else None}
elif get_origin(type_to_check) is list:
result = self._get_base_models_in_type(
get_args(type_to_check)[0], in_an_iterable=True
)
if result:
models.update(result)
elif get_origin(type_to_check) is dict:
result = self._get_base_models_in_type(
get_args(type_to_check)[1], in_an_iterable=True
)
if result:
models.update(result)
elif get_origin(type_to_check) is Union:
for union_type in get_args(type_to_check):
result = self._get_base_models_in_type(
union_type, in_an_iterable=in_an_iterable
)
if result:
models.update(result)
return models
18 changes: 18 additions & 0 deletions copernicusmarine/core_functions/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ def __init__(self, message: str):
self.__setattr__("custom_exception_message", message)


class GeospatialSubsetNotAvailableForNonLatLon(Exception):
"""
The data you are requesting is using a projection that is not on the
normalised latitude and longitude grid. The geospatial subset of such
datasets is not yet available.
Please check other parts of the dataset to subset it. The geospatial subset
of the datasets with different gridding will be fully available soon.
"""

def __init__(self):
super().__init__(
"The geospatial subset of datasets in a projection that is not in "
"latitude and longitude is not yet available. "
"We are developing such feature and will be supported in future versions."
)


class NetCDFCompressionNotAvailable(Exception):
"""
Exception raised when the NetCDF compression is not available.
Expand Down
31 changes: 9 additions & 22 deletions copernicusmarine/core_functions/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pathlib
from enum import Enum
from typing import Literal, Optional, get_args
from typing import Literal, Optional, Union, get_args

from pydantic import BaseModel, ConfigDict

Expand Down Expand Up @@ -153,32 +153,19 @@ def add_s3_file(self, s3_file: S3FileInfo):
class GeographicalExtent(BaseModel):
"""Interval for geographical coordinates."""

minimum: Optional[float]
maximum: Optional[float]
minimum: float
maximum: float
unit: Optional[str]
coordinate_id: str


class TimeExtent(BaseModel):
"""Interval for time coordinates."""

minimum: Optional[str]
maximum: Optional[str]
unit: Optional[str]


class DatasetCoordinatesExtent(BaseModel):
#: Longitude interval of the subsetted data.
longitude: Optional[GeographicalExtent]
#: Latitude interval of the subsetted data.
latitude: Optional[GeographicalExtent]
#: Time interval of the subsetted data in iso8601 string.
time: Optional[TimeExtent]
#: Depth interval of the subsetted data.
depth: Optional[GeographicalExtent] = None
#: Elevation interval of the subsetted data.
#: Is relevant if data are requested for elevation
#: instead of depth.
elevation: Optional[GeographicalExtent] = None
minimum: str
maximum: str
unit: str
coordinate_id: str


class ResponseSubset(BaseModel):
Expand All @@ -200,7 +187,7 @@ class ResponseSubset(BaseModel):
#: Variables of the subsetted dataset.
variables: list[str]
#: The bounds of the subsetted dataset.
coordinates_extent: DatasetCoordinatesExtent
coordinates_extent: list[Union[GeographicalExtent, TimeExtent]]
#: Status of the request.
status: StatusCode
#: Message explaning the status.
Expand Down
2 changes: 2 additions & 0 deletions copernicusmarine/core_functions/services_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,7 @@ class RetrievalService:
uri: str
dataset_valid_start_date: Optional[Union[str, int, float]]
service: CopernicusMarineService
is_original_grid: bool = False


def get_retrieval_service(
Expand Down Expand Up @@ -408,6 +409,7 @@ def _get_retrieval_service_from_dataset_version(
dataset_valid_start_date=dataset_start_date,
service_format=service.service_format,
service=service,
is_original_grid=dataset_part.name == "originalGrid",
)


Expand Down
1 change: 1 addition & 0 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ def subset_function(
dataset_subset=subset_request.get_time_and_space_subset(),
coordinates_selection_method=subset_request.coordinates_selection_method,
dataset_valid_date=retrieval_service.dataset_valid_start_date,
is_original_grid=retrieval_service.is_original_grid,
)
logger.info(
"Downloading using service " f"{retrieval_service.service_name}..."
Expand Down
12 changes: 12 additions & 0 deletions copernicusmarine/download_functions/subset_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from copernicusmarine.core_functions import custom_open_zarr
from copernicusmarine.core_functions.exceptions import (
CoordinatesOutOfDatasetBounds,
GeospatialSubsetNotAvailableForNonLatLon,
MinimumLongitudeGreaterThanMaximumLongitude,
ServiceNotSupported,
VariableDoesNotExistInTheDataset,
Expand Down Expand Up @@ -584,6 +585,7 @@ def check_dataset_subset_bounds(
dataset_subset: DatasetTimeAndSpaceSubset,
coordinates_selection_method: CoordinatesSelectionMethod,
dataset_valid_date: Optional[Union[str, int, float]],
is_original_grid: bool,
) -> None:
if service_name in [
CopernicusMarineServiceNames.GEOSERIES,
Expand All @@ -597,6 +599,16 @@ def check_dataset_subset_bounds(
dataset_coordinates = dataset.coords
else:
raise ServiceNotSupported(service_name)
if is_original_grid:
logger.debug("Dataset part has the non lat lon projection.")
if (
dataset_subset.minimum_latitude is not None
or dataset_subset.maximum_latitude is not None
or dataset_subset.minimum_longitude is not None
or dataset_subset.maximum_longitude is not None
):
raise GeospatialSubsetNotAvailableForNonLatLon()

for coordinate_label in COORDINATES_LABEL["latitude"]:
if coordinate_label in dataset.sizes:
latitudes = dataset_coordinates[coordinate_label].values
Expand Down
20 changes: 8 additions & 12 deletions copernicusmarine/download_functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from copernicusmarine.core_functions.models import (
DEFAULT_FILE_EXTENSIONS,
DatasetCoordinatesExtent,
FileFormat,
GeographicalExtent,
TimeExtent,
Expand Down Expand Up @@ -190,17 +189,12 @@ def _format_datetimes(

def get_dataset_coordinates_extent(
dataset: xarray.Dataset,
) -> DatasetCoordinatesExtent:
coordinates_extent = DatasetCoordinatesExtent(
longitude=_get_coordinate_extent(dataset, "longitude"), # type: ignore
latitude=_get_coordinate_extent(dataset, "latitude"), # type: ignore
time=_get_coordinate_extent(dataset, "time"), # type: ignore
)
depth_or_elevation_extent = _get_coordinate_extent(dataset, "depth")
if "depth" in dataset.sizes:
coordinates_extent.depth = depth_or_elevation_extent # type: ignore
elif "elevation" in dataset.sizes:
coordinates_extent.elevation = depth_or_elevation_extent # type: ignore
) -> list[Union[GeographicalExtent, TimeExtent]]:
coordinates_extent = []
for coord_label in ["longitude", "latitude", "time", "depth"]:
if coordinate_extent := _get_coordinate_extent(dataset, coord_label):
coordinates_extent.append(coordinate_extent)

return coordinates_extent


Expand All @@ -225,11 +219,13 @@ def _get_coordinate_extent(
minimum=minimum,
maximum=maximum,
unit=unit,
coordinate_id=coord_label,
)
return GeographicalExtent(
minimum=minimum,
maximum=maximum,
unit=unit,
coordinate_id=coord_label,
)
return None

Expand Down
1 change: 1 addition & 0 deletions copernicusmarine/python_interface/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def load_data_object_from_load_request(
dataset_subset=load_request.get_time_and_space_subset(),
coordinates_selection_method=load_request.coordinates_selection_method,
dataset_valid_date=retrieval_service.dataset_valid_start_date,
is_original_grid=retrieval_service.is_original_grid,
)
if retrieval_service.service_name in [
CopernicusMarineServiceNames.GEOSERIES,
Expand Down
7 changes: 0 additions & 7 deletions doc/response-types.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,6 @@ Subtypes
:exclude-members: model_computed_fields, model_config, model_fields
:member-order: bysource

.. autoclass:: copernicusmarine.DatasetCoordinatesExtent()
:members:
:undoc-members:
:exclude-members: model_computed_fields, model_config, model_fields
:member-order: bysource


.. autoclass:: copernicusmarine.GeographicalExtent()
:members:
:undoc-members:
Expand Down
8 changes: 6 additions & 2 deletions tests/test_describe_released_date.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def then_I_dont_get_the_not_released_products_version_and_datasets(
assert 1 == len(describe_result.products)
assert (
describe_result.model_dump(
exclude_none=True, exclude_unset=True, exclude=exclude_query
exclude_none=True,
exclude_unset=True,
exclude=exclude_query,
)
== snapshot
)
Expand All @@ -55,7 +57,9 @@ def then_I_get_all_products_versions_and_datasets(
assert 2 == len(describe_result.products)
assert (
describe_result.model_dump(
exclude_none=True, exclude_unset=True, exclude=exclude_query
exclude_none=True,
exclude_unset=True,
exclude=exclude_query,
)
== snapshot
)
Loading

0 comments on commit 0c20b83

Please sign in to comment.