Skip to content

Commit

Permalink
chore: refactor to stop using the cache (#95)
Browse files Browse the repository at this point in the history
Not using any cache anymore
Always get the data on the fly

fix: use pendulum to parse dates

chore: use pendulum accross the repo

fix: to pendulum datetime from boto3

fix: microsecond problem with boto3
  • Loading branch information
renaudjester committed Aug 29, 2024
1 parent 84d2dd3 commit 49229bd
Show file tree
Hide file tree
Showing 17 changed files with 270 additions and 120 deletions.
10 changes: 5 additions & 5 deletions copernicusmarine/catalogue_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class WebApi(Enum):


class ServiceNotHandled(Exception):
...
pass


# service formats
Expand Down Expand Up @@ -175,11 +175,11 @@ def _format_admp_valid_start_date(
if to_timestamp:
return int(
datetime_parser(
arco_data_metadata_producer_valid_start_date.split(".")[0]
arco_data_metadata_producer_valid_start_date
).timestamp()
* 1000
)
return arco_data_metadata_producer_valid_start_date.split(".")[0]
return arco_data_metadata_producer_valid_start_date

def _convert_elevation_to_depth(self):
self.coordinates_id = "depth"
Expand Down Expand Up @@ -507,11 +507,11 @@ def filter_only_official_versions_and_parts(self):

# Errors
class DatasetVersionPartNotFound(Exception):
...
pass


class DatasetVersionNotFound(Exception):
...
pass


def dataset_version_part_not_found_exception(
Expand Down
11 changes: 6 additions & 5 deletions copernicusmarine/catalogue_parser/request_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import pathlib
import re
from dataclasses import dataclass, field
from datetime import datetime
from json import load
from typing import Any, Dict, List, Optional

from pendulum import DateTime

from copernicusmarine.core_functions.deprecated_options import (
DEPRECATED_OPTIONS,
)
Expand Down Expand Up @@ -41,8 +42,8 @@ class DatasetTimeAndSpaceSubset:
maximum_latitude: Optional[float] = None
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None


@dataclass
Expand All @@ -59,8 +60,8 @@ class SubsetRequest:
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
vertical_dimension_as_originally_produced: bool = True
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None
subset_method: SubsetMethod = DEFAULT_SUBSET_METHOD
output_filename: Optional[str] = None
file_format: FileFormat = DEFAULT_FILE_FORMAT
Expand Down
9 changes: 7 additions & 2 deletions copernicusmarine/command_line_interface/group_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List, Optional

import click
import pendulum

from copernicusmarine.command_line_interface.exception_handler import (
log_exception_and_exit,
Expand Down Expand Up @@ -400,8 +401,12 @@ def subset(
minimum_depth,
maximum_depth,
vertical_dimension_as_originally_produced,
start_datetime,
end_datetime,
(
start_datetime
if not start_datetime
else pendulum.instance(start_datetime)
),
end_datetime if not end_datetime else pendulum.instance(end_datetime),
subset_method,
output_filename,
file_format,
Expand Down
6 changes: 2 additions & 4 deletions copernicusmarine/core_functions/credentials_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,10 @@
# TODO: handle cache of the credentials without cachier


class CredentialCannotBeNone(Exception):
...
class CredentialCannotBeNone(Exception): ...


class InvalidUsernameOrPassword(Exception):
...
class InvalidUsernameOrPassword(Exception): ...


def _load_credential_from_copernicus_marine_configuration_file(
Expand Down
16 changes: 12 additions & 4 deletions copernicusmarine/core_functions/services_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,16 @@ def _get_best_arco_service_type(
time_size = get_size_of_coordinate_subset(
dataset,
"time",
dataset_subset.start_datetime,
dataset_subset.end_datetime,
(
dataset_subset.start_datetime.in_tz("UTC").naive()
if dataset_subset.start_datetime
else dataset_subset.start_datetime
),
(
dataset_subset.end_datetime.in_tz("UTC").naive()
if dataset_subset.end_datetime
else dataset_subset.end_datetime
),
)
dataset_coordinates = dataset.coords

Expand Down Expand Up @@ -444,7 +452,7 @@ def _get_dataset_start_date_from_service(


class ServiceNotAvailable(Exception):
...
pass


def _warning_dataset_will_be_deprecated(
Expand Down Expand Up @@ -498,7 +506,7 @@ def _service_not_available_error(


class NoServiceAvailable(Exception):
...
pass


def _no_service_available_for_command(
Expand Down
7 changes: 4 additions & 3 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
import pathlib
from datetime import datetime
from typing import List, Optional

from pendulum import DateTime

from copernicusmarine.catalogue_parser.models import (
CopernicusMarineDatasetServiceType,
CopernicusMarineServiceFormat,
Expand Down Expand Up @@ -51,8 +52,8 @@ def subset_function(
minimum_depth: Optional[float],
maximum_depth: Optional[float],
vertical_dimension_as_originally_produced: bool,
start_datetime: Optional[datetime],
end_datetime: Optional[datetime],
start_datetime: Optional[DateTime],
end_datetime: Optional[DateTime],
subset_method: SubsetMethod,
output_filename: Optional[str],
file_format: FileFormat,
Expand Down
63 changes: 39 additions & 24 deletions copernicusmarine/core_functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import pathlib
import re
from datetime import datetime, timezone
from importlib.metadata import version
from typing import (
Any,
Expand All @@ -12,6 +11,7 @@
Iterable,
Iterator,
List,
Literal,
Optional,
Tuple,
TypeVar,
Expand All @@ -21,7 +21,10 @@
import cftime
import numpy
import pandas as pd
import pendulum
import pendulum.exceptions
import xarray
from pendulum import DateTime
from requests import PreparedRequest

from copernicusmarine import __version__ as copernicusmarine_version
Expand All @@ -48,11 +51,6 @@
"%Y-%m-%d %H:%M:%S.%f%Z",
]

DATETIME_NON_ISO_FORMATS = [
"%Y",
"%Y-%m-%dT%H:%M:%S.%fZ",
]


def get_unique_filename(
filepath: pathlib.Path, overwrite_option: bool
Expand Down Expand Up @@ -126,27 +124,44 @@ def construct_query_params_for_marine_data_store_monitoring(


class WrongDatetimeFormat(Exception):
...
pass


def datetime_parser(string: str) -> datetime:
if string == "now":
return datetime.now(tz=timezone.utc).replace(tzinfo=None)
def datetime_parser(date: Union[str, numpy.datetime64]) -> DateTime:
if date == "now":
return pendulum.now(tz="UTC")
try:
parsed_datetime = datetime.fromisoformat(string)
if parsed_datetime.tzinfo is None:
return parsed_datetime
else:
return parsed_datetime.astimezone(timezone.utc).replace(
tzinfo=None
)
except ValueError:
for datetime_format in DATETIME_NON_ISO_FORMATS:
try:
return datetime.strptime(string, datetime_format)
except ValueError:
pass
raise WrongDatetimeFormat(string)
if isinstance(date, numpy.datetime64):
date = str(date)
parsed_datetime = pendulum.parse(date)
# ignoring types because one needs to pass
# `exact=True` to `parse` method to get
# something else than `pendulum.DateTime`
return parsed_datetime # type: ignore
except pendulum.exceptions.ParserError:
pass
raise WrongDatetimeFormat(date)


def timestamp_parser(
timestamp: Union[int, float], unit: Literal["s", "ms"] = "ms"
) -> DateTime:
"""
Convert a timestamp in milliseconds to a pendulum DateTime object
by default. The unit can be changed to seconds by passing "s" as
the unit.
"""
conversion_factor = 1 if unit == "s" else 10e3
return pendulum.from_timestamp(timestamp / conversion_factor, tz="UTC")


def timestamp_or_datestring_to_datetime(
date: Union[str, int, numpy.datetime64]
) -> DateTime:
if isinstance(date, int):
return timestamp_parser(date)
else:
return datetime_parser(date)


def convert_datetime64_to_netcdf_timestamp(
Expand Down
10 changes: 5 additions & 5 deletions copernicusmarine/download_functions/download_arco_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
LongitudeParameters,
TemporalParameters,
)
from copernicusmarine.download_functions.subset_xarray import (
date_to_datetime,
subset,
)
from copernicusmarine.download_functions.subset_xarray import subset
from copernicusmarine.download_functions.utils import (
FileFormat,
get_filename,
get_formatted_dataset_size_estimation,
timestamp_or_datestring_to_datetime,
)

logger = logging.getLogger("copernicusmarine")
Expand Down Expand Up @@ -153,7 +151,9 @@ def download_zarr(
)
start_datetime = subset_request.start_datetime
if dataset_valid_start_date:
minimum_start_date = date_to_datetime(dataset_valid_start_date)
minimum_start_date = timestamp_or_datestring_to_datetime(
dataset_valid_start_date
)
if (
not subset_request.start_datetime
or subset_request.start_datetime < minimum_start_date
Expand Down
Loading

0 comments on commit 49229bd

Please sign in to comment.