Skip to content

Commit

Permalink
fix: use pendulum to parse dates (#123)
Browse files Browse the repository at this point in the history
Support more date format: support timezones and uses pendulum
  • Loading branch information
renaudjester committed Oct 28, 2024
1 parent 2feee4b commit 8d691c1
Show file tree
Hide file tree
Showing 17 changed files with 275 additions and 123 deletions.
4 changes: 2 additions & 2 deletions copernicusmarine/catalogue_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,11 @@ def _format_admp_valid_start_date(
if to_timestamp:
return int(
datetime_parser(
arco_data_metadata_producer_valid_start_date.split(".")[0]
arco_data_metadata_producer_valid_start_date
).timestamp()
* 1000
)
return arco_data_metadata_producer_valid_start_date.split(".")[0]
return arco_data_metadata_producer_valid_start_date

def _convert_elevation_to_depth(self):
self.coordinates_id = "depth"
Expand Down
11 changes: 6 additions & 5 deletions copernicusmarine/catalogue_parser/request_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import pathlib
import re
from dataclasses import dataclass, field
from datetime import datetime
from json import load
from typing import Any, Dict, List, Optional

from pendulum import DateTime

from copernicusmarine.core_functions.deprecated_options import (
DEPRECATED_OPTIONS,
)
Expand Down Expand Up @@ -41,8 +42,8 @@ class DatasetTimeAndSpaceSubset:
maximum_latitude: Optional[float] = None
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None


@dataclass
Expand All @@ -59,8 +60,8 @@ class SubsetRequest:
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
vertical_dimension_as_originally_produced: bool = True
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None
subset_method: SubsetMethod = DEFAULT_SUBSET_METHOD
output_filename: Optional[str] = None
file_format: FileFormat = DEFAULT_FILE_FORMAT
Expand Down
23 changes: 13 additions & 10 deletions copernicusmarine/command_line_interface/group_subset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import pathlib
from datetime import datetime
from typing import List, Optional

import click
Expand Down Expand Up @@ -32,10 +31,10 @@
subset_function,
)
from copernicusmarine.core_functions.utils import (
DATETIME_SUPPORTED_FORMATS,
OVERWRITE_LONG_OPTION,
OVERWRITE_OPTION_HELP_TEXT,
OVERWRITE_SHORT_OPTION,
datetime_parser,
)

logger = logging.getLogger("copernicusmarine")
Expand Down Expand Up @@ -192,17 +191,21 @@ def cli_subset() -> None:
@click.option(
"--start-datetime",
"-t",
type=click.DateTime(DATETIME_SUPPORTED_FORMATS),
type=str,
help="The start datetime of the temporal subset. "
"Caution: encapsulate date "
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S".',
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S". '
+ "Supports common format parsed by pendulum. "
+ "See https://pendulum.eustace.io/docs/#parsing",
)
@click.option(
"--end-datetime",
"-T",
type=click.DateTime(DATETIME_SUPPORTED_FORMATS),
type=str,
help="The end datetime of the temporal subset. Caution: encapsulate date "
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S".',
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S". '
+ "Supports common format parsed by pendulum. "
+ "See https://pendulum.eustace.io/docs/#parsing",
)
@click.option(
"--subset-method",
Expand Down Expand Up @@ -354,8 +357,8 @@ def subset(
minimum_depth: Optional[float],
maximum_depth: Optional[float],
vertical_dimension_as_originally_produced: bool,
start_datetime: Optional[datetime],
end_datetime: Optional[datetime],
start_datetime: Optional[str],
end_datetime: Optional[str],
subset_method: SubsetMethod,
output_filename: Optional[str],
file_format: FileFormat,
Expand Down Expand Up @@ -404,8 +407,8 @@ def subset(
minimum_depth,
maximum_depth,
vertical_dimension_as_originally_produced,
start_datetime,
end_datetime,
datetime_parser(start_datetime) if start_datetime else None,
datetime_parser(end_datetime) if end_datetime else None,
subset_method,
output_filename,
file_format,
Expand Down
12 changes: 10 additions & 2 deletions copernicusmarine/core_functions/services_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,16 @@ def _get_best_arco_service_type(
time_size = get_size_of_coordinate_subset(
dataset,
"time",
dataset_subset.start_datetime,
dataset_subset.end_datetime,
(
dataset_subset.start_datetime.in_tz("UTC").naive()
if dataset_subset.start_datetime
else dataset_subset.start_datetime
),
(
dataset_subset.end_datetime.in_tz("UTC").naive()
if dataset_subset.end_datetime
else dataset_subset.end_datetime
),
)
dataset_coordinates = dataset.coords

Expand Down
7 changes: 4 additions & 3 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
import pathlib
from datetime import datetime
from typing import List, Optional

from pendulum import DateTime

from copernicusmarine.catalogue_parser.models import (
CopernicusMarineDatasetServiceType,
CopernicusMarineServiceFormat,
Expand Down Expand Up @@ -51,8 +52,8 @@ def subset_function(
minimum_depth: Optional[float],
maximum_depth: Optional[float],
vertical_dimension_as_originally_produced: bool,
start_datetime: Optional[datetime],
end_datetime: Optional[datetime],
start_datetime: Optional[DateTime],
end_datetime: Optional[DateTime],
subset_method: SubsetMethod,
output_filename: Optional[str],
file_format: FileFormat,
Expand Down
61 changes: 38 additions & 23 deletions copernicusmarine/core_functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import pathlib
import re
from datetime import datetime, timezone
from importlib.metadata import version
from typing import (
Any,
Expand All @@ -12,6 +11,7 @@
Iterable,
Iterator,
List,
Literal,
Optional,
Tuple,
TypeVar,
Expand All @@ -21,7 +21,10 @@
import cftime
import numpy
import pandas as pd
import pendulum
import pendulum.exceptions
import xarray
from pendulum import DateTime
from requests import PreparedRequest

from copernicusmarine import __version__ as copernicusmarine_version
Expand All @@ -48,11 +51,6 @@
"%Y-%m-%d %H:%M:%S.%f%Z",
]

DATETIME_NON_ISO_FORMATS = [
"%Y",
"%Y-%m-%dT%H:%M:%S.%fZ",
]


def get_unique_filename(
filepath: pathlib.Path, overwrite_option: bool
Expand Down Expand Up @@ -129,24 +127,41 @@ class WrongDatetimeFormat(Exception):
pass


def datetime_parser(string: str) -> datetime:
if string == "now":
return datetime.now(tz=timezone.utc).replace(tzinfo=None)
def datetime_parser(date: Union[str, numpy.datetime64]) -> DateTime:
if date == "now":
return pendulum.now(tz="UTC")
try:
parsed_datetime = datetime.fromisoformat(string)
if parsed_datetime.tzinfo is None:
return parsed_datetime
else:
return parsed_datetime.astimezone(timezone.utc).replace(
tzinfo=None
)
except ValueError:
for datetime_format in DATETIME_NON_ISO_FORMATS:
try:
return datetime.strptime(string, datetime_format)
except ValueError:
pass
raise WrongDatetimeFormat(string)
if isinstance(date, numpy.datetime64):
date = str(date)
parsed_datetime = pendulum.parse(date)
# ignoring types because one needs to pass
# `exact=True` to `parse` method to get
# something else than `pendulum.DateTime`
return parsed_datetime # type: ignore
except pendulum.exceptions.ParserError:
pass
raise WrongDatetimeFormat(date)


def timestamp_parser(
timestamp: Union[int, float], unit: Literal["s", "ms"] = "ms"
) -> DateTime:
"""
Convert a timestamp in milliseconds to a pendulum DateTime object
by default. The unit can be changed to seconds by passing "s" as
the unit.
"""
conversion_factor = 1 if unit == "s" else 10e3
return pendulum.from_timestamp(timestamp / conversion_factor, tz="UTC")


def timestamp_or_datestring_to_datetime(
date: Union[str, int, numpy.datetime64]
) -> DateTime:
if isinstance(date, int):
return timestamp_parser(date)
else:
return datetime_parser(date)


def convert_datetime64_to_netcdf_timestamp(
Expand Down
10 changes: 5 additions & 5 deletions copernicusmarine/download_functions/download_arco_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
LongitudeParameters,
TemporalParameters,
)
from copernicusmarine.download_functions.subset_xarray import (
date_to_datetime,
subset,
)
from copernicusmarine.download_functions.subset_xarray import subset
from copernicusmarine.download_functions.utils import (
FileFormat,
get_filename,
get_formatted_dataset_size_estimation,
timestamp_or_datestring_to_datetime,
)

logger = logging.getLogger("copernicusmarine")
Expand Down Expand Up @@ -153,7 +151,9 @@ def download_zarr(
)
start_datetime = subset_request.start_datetime
if dataset_valid_start_date:
minimum_start_date = date_to_datetime(dataset_valid_start_date)
minimum_start_date = timestamp_or_datestring_to_datetime(
dataset_valid_start_date
)
if (
not subset_request.start_datetime
or subset_request.start_datetime < minimum_start_date
Expand Down
Loading

0 comments on commit 8d691c1

Please sign in to comment.