Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: use pendulum to parse dates #123

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions copernicusmarine/catalogue_parser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,11 +175,11 @@ def _format_admp_valid_start_date(
if to_timestamp:
return int(
datetime_parser(
arco_data_metadata_producer_valid_start_date.split(".")[0]
arco_data_metadata_producer_valid_start_date
).timestamp()
* 1000
)
return arco_data_metadata_producer_valid_start_date.split(".")[0]
return arco_data_metadata_producer_valid_start_date

def _convert_elevation_to_depth(self):
self.coordinates_id = "depth"
Expand Down
11 changes: 6 additions & 5 deletions copernicusmarine/catalogue_parser/request_structure.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
import pathlib
import re
from dataclasses import dataclass, field
from datetime import datetime
from json import load
from typing import Any, Dict, List, Optional

from pendulum import DateTime

from copernicusmarine.core_functions.deprecated_options import (
DEPRECATED_OPTIONS,
)
Expand Down Expand Up @@ -41,8 +42,8 @@ class DatasetTimeAndSpaceSubset:
maximum_latitude: Optional[float] = None
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None


@dataclass
Expand All @@ -59,8 +60,8 @@ class SubsetRequest:
minimum_depth: Optional[float] = None
maximum_depth: Optional[float] = None
vertical_dimension_as_originally_produced: bool = True
start_datetime: Optional[datetime] = None
end_datetime: Optional[datetime] = None
start_datetime: Optional[DateTime] = None
end_datetime: Optional[DateTime] = None
subset_method: SubsetMethod = DEFAULT_SUBSET_METHOD
output_filename: Optional[str] = None
file_format: FileFormat = DEFAULT_FILE_FORMAT
Expand Down
23 changes: 13 additions & 10 deletions copernicusmarine/command_line_interface/group_subset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import logging
import pathlib
from datetime import datetime
from typing import List, Optional

import click
Expand Down Expand Up @@ -32,10 +31,10 @@
subset_function,
)
from copernicusmarine.core_functions.utils import (
DATETIME_SUPPORTED_FORMATS,
OVERWRITE_LONG_OPTION,
OVERWRITE_OPTION_HELP_TEXT,
OVERWRITE_SHORT_OPTION,
datetime_parser,
)

logger = logging.getLogger("copernicusmarine")
Expand Down Expand Up @@ -192,17 +191,21 @@ def cli_subset() -> None:
@click.option(
"--start-datetime",
"-t",
type=click.DateTime(DATETIME_SUPPORTED_FORMATS),
type=str,
help="The start datetime of the temporal subset. "
"Caution: encapsulate date "
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S".',
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S". '
+ "Supports common format parsed by pendulum. "
+ "See https://pendulum.eustace.io/docs/#parsing",
)
@click.option(
"--end-datetime",
"-T",
type=click.DateTime(DATETIME_SUPPORTED_FORMATS),
type=str,
help="The end datetime of the temporal subset. Caution: encapsulate date "
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S".',
+ 'with " " to ensure valid expression for format "%Y-%m-%d %H:%M:%S". '
+ "Supports common format parsed by pendulum. "
+ "See https://pendulum.eustace.io/docs/#parsing",
)
@click.option(
"--subset-method",
Expand Down Expand Up @@ -354,8 +357,8 @@ def subset(
minimum_depth: Optional[float],
maximum_depth: Optional[float],
vertical_dimension_as_originally_produced: bool,
start_datetime: Optional[datetime],
end_datetime: Optional[datetime],
start_datetime: Optional[str],
end_datetime: Optional[str],
subset_method: SubsetMethod,
output_filename: Optional[str],
file_format: FileFormat,
Expand Down Expand Up @@ -404,8 +407,8 @@ def subset(
minimum_depth,
maximum_depth,
vertical_dimension_as_originally_produced,
start_datetime,
end_datetime,
datetime_parser(start_datetime) if start_datetime else None,
datetime_parser(end_datetime) if end_datetime else None,
subset_method,
output_filename,
file_format,
Expand Down
12 changes: 10 additions & 2 deletions copernicusmarine/core_functions/services_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,16 @@ def _get_best_arco_service_type(
time_size = get_size_of_coordinate_subset(
dataset,
"time",
dataset_subset.start_datetime,
dataset_subset.end_datetime,
(
dataset_subset.start_datetime.in_tz("UTC").naive()
if dataset_subset.start_datetime
else dataset_subset.start_datetime
),
(
dataset_subset.end_datetime.in_tz("UTC").naive()
if dataset_subset.end_datetime
else dataset_subset.end_datetime
),
)
dataset_coordinates = dataset.coords

Expand Down
7 changes: 4 additions & 3 deletions copernicusmarine/core_functions/subset.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
import pathlib
from datetime import datetime
from typing import List, Optional

from pendulum import DateTime

from copernicusmarine.catalogue_parser.models import (
CopernicusMarineDatasetServiceType,
CopernicusMarineServiceFormat,
Expand Down Expand Up @@ -51,8 +52,8 @@ def subset_function(
minimum_depth: Optional[float],
maximum_depth: Optional[float],
vertical_dimension_as_originally_produced: bool,
start_datetime: Optional[datetime],
end_datetime: Optional[datetime],
start_datetime: Optional[DateTime],
end_datetime: Optional[DateTime],
subset_method: SubsetMethod,
output_filename: Optional[str],
file_format: FileFormat,
Expand Down
61 changes: 38 additions & 23 deletions copernicusmarine/core_functions/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import logging
import pathlib
import re
from datetime import datetime, timezone
from importlib.metadata import version
from typing import (
Any,
Expand All @@ -12,6 +11,7 @@
Iterable,
Iterator,
List,
Literal,
Optional,
Tuple,
TypeVar,
Expand All @@ -21,7 +21,10 @@
import cftime
import numpy
import pandas as pd
import pendulum
import pendulum.exceptions
import xarray
from pendulum import DateTime
from requests import PreparedRequest

from copernicusmarine import __version__ as copernicusmarine_version
Expand All @@ -48,11 +51,6 @@
"%Y-%m-%d %H:%M:%S.%f%Z",
]

DATETIME_NON_ISO_FORMATS = [
"%Y",
"%Y-%m-%dT%H:%M:%S.%fZ",
]


def get_unique_filename(
filepath: pathlib.Path, overwrite_option: bool
Expand Down Expand Up @@ -129,24 +127,41 @@ class WrongDatetimeFormat(Exception):
pass


def datetime_parser(string: str) -> datetime:
if string == "now":
return datetime.now(tz=timezone.utc).replace(tzinfo=None)
def datetime_parser(date: Union[str, numpy.datetime64]) -> DateTime:
if date == "now":
return pendulum.now(tz="UTC")
try:
parsed_datetime = datetime.fromisoformat(string)
if parsed_datetime.tzinfo is None:
return parsed_datetime
else:
return parsed_datetime.astimezone(timezone.utc).replace(
tzinfo=None
)
except ValueError:
for datetime_format in DATETIME_NON_ISO_FORMATS:
try:
return datetime.strptime(string, datetime_format)
except ValueError:
pass
raise WrongDatetimeFormat(string)
if isinstance(date, numpy.datetime64):
date = str(date)
parsed_datetime = pendulum.parse(date)
# ignoring types because one needs to pass
# `exact=True` to `parse` method to get
# something else than `pendulum.DateTime`
return parsed_datetime # type: ignore
except pendulum.exceptions.ParserError:
pass
raise WrongDatetimeFormat(date)


def timestamp_parser(
timestamp: Union[int, float], unit: Literal["s", "ms"] = "ms"
) -> DateTime:
"""
Convert a timestamp in milliseconds to a pendulum DateTime object
by default. The unit can be changed to seconds by passing "s" as
the unit.
"""
conversion_factor = 1 if unit == "s" else 10e3
return pendulum.from_timestamp(timestamp / conversion_factor, tz="UTC")


def timestamp_or_datestring_to_datetime(
date: Union[str, int, numpy.datetime64]
) -> DateTime:
if isinstance(date, int):
return timestamp_parser(date)
else:
return datetime_parser(date)


def convert_datetime64_to_netcdf_timestamp(
Expand Down
10 changes: 5 additions & 5 deletions copernicusmarine/download_functions/download_arco_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,12 @@
LongitudeParameters,
TemporalParameters,
)
from copernicusmarine.download_functions.subset_xarray import (
date_to_datetime,
subset,
)
from copernicusmarine.download_functions.subset_xarray import subset
from copernicusmarine.download_functions.utils import (
FileFormat,
get_filename,
get_formatted_dataset_size_estimation,
timestamp_or_datestring_to_datetime,
)

logger = logging.getLogger("copernicusmarine")
Expand Down Expand Up @@ -153,7 +151,9 @@ def download_zarr(
)
start_datetime = subset_request.start_datetime
if dataset_valid_start_date:
minimum_start_date = date_to_datetime(dataset_valid_start_date)
minimum_start_date = timestamp_or_datestring_to_datetime(
dataset_valid_start_date
)
if (
not subset_request.start_datetime
or subset_request.start_datetime < minimum_start_date
Expand Down
Loading