-
Notifications
You must be signed in to change notification settings - Fork 121
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Datetime(time_unit, time_zone)
and Duration(time_unit)
types
#960
Changes from 6 commits
121f6f8
4896df2
cd2ed40
eb1468e
e71f9c3
32385d0
3abeaf8
c5b7635
4415e3c
5309d4f
85fdd80
91bfb7a
20e36a1
ec1cb5e
2147ec6
0f69ec1
22836a0
a1f56bc
a84480d
80a574d
916eac5
e94b517
180b86e
da884e8
114be74
587d917
dd050a8
b4de1f7
458f2a2
34c27ef
0de71a6
d105911
a773d85
0149431
2249af0
942a77b
ad38667
38898a8
43da4c3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
from __future__ import annotations | ||
|
||
import re | ||
from typing import TYPE_CHECKING | ||
from typing import Any | ||
from typing import Iterable | ||
from typing import Literal | ||
from typing import TypeVar | ||
|
||
from narwhals.dependencies import get_cudf | ||
|
@@ -221,6 +223,12 @@ def translate_dtype(column: Any) -> DType: | |
from narwhals import dtypes | ||
|
||
dtype = column.dtype | ||
|
||
pd_datetime_rgx = ( | ||
r"^datetime64\[(?P<time_unit>ms|us|ns)(?:, (?P<time_zone>[a-zA-Z\/]+))?\]$" | ||
) | ||
pa_datetime_rgx = r"^timestamp\[(?P<time_unit>ms|us|ns)(?:, tz=(?P<time_zone>[a-zA-Z\/]+))?\]\[pyarrow\]$" | ||
|
||
if str(dtype) in ("int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"): | ||
return dtypes.Int64() | ||
if str(dtype) in ("int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"): | ||
|
@@ -264,16 +272,15 @@ def translate_dtype(column: Any) -> DType: | |
return dtypes.Boolean() | ||
if str(dtype) in ("category",) or str(dtype).startswith("dictionary<"): | ||
return dtypes.Categorical() | ||
if str(dtype).startswith("datetime64"): | ||
# TODO(Unassigned): different time units and time zones | ||
return dtypes.Datetime() | ||
if (match_ := re.match(pd_datetime_rgx, str(dtype))) or ( | ||
match_ := re.match(pa_datetime_rgx, str(dtype)) | ||
): | ||
time_unit: Literal["us", "ns", "ms"] = match_.group("time_unit") # type: ignore[assignment] | ||
time_zone: str | None = match_.group("time_zone") | ||
return dtypes.Datetime(time_unit, time_zone) | ||
if str(dtype).startswith("timedelta64") or str(dtype).startswith("duration"): | ||
# TODO(Unassigned): different time units | ||
return dtypes.Duration() | ||
if str(dtype).startswith("timestamp["): | ||
# pyarrow-backed datetime | ||
# TODO(Unassigned): different time units and time zones | ||
return dtypes.Datetime() | ||
if str(dtype) == "date32[day][pyarrow]": | ||
return dtypes.Date() | ||
if str(dtype) == "object": | ||
|
@@ -321,7 +328,10 @@ def get_dtype_backend(dtype: Any, implementation: Implementation) -> str: | |
|
||
|
||
def narwhals_to_native_dtype( # noqa: PLR0915 | ||
dtype: DType | type[DType], starting_dtype: Any, implementation: Implementation | ||
dtype: DType | type[DType], | ||
starting_dtype: Any, | ||
implementation: Implementation, | ||
backend_version: tuple[int, ...], | ||
) -> Any: | ||
from narwhals import dtypes | ||
|
||
|
@@ -425,10 +435,25 @@ def narwhals_to_native_dtype( # noqa: PLR0915 | |
# convert to it? | ||
return "category" | ||
if isinstance_or_issubclass(dtype, dtypes.Datetime): | ||
# TODO(Unassigned): different time units and time zones | ||
time_unit = getattr(dtype, "time_unit", "us") | ||
time_zone = getattr(dtype, "time_zone", None) | ||
|
||
# Pandas does not support "ms" or "us" time units before version 1.5.0 | ||
# Let's overwrite with "ns" | ||
if implementation is Implementation.PANDAS and backend_version < ( | ||
1, | ||
5, | ||
0, | ||
): # pragma: no cover | ||
time_unit = "ns" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we can do much else here |
||
|
||
if dtype_backend == "pyarrow-nullable": | ||
return "timestamp[ns][pyarrow]" | ||
return "datetime64[ns]" | ||
tz_part = f", tz={time_zone}" if time_zone else "" | ||
return f"timestamp[{time_unit}{tz_part}][pyarrow]" | ||
else: | ||
tz_part = f", {time_zone}" if time_zone else "" | ||
return f"datetime64[{time_unit}{tz_part}]" | ||
|
||
if isinstance_or_issubclass(dtype, dtypes.Duration): | ||
# TODO(Unassigned): different time units and time zones | ||
if dtype_backend == "pyarrow-nullable": | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from __future__ import annotations | ||
|
||
from datetime import timezone | ||
from typing import Literal | ||
|
||
import pytest | ||
|
||
import narwhals.stable.v1 as nw | ||
|
||
|
||
@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"]) | ||
@pytest.mark.parametrize("time_zone", ["Europe/Rome", timezone.utc, None]) | ||
def test_datetime_valid( | ||
time_unit: Literal["us", "ns", "ms"], time_zone: str | timezone | None | ||
) -> None: | ||
dtype = nw.Datetime(time_unit=time_unit, time_zone=time_zone) | ||
|
||
assert dtype.time_unit == time_unit | ||
assert isinstance(dtype.time_zone, str) or dtype.time_zone is None | ||
|
||
|
||
@pytest.mark.parametrize("time_unit", ["abc", "s"]) | ||
def test_datetime_invalid(time_unit: str) -> None: | ||
with pytest.raises(ValueError, match="invalid `time_unit`"): | ||
nw.Datetime(time_unit=time_unit) # type: ignore[arg-type] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,8 @@ | ||
from __future__ import annotations | ||
|
||
from datetime import datetime | ||
from datetime import timedelta | ||
from datetime import timezone | ||
from typing import Any | ||
|
||
import pandas as pd | ||
|
@@ -6,6 +11,7 @@ | |
|
||
import narwhals.stable.v1 as nw | ||
from narwhals.utils import parse_version | ||
from tests.utils import compare_dicts | ||
|
||
data = { | ||
"a": [1], | ||
|
@@ -179,3 +185,27 @@ class Banana: | |
|
||
with pytest.raises(AssertionError, match=r"Unknown dtype"): | ||
df.select(nw.col("a").cast(Banana)) | ||
|
||
|
||
def test_cast_datetime_tz_aware(constructor: Any, request: Any) -> None: | ||
if "dask" in str(constructor): | ||
request.applymarker(pytest.mark.xfail) | ||
|
||
data = { | ||
"date": [ | ||
datetime(2024, 1, 1, tzinfo=timezone.utc) + timedelta(days=i) | ||
for i in range(3) | ||
] | ||
} | ||
expected = { | ||
"date": ["2024-01-01 01:00:00", "2024-01-02 01:00:00", "2024-01-03 01:00:00"] | ||
} | ||
|
||
df = nw.from_native(constructor(data)) | ||
result = df.select( | ||
nw.col("date") | ||
.cast(nw.Datetime("ms", time_zone="Europe/Rome")) | ||
.cast(nw.String()) | ||
.str.slice(offset=0, length=19) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 19: number of characters of |
||
) | ||
compare_dicts(result, expected) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -75,13 +75,13 @@ def test_cast_date_datetime_pandas() -> None: | |
df = df.select(nw.col("a").cast(nw.Datetime)) | ||
result = nw.to_native(df) | ||
expected = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}).astype( | ||
{"a": "timestamp[ns][pyarrow]"} | ||
{"a": "timestamp[us][pyarrow]"} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changes the default to the polars one |
||
) | ||
pd.testing.assert_frame_equal(result, expected) | ||
|
||
# pandas: pyarrow datetime to date | ||
# # pandas: pyarrow datetime to date | ||
dfpd = pd.DataFrame({"a": [datetime(2020, 1, 1), datetime(2020, 1, 2)]}).astype( | ||
{"a": "timestamp[ns][pyarrow]"} | ||
{"a": "timestamp[us][pyarrow]"} | ||
) | ||
df = nw.from_native(dfpd) | ||
df = df.select(nw.col("a").cast(nw.Date)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please try to break these π