From c24cf2714bd19ca5475f7975d146d9091e56ff76 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 8 Oct 2023 00:00:38 +0300 Subject: [PATCH] feat(rust, python): let "ambiguous" take "null" value too --- crates/polars-arrow/src/kernels/time.rs | 9 +++--- .../datetime/replace_time_zone.rs | 25 ++++++++++------ py-polars/polars/expr/datetime.py | 3 ++ py-polars/polars/expr/string.py | 2 ++ py-polars/polars/functions/as_datatype.py | 2 +- py-polars/polars/series/datetime.py | 3 ++ py-polars/polars/series/string.py | 2 ++ py-polars/polars/type_aliases.py | 2 +- .../tests/unit/datatypes/test_temporal.py | 29 +++++++++++++++++++ 9 files changed, 62 insertions(+), 15 deletions(-) diff --git a/crates/polars-arrow/src/kernels/time.rs b/crates/polars-arrow/src/kernels/time.rs index b901c9cb4bff..14260d70f203 100644 --- a/crates/polars-arrow/src/kernels/time.rs +++ b/crates/polars-arrow/src/kernels/time.rs @@ -7,13 +7,14 @@ pub fn convert_to_naive_local( to_tz: &Tz, ndt: NaiveDateTime, ambiguous: &str, -) -> Result { +) -> Result> { let ndt = from_tz.from_utc_datetime(&ndt).naive_local(); match to_tz.from_local_datetime(&ndt) { - LocalResult::Single(dt) => Ok(dt.naive_utc()), + LocalResult::Single(dt) => Ok(Some(dt.naive_utc())), LocalResult::Ambiguous(dt_earliest, dt_latest) => match ambiguous { - "earliest" => Ok(dt_earliest.naive_utc()), - "latest" => Ok(dt_latest.naive_utc()), + "null" => Ok(None), + "earliest" => Ok(Some(dt_earliest.naive_utc())), + "latest" => Ok(Some(dt_latest.naive_utc())), "raise" => Err(ArrowError::InvalidArgumentError( format!("datetime '{}' is ambiguous in time zone '{}'. Please use `ambiguous` to tell how it should be localized.", ndt, to_tz) )), diff --git a/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs b/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs index cc3713d91562..f5a866eea295 100644 --- a/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs +++ b/crates/polars-ops/src/chunked_array/datetime/replace_time_zone.rs @@ -43,21 +43,28 @@ pub fn replace_time_zone( }; let out = match ambiguous.len() { 1 => match unsafe { ambiguous.get_unchecked(0) } { - Some(ambiguous) => datetime.0.try_apply(|timestamp| { - let ndt = timestamp_to_datetime(timestamp); - Ok(datetime_to_timestamp(convert_to_naive_local( - &from_tz, &to_tz, ndt, ambiguous, - )?)) - }), + Some(ambiguous) => { + let iter = datetime.0.downcast_iter().map(|arr| { + let element_iter = arr.iter().map(|timestamp_opt| match timestamp_opt { + Some(timestamp) => { + let ndt = timestamp_to_datetime(*timestamp); + let res = convert_to_naive_local(&from_tz, &to_tz, ndt, ambiguous)?; + Ok::<_, PolarsError>(res.map(datetime_to_timestamp)) + }, + None => Ok(None), + }); + element_iter.try_collect_arr() + }); + ChunkedArray::try_from_chunk_iter(datetime.0.name(), iter) + }, _ => Ok(datetime.0.apply(|_| None)), }, _ => try_binary_elementwise(datetime, ambiguous, |timestamp_opt, ambiguous_opt| { match (timestamp_opt, ambiguous_opt) { (Some(timestamp), Some(ambiguous)) => { let ndt = timestamp_to_datetime(timestamp); - Ok(Some(datetime_to_timestamp(convert_to_naive_local( - &from_tz, &to_tz, ndt, ambiguous, - )?))) + let res = convert_to_naive_local(&from_tz, &to_tz, ndt, ambiguous)?; + Ok(res.map(datetime_to_timestamp)) }, _ => Ok(None), } diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index bfae50dc1056..9d97300f459c 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -61,6 +61,7 @@ def truncate( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Notes ----- @@ -264,6 +265,7 @@ def round( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Notes ----- @@ -1379,6 +1381,7 @@ def replace_time_zone( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Examples -------- diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 5ae5708f27bc..c63c24145c7f 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -137,6 +137,7 @@ def to_datetime( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Examples -------- @@ -252,6 +253,7 @@ def strptime( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Notes ----- diff --git a/py-polars/polars/functions/as_datatype.py b/py-polars/polars/functions/as_datatype.py index 14e4d19a23ac..e9792adb7b37 100644 --- a/py-polars/polars/functions/as_datatype.py +++ b/py-polars/polars/functions/as_datatype.py @@ -75,7 +75,7 @@ def datetime_( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime - + - ``'null'``: set to null Returns ------- diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 37bf28a65ce4..03fdb565721d 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -1028,6 +1028,7 @@ def replace_time_zone( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Examples -------- @@ -1468,6 +1469,7 @@ def truncate( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Notes ----- @@ -1667,6 +1669,7 @@ def round( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Returns ------- diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 777daa8fd92e..aa59e25378a1 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -137,6 +137,7 @@ def to_datetime( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Examples -------- @@ -235,6 +236,7 @@ def strptime( - ``'raise'`` (default): raise - ``'earliest'``: use the earliest datetime - ``'latest'``: use the latest datetime + - ``'null'``: set to null Notes ----- diff --git a/py-polars/polars/type_aliases.py b/py-polars/polars/type_aliases.py index 18809e330086..1e5a6f87b152 100644 --- a/py-polars/polars/type_aliases.py +++ b/py-polars/polars/type_aliases.py @@ -144,7 +144,7 @@ ] # ListToStructWidthStrategy # The following have no equivalent on the Rust side -Ambiguous: TypeAlias = Literal["earliest", "latest", "raise"] +Ambiguous: TypeAlias = Literal["earliest", "latest", "raise", "null"] ConcatMethod = Literal[ "vertical", "vertical_relaxed", "diagonal", "horizontal", "align" ] diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index 4ed84f2cd882..632f932d7213 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -2234,6 +2234,35 @@ def test_replace_time_zone_sortedness_expressions( assert result["ts"].flags["SORTED_ASC"] == expected_sortedness +def test_replace_time_zone_ambiguous_null() -> None: + df = pl.DataFrame( + { + "a": [datetime(2020, 10, 25, 1)] * 3, + "b": ["earliest", "latest", "null"], + } + ) + # expression containing 'null' + result = df.select( + pl.col("a").dt.replace_time_zone("Europe/London", ambiguous=pl.col("b")) + )["a"] + expected = [ + datetime(2020, 10, 25, 1, fold=0, tzinfo=ZoneInfo("Europe/London")), + datetime(2020, 10, 25, 1, fold=1, tzinfo=ZoneInfo("Europe/London")), + None, + ] + assert result[0] == expected[0] + assert result[1] == expected[1] + assert result[2] == expected[2] + + # single 'null' value + result = df.select( + pl.col("a").dt.replace_time_zone("Europe/London", ambiguous="null") + )["a"] + assert result[0] is None + assert result[1] is None + assert result[2] is None + + def test_use_earliest_deprecation() -> None: # strptime with pytest.warns(