Skip to content

Commit

Permalink
fix: Sortedness was incorrectly being preserved in dt.offset_by when …
Browse files Browse the repository at this point in the history
…offsetting by non-constant durations in the timezone-naive case (#19616)
  • Loading branch information
MarcoGorelli authored Nov 4, 2024
1 parent e98fb41 commit 99b4a69
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 26 deletions.
44 changes: 22 additions & 22 deletions crates/polars-time/src/offset_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,20 +58,35 @@ fn apply_offsets_to_datetime(
}

pub fn impl_offset_by(ts: &Series, offsets: &Series) -> PolarsResult<Series> {
let preserve_sortedness: bool;
let offsets = offsets.str()?;
let out = match ts.dtype() {
let dtype = ts.dtype();

// Sortedness may not be preserved for non-constant durations,
// see https://github.com/pola-rs/polars/issues/19608 for a counterexample.
// Constant durations (e.g. 2 hours) always preserve sortedness.
let tz = match dtype {
DataType::Date => None,
DataType::Datetime(_, tz) => tz.clone(),
_ => polars_bail!(InvalidOperation: "expected Date or Datetime, got {}", dtype),
};
let preserve_sortedness = match offsets.len() {
1 => match offsets.get(0) {
Some(offset) => {
let offset = Duration::parse(offset);
offset.is_constant_duration(tz.as_deref())
},
None => false,
},
_ => false,
};

let out = match dtype {
DataType::Date => {
let ts = ts
.cast(&DataType::Datetime(TimeUnit::Milliseconds, None))
.unwrap();
let datetime = ts.datetime().unwrap();
let out = apply_offsets_to_datetime(datetime, offsets, None)?;
// sortedness is only guaranteed to be preserved if a constant offset is being added to every datetime
preserve_sortedness = match offsets.len() {
1 => offsets.get(0).is_some(),
_ => false,
};
out.cast(&DataType::Datetime(TimeUnit::Milliseconds, None))
.unwrap()
.cast(&DataType::Date)
Expand All @@ -86,21 +101,6 @@ pub fn impl_offset_by(ts: &Series, offsets: &Series) -> PolarsResult<Series> {
},
_ => apply_offsets_to_datetime(datetime, offsets, None)?,
};
// Sortedness may not be preserved when crossing daylight savings time boundaries
// for calendar-aware durations.
// Constant durations (e.g. 2 hours) always preserve sortedness.
preserve_sortedness = match offsets.len() {
1 => match offsets.get(0) {
Some(offset) => {
let offset = Duration::parse(offset);
tz.is_none()
|| tz.as_deref() == Some("UTC")
|| offset.is_constant_duration(tz.as_deref())
},
None => false,
},
_ => false,
};
out.cast(&DataType::Datetime(*tu, tz.clone()))
},
dt => polars_bail!(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,9 @@ def test_local_time_before_epoch(time_unit: TimeUnit) -> None:
(None, "1d", True),
("Europe/London", "1d", False),
("UTC", "1d", True),
(None, "1mo", True),
("Europe/London", "1mo", False),
("UTC", "1mo", True),
(None, "1m", True),
("Europe/London", "1m", True),
("UTC", "1m", True),
(None, "1w", True),
("Europe/London", "1w", False),
("UTC", "1w", True),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

import polars as pl
from polars.testing import assert_series_equal
from polars.testing import assert_frame_equal, assert_series_equal

if TYPE_CHECKING:
from polars._typing import TimeUnit
Expand Down Expand Up @@ -116,3 +116,30 @@ def test_datetime_offset_by(
time_zone
)
assert_series_equal(result, expected)


def test_offset_by_unique_29_feb_19608() -> None:
df20 = pl.select(
t=pl.datetime_range(
pl.datetime(2020, 2, 28),
pl.datetime(2020, 3, 1),
closed="left",
time_unit="ms",
interval="8h",
time_zone="UTC",
),
).with_columns(x=pl.int_range(pl.len()))
df19 = df20.with_columns(pl.col("t").dt.offset_by("-1y"))
result = df19.unique("t", keep="first").sort("t")
expected = pl.DataFrame(
{
"t": [
datetime(2019, 2, 28),
datetime(2019, 2, 28, 8),
datetime(2019, 2, 28, 16),
],
"x": [0, 1, 2],
},
schema_overrides={"t": pl.Datetime("ms", "UTC")},
)
assert_frame_equal(result, expected)

0 comments on commit 99b4a69

Please sign in to comment.