Skip to content

Commit 8c1df8d

Browse files
authored
BUG: Setting DTI/TDI freq affecting other indexes viewing the same data (#33552)
1 parent 77a0f19 commit 8c1df8d

16 files changed

+151
-53
lines changed

Diff for: pandas/core/indexes/datetimelike.py

+102-31
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
Base and utility classes for tseries type pandas objects.
33
"""
4-
from datetime import datetime
4+
from datetime import datetime, timedelta
55
from typing import Any, List, Optional, Union, cast
66

77
import numpy as np
@@ -17,14 +17,18 @@
1717
ensure_int64,
1818
ensure_platform_int,
1919
is_bool_dtype,
20+
is_datetime64_any_dtype,
2021
is_dtype_equal,
2122
is_integer,
2223
is_list_like,
24+
is_object_dtype,
2325
is_period_dtype,
2426
is_scalar,
27+
is_timedelta64_dtype,
2528
)
2629
from pandas.core.dtypes.concat import concat_compat
2730
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
31+
from pandas.core.dtypes.missing import isna
2832

2933
from pandas.core import algorithms
3034
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
@@ -41,7 +45,8 @@
4145
from pandas.core.ops import get_op_result_name
4246
from pandas.core.tools.timedeltas import to_timedelta
4347

44-
from pandas.tseries.frequencies import DateOffset
48+
from pandas.tseries.frequencies import DateOffset, to_offset
49+
from pandas.tseries.offsets import Tick
4550

4651
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
4752

@@ -72,13 +77,33 @@ def wrapper(left, right):
7277
return wrapper
7378

7479

80+
def _make_wrapped_arith_op_with_freq(opname: str):
81+
"""
82+
Dispatch the operation to the underlying ExtensionArray, and infer
83+
the appropriate frequency for the result.
84+
"""
85+
meth = make_wrapped_arith_op(opname)
86+
87+
def wrapped(self, other):
88+
result = meth(self, other)
89+
if result is NotImplemented:
90+
return NotImplemented
91+
92+
new_freq = self._get_addsub_freq(other)
93+
result._freq = new_freq
94+
return result
95+
96+
wrapped.__name__ = opname
97+
return wrapped
98+
99+
75100
@inherit_names(
76101
["inferred_freq", "_isnan", "_resolution", "resolution"],
77102
DatetimeLikeArrayMixin,
78103
cache=True,
79104
)
80105
@inherit_names(
81-
["mean", "freq", "freqstr", "asi8", "_box_func"], DatetimeLikeArrayMixin,
106+
["mean", "asi8", "_box_func"], DatetimeLikeArrayMixin,
82107
)
83108
class DatetimeIndexOpsMixin(ExtensionIndex):
84109
"""
@@ -446,10 +471,45 @@ def get_indexer_non_unique(self, target):
446471
return ensure_platform_int(indexer), missing
447472

448473
# --------------------------------------------------------------------
474+
# Arithmetic Methods
475+
476+
def _get_addsub_freq(self, other) -> Optional[DateOffset]:
477+
"""
478+
Find the freq we expect the result of an addition/subtraction operation
479+
to have.
480+
"""
481+
if is_period_dtype(self.dtype):
482+
# Only used for ops that stay PeriodDtype
483+
return self.freq
484+
elif self.freq is None:
485+
return None
486+
elif lib.is_scalar(other) and isna(other):
487+
return None
488+
489+
elif isinstance(other, (Tick, timedelta, np.timedelta64)):
490+
new_freq = None
491+
if isinstance(self.freq, Tick):
492+
new_freq = self.freq
493+
return new_freq
494+
495+
elif isinstance(other, DateOffset):
496+
# otherwise just DatetimeArray
497+
return None # TODO: Should we infer if it matches self.freq * n?
498+
elif isinstance(other, (datetime, np.datetime64)):
499+
return self.freq
500+
501+
elif is_timedelta64_dtype(other):
502+
return None # TODO: shouldnt we be able to do self.freq + other.freq?
503+
elif is_object_dtype(other):
504+
return None # TODO: is this quite right? sometimes we unpack singletons
505+
elif is_datetime64_any_dtype(other):
506+
return None # TODO: shouldnt we be able to do self.freq + other.freq?
507+
else:
508+
raise NotImplementedError
449509

450-
__add__ = make_wrapped_arith_op("__add__")
510+
__add__ = _make_wrapped_arith_op_with_freq("__add__")
511+
__sub__ = _make_wrapped_arith_op_with_freq("__sub__")
451512
__radd__ = make_wrapped_arith_op("__radd__")
452-
__sub__ = make_wrapped_arith_op("__sub__")
453513
__rsub__ = make_wrapped_arith_op("__rsub__")
454514
__pow__ = make_wrapped_arith_op("__pow__")
455515
__rpow__ = make_wrapped_arith_op("__rpow__")
@@ -558,7 +618,9 @@ def shift(self, periods=1, freq=None):
558618
Index.shift : Shift values of Index.
559619
PeriodIndex.shift : Shift values of PeriodIndex.
560620
"""
561-
result = self._data._time_shift(periods, freq=freq)
621+
arr = self._data.view()
622+
arr._freq = self.freq
623+
result = arr._time_shift(periods, freq=freq)
562624
return type(self)(result, name=self.name)
563625

564626
# --------------------------------------------------------------------
@@ -610,21 +672,40 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
610672
_is_monotonic_increasing = Index.is_monotonic_increasing
611673
_is_monotonic_decreasing = Index.is_monotonic_decreasing
612674
_is_unique = Index.is_unique
675+
_freq = lib.no_default
613676

614-
def _set_freq(self, freq):
677+
@property
678+
def freq(self):
679+
"""
680+
In limited circumstances, our freq may differ from that of our _data.
615681
"""
616-
Set the _freq attribute on our underlying DatetimeArray.
682+
if self._freq is not lib.no_default:
683+
return self._freq
684+
return self._data.freq
617685

618-
Parameters
619-
----------
620-
freq : DateOffset, None, or "infer"
686+
@property
687+
def freqstr(self):
688+
"""
689+
Return the frequency object as a string if its set, otherwise None.
621690
"""
622-
# GH#29843
623-
self._data._with_freq(freq)
691+
if self.freq is None:
692+
return None
693+
return self.freq.freqstr
624694

625695
def _with_freq(self, freq):
626696
index = self.copy(deep=False)
627-
index._set_freq(freq)
697+
if freq is None:
698+
# Even if we _can_ have a freq, we might want to set it to None
699+
index._freq = None
700+
elif len(self) == 0 and isinstance(freq, DateOffset):
701+
# Always valid. In the TimedeltaArray case, we assume this
702+
# is a Tick offset.
703+
index._freq = freq
704+
else:
705+
assert freq == "infer", freq
706+
freq = to_offset(self.inferred_freq)
707+
index._freq = freq
708+
628709
return index
629710

630711
def _shallow_copy(self, values=None, name: Label = lib.no_default):
@@ -647,8 +728,7 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default):
647728

648729
@Appender(Index.difference.__doc__)
649730
def difference(self, other, sort=None):
650-
new_idx = super().difference(other, sort=sort)
651-
new_idx._set_freq(None)
731+
new_idx = super().difference(other, sort=sort)._with_freq(None)
652732
return new_idx
653733

654734
def intersection(self, other, sort=False):
@@ -693,7 +773,7 @@ def intersection(self, other, sort=False):
693773
result = Index.intersection(self, other, sort=sort)
694774
if isinstance(result, type(self)):
695775
if result.freq is None:
696-
result._set_freq("infer")
776+
result = result._with_freq("infer")
697777
return result
698778

699779
elif (
@@ -704,14 +784,7 @@ def intersection(self, other, sort=False):
704784
or (not self.is_monotonic or not other.is_monotonic)
705785
):
706786
result = Index.intersection(self, other, sort=sort)
707-
708-
# Invalidate the freq of `result`, which may not be correct at
709-
# this point, depending on the values.
710-
711-
result._set_freq(None)
712-
result = self._shallow_copy(result._data, name=result.name)
713-
if result.freq is None:
714-
result._set_freq("infer")
787+
result = result._with_freq("infer")
715788
return result
716789

717790
# to make our life easier, "sort" the two ranges
@@ -781,10 +854,9 @@ def _fast_union(self, other, sort=None):
781854
left_start = left[0]
782855
loc = right.searchsorted(left_start, side="left")
783856
right_chunk = right._values[:loc]
784-
dates = concat_compat([left._values, right_chunk])
785-
result = self._shallow_copy(dates)
786-
result._set_freq("infer")
857+
dates = concat_compat((left._values, right_chunk))
787858
# TODO: can we infer that it has self.freq?
859+
result = self._shallow_copy(dates)._with_freq("infer")
788860
return result
789861
else:
790862
left, right = other, self
@@ -797,9 +869,8 @@ def _fast_union(self, other, sort=None):
797869
loc = right.searchsorted(left_end, side="right")
798870
right_chunk = right._values[loc:]
799871
dates = concat_compat([left._values, right_chunk])
800-
result = self._shallow_copy(dates)
801-
result._set_freq("infer")
802872
# TODO: can we infer that it has self.freq?
873+
result = self._shallow_copy(dates)._with_freq("infer")
803874
return result
804875
else:
805876
return left
@@ -816,7 +887,7 @@ def _union(self, other, sort):
816887
if this._can_fast_union(other):
817888
result = this._fast_union(other, sort=sort)
818889
if result.freq is None:
819-
result._set_freq("infer")
890+
result = result._with_freq("infer")
820891
return result
821892
else:
822893
i8self = Int64Index._simple_new(self.asi8, name=self.name)

Diff for: pandas/core/indexes/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def _new_PeriodIndex(cls, **d):
7171
PeriodArray,
7272
wrap=True,
7373
)
74-
@inherit_names(["is_leap_year", "freq", "_format_native_types"], PeriodArray)
74+
@inherit_names(["is_leap_year", "freq", "freqstr", "_format_native_types"], PeriodArray)
7575
class PeriodIndex(DatetimeIndexOpsMixin, Int64Index):
7676
"""
7777
Immutable ndarray holding ordinal values indicating regular periods in time.

Diff for: pandas/core/resample.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,8 @@ def _downsample(self, how, **kwargs):
10171017
if not len(ax):
10181018
# reset to the new freq
10191019
obj = obj.copy()
1020-
obj.index._set_freq(self.freq)
1020+
obj.index = obj.index._with_freq(self.freq)
1021+
assert obj.index.freq == self.freq, (obj.index.freq, self.freq)
10211022
return obj
10221023

10231024
# do we have a regular frequency

Diff for: pandas/tests/arithmetic/test_datetime64.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2052,7 +2052,7 @@ def test_dti_add_tdi(self, tz_naive_fixture):
20522052
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
20532053
tdi = pd.timedelta_range("0 days", periods=10)
20542054
expected = pd.date_range("2017-01-01", periods=10, tz=tz)
2055-
expected._set_freq(None)
2055+
expected = expected._with_freq(None)
20562056

20572057
# add with TimdeltaIndex
20582058
result = dti + tdi
@@ -2074,7 +2074,7 @@ def test_dti_iadd_tdi(self, tz_naive_fixture):
20742074
dti = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)
20752075
tdi = pd.timedelta_range("0 days", periods=10)
20762076
expected = pd.date_range("2017-01-01", periods=10, tz=tz)
2077-
expected._set_freq(None)
2077+
expected = expected._with_freq(None)
20782078

20792079
# iadd with TimdeltaIndex
20802080
result = DatetimeIndex([Timestamp("2017-01-01", tz=tz)] * 10)

Diff for: pandas/tests/arithmetic/test_timedelta64.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ def test_tda_add_sub_index(self):
544544
def test_tda_add_dt64_object_array(self, box_df_fail, tz_naive_fixture):
545545
# Result should be cast back to DatetimeArray
546546
dti = pd.date_range("2016-01-01", periods=3, tz=tz_naive_fixture)
547-
dti._set_freq(None)
547+
dti = dti._with_freq(None)
548548
tdi = dti - dti
549549

550550
obj = tm.box_expected(tdi, box_df_fail)

Diff for: pandas/tests/indexes/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def test_ensure_copied_data(self, indices):
267267
if is_datetime64tz_dtype(indices.dtype):
268268
result = result.tz_localize("UTC").tz_convert(indices.tz)
269269
if isinstance(indices, (DatetimeIndex, TimedeltaIndex)):
270-
indices._set_freq(None)
270+
indices = indices._with_freq(None)
271271

272272
tm.assert_index_equal(indices, result)
273273

@@ -397,7 +397,7 @@ def test_where(self, klass):
397397
i = self.create_index()
398398
if isinstance(i, (pd.DatetimeIndex, pd.TimedeltaIndex)):
399399
# where does not preserve freq
400-
i._set_freq(None)
400+
i = i._with_freq(None)
401401

402402
cond = [True] * len(i)
403403
result = i.where(klass(cond))

Diff for: pandas/tests/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def test_map_dictlike(self, mapper):
8282

8383
# don't compare the freqs
8484
if isinstance(expected, (pd.DatetimeIndex, pd.TimedeltaIndex)):
85-
expected._set_freq(None)
85+
expected = expected._with_freq(None)
8686

8787
result = index.map(mapper(expected, index))
8888
tm.assert_index_equal(result, expected)

Diff for: pandas/tests/indexes/datetimes/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def test_construction_with_alt(self, kwargs, tz_aware_fixture):
131131
def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture):
132132
tz = tz_aware_fixture
133133
i = pd.date_range("20130101", periods=5, freq="H", tz=tz)
134-
i._set_freq(None)
134+
i = i._with_freq(None)
135135
kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()}
136136

137137
if "tz" in kwargs:

Diff for: pandas/tests/indexes/datetimes/test_datetime.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,5 @@ def test_split_non_utc(self):
463463
# GH 14042
464464
indices = pd.date_range("2016-01-01 00:00:00+0200", freq="S", periods=10)
465465
result = np.split(indices, indices_or_sections=[])[0]
466-
expected = indices.copy()
467-
expected._set_freq(None)
466+
expected = indices._with_freq(None)
468467
tm.assert_index_equal(result, expected)

Diff for: pandas/tests/indexes/datetimes/test_ops.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def test_value_counts_unique(self, tz_naive_fixture):
134134

135135
exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz)
136136
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
137-
expected.index._set_freq(None)
137+
expected.index = expected.index._with_freq(None)
138138

139139
for obj in [idx, Series(idx)]:
140140

@@ -406,6 +406,20 @@ def test_freq_setter_errors(self):
406406
with pytest.raises(ValueError, match="Invalid frequency"):
407407
idx._data.freq = "foo"
408408

409+
def test_freq_view_safe(self):
410+
# Setting the freq for one DatetimeIndex shouldn't alter the freq
411+
# for another that views the same data
412+
413+
dti = pd.date_range("2016-01-01", periods=5)
414+
dta = dti._data
415+
416+
dti2 = DatetimeIndex(dta)._with_freq(None)
417+
assert dti2.freq is None
418+
419+
# Original was not altered
420+
assert dti.freq == "D"
421+
assert dta.freq == "D"
422+
409423

410424
class TestBusinessDatetimeIndex:
411425
def setup_method(self, method):

Diff for: pandas/tests/indexes/datetimes/test_setops.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,7 @@ def test_intersection(self, tz, sort):
231231
]:
232232
result = base.intersection(rng)
233233
tm.assert_index_equal(result, expected)
234-
assert result.name == expected.name
235234
assert result.freq == expected.freq
236-
assert result.tz == expected.tz
237235

238236
# non-monotonic
239237
base = DatetimeIndex(
@@ -255,6 +253,7 @@ def test_intersection(self, tz, sort):
255253
# GH 7880
256254
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
257255
expected4 = DatetimeIndex([], tz=tz, name="idx")
256+
assert expected4.freq is None
258257

259258
for (rng, expected) in [
260259
(rng2, expected2),
@@ -265,9 +264,7 @@ def test_intersection(self, tz, sort):
265264
if sort is None:
266265
expected = expected.sort_values()
267266
tm.assert_index_equal(result, expected)
268-
assert result.name == expected.name
269267
assert result.freq is None
270-
assert result.tz == expected.tz
271268

272269
# parametrize over both anchored and non-anchored freqs, as they
273270
# have different code paths

0 commit comments

Comments
 (0)