Skip to content

Commit

Permalink
Feat/improve timeseries (#2196)
Browse files Browse the repository at this point in the history
* found major peformance boost for time series creation

* first boosted time series version

* improve slicing with integers

* improve slicing with time stamps

* improve slicing with time stamps

* update from_xarray

* improve from_group_dataframe()

* remove test time series

* remove old time series

* add option to drop group columns from from_group_dataframe

* update changelog

* apply suggestions from PR review
  • Loading branch information
dennisbader authored Feb 2, 2024
1 parent 0b4dcf0 commit 8cb04f6
Show file tree
Hide file tree
Showing 3 changed files with 330 additions and 203 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ but cannot always guarantee backwards compatibility. Changes that may **break co

### For users of the library:
**Improved**
- Improvements to `TimeSeries`: [#2196](https://github.com/unit8co/darts/pull/2196) by [Dennis Bader](https://github.com/dennisbader).
- 🚀🚀🚀 Significant performance boosts for several `TimeSeries` methods resulting increased efficiency across the entire `Darts` library. Up to 2x faster creation times for series indexed with "regular" frequencies (e.g. Daily, hourly, ...), and >100x for series indexed with "special" frequencies (e.g. "W-MON", ...). Affects:
- All `TimeSeries` creation methods
- Additional boosts for slicing with integers and Timestamps
- Additional boosts for `from_group_dataframe()` by performing some of the heavy-duty computations on the entire DataFrame, rather than iteratively on the group level.
- Added option to exclude some `group_cols` from being added as static covariates when using `TimeSeries.from_group_dataframe()` with parameter `drop_group_cols`.

**Fixed**

Expand Down
87 changes: 76 additions & 11 deletions darts/tests/test_timeseries_static_covariates.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,27 +154,92 @@ def test_timeseries_from_longitudinal_df(self):
)
assert (ts.static_covariates_values(copy=False) == [[i, j, 1]]).all()

df = copy.deepcopy(self.df_long_multi)
df.loc[:, "non_static"] = np.arange(len(df))
# non static columns as static columns should raise an error
with pytest.raises(ValueError):
# drop group columns gives same time series with dropped static covariates
# drop first column
ts_groups4 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
value_cols=value_cols,
drop_group_cols=["st1"],
)
assert len(ts_groups4) == self.n_groups * 2
for idx, ts in enumerate(ts_groups4):
j = idx % 2
assert ts.static_covariates.shape == (1, 2)
assert ts.static_covariates.columns.equals(pd.Index(["st2", "constant"]))
assert (ts.static_covariates_values(copy=False) == [[j, 1]]).all()

# drop last column
ts_groups5 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
value_cols=value_cols,
drop_group_cols=["st2"],
)
assert len(ts_groups5) == self.n_groups * 2
for idx, ts in enumerate(ts_groups5):
i = idx // 2
assert ts.static_covariates.shape == (1, 2)
assert ts.static_covariates.columns.equals(pd.Index(["st1", "constant"]))
assert (ts.static_covariates_values(copy=False) == [[i, 1]]).all()

# drop all columns
ts_groups6 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
group_cols=["st1", "st2"],
static_cols=["constant"],
time_col="times",
value_cols=value_cols,
drop_group_cols=["st1", "st2"],
)
assert len(ts_groups6) == self.n_groups * 2
for ts in ts_groups6:
assert ts.static_covariates.shape == (1, 1)
assert ts.static_covariates.columns.equals(pd.Index(["constant"]))
assert (ts.static_covariates_values(copy=False) == [[1]]).all()

# drop all static covariates (no `static_cols`, all `group_cols` dropped)
ts_groups7 = TimeSeries.from_group_dataframe(
df=self.df_long_multi,
group_cols=["st1", "st2"],
time_col="times",
value_cols=value_cols,
drop_group_cols=["st1", "st2"],
)
assert len(ts_groups7) == self.n_groups * 2
for ts in ts_groups7:
assert ts.static_covariates is None

def test_from_group_dataframe_invalid_drop_cols(self):
# drop col is not part of `group_cols`
with pytest.raises(ValueError) as err:
_ = TimeSeries.from_group_dataframe(
df=df,
df=self.df_long_multi,
group_cols=["st1"],
static_cols=["non_static"],
time_col="times",
value_cols=value_cols,
value_cols="a",
drop_group_cols=["invalid"],
)
assert str(err.value).endswith("received: {'invalid'}.")

def test_from_group_dataframe_groups_too_short(self):
# groups that are too short for TimeSeries requirements should raise an error
with pytest.raises(ValueError):
df = copy.deepcopy(self.df_long_multi)
df.loc[:, "non_static"] = np.arange(len(df))
with pytest.raises(ValueError) as err:
_ = TimeSeries.from_group_dataframe(
df=df,
group_cols=["st1", "non_static"],
static_cols=None,
group_cols="non_static",
time_col="times",
value_cols=value_cols,
value_cols="a",
)
assert str(err.value).startswith(
"The time index of the provided DataArray is missing the freq attribute"
)

def test_with_static_covariates_univariate(self):
ts = linear_timeseries(length=10)
Expand Down
Loading

0 comments on commit 8cb04f6

Please sign in to comment.