Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FIX-2633: Use correct time indices when running historical forecasts with output_chunk_shift in regression models #2634

Merged
merged 5 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
- New model: `StatsForecastAutoTBATS`. This model offers the [AutoTBATS](https://nixtlaverse.nixtla.io/statsforecast/src/core/models.html#autotbats) model from Nixtla's `statsforecasts` library. [#2611](https://github.com/unit8co/darts/pull/2611) by [He Weilin](https://github.com/cnhwl).

**Fixed**
- Fixed a bug when performing optimized historical forecasts with `stride=1` using a `RegressionModel` with `output_chunk_shift>=1` and `output_chunk_length=1`, where the forecast time index was not properly shifted. [#2634](https://github.com/unit8co/darts/pull/2634) by [Mattias De Charleroy](https://github.com/MattiasDC).

**Dependencies**

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,20 @@
from darts.utils.likelihood_models import GaussianLikelihood, QuantileRegression

models = [LinearRegressionModel, NaiveDrift]
models_reg_no_cov_cls_kwargs = [(LinearRegressionModel, {"lags": 8}, {}, (8, 1))]
models_reg_no_cov_cls_kwargs = [
(LinearRegressionModel, {"lags": 8}, {}, (8, 1)),
# output_chunk_length only
(LinearRegressionModel, {"lags": 5, "output_chunk_length": 2}, {}, (5, 2)),
# output_chunk_shift only
(LinearRegressionModel, {"lags": 5, "output_chunk_shift": 1}, {}, (5, 2)),
# output_chunk_shift + output_chunk_length only
(
LinearRegressionModel,
{"lags": 5, "output_chunk_shift": 1, "output_chunk_length": 2},
{},
(5, 3),
),
]
if not isinstance(CatBoostModel, NotImportedModule):
models_reg_no_cov_cls_kwargs.append((
CatBoostModel,
Expand Down Expand Up @@ -650,13 +663,41 @@ def test_historical_forecasts_negative_rangeindex(self):

@pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs)
def test_historical_forecasts(self, config):
train_length = 10
forecast_horizon = 8
# if no fit and retrain=false, should fit at fist iteration
model_cls, kwargs, model_kwarg, bounds = config
model = model_cls(**kwargs, **model_kwarg)
# set train length to be the minimum required training length
# +1 as sklearn models require min 2 train samples
train_length = bounds[0] + bounds[1] + 1

# time index
if model.output_chunk_shift > 0:
with pytest.raises(ValueError) as err:
forecasts = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
stride=1,
train_length=train_length,
retrain=True,
overlap_end=False,
)
assert str(err.value).startswith(
"Cannot perform auto-regression `(n > output_chunk_length)`"
)
# continue the test without auto-regression if we are using shifts
forecast_horizon = model.output_chunk_length

# time index without train length
forecasts_no_train_length = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
stride=1,
train_length=None,
retrain=True,
overlap_end=False,
)

# time index with minimum train length
forecasts = model.historical_forecasts(
series=self.ts_pass_val,
forecast_horizon=forecast_horizon,
Expand All @@ -666,14 +707,11 @@ def test_historical_forecasts(self, config):
overlap_end=False,
)

assert len(forecasts_no_train_length) == len(forecasts)

theorical_forecast_length = (
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length # because we train
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
Expand Down Expand Up @@ -1144,15 +1182,32 @@ def test_historical_forecasts_start_too_early(self, caplog, config):

@pytest.mark.parametrize("config", models_reg_no_cov_cls_kwargs)
def test_regression_auto_start_multiple_no_cov(self, config):
train_length = 15
# minimum required train length (+1 since sklearn models require 2 sampels)
forecast_horizon = 10
model_cls, kwargs, model_kwargs, bounds = config
train_length = bounds[0] + bounds[1] + 1
model = model_cls(
**kwargs,
**model_kwargs,
)
model.fit(self.ts_pass_train)

if model.output_chunk_shift > 0:
with pytest.raises(ValueError) as err:
forecasts = model.historical_forecasts(
series=[self.ts_pass_val, self.ts_pass_val],
forecast_horizon=forecast_horizon,
train_length=train_length,
stride=1,
retrain=True,
overlap_end=False,
)
assert str(err.value).startswith(
"Cannot perform auto-regression `(n > output_chunk_length)`"
)
# continue the test without autogregression if we are using shifts
forecast_horizon = model.output_chunk_length

forecasts = model.historical_forecasts(
series=[self.ts_pass_val, self.ts_pass_val],
forecast_horizon=forecast_horizon,
Expand All @@ -1168,12 +1223,7 @@ def test_regression_auto_start_multiple_no_cov(self, config):

theorical_forecast_length = (
self.ts_val_length
- max([
(
bounds[0] + bounds[1] + 1
), # +1 as sklearn models require min 2 train samples
train_length,
]) # because we train
- train_length
- forecast_horizon # because we have overlap_end = False
+ 1 # because we include the first element
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,24 @@ def _optimized_historical_forecasts_last_points_only(
else:
forecast = forecast[:, 0]

if (
stride == 1
and model.output_chunk_length == 1
and model.output_chunk_shift == 0
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if-check is extended with an additional condition, which fixes the issue.

):
times = times[0]
else:
times = generate_index(
start=hist_fct_start
+ (forecast_horizon + model.output_chunk_shift - 1) * freq,
length=forecast.shape[0],
freq=freq * stride,
name=series_.time_index.name,
)

forecasts_list.append(
TimeSeries.from_times_and_values(
times=(
times[0]
if stride == 1 and model.output_chunk_length == 1
else generate_index(
start=hist_fct_start
+ (forecast_horizon + model.output_chunk_shift - 1) * freq,
length=forecast.shape[0],
freq=freq * stride,
name=series_.time_index.name,
)
),
times=times,
values=forecast,
columns=forecast_components,
static_covariates=series_.static_covariates,
Expand Down
Loading