Skip to content

Commit

Permalink
Merge pull request #380 from jdb78/feature/simple_models
Browse files Browse the repository at this point in the history
Add recurrent and mlp models
  • Loading branch information
jdb78 authored Mar 7, 2021
2 parents 48179d2 + 66cf2e9 commit 884106c
Show file tree
Hide file tree
Showing 13 changed files with 823 additions and 32 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,15 @@
### Added

- Adding a filter functionality to the timeseries datasset (#329)
- Add simple models such as LSTM, GRU and a MLP on the decoder (#380)
- Allow usage of any torch optimizer such as SGD (#380)

### Fixed

- Moving predictions to CPU to avoid running out of memory (#329)
- Correct determination of `output_size` for multi-target forecasting with the TemporalFusionTransformer (#328)
- Tqdm autonotebook fix to work outside of Jupyter (#338)
- Fix issue with yaml serialization for TensorboardLogger (#379)

### Contributors

Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ documentation with detailed tutorials.
methods in the M4 competition. The M4 competition is arguably the most important benchmark for univariate time series forecasting.
- [DeepAR: Probabilistic forecasting with autoregressive recurrent networks](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
which is the one of the most popular forecasting algorithms and is often used as a baseline
- A baseline model that always predicts the latest known value
- Simple standard networks for baselining: LSTM and GRU networks as well as a MLP on the decoder

To implement new models, see the [How to implement new models tutorial](https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/building.html).
It covers basic as well as advanced architectures.
Expand Down
2 changes: 2 additions & 0 deletions docs/source/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ and you should take into account. Here is an overview over the pros and cons of
.. csv-table:: Model comparison
:header: "Name", "Covariates", "Multiple targets", "Regression", "Classification", "Probabilistic", "Uncertainty", "Interactions between series", "Flexible history length", "Cold-start", "Required computational resources (1-5, 5=most)"

:py:class:`~pytorch_forecasting.models.rnn.RecurrentNetwork`, "x", "x", "x", "", "", "", "", "x", "", 2
:py:class:`~pytorch_forecasting.models.mlp.DecoderMLP`, "x", "x", "x", "x", "", "x", "", "x", "x", 1
:py:class:`~pytorch_forecasting.models.nbeats.NBeats`, "", "", "x", "", "", "", "", "", "", 1
:py:class:`~pytorch_forecasting.models.deepar.DeepAR`, "x", "x", "x", "", "x", "x", "", "x", "", 3
:py:class:`~pytorch_forecasting.models.temporal_fusion_transformer.TemporalFusionTransformer`, "x", "x", "x", "x", "", "x", "", "x", "x", 4
Expand Down
4 changes: 4 additions & 0 deletions pytorch_forecasting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@
Baseline,
BaseModel,
BaseModelWithCovariates,
DecoderMLP,
DeepAR,
MultiEmbedding,
NBeats,
RecurrentNetwork,
TemporalFusionTransformer,
get_rnn,
)
Expand Down Expand Up @@ -85,6 +87,8 @@
"get_embedding_size",
"create_mask",
"to_list",
"RecurrentNetwork",
"DecoderMLP",
]

__version__ = "0.0.0"
4 changes: 4 additions & 0 deletions pytorch_forecasting/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
)
from pytorch_forecasting.models.baseline import Baseline
from pytorch_forecasting.models.deepar import DeepAR
from pytorch_forecasting.models.mlp import DecoderMLP
from pytorch_forecasting.models.nbeats import NBeats
from pytorch_forecasting.models.nn import GRU, LSTM, MultiEmbedding, get_rnn
from pytorch_forecasting.models.rnn import RecurrentNetwork
from pytorch_forecasting.models.temporal_fusion_transformer import TemporalFusionTransformer

__all__ = [
"NBeats",
"TemporalFusionTransformer",
"RecurrentNetwork",
"DeepAR",
"BaseModel",
"Baseline",
Expand All @@ -26,4 +29,5 @@
"LSTM",
"GRU",
"MultiEmbedding",
"DecoderMLP",
]
92 changes: 89 additions & 3 deletions pytorch_forecasting/models/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,16 @@

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.data.encoders import EncoderNormalizer, GroupNormalizer, MultiNormalizer, NaNLabelEncoder
from pytorch_forecasting.metrics import MASE, SMAPE, DistributionLoss, Metric, MultiLoss
from pytorch_forecasting.metrics import (
MAE,
MASE,
SMAPE,
DistributionLoss,
Metric,
MultiHorizonMetric,
MultiLoss,
QuantileLoss,
)
from pytorch_forecasting.optim import Ranger
from pytorch_forecasting.utils import apply_to_list, create_mask, get_embedding_size, groupby_apply, to_list

Expand Down Expand Up @@ -154,6 +163,7 @@ def __init__(
reduce_on_plateau_patience: int = 1000,
reduce_on_plateau_min_lr: float = 1e-5,
weight_decay: float = 0.0,
optimizer_params: Dict[str, Any] = None,
monotone_constaints: Dict[str, int] = {},
output_transformer: Callable = None,
optimizer="ranger",
Expand All @@ -177,14 +187,16 @@ def __init__(
reduce_on_plateau_min_lr (float): minimum learning rate for reduce on plateua learning rate scheduler.
Defaults to 1e-5
weight_decay (float): weight decay. Defaults to 0.0.
optimizer_params (Dict[str, Any]): additional parameters for the optimizer. Defaults to {}.
monotone_constaints (Dict[str, int]): dictionary of monotonicity constraints for continuous decoder
variables mapping
position (e.g. ``"0"`` for first position) to constraint (``-1`` for negative and ``+1`` for positive,
larger numbers add more weight to the constraint vs. the loss but are usually not necessary).
This constraint significantly slows down training. Defaults to {}.
output_transformer (Callable): transformer that takes network output and transforms it to prediction space.
Defaults to None which is equivalent to ``lambda out: out["prediction"]``.
optimizer (str): Optimizer, "ranger", "adam" or "adamw". Defaults to "ranger".
optimizer (str): Optimizer, "ranger", "sgd", "adam", "adamw" or class name of optimizer in ``torch.optim``.
Defaults to "ranger".
"""
super().__init__()
# update hparams
Expand All @@ -203,6 +215,21 @@ def __init__(
if not hasattr(self, "output_transformer"):
self.output_transformer = output_transformer

@property
def n_targets(self) -> int:
"""
Number of targets to forecast.
Based on loss function.
Returns:
int: number of targets
"""
if isinstance(self.loss, MultiLoss):
return len(self.loss.metrics)
else:
return 1

def transform_output(self, out: Dict[str, torch.Tensor]) -> torch.Tensor:
"""
Extract prediction from network output and rescale it to real space / de-normalize it.
Expand Down Expand Up @@ -251,6 +278,52 @@ def transform_output(self, out: Dict[str, torch.Tensor]) -> torch.Tensor:
out = self.output_transformer(out)
return out

@staticmethod
def deduce_default_output_parameters(
dataset: TimeSeriesDataSet, kwargs: Dict[str, Any], default_loss: MultiHorizonMetric = None
) -> Dict[str, Any]:
"""
Deduce default parameters for output for `from_dataset()` method.
Determines ``output_size`` and ``loss`` parameters.
Args:
dataset (TimeSeriesDataSet): timeseries dataset
kwargs (Dict[str, Any]): current hyperparameters
default_loss (MultiHorizonMetric, optional): default loss function.
Defaults to :py:class:`~pytorch_forecasting.metrics.MAE`.
Returns:
Dict[str, Any]: dictionary with ``output_size`` and ``loss``.
"""
# infer output size
def get_output_size(normalizer, loss):
if isinstance(loss, QuantileLoss):
return len(loss.quantiles)
elif isinstance(normalizer, NaNLabelEncoder):
return len(normalizer.classes_)
else:
return 1

# handle multiple targets
new_kwargs = {}
n_targets = len(dataset.target_names)
if default_loss is None:
default_loss = MAE()
loss = kwargs.get("loss", default_loss)
if n_targets > 1: # try to infer number of ouput sizes
if not isinstance(loss, MultiLoss):
loss = MultiLoss([deepcopy(loss)] * n_targets)
new_kwargs["loss"] = loss
if isinstance(loss, MultiLoss) and "output_size" not in kwargs:
new_kwargs["output_size"] = [
get_output_size(normalizer, l)
for normalizer, l in zip(dataset.target_normalizer.normalizers, loss.metrics)
]
elif "output_size" not in kwargs:
new_kwargs["output_size"] = get_output_size(dataset.target_normalizer, loss)
return new_kwargs

def size(self) -> int:
"""
get number of parameters in model
Expand Down Expand Up @@ -673,6 +746,10 @@ def configure_optimizers(self):
Tuple[List]: first entry is list of optimizers and second is list of schedulers
"""
# either set a schedule of lrs or find it dynamically
if self.hparams.optimizer_params is None:
optimizer_params = {}
else:
optimizer_params = self.hparams.optimizer_params
if isinstance(self.hparams.learning_rate, (list, tuple)): # set schedule
lrs = self.hparams.learning_rate
if self.hparams.optimizer == "adam":
Expand All @@ -681,8 +758,17 @@ def configure_optimizers(self):
optimizer = torch.optim.AdamW(self.parameters(), lr=lrs[0])
elif self.hparams.optimizer == "ranger":
optimizer = Ranger(self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay)
elif self.hparams.optimizer == "sgd":
optimizer = torch.optim.SGD(
self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay, **optimizer_params
)
else:
raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown")
try:
optimizer = getattr(torch.optim, self.hparams.optimizer)(
self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay, **optimizer_params
)
except AttributeError:
raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown")
# normalize lrs
lrs = np.array(lrs) / lrs[0]
schedulers = [
Expand Down
1 change: 0 additions & 1 deletion pytorch_forecasting/models/deepar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def from_dataset(
Returns:
DeepAR network
"""
# assert fixed encoder and decoder length for the moment
new_kwargs = {}
if dataset.multi_target:
new_kwargs.setdefault("loss", MultiLoss([NormalDistributionLoss()] * len(dataset.target_names)))
Expand Down
155 changes: 155 additions & 0 deletions pytorch_forecasting/models/mlp/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""
Simple models based on fully connected networks
"""


from typing import Dict, List, Tuple, Union

import numpy as np
import torch
from torch import nn

from pytorch_forecasting.data import TimeSeriesDataSet
from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric, QuantileLoss
from pytorch_forecasting.models.base_model import BaseModelWithCovariates
from pytorch_forecasting.models.mlp.submodules import FullyConnectedModule
from pytorch_forecasting.models.nn.embeddings import MultiEmbedding


class DecoderMLP(BaseModelWithCovariates):
"""
MLP on the decoder.
MLP that predicts output only based on information available in the decoder.
"""

def __init__(
self,
activation_class: str = "ReLU",
hidden_size: int = 300,
n_hidden_layers: int = 3,
dropout: float = 0.1,
norm: bool = True,
static_categoricals: List[str] = [],
static_reals: List[str] = [],
time_varying_categoricals_encoder: List[str] = [],
time_varying_categoricals_decoder: List[str] = [],
categorical_groups: Dict[str, List[str]] = {},
time_varying_reals_encoder: List[str] = [],
time_varying_reals_decoder: List[str] = [],
embedding_sizes: Dict[str, Tuple[int, int]] = {},
embedding_paddings: List[str] = [],
embedding_labels: Dict[str, np.ndarray] = {},
x_reals: List[str] = [],
x_categoricals: List[str] = [],
output_size: Union[int, List[int]] = 1,
target: Union[str, List[str]] = None,
loss: MultiHorizonMetric = None,
logging_metrics: nn.ModuleList = None,
**kwargs,
):
"""
Args:
activation_class (str, optional): PyTorch activation class. Defaults to "ReLU".
hidden_size (int, optional): hidden recurrent size - the most important hyperparameter along with
``n_hidden_layers``. Defaults to 10.
n_hidden_layers (int, optional): Number of hidden layers - important hyperparameter. Defaults to 2.
dropout (float, optional): Dropout. Defaults to 0.1.
norm (bool, optional): if to use normalization in the MLP. Defaults to True.
static_categoricals: integer of positions of static categorical variables
static_reals: integer of positions of static continuous variables
time_varying_categoricals_encoder: integer of positions of categorical variables for encoder
time_varying_categoricals_decoder: integer of positions of categorical variables for decoder
time_varying_reals_encoder: integer of positions of continuous variables for encoder
time_varying_reals_decoder: integer of positions of continuous variables for decoder
categorical_groups: dictionary where values
are list of categorical variables that are forming together a new categorical
variable which is the key in the dictionary
x_reals: order of continuous variables in tensor passed to forward function
x_categoricals: order of categorical variables in tensor passed to forward function
embedding_sizes: dictionary mapping (string) indices to tuple of number of categorical classes and
embedding size
embedding_paddings: list of indices for embeddings which transform the zero's embedding to a zero vector
embedding_labels: dictionary mapping (string) indices to list of categorical labels
output_size (Union[int, List[int]], optional): number of outputs (e.g. number of quantiles for
QuantileLoss and one target or list of output sizes).
target (str, optional): Target variable or list of target variables. Defaults to None.
loss (MultiHorizonMetric, optional): loss: loss function taking prediction and targets.
Defaults to QuantileLoss.
logging_metrics (nn.ModuleList, optional): Metrics to log during training.
Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]).
"""
if loss is None:
loss = QuantileLoss()
if logging_metrics is None:
logging_metrics = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
self.save_hyperparameters()
# store loss function separately as it is a module
super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)

self.input_embeddings = MultiEmbedding(
embedding_sizes={
name: val
for name, val in embedding_sizes.items()
if name in self.decoder_variables + self.static_variables
},
embedding_paddings=embedding_paddings,
categorical_groups=categorical_groups,
x_categoricals=x_categoricals,
)
# define network
if isinstance(self.hparams.output_size, int):
mlp_output_size = self.hparams.output_size
else:
mlp_output_size = sum(self.hparams.output_size)

cont_size = len(self.decoder_reals_positions)
cat_size = sum([emb.embedding_dim for emb in self.input_embeddings.values()])
input_size = cont_size + cat_size

self.mlp = FullyConnectedModule(
dropout=dropout,
norm=self.hparams.norm,
activation_class=getattr(nn, self.hparams.activation_class),
input_size=input_size,
output_size=mlp_output_size,
hidden_size=self.hparams.hidden_size,
n_hidden_layers=self.hparams.n_hidden_layers,
)

@property
def decoder_reals_positions(self) -> List[int]:
return [
self.hparams.x_reals.index(name)
for name in self.reals
if name in self.decoder_variables + self.static_variables
]

def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str, torch.Tensor]:
"""
Forward network
"""
# x is a batch generated based on the TimeSeriesDataset
batch_size = x["decoder_lengths"].size(0)
embeddings = self.input_embeddings(x["decoder_cat"]) # returns dictionary with embedding tensors
network_input = torch.cat(
[x["decoder_cont"][..., self.decoder_reals_positions]] + list(embeddings.values()),
dim=-1,
)
prediction = self.mlp(network_input.view(-1, self.mlp.input_size)).view(
batch_size, network_input.size(1), self.mlp.output_size
)

# cut prediction into pieces for multiple targets
if self.n_targets > 1:
prediction = torch.split(prediction, self.hparams.output_size, dim=-1)

# We need to return a dictionary that at least contains the prediction and the target_scale.
# The parameter can be directly forwarded from the input.
return dict(prediction=prediction, target_scale=x["target_scale"])

@classmethod
def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
new_kwargs = cls.deduce_default_output_parameters(dataset, kwargs, QuantileLoss())
kwargs.update(new_kwargs)
return super().from_dataset(dataset, **kwargs)
Loading

0 comments on commit 884106c

Please sign in to comment.