[MNT] Remove mutable objects from defaults (#1699)

### Description This PR is a fix for #1668. Removes mutable default arguments and replaces them with internal newly initialized mutable defaults.
sktime · Nov 9, 2024 · 193f539 · 193f539
1 parent a884c4d
commit 193f539
Show file tree

Hide file tree

Showing 17 changed files with 511 additions and 303 deletions.
diff --git a/docs/source/tutorials/building.ipynb b/docs/source/tutorials/building.ipynb
@@ -875,13 +875,13 @@
     "        assert dataset.max_prediction_length == dataset.min_prediction_length, \"Decoder only supports a fixed length\"\n",
     "        assert dataset.min_encoder_length == dataset.max_encoder_length, \"Encoder only supports a fixed length\"\n",
     "        assert (\n",
-    "            len(dataset.time_varying_known_categoricals) == 0\n",
-    "            and len(dataset.time_varying_known_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_categoricals) == 0\n",
-    "            and len(dataset.static_categoricals) == 0\n",
-    "            and len(dataset.static_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_reals) == 1\n",
-    "            and dataset.time_varying_unknown_reals[0] == dataset.target\n",
+    "            len(dataset._time_varying_known_categoricals) == 0\n",
+    "            and len(dataset._time_varying_known_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_categoricals) == 0\n",
+    "            and len(dataset._static_categoricals) == 0\n",
+    "            and len(dataset._static_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_reals) == 1\n",
+    "            and dataset._time_varying_unknown_reals[0] == dataset.target\n",
     "        ), \"Only covariate should be the target in 'time_varying_unknown_reals'\"\n",
     "\n",
     "        return super().from_dataset(dataset, **new_kwargs)"
@@ -1587,12 +1587,12 @@
     "        assert dataset.max_prediction_length == dataset.min_prediction_length, \"Decoder only supports a fixed length\"\n",
     "        assert dataset.min_encoder_length == dataset.max_encoder_length, \"Encoder only supports a fixed length\"\n",
     "        assert (\n",
-    "            len(dataset.time_varying_known_categoricals) == 0\n",
-    "            and len(dataset.time_varying_known_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_categoricals) == 0\n",
-    "            and len(dataset.static_categoricals) == 0\n",
-    "            and len(dataset.static_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_reals) == 1\n",
+    "            len(dataset._time_varying_known_categoricals) == 0\n",
+    "            and len(dataset._time_varying_known_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_categoricals) == 0\n",
+    "            and len(dataset._static_categoricals) == 0\n",
+    "            and len(dataset._static_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_reals) == 1\n",
     "        ), \"Only covariate should be in 'time_varying_unknown_reals'\"\n",
     "\n",
     "        return super().from_dataset(dataset, **new_kwargs)\n",
@@ -2136,12 +2136,12 @@
     "        assert dataset.max_prediction_length == dataset.min_prediction_length, \"Decoder only supports a fixed length\"\n",
     "        assert dataset.min_encoder_length == dataset.max_encoder_length, \"Encoder only supports a fixed length\"\n",
     "        assert (\n",
-    "            len(dataset.time_varying_known_categoricals) == 0\n",
-    "            and len(dataset.time_varying_known_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_categoricals) == 0\n",
-    "            and len(dataset.static_categoricals) == 0\n",
-    "            and len(dataset.static_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_reals)\n",
+    "            len(dataset._time_varying_known_categoricals) == 0\n",
+    "            and len(dataset._time_varying_known_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_categoricals) == 0\n",
+    "            and len(dataset._static_categoricals) == 0\n",
+    "            and len(dataset._static_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_reals)\n",
     "            == len(dataset.target_names)  # Expect as as many unknown reals as targets\n",
     "        ), \"Only covariate should be in 'time_varying_unknown_reals'\"\n",
     "\n",
@@ -3414,13 +3414,13 @@
     "        assert dataset.max_prediction_length == dataset.min_prediction_length, \"Decoder only supports a fixed length\"\n",
     "        assert dataset.min_encoder_length == dataset.max_encoder_length, \"Encoder only supports a fixed length\"\n",
     "        assert (\n",
-    "            len(dataset.time_varying_known_categoricals) == 0\n",
-    "            and len(dataset.time_varying_known_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_categoricals) == 0\n",
-    "            and len(dataset.static_categoricals) == 0\n",
-    "            and len(dataset.static_reals) == 0\n",
-    "            and len(dataset.time_varying_unknown_reals) == 1\n",
-    "            and dataset.time_varying_unknown_reals[0] == dataset.target\n",
+    "            len(dataset._time_varying_known_categoricals) == 0\n",
+    "            and len(dataset._time_varying_known_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_categoricals) == 0\n",
+    "            and len(dataset._static_categoricals) == 0\n",
+    "            and len(dataset._static_reals) == 0\n",
+    "            and len(dataset._time_varying_unknown_reals) == 1\n",
+    "            and dataset._time_varying_unknown_reals[0] == dataset.target\n",
     "        ), \"Only covariate should be the target in 'time_varying_unknown_reals'\"\n",
     "\n",
     "        return super().from_dataset(dataset, **new_kwargs)\n",

diff --git a/pytorch_forecasting/data/encoders.py b/pytorch_forecasting/data/encoders.py
@@ -2,11 +2,12 @@
 Encoders for encoding categorical variables and scaling continuous data.
 """
 
-from typing import Any, Callable, Dict, Iterable, List, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Tuple, Union, Optional
 import warnings
 
 import numpy as np
 import pandas as pd
+from copy import deepcopy
 from sklearn.base import BaseEstimator, TransformerMixin
 import torch
 from torch.distributions import constraints
@@ -396,7 +397,7 @@ def __init__(
         method: str = "standard",
         center: bool = True,
         transformation: Union[str, Tuple[Callable, Callable]] = None,
-        method_kwargs: Dict[str, Any] = {},
+        method_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
         Args:
@@ -428,6 +429,7 @@ def __init__(
         self.center = center
         self.transformation = transformation
         self.method_kwargs = method_kwargs
+        self._method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {}
 
     def get_parameters(self, *args, **kwargs) -> torch.Tensor:
         """
@@ -496,17 +498,17 @@ def _set_parameters(
 
         elif self.method == "robust":
             if isinstance(y_center, torch.Tensor):
-                self.center_ = y_center.quantile(self.method_kwargs.get("center", 0.5), dim=-1)
-                q_75 = y_scale.quantile(self.method_kwargs.get("upper", 0.75), dim=-1)
-                q_25 = y_scale.quantile(self.method_kwargs.get("lower", 0.25), dim=-1)
+                self.center_ = y_center.quantile(self._method_kwargs.get("center", 0.5), dim=-1)
+                q_75 = y_scale.quantile(self._method_kwargs.get("upper", 0.75), dim=-1)
+                q_25 = y_scale.quantile(self._method_kwargs.get("lower", 0.25), dim=-1)
             elif isinstance(y_center, np.ndarray):
-                self.center_ = np.percentile(y_center, self.method_kwargs.get("center", 0.5) * 100, axis=-1)
-                q_75 = np.percentile(y_scale, self.method_kwargs.get("upper", 0.75) * 100, axis=-1)
-                q_25 = np.percentile(y_scale, self.method_kwargs.get("lower", 0.25) * 100, axis=-1)
+                self.center_ = np.percentile(y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1)
+                q_75 = np.percentile(y_scale, self._method_kwargs.get("upper", 0.75) * 100, axis=-1)
+                q_25 = np.percentile(y_scale, self._method_kwargs.get("lower", 0.25) * 100, axis=-1)
             else:
-                self.center_ = np.percentile(y_center, self.method_kwargs.get("center", 0.5) * 100, axis=-1)
-                q_75 = np.percentile(y_scale, self.method_kwargs.get("upper", 0.75) * 100)
-                q_25 = np.percentile(y_scale, self.method_kwargs.get("lower", 0.25) * 100)
+                self.center_ = np.percentile(y_center, self._method_kwargs.get("center", 0.5) * 100, axis=-1)
+                q_75 = np.percentile(y_scale, self._method_kwargs.get("upper", 0.75) * 100)
+                q_25 = np.percentile(y_scale, self._method_kwargs.get("lower", 0.25) * 100)
             self.scale_ = (q_75 - q_25) / 2.0 + eps
         if not self.center and self.method != "identity":
             self.scale_ = self.center_
@@ -623,7 +625,7 @@ def __init__(
         center: bool = True,
         max_length: Union[int, List[int]] = None,
         transformation: Union[str, Tuple[Callable, Callable]] = None,
-        method_kwargs: Dict[str, Any] = {},
+        method_kwargs: Dict[str, Any] = None,
     ):
         """
         Initialize
@@ -655,6 +657,7 @@ def __init__(
                   should be defined if ``reverse`` is not the inverse of the forward transformation. ``inverse_torch``
                   can be defined to provide a torch distribution transform for inverse transformations.
         """
+        method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {}
         super().__init__(method=method, center=center, transformation=transformation, method_kwargs=method_kwargs)
         self.max_length = max_length
 
@@ -726,11 +729,11 @@ class GroupNormalizer(TorchNormalizer):
     def __init__(
         self,
         method: str = "standard",
-        groups: List[str] = [],
+        groups: Optional[List[str]] = None,
         center: bool = True,
         scale_by_group: bool = False,
-        transformation: Union[str, Tuple[Callable, Callable]] = None,
-        method_kwargs: Dict[str, Any] = {},
+        transformation: Optional[Union[str, Tuple[Callable, Callable]]] = None,
+        method_kwargs: Optional[Dict[str, Any]] = None,
     ):
         """
         Group normalizer to normalize a given entry by groups. Can be used as target normalizer.
@@ -765,7 +768,9 @@ def __init__(
 
         """
         self.groups = groups
+        self._groups = list(groups) if groups is not None else []
         self.scale_by_group = scale_by_group
+        method_kwargs = deepcopy(method_kwargs) if method_kwargs is not None else {}
         super().__init__(method=method, center=center, transformation=transformation, method_kwargs=method_kwargs)
 
     def fit(self, y: pd.Series, X: pd.DataFrame):
@@ -781,17 +786,17 @@ def fit(self, y: pd.Series, X: pd.DataFrame):
         """
         y = self.preprocess(y)
         eps = np.finfo(np.float16).eps
-        if len(self.groups) == 0:
+        if len(self._groups) == 0:
             assert not self.scale_by_group, "No groups are defined, i.e. `scale_by_group=[]`"
             if self.method == "standard":
                 self.norm_ = {"center": np.mean(y), "scale": np.std(y) + eps}  # center and scale
             else:
                 quantiles = np.quantile(
                     y,
                     [
-                        self.method_kwargs.get("lower", 0.25),
-                        self.method_kwargs.get("center", 0.5),
-                        self.method_kwargs.get("upper", 0.75),
+                        self._method_kwargs.get("lower", 0.25),
+                        self._method_kwargs.get("center", 0.5),
+                        self._method_kwargs.get("upper", 0.75),
                     ],
                 )
                 self.norm_ = {
@@ -810,7 +815,7 @@ def fit(self, y: pd.Series, X: pd.DataFrame):
                     .groupby(g, observed=True)
                     .agg(center=("y", "mean"), scale=("y", "std"))
                     .assign(center=lambda x: x["center"], scale=lambda x: x.scale + eps)
-                    for g in self.groups
+                    for g in self._groups
                 }
             else:
                 self.norm_ = {
@@ -819,21 +824,21 @@ def fit(self, y: pd.Series, X: pd.DataFrame):
                     .groupby(g, observed=True)
                     .y.quantile(
                         [
-                            self.method_kwargs.get("lower", 0.25),
-                            self.method_kwargs.get("center", 0.5),
-                            self.method_kwargs.get("upper", 0.75),
+                            self._method_kwargs.get("lower", 0.25),
+                            self._method_kwargs.get("center", 0.5),
+                            self._method_kwargs.get("upper", 0.75),
                         ]
                     )
                     .unstack(-1)
                     .assign(
-                        center=lambda x: x[self.method_kwargs.get("center", 0.5)],
+                        center=lambda x: x[self._method_kwargs.get("center", 0.5)],
                         scale=lambda x: (
-                            x[self.method_kwargs.get("upper", 0.75)] - x[self.method_kwargs.get("lower", 0.25)]
+                            x[self._method_kwargs.get("upper", 0.75)] - x[self._method_kwargs.get("lower", 0.25)]
                         )
                         / 2.0
                         + eps,
                     )[["center", "scale"]]
-                    for g in self.groups
+                    for g in self._groups
                 }
             # calculate missings
             if not self.center:  # swap center and scale
@@ -849,29 +854,29 @@ def swap_parameters(norm):
         else:
             if self.method == "standard":
                 self.norm_ = (
-                    X[self.groups]
+                    X[self._groups]
                     .assign(y=y)
-                    .groupby(self.groups, observed=True)
+                    .groupby(self._groups, observed=True)
                     .agg(center=("y", "mean"), scale=("y", "std"))
                     .assign(center=lambda x: x["center"], scale=lambda x: x.scale + eps)
                 )
             else:
                 self.norm_ = (
-                    X[self.groups]
+                    X[self._groups]
                     .assign(y=y)
-                    .groupby(self.groups, observed=True)
+                    .groupby(self._groups, observed=True)
                     .y.quantile(
                         [
-                            self.method_kwargs.get("lower", 0.25),
-                            self.method_kwargs.get("center", 0.5),
-                            self.method_kwargs.get("upper", 0.75),
+                            self._method_kwargs.get("lower", 0.25),
+                            self._method_kwargs.get("center", 0.5),
+                            self._method_kwargs.get("upper", 0.75),
                         ]
                     )
                     .unstack(-1)
                     .assign(
-                        center=lambda x: x[self.method_kwargs.get("center", 0.5)],
+                        center=lambda x: x[self._method_kwargs.get("center", 0.5)],
                         scale=lambda x: (
-                            x[self.method_kwargs.get("upper", 0.75)] - x[self.method_kwargs.get("lower", 0.25)]
+                            x[self._method_kwargs.get("upper", 0.75)] - x[self._method_kwargs.get("lower", 0.25)]
                         )
                         / 2.0
                         + eps,
@@ -883,7 +888,7 @@ def swap_parameters(norm):
             self.missing_ = self.norm_.median().to_dict()
 
         if (
-            (self.scale_by_group and any((self.norm_[group]["scale"] < 1e-7).any() for group in self.groups))
+            (self.scale_by_group and any((self.norm_[group]["scale"] < 1e-7).any() for group in self._groups))
             or (not self.scale_by_group and isinstance(self.norm_["scale"], float) and self.norm_["scale"] < 1e-7)
             or (
                 not self.scale_by_group
@@ -973,13 +978,13 @@ def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names:
         if isinstance(groups, list):
             groups = tuple(groups)
         if group_names is None:
-            group_names = self.groups
+            group_names = self._groups
         else:
             # filter group names
-            group_names = [name for name in group_names if name in self.groups]
-        assert len(group_names) == len(self.groups), "Passed groups and fitted do not match"
+            group_names = [name for name in group_names if name in self._groups]
+        assert len(group_names) == len(self._groups), "Passed groups and fitted do not match"
 
-        if len(self.groups) == 0:
+        if len(self._groups) == 0:
             params = np.array([self.norm_["center"], self.norm_["scale"]])
         elif self.scale_by_group:
             norm = np.array([1.0, 1.0])
@@ -988,7 +993,7 @@ def get_parameters(self, groups: Union[torch.Tensor, list, tuple], group_names:
                     norm = norm * self.norm_[group_name].loc[group].to_numpy()
                 except KeyError:
                     norm = norm * np.asarray([self.missing_[group_name][name] for name in self.names])
-            norm = np.power(norm, 1.0 / len(self.groups))
+            norm = np.power(norm, 1.0 / len(self._groups))
             params = norm
         else:
             try:
@@ -1007,7 +1012,7 @@ def get_norm(self, X: pd.DataFrame) -> pd.DataFrame:
         Returns:
             pd.DataFrame: dataframe with scaling parameterswhere each row corresponds to the input dataframe
         """
-        if len(self.groups) == 0:
+        if len(self._groups) == 0:
             norm = np.asarray([self.norm_["center"], self.norm_["scale"]]).reshape(1, -1)
         elif self.scale_by_group:
             norm = [
@@ -1017,15 +1022,15 @@ def get_norm(self, X: pd.DataFrame) -> pd.DataFrame:
                         .map(self.norm_[group_name][name])
                         .fillna(self.missing_[group_name][name])
                         .to_numpy()
-                        for group_name in self.groups
+                        for group_name in self._groups
                     ],
                     axis=0,
                 )
                 for name in self.names
             ]
-            norm = np.power(np.stack(norm, axis=1), 1.0 / len(self.groups))
+            norm = np.power(np.stack(norm, axis=1), 1.0 / len(self._groups))
         else:
-            norm = X[self.groups].set_index(self.groups).join(self.norm_).fillna(self.missing_).to_numpy()
+            norm = X[self._groups].set_index(self._groups).join(self.norm_).fillna(self.missing_).to_numpy()
         return norm