[fix] Fix mypy issues and modify the test accordingly

Since the previous codes had the default shuffle = True and the indices shuffle before splitting, the test cases for CV and Holdout did not match. More specifically, when I bring back the followings, I could reproduce the original outputs: 1. Bring back _get_indices in BaseDataset 2. Make the default value of self.shuffle in BaseDataset True 3. Input shuffle = True in KFold instead of using ShuffleSplit These reproduce the original outputs. Note that KFold(shuffle=True) and ShuffleSplit() are not identical and even when we input the same random_state, the results do not reproduce.
automl · nabenabe0928 · May 10, 2021 · May 10, 2021 · May 10, 2021 · May 10, 2021
commit eee3b1c49a823f6242b9c624e7e87fcab1f2dba0
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
@@ -233,7 +233,7 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int]
         labels_to_stratify = self.train_tensors[-1] if self.is_stratify else None
 
         if isinstance(self.resampling_strategy, HoldoutValTypes):
-            val_share = self.resampling_strategy_args['val_share']
+            val_share = self.resampling_strategy_args.get('val_share', None)
 
             return self.resampling_strategy(
                 random_state=self.random_state,
@@ -243,11 +243,11 @@ def get_splits_from_resampling_strategy(self) -> List[Tuple[List[int], List[int]
                 labels_to_stratify=labels_to_stratify
             )
         elif isinstance(self.resampling_strategy, CrossValTypes):
-            num_splits = self.resampling_strategy_args['num_splits']
+            num_splits = self.resampling_strategy_args.get('num_splits', None)
 
             return self.resampling_strategy(
                 random_state=self.random_state,
-                num_splits=int(num_splits),
+                num_splits=num_splits,
                 shuffle=self.shuffle,
                 indices=self._get_indices(),
                 labels_to_stratify=labels_to_stratify

diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
@@ -26,19 +26,19 @@ class _ResamplingStrategyArgs(NamedTuple):
 class HoldoutFuncs():
     @staticmethod
     def holdout_validation(
-        random_state: np.random.RandomState,
-        val_share: float,
         indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        val_share: Optional[float] = None,
         shuffle: bool = False,
         labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
     ) -> List[Tuple[np.ndarray, np.ndarray]]:
 
         train, val = train_test_split(
-            indices, test_size=val_share, shuffle=shuffle,
-            random_state=random_state if shuffle else None,
+            indices, test_size=val_share,
+            shuffle=shuffle, random_state=random_state,
             stratify=labels_to_stratify
         )
-        return [train, val]
+        return [(train, val)]
 
 
 class CrossValFuncs():
@@ -52,9 +52,9 @@ class CrossValFuncs():
 
     @staticmethod
     def k_fold_cross_validation(
-        random_state: np.random.RandomState,
-        num_splits: int,
         indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        num_splits: Optional[int] = None,
         shuffle: bool = False,
         labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
     ) -> List[Tuple[np.ndarray, np.ndarray]]:
@@ -70,22 +70,15 @@ def k_fold_cross_validation(
 
     @staticmethod
     def time_series(
-        random_state: np.random.RandomState,
-        num_splits: int,
         indices: np.ndarray,
+        random_state: Optional[np.random.RandomState] = None,
+        num_splits: Optional[int] = None,
         shuffle: bool = False,
         labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
     ) -> List[Tuple[np.ndarray, np.ndarray]]:
         """
         Returns train and validation indices respecting the temporal ordering of the data.
 
-        Args:
-            indices (np.ndarray): array of indices to be split
-            num_splits (int): number of cross validation splits
-
-        Returns:
-            splits (List[Tuple[List, List]]): list of tuples of training and validation indices
-
         Examples:
             >>> indices = np.array([0, 1, 2, 3])
             >>> CrossValFuncs.time_series_cross_validation(3, indices)
@@ -94,7 +87,7 @@ def time_series(
                  ([0, 1, 2], [3])]
 
         """
-        cv = TimeSeriesSplit(n_splits=num_splits, random_state=random_state)
+        cv = TimeSeriesSplit(n_splits=num_splits)
         splits = list(cv.split(indices))
         return splits
 
@@ -122,9 +115,9 @@ class CrossValTypes(Enum):
 
     def __call__(
         self,
-        random_state: np.random.RandomState,
         indices: np.ndarray,
-        num_splits: int = 5,
+        random_state: Optional[np.random.RandomState] = None,
+        num_splits: Optional[int] = None,
         shuffle: bool = False,
         labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
     ) -> List[Tuple[np.ndarray, np.ndarray]]:
@@ -144,8 +137,12 @@ def __call__(
                 splits[a split identifier][0: train, 1: val][a data point identifier]
 
         """
+
+        default_num_splits = _ResamplingStrategyArgs().num_splits
+        num_splits = num_splits if num_splits is not None else default_num_splits
+
         return self.value(
-            random_state=random_state,
+            random_state=random_state if shuffle else None,
             num_splits=num_splits,
             indices=indices,
             shuffle=shuffle,
@@ -181,9 +178,9 @@ class HoldoutValTypes(Enum):
 
     def __call__(
         self,
-        random_state: np.random.RandomState,
         indices: np.ndarray,
-        val_share: float = 0.33,
+        random_state: Optional[np.random.RandomState] = None,
+        val_share: Optional[float] = None,
         shuffle: bool = False,
         labels_to_stratify: Optional[Union[Tuple[np.ndarray, np.ndarray], Dataset]] = None
     ) -> List[Tuple[np.ndarray, np.ndarray]]:
@@ -203,8 +200,12 @@ def __call__(
                 splits[a split identifier][0: train, 1: val][a data point identifier]
 
         """
+
+        default_val_share = _ResamplingStrategyArgs().val_share
+        val_share = val_share if val_share is not None else default_val_share
+
         return self.value(
-            random_state=random_state,
+            random_state=random_state if shuffle else None,
             val_share=val_share,
             indices=indices,
             shuffle=shuffle,

diff --git a/test/test_evaluation/test_train_evaluator.py b/test/test_evaluation/test_train_evaluator.py
@@ -112,7 +112,7 @@ def test_holdout(self, pipeline_mock):
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(evaluator.file_output.call_count, 1)
-        self.assertEqual(result, 0.5652173913043479)
+        self.assertEqual(result, 0.30434782608695654)
         self.assertEqual(pipeline_mock.fit.call_count, 1)
         # 3 calls because of train, holdout and test set
         self.assertEqual(pipeline_mock.predict_proba.call_count, 3)
@@ -150,7 +150,7 @@ def test_cv(self, pipeline_mock):
         self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
 
         self.assertEqual(evaluator.file_output.call_count, 1)
-        self.assertEqual(result, 0.46235467431119603)
+        self.assertEqual(result, 0.4651019270584489)
         self.assertEqual(pipeline_mock.fit.call_count, 5)
         # 9 calls because of the training, holdout and
         # test set (3 sets x 5 folds = 15)