Sklearn api x (#405)

* changed signature of automl.predict and automl.predict_proba to X * XGBoostEstimator * changed signature of Prophet predict to X * changed signature of ARIMA predict to X * changed signature of TS_SKLearn_Regressor predict to X
microsoft · Jan 16, 2022 · 1c911da · 1c911da
1 parent a6d70ef
commit 1c911da
Show file tree

Hide file tree

Showing 2 changed files with 52 additions and 56 deletions.
diff --git a/flaml/automl.py b/flaml/automl.py
@@ -714,13 +714,11 @@ def time_to_find_best_model(self) -> float:
         """Time taken to find best model in seconds."""
         return self.__dict__.get("_time_taken_best_iter")
 
-    def predict(
-        self, X_test: Union[np.array, pd.DataFrame, List[str], List[List[str]]]
-    ):
+    def predict(self, X: Union[np.array, pd.DataFrame, List[str], List[List[str]]]):
         """Predict label from features.
 
         Args:
-            X_test: A numpy array of featurized instances, shape n * m,
+            X: A numpy array of featurized instances, shape n * m,
                 or for 'ts_forecast' task:
                     a pandas dataframe with the first column containing
                     timestamp values (datetime type) or an integer n for
@@ -748,8 +746,8 @@ def predict(
                 "No estimator is trained. Please run fit with enough budget."
             )
             return None
-        X_test = self._preprocess(X_test)
-        y_pred = estimator.predict(X_test)
+        X = self._preprocess(X)
+        y_pred = estimator.predict(X)
         if (
             isinstance(y_pred, np.ndarray)
             and y_pred.ndim > 1
@@ -763,12 +761,12 @@ def predict(
         else:
             return y_pred
 
-    def predict_proba(self, X_test):
+    def predict_proba(self, X):
         """Predict the probability of each class from features, only works for
         classification problems.
 
         Args:
-            X_test: A numpy array of featurized instances, shape n * m.
+            X: A numpy array of featurized instances, shape n * m.
 
         Returns:
             A numpy array of shape n * c. c is the  # classes. Each element at
@@ -780,8 +778,8 @@ def predict_proba(self, X_test):
                 "No estimator is trained. Please run fit with enough budget."
             )
             return None
-        X_test = self._preprocess(X_test)
-        proba = self._trained_estimator.predict_proba(X_test)
+        X = self._preprocess(X)
+        proba = self._trained_estimator.predict_proba(X)
         return proba
 
     def _preprocess(self, X):

diff --git a/flaml/model.py b/flaml/model.py
@@ -197,32 +197,32 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
             train_time = self._fit(X_train, y_train, **kwargs)
         return train_time
 
-    def predict(self, X_test):
+    def predict(self, X):
         """Predict label from features.
 
         Args:
-            X_test: A numpy array or a dataframe of featurized instances, shape n*m.
+            X: A numpy array or a dataframe of featurized instances, shape n*m.
 
         Returns:
             A numpy array of shape n*1.
             Each element is the label for a instance.
         """
         if self._model is not None:
-            X_test = self._preprocess(X_test)
-            return self._model.predict(X_test)
+            X = self._preprocess(X)
+            return self._model.predict(X)
         else:
             logger.warning(
                 "Estimator is not fit yet. Please run fit() before predict()."
             )
-            return np.ones(X_test.shape[0])
+            return np.ones(X.shape[0])
 
-    def predict_proba(self, X_test):
+    def predict_proba(self, X):
         """Predict the probability of each class from features.
 
         Only works for classification problems
 
         Args:
-            X_test: A numpy array of featurized instances, shape n*m.
+            X: A numpy array of featurized instances, shape n*m.
 
         Returns:
             A numpy array of shape n*c. c is the # classes.
@@ -231,8 +231,8 @@ class j.
         """
         assert self._task in CLASSIFICATION, "predict_proba() only for classification."
 
-        X_test = self._preprocess(X_test)
-        return self._model.predict_proba(X_test)
+        X = self._preprocess(X)
+        return self._model.predict_proba(X)
 
     def cleanup(self):
         del self._model
@@ -708,18 +708,18 @@ def _init_model_for_predict(self, X_test):
         )
         return test_dataset, training_args
 
-    def predict_proba(self, X_test):
+    def predict_proba(self, X):
         assert (
             self._task in CLASSIFICATION
         ), "predict_proba() only for classification tasks."
 
-        test_dataset, _ = self._init_model_for_predict(X_test)
+        test_dataset, _ = self._init_model_for_predict(X)
         predictions = self._trainer.predict(test_dataset)
         self._trainer = None
         return predictions.predictions
 
-    def predict(self, X_test):
-        test_dataset, training_args = self._init_model_for_predict(X_test)
+    def predict(self, X):
+        test_dataset, training_args = self._init_model_for_predict(X)
         if self._task not in NLG_TASKS:
             predictions = self._trainer.predict(test_dataset)
         else:
@@ -1108,12 +1108,12 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
         train_time = time.time() - start_time
         return train_time
 
-    def predict(self, X_test):
+    def predict(self, X):
         import xgboost as xgb
 
-        if not issparse(X_test):
-            X_test = self._preprocess(X_test)
-        dtest = xgb.DMatrix(X_test)
+        if not issparse(X):
+            X = self._preprocess(X)
+        dtest = xgb.DMatrix(X)
         return super().predict(dtest)
 
     @classmethod
@@ -1598,22 +1598,22 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
         self._model = model
         return train_time
 
-    def predict(self, X_test):
-        if isinstance(X_test, int):
+    def predict(self, X):
+        if isinstance(X, int):
             raise ValueError(
                 "predict() with steps is only supported for arima/sarimax."
                 " For Prophet, pass a dataframe with the first column containing"
                 " the timestamp values."
             )
         if self._model is not None:
-            X_test = self._preprocess(X_test)
-            forecast = self._model.predict(X_test)
+            X = self._preprocess(X)
+            forecast = self._model.predict(X)
             return forecast["yhat"]
         else:
             logger.warning(
                 "Estimator is not fit yet. Please run fit() before predict()."
             )
-            return np.ones(X_test.shape[0])
+            return np.ones(X.shape[0])
 
 
 class ARIMA(Prophet):
@@ -1678,30 +1678,30 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
         self._model = model
         return train_time
 
-    def predict(self, X_test):
+    def predict(self, X):
         if self._model is not None:
-            if isinstance(X_test, int):
-                forecast = self._model.forecast(steps=X_test)
-            elif isinstance(X_test, DataFrame):
-                start = X_test[TS_TIMESTAMP_COL].iloc[0]
-                end = X_test[TS_TIMESTAMP_COL].iloc[-1]
-                if len(X_test.columns) > 1:
-                    X_test = self._preprocess(X_test.drop(columns=TS_TIMESTAMP_COL))
-                    regressors = list(X_test)
-                    print(start, end, X_test.shape)
+            if isinstance(X, int):
+                forecast = self._model.forecast(steps=X)
+            elif isinstance(X, DataFrame):
+                start = X[TS_TIMESTAMP_COL].iloc[0]
+                end = X[TS_TIMESTAMP_COL].iloc[-1]
+                if len(X.columns) > 1:
+                    X = self._preprocess(X.drop(columns=TS_TIMESTAMP_COL))
+                    regressors = list(X)
+                    print(start, end, X.shape)
                     forecast = self._model.predict(
-                        start=start, end=end, exog=X_test[regressors]
+                        start=start, end=end, exog=X[regressors]
                     )
                 else:
                     forecast = self._model.predict(start=start, end=end)
             else:
                 raise ValueError(
-                    "X_test needs to be either a pandas Dataframe with dates as the first column"
+                    "X needs to be either a pandas Dataframe with dates as the first column"
                     " or an int number of periods for predict()."
                 )
             return forecast
         else:
-            return np.ones(X_test if isinstance(X_test, int) else X_test.shape[0])
+            return np.ones(X if isinstance(X, int) else X.shape[0])
 
 
 class SARIMAX(ARIMA):
@@ -1873,42 +1873,40 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
         train_time = time.time() - current_time
         return train_time
 
-    def predict(self, X_test):
+    def predict(self, X):
         if self._model is not None:
-            X_test = self.transform_X(X_test)
-            X_test = self._preprocess(X_test)
+            X = self.transform_X(X)
+            X = self._preprocess(X)
             if isinstance(self._model, list):
                 assert len(self._model) == len(
-                    X_test
-                ), "Model is optimized for horizon, length of X_test must be equal to `period`."
+                    X
+                ), "Model is optimized for horizon, length of X must be equal to `period`."
                 preds = []
                 for i in range(1, len(self._model) + 1):
                     (
                         X_pred,
                         _,
                     ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                        X_test.iloc[:i, :]
+                        X.iloc[:i, :]
                     )
                     preds.append(self._model[i - 1].predict(X_pred)[-1])
                 forecast = DataFrame(
                     data=np.asarray(preds).reshape(-1, 1),
                     columns=[self.hcrystaball_model.name],
-                    index=X_test.index,
+                    index=X.index,
                 )
             else:
                 (
                     X_pred,
                     _,
-                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
-                    X_test
-                )
+                ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X)
                 forecast = self._model.predict(X_pred)
             return forecast
         else:
             logger.warning(
                 "Estimator is not fit yet. Please run fit() before predict()."
             )
-            return np.ones(X_test.shape[0])
+            return np.ones(X.shape[0])
 
 
 class LGBM_TS_Regressor(TS_SKLearn_Regressor):