From c325b65b6eb91fce55a2ab46438d326426b6efcb Mon Sep 17 00:00:00 2001
From: Axel Fahy <axel.fahy@heig-vd.ch>
Date: Mon, 2 Mar 2020 15:08:41 +0100
Subject: [PATCH] feat(_check_sklearn_support): add function to check if
 sklearn is installed

To avoid having ``scikit-learn`` in the dependencies of the project,
check if sklearn is installed when using it instead of installing it by
default.
---
 bff/__init__.py |  1 +
 bff/fancy.py    | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/bff/__init__.py b/bff/__init__.py
index f3a85ef..1c2f802 100644
--- a/bff/__init__.py
+++ b/bff/__init__.py
@@ -47,6 +47,7 @@
     'value_2_list',
 ]
 
+
 # Logging configuration.
 FORMAT = '%(asctime)s [%(levelname)-7s] %(name)s: %(message)s'
 logging.basicConfig(format=FORMAT, level=logging.INFO)
diff --git a/bff/fancy.py b/bff/fancy.py
index a7aefc1..f82c574 100644
--- a/bff/fancy.py
+++ b/bff/fancy.py
@@ -105,6 +105,30 @@ def cast_to_category_pd(df: pd.DataFrame, deep: bool = True) -> pd.DataFrame:
             )
 
 
+def _check_sklearn_support(caller_name: str):
+    """
+    Raise ImportError with detailed error message if sklearn is not installed.
+
+    Used by function that might need to import sklearn to have a default Transformer
+    if one is not provided by the user. This is to avoid setting sklearn as a dependency.
+
+    Parameters
+    ----------
+    caller_name : str
+        The name of the caller that requires sklearn.
+    Raises
+    ------
+    ImportError
+        If sklearn is not installed.
+    """
+    try:
+        import sklearn  # noqa
+    except ImportError as e:
+        raise ImportError(
+            f'{caller_name} requires sklearn. You can install scikit-learn with '
+            '`pip install scikit-learn`') from e
+
+
 def concat_with_categories(df_left: pd.DataFrame, df_right: pd.DataFrame,
                            **kwargs) -> pd.DataFrame:
     """
@@ -511,6 +535,10 @@ def normalization_pd(df: pd.DataFrame, scaler=None,
     3   45   34.0  0.375000  0.009434
     4   67   90.0  0.833333  0.273585
     """
+    if scaler is None:
+        _check_sklearn_support('normalization_pd')
+        from sklearn.preprocessing import MinMaxScaler
+        scaler = MinMaxScaler
     # If columns are not provided, select all the numerical columns of the DataFrame.
     # If provided, select only the numerical ones.
     cols_to_norm = ([col for col in value_2_list(columns) if np.issubdtype(df[col], np.number)]