From 98457b0ce690a9d819b09cbba1abc18bcacd04fa Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Mon, 5 Aug 2024 17:38:31 +0100
Subject: [PATCH 01/15] Add checks for threshold to be in range of variables.
 Write fit method.

---
 feature_engine/discretisation/binarizer.py    | 183 ++++++++++++++++++
 .../tests_binarizer/test_binarizer.py         |  11 ++
 2 files changed, 194 insertions(+)
 create mode 100644 feature_engine/discretisation/binarizer.py
 create mode 100644 feature_engine/discretisation/tests_binarizer/test_binarizer.py

diff --git a/feature_engine/discretisation/binarizer.py b/feature_engine/discretisation/binarizer.py
new file mode 100644
index 000000000..d9d70dd0a
--- /dev/null
+++ b/feature_engine/discretisation/binarizer.py
@@ -0,0 +1,183 @@
+from typing import List, Optional, Union
+
+import pandas as pd
+
+from feature_engine._check_init_parameters.check_variables import (
+    _check_variables_input_value,
+)
+from feature_engine._docstrings.fit_attributes import (
+    _binner_dict_docstring,
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+    _variables_attribute_docstring,
+)
+from feature_engine._docstrings.init_parameters.all_trasnformers import (
+    _variables_numerical_docstring,
+)
+from feature_engine._docstrings.init_parameters.discretisers import (
+    _precision_docstring,
+    _return_boundaries_docstring,
+    _return_object_docstring,
+)
+from feature_engine._docstrings.methods import (
+    _fit_discretiser_docstring,
+    _fit_transform_docstring,
+    _transform_discretiser_docstring,
+)
+from feature_engine._docstrings.substitute import Substitution
+from feature_engine.discretisation.base_discretiser import BaseDiscretiser
+
+
+@Substitution(
+    return_object=_return_object_docstring,
+    return_boundaries=_return_boundaries_docstring,
+    precision=_precision_docstring,
+    binner_dict_=_binner_dict_docstring,
+    fit=_fit_discretiser_docstring,
+    transform=_transform_discretiser_docstring,
+    variables=_variables_numerical_docstring,
+    variables_=_variables_attribute_docstring,
+    feature_names_in_=_feature_names_in_docstring,
+    n_features_in_=_n_features_in_docstring,
+    fit_transform=_fit_transform_docstring,
+)
+class Binarizer(BaseDiscretiser):
+    """
+    TODO: FIX THE DOCSTRING. SEE BELOW FOR EXAMPLE
+    The EqualWidthDiscretiser() divides continuous numerical variables into
+    intervals of the same width, that is, equidistant intervals. Note that the
+    proportion of observations per interval may vary.
+
+    The size of the interval is calculated as:
+
+    .. math::
+
+        ( max(X) - min(X) ) / bins
+
+    where bins, which is the number of intervals, is determined by the user.
+
+    The EqualWidthDiscretiser() works only with numerical variables.
+    A list of variables can be passed as argument. Alternatively, the discretiser
+    will automatically select all numerical variables.
+
+    The EqualWidthDiscretiser() first finds the boundaries for the intervals for
+    each variable. Then, it transforms the variables, that is, sorts the values into
+    the intervals.
+
+    More details in the :ref:`User Guide <equal_width_discretiser>`.
+
+    Parameters
+    ----------
+    {variables}
+
+    bins: int, default=10
+        Desired number of equal width intervals / bins.
+
+    {return_object}
+
+    {return_boundaries}
+
+    {precision}
+
+    Attributes
+    ----------
+    {binner_dict_}
+
+    {variables_}
+
+    {feature_names_in_}
+
+    {n_features_in_}
+
+    Methods
+    -------
+    {fit}
+
+    {fit_transform}
+
+    {transform}
+
+    See Also
+    --------
+    pandas.cut
+    sklearn.preprocessing.KBinsDiscretizer
+
+    References
+    ----------
+    .. [1] Kotsiantis and Pintelas, "Data preprocessing for supervised leaning,"
+        International Journal of Computer Science,  vol. 1, pp. 111 117, 2006.
+
+    .. [2] Dong. "Beating Kaggle the easy way". Master Thesis.
+        https://www.ke.tu-darmstadt.de/lehre/arbeiten/studien/2015/Dong_Ying.pdf
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> from feature_engine.discretisation import EqualWidthDiscretiser
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.randint(1,100, 100)))
+    >>> ewd = EqualWidthDiscretiser()
+    >>> ewd.fit(X)
+    >>> ewd.transform(X)["x"].value_counts()
+    9    15
+    6    15
+    0    13
+    5    11
+    8     9
+    7     8
+    2     8
+    1     7
+    3     7
+    4     7
+    Name: x, dtype: int64
+    """
+
+    def __init__(
+        self,
+        variables: Union[None, int, str, List[Union[str, int]]] = None,
+        threshold = None,
+        return_object: bool = False,
+        return_boundaries: bool = False,
+        precision: int = 3,
+    ) -> None:
+        
+        if not threshold:
+            raise ValueError(f"threshold not supplied. Please provide a threshold of type float or int.")
+        
+        if not isinstance(threshold, (int, float)):
+            raise TypeError(f"threshold must be an integer or a float. Got type {type(threshold)} instead.")
+
+        super().__init__(return_object, return_boundaries, precision)
+
+        self.variables = _check_variables_input_value(variables)
+        self.threshold = threshold
+
+    def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
+        """
+        Learn the boundaries of the equal width intervals / bins for each
+        variable.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The training dataset. Can be the entire dataframe, not just the variables
+            to be transformed.
+        y: None
+            y is not needed in this encoder. You can pass y or None.
+        """
+        
+        # check input dataframe
+        X = super().fit(X)
+        
+        # Check threshold is in between max and min of all features in self.variables.
+        thresh_checks = all([self.threshold > min(X[col]) and self.threshold < max(X[col]) for col in self.variables])
+        
+        if not thresh_checks:
+            print(f"threshold outside of range for one or more variables {self.variables}. Features {self.variables} will not be transformed.")
+            
+        return self
+    
+    def transform():
+        pass
\ No newline at end of file
diff --git a/feature_engine/discretisation/tests_binarizer/test_binarizer.py b/feature_engine/discretisation/tests_binarizer/test_binarizer.py
new file mode 100644
index 000000000..e1f09bb5a
--- /dev/null
+++ b/feature_engine/discretisation/tests_binarizer/test_binarizer.py
@@ -0,0 +1,11 @@
+import numpy as np
+import pandas as pd
+
+from feature_engine.discretisation.binarizer import Binarizer
+
+np.random.seed(42)
+X = pd.DataFrame(dict(x = np.random.randint(1, 100, 100)))
+
+b = Binarizer(threshold=200, variables=['x'])
+
+b.fit(X)

From 6cdf958d110cd7be6c0065594a63fe1215659c1a Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:10:10 +0100
Subject: [PATCH 02/15] Removing files with -z spelling

---
 feature_engine/discretisation/binarizer.py    | 183 ------------------
 .../tests_binarizer/test_binarizer.py         |  11 --
 2 files changed, 194 deletions(-)
 delete mode 100644 feature_engine/discretisation/binarizer.py
 delete mode 100644 feature_engine/discretisation/tests_binarizer/test_binarizer.py

diff --git a/feature_engine/discretisation/binarizer.py b/feature_engine/discretisation/binarizer.py
deleted file mode 100644
index d9d70dd0a..000000000
--- a/feature_engine/discretisation/binarizer.py
+++ /dev/null
@@ -1,183 +0,0 @@
-from typing import List, Optional, Union
-
-import pandas as pd
-
-from feature_engine._check_init_parameters.check_variables import (
-    _check_variables_input_value,
-)
-from feature_engine._docstrings.fit_attributes import (
-    _binner_dict_docstring,
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-    _variables_attribute_docstring,
-)
-from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _variables_numerical_docstring,
-)
-from feature_engine._docstrings.init_parameters.discretisers import (
-    _precision_docstring,
-    _return_boundaries_docstring,
-    _return_object_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_discretiser_docstring,
-    _fit_transform_docstring,
-    _transform_discretiser_docstring,
-)
-from feature_engine._docstrings.substitute import Substitution
-from feature_engine.discretisation.base_discretiser import BaseDiscretiser
-
-
-@Substitution(
-    return_object=_return_object_docstring,
-    return_boundaries=_return_boundaries_docstring,
-    precision=_precision_docstring,
-    binner_dict_=_binner_dict_docstring,
-    fit=_fit_discretiser_docstring,
-    transform=_transform_discretiser_docstring,
-    variables=_variables_numerical_docstring,
-    variables_=_variables_attribute_docstring,
-    feature_names_in_=_feature_names_in_docstring,
-    n_features_in_=_n_features_in_docstring,
-    fit_transform=_fit_transform_docstring,
-)
-class Binarizer(BaseDiscretiser):
-    """
-    TODO: FIX THE DOCSTRING. SEE BELOW FOR EXAMPLE
-    The EqualWidthDiscretiser() divides continuous numerical variables into
-    intervals of the same width, that is, equidistant intervals. Note that the
-    proportion of observations per interval may vary.
-
-    The size of the interval is calculated as:
-
-    .. math::
-
-        ( max(X) - min(X) ) / bins
-
-    where bins, which is the number of intervals, is determined by the user.
-
-    The EqualWidthDiscretiser() works only with numerical variables.
-    A list of variables can be passed as argument. Alternatively, the discretiser
-    will automatically select all numerical variables.
-
-    The EqualWidthDiscretiser() first finds the boundaries for the intervals for
-    each variable. Then, it transforms the variables, that is, sorts the values into
-    the intervals.
-
-    More details in the :ref:`User Guide <equal_width_discretiser>`.
-
-    Parameters
-    ----------
-    {variables}
-
-    bins: int, default=10
-        Desired number of equal width intervals / bins.
-
-    {return_object}
-
-    {return_boundaries}
-
-    {precision}
-
-    Attributes
-    ----------
-    {binner_dict_}
-
-    {variables_}
-
-    {feature_names_in_}
-
-    {n_features_in_}
-
-    Methods
-    -------
-    {fit}
-
-    {fit_transform}
-
-    {transform}
-
-    See Also
-    --------
-    pandas.cut
-    sklearn.preprocessing.KBinsDiscretizer
-
-    References
-    ----------
-    .. [1] Kotsiantis and Pintelas, "Data preprocessing for supervised leaning,"
-        International Journal of Computer Science,  vol. 1, pp. 111 117, 2006.
-
-    .. [2] Dong. "Beating Kaggle the easy way". Master Thesis.
-        https://www.ke.tu-darmstadt.de/lehre/arbeiten/studien/2015/Dong_Ying.pdf
-
-    Examples
-    --------
-
-    >>> import pandas as pd
-    >>> import numpy as np
-    >>> from feature_engine.discretisation import EqualWidthDiscretiser
-    >>> np.random.seed(42)
-    >>> X = pd.DataFrame(dict(x = np.random.randint(1,100, 100)))
-    >>> ewd = EqualWidthDiscretiser()
-    >>> ewd.fit(X)
-    >>> ewd.transform(X)["x"].value_counts()
-    9    15
-    6    15
-    0    13
-    5    11
-    8     9
-    7     8
-    2     8
-    1     7
-    3     7
-    4     7
-    Name: x, dtype: int64
-    """
-
-    def __init__(
-        self,
-        variables: Union[None, int, str, List[Union[str, int]]] = None,
-        threshold = None,
-        return_object: bool = False,
-        return_boundaries: bool = False,
-        precision: int = 3,
-    ) -> None:
-        
-        if not threshold:
-            raise ValueError(f"threshold not supplied. Please provide a threshold of type float or int.")
-        
-        if not isinstance(threshold, (int, float)):
-            raise TypeError(f"threshold must be an integer or a float. Got type {type(threshold)} instead.")
-
-        super().__init__(return_object, return_boundaries, precision)
-
-        self.variables = _check_variables_input_value(variables)
-        self.threshold = threshold
-
-    def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
-        """
-        Learn the boundaries of the equal width intervals / bins for each
-        variable.
-
-        Parameters
-        ----------
-        X: pandas dataframe of shape = [n_samples, n_features]
-            The training dataset. Can be the entire dataframe, not just the variables
-            to be transformed.
-        y: None
-            y is not needed in this encoder. You can pass y or None.
-        """
-        
-        # check input dataframe
-        X = super().fit(X)
-        
-        # Check threshold is in between max and min of all features in self.variables.
-        thresh_checks = all([self.threshold > min(X[col]) and self.threshold < max(X[col]) for col in self.variables])
-        
-        if not thresh_checks:
-            print(f"threshold outside of range for one or more variables {self.variables}. Features {self.variables} will not be transformed.")
-            
-        return self
-    
-    def transform():
-        pass
\ No newline at end of file
diff --git a/feature_engine/discretisation/tests_binarizer/test_binarizer.py b/feature_engine/discretisation/tests_binarizer/test_binarizer.py
deleted file mode 100644
index e1f09bb5a..000000000
--- a/feature_engine/discretisation/tests_binarizer/test_binarizer.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import numpy as np
-import pandas as pd
-
-from feature_engine.discretisation.binarizer import Binarizer
-
-np.random.seed(42)
-X = pd.DataFrame(dict(x = np.random.randint(1, 100, 100)))
-
-b = Binarizer(threshold=200, variables=['x'])
-
-b.fit(X)

From 5b1927d3b829ff5a38b90a74451cdc3d5cd09cc4 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:10:50 +0100
Subject: [PATCH 03/15] Initial commit of Binariser class

---
 feature_engine/discretisation/binariser.py | 233 +++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 feature_engine/discretisation/binariser.py

diff --git a/feature_engine/discretisation/binariser.py b/feature_engine/discretisation/binariser.py
new file mode 100644
index 000000000..cf8f98373
--- /dev/null
+++ b/feature_engine/discretisation/binariser.py
@@ -0,0 +1,233 @@
+from typing import List, Optional, Union
+
+import numpy as np
+import pandas as pd
+
+from feature_engine._check_init_parameters.check_variables import (
+    _check_variables_input_value,
+)
+from feature_engine._docstrings.fit_attributes import (
+    _binner_dict_docstring,
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+    _variables_attribute_docstring,
+)
+from feature_engine._docstrings.init_parameters.all_trasnformers import (
+    _variables_numerical_docstring,
+)
+from feature_engine._docstrings.init_parameters.discretisers import (
+    _precision_docstring,
+    _return_boundaries_docstring,
+    _return_object_docstring,
+)
+from feature_engine._docstrings.methods import (
+    _fit_discretiser_docstring,
+    _fit_transform_docstring,
+    _transform_discretiser_docstring,
+)
+from feature_engine._docstrings.substitute import Substitution
+from feature_engine.discretisation.base_discretiser import BaseDiscretiser
+
+
+@Substitution(
+    return_object=_return_object_docstring,
+    return_boundaries=_return_boundaries_docstring,
+    precision=_precision_docstring,
+    binner_dict_=_binner_dict_docstring,
+    fit=_fit_discretiser_docstring,
+    transform=_transform_discretiser_docstring,
+    variables=_variables_numerical_docstring,
+    variables_=_variables_attribute_docstring,
+    feature_names_in_=_feature_names_in_docstring,
+    n_features_in_=_n_features_in_docstring,
+    fit_transform=_fit_transform_docstring,
+)
+class Binariser(BaseDiscretiser):
+    """
+    The Binariser() divides continuous numerical variables into two intervals, where
+    the value `threshold`, the point at which the interval is  divided, is determined
+    by the user.
+
+    The Binariser() works only with numerical variables.
+    A list of variables can be passed as argument. Alternatively, the discretiser
+    will automatically select all numerical variables.
+
+    The Binariser() first finds the boundaries for the intervals for
+    each variable. Then, it transforms the variables, that is, sorts the values into
+    the intervals.
+
+    More details in the :ref:`User Guide <equal_width_discretiser>`.
+
+    Parameters
+    ----------
+    {variables}
+
+    threshold: int, float, default=None
+        Desired value at which to divide the interval.
+
+    {return_object}
+
+    {return_boundaries}
+
+    {precision}
+
+    Attributes
+    ----------
+    {binner_dict_}
+
+    {variables_}
+
+    {feature_names_in_}
+
+    {n_features_in_}
+
+    Methods
+    -------
+    {fit}
+
+    {fit_transform}
+
+    {transform}
+
+    See Also
+    --------
+    pandas.cut
+    sklearn.preprocessing.KBinsDiscretizer
+
+    References
+    ----------
+    .. [1] Kotsiantis and Pintelas, "Data preprocessing for supervised leaning,"
+        International Journal of Computer Science,  vol. 1, pp. 111 117, 2006.
+
+    .. [2] Dong. "Beating Kaggle the easy way". Master Thesis.
+        https://www.ke.tu-darmstadt.de/lehre/arbeiten/studien/2015/Dong_Ying.pdf
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> import numpy as np
+    >>> from feature_engine.discretisation import EqualWidthDiscretiser
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.randint(1,100, 100)))
+    >>> transformer = Binariser(threshold=50)
+    >>> transformer.fit(X)
+    >>> transformer.transform(X)['x'].value_counts()
+        x
+        1    56
+        0    44
+        Name: count, dtype: int64
+    """
+
+    def __init__(
+        self,
+        threshold: Union[None, int, float] = None,
+        variables: Union[None, int, str, List[Union[str, int]]] = None,
+        return_object: bool = False,
+        return_boundaries: bool = False,
+        precision: int = 3,
+    ) -> None:
+
+        if threshold is None:
+            raise TypeError(
+                "threshold not supplied."
+                " Please provide a threshold of type float or int."
+            )
+
+        if not isinstance(threshold, (int, float)):
+            raise TypeError(
+                "threshold must be an integer or a float."
+                f" Got type '{type(threshold).__name__}' instead."
+            )
+
+        super().__init__(return_object, return_boundaries, precision)
+
+        self.variables = _check_variables_input_value(variables)
+        self.threshold = threshold
+
+    def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
+        """
+        Learn the boundaries of the bins for each
+        variable.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The training dataset. Can be the entire dataframe, not just the variables
+            to be transformed.
+        y: None
+            y is not needed in this encoder. You can pass y or None.
+        """
+
+        # check input dataframe
+        X = super().fit(X)
+
+        failed_threshold_check = []
+        self.binner_dict_ = {}
+        for var in self.variables_:
+            # Check that threshold is within range
+            if (self.threshold < min(X[var])) or (self.threshold > max(X[var])):
+                # Omit these features from transformation step
+                failed_threshold_check.append(var)
+            else:
+                self.binner_dict_[var] = [
+                    float("-inf"),
+                    np.float64(self.threshold),
+                    float("inf"),
+                ]
+
+        if failed_threshold_check:
+            print(
+                "threshold outside of range for one or more variables."
+                f" Features {failed_threshold_check} have not been transformed."
+            )
+
+        # A list of features that satisfy threshold check and will be transformed
+        self.variables_trans_ = [
+            var for var in self.variables_ if var not in failed_threshold_check
+        ]
+
+        return self
+
+    def transform(self, X: pd.DataFrame) -> pd.DataFrame:
+        """Sort the variable values into the intervals.
+
+        Parameters
+        ----------
+        X: pandas dataframe of shape = [n_samples, n_features]
+            The data to transform.
+
+        Returns
+        -------
+        X_new: pandas dataframe of shape = [n_samples, n_features]
+            The transformed data with the discrete variables.
+        """
+
+        # check input dataframe and if class was fitted
+        X = self._check_transform_input_and_state(X)
+
+        # transform variables
+        if self.return_boundaries is True:
+            for feature in self.variables_trans_:
+                X[feature] = pd.cut(
+                    X[feature],
+                    self.binner_dict_[feature],
+                    precision=self.precision,
+                    include_lowest=True,
+                )
+            X[self.variables_trans_] = X[self.variables_trans_].astype(str)
+
+        else:
+            for feature in self.variables_trans_:
+                X[feature] = pd.cut(
+                    X[feature],
+                    self.binner_dict_[feature],
+                    labels=False,
+                    include_lowest=True,
+                )
+
+            # return object
+            if self.return_object:
+                X[self.variables_trans_] = X[self.variables_trans_].astype("O")
+
+        return X

From c2b138aa597088fd6a44382d4c489d8c29af7790 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:11:43 +0100
Subject: [PATCH 04/15] Committing tests

---
 tests/test_discretisation/test_binariser.py | 84 +++++++++++++++++++++
 1 file changed, 84 insertions(+)
 create mode 100644 tests/test_discretisation/test_binariser.py

diff --git a/tests/test_discretisation/test_binariser.py b/tests/test_discretisation/test_binariser.py
new file mode 100644
index 000000000..b84c862ad
--- /dev/null
+++ b/tests/test_discretisation/test_binariser.py
@@ -0,0 +1,84 @@
+import numpy as np
+import pytest
+from sklearn.exceptions import NotFittedError
+
+from feature_engine.discretisation.binariser import Binariser
+
+
+def test_automatically_find_variables_and_return_as_numeric(df_normal_dist):
+    # test case 1: automatically select variables, return_object=False
+    transformer = Binariser(threshold=0, variables=None, return_object=False)
+    X = transformer.fit_transform(df_normal_dist)
+
+    # transform input
+    Xt = np.where(df_normal_dist["var"] > 0, 1, 0)
+    bins = [float("-inf"), np.float64(0), float("inf")]
+
+    # init params
+    assert transformer.threshold == 0
+    assert transformer.variables is None
+    assert transformer.return_object is False
+    # fit params
+    assert transformer.variables_ == ["var"]
+    assert transformer.n_features_in_ == 1
+    assert transformer.binner_dict_["var"] == bins
+    # check transformed output against Xt
+    assert all(x == y for x, y in zip(X["var"].values, Xt))
+
+
+def test_automatically_find_variables_and_return_as_object(df_normal_dist):
+    transformer = Binariser(threshold=0, variables=None, return_object=True)
+    X = transformer.fit_transform(df_normal_dist)
+    assert X["var"].dtypes == "O"
+
+
+def test_error_when_threshold_not_int_or_float():
+    with pytest.raises(TypeError):
+        Binariser(threshold="other")
+
+
+def test_error_when_threshold_not_supplied():
+    with pytest.raises(TypeError):
+        Binariser()
+
+
+def test_error_if_return_object_not_bool():
+    with pytest.raises(ValueError):
+        Binariser(threshold=0, return_object="other")
+
+
+def test_error_if_input_df_contains_na_in_fit(df_na):
+    # test case 3: when dataset contains na, fit method
+    with pytest.raises(ValueError):
+        transformer = Binariser(threshold=0)
+        transformer.fit(df_na)
+
+
+def test_error_if_input_df_contains_na_in_transform(df_vartypes, df_na):
+    # test case 4: when dataset contains na, transform method
+    with pytest.raises(ValueError):
+        transformer = Binariser(threshold=0)
+        transformer.fit(df_vartypes)
+        transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
+
+
+def test_non_fitted_error(df_vartypes):
+    with pytest.raises(NotFittedError):
+        transformer = Binariser(threshold=0)
+        transformer.transform(df_vartypes)
+
+
+def test_stout_threshold_out_of_range(df_vartypes, capsys):
+    transformer = Binariser(threshold=20, variables=None, return_object=False)
+    _ = transformer.fit_transform(df_vartypes[["Age", "Marks"]])
+    captured = capsys.readouterr()
+    assert (
+        captured.out
+        == "threshold outside of range for one or more variables. Features ['Marks'] have not been transformed.\n"
+    )
+
+
+def test_return_boundaries(df_normal_dist):
+    transformer = Binariser(threshold=0, return_boundaries=True)
+    Xt = transformer.fit_transform(df_normal_dist)
+    assert all(x for x in df_normal_dist["var"].unique() if x not in Xt)

From 008d495f4599806690fdea6cf8798675382db04c Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:13:13 +0100
Subject: [PATCH 05/15] typo: fixing typo in DecisionTreeDiscretiser

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8f01bfd1c..afbceb101 100644
--- a/README.md
+++ b/README.md
@@ -94,7 +94,7 @@ Please share your story by answering 1 quick question
 * EqualWidthDiscretiser
 * GeometricWidthDiscretiser
 * DecisionTreeDiscretiser
-* ArbitraryDiscreriser
+* ArbitraryDiscretiser
 
 ### Outlier Handling methods
 * Winsorizer

From 994f119036c2abb05f7078b4c62a00906702f8b2 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:14:00 +0100
Subject: [PATCH 06/15] Adding Binariser to list of transformers

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index afbceb101..52d448bf1 100644
--- a/README.md
+++ b/README.md
@@ -95,6 +95,7 @@ Please share your story by answering 1 quick question
 * GeometricWidthDiscretiser
 * DecisionTreeDiscretiser
 * ArbitraryDiscretiser
+* Binariser
 
 ### Outlier Handling methods
 * Winsorizer

From 3a85d33702d95311b717264b3dfc8e64cbed8151 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:18:21 +0100
Subject: [PATCH 07/15] Adding Binariser to the index

---
 docs/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/index.rst b/docs/index.rst
index e30eb7eb0..29962c2a6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -183,6 +183,7 @@ discretization with decision trees:
 - :doc:`api_doc/discretisation/EqualWidthDiscretiser`: sorts variable into equal width intervals
 - :doc:`api_doc/discretisation/DecisionTreeDiscretiser`: uses decision trees to create finite variables
 - :doc:`api_doc/discretisation/GeometricWidthDiscretiser`: sorts variable into geometrical intervals
+- :doc:`api_doc/discretisation/Binariser`: two intervals determined by a threshold
 
 Outlier Capping or Removal
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

From 40bd686f73ed331e108f6a21e54861b3b13d761f Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:22:57 +0100
Subject: [PATCH 08/15] Adding Binariser to the API index

---
 docs/api_doc/discretisation/Binariser.rst | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 docs/api_doc/discretisation/Binariser.rst

diff --git a/docs/api_doc/discretisation/Binariser.rst b/docs/api_doc/discretisation/Binariser.rst
new file mode 100644
index 000000000..e2445c04b
--- /dev/null
+++ b/docs/api_doc/discretisation/Binariser.rst
@@ -0,0 +1,5 @@
+Binariser
+=========
+
+.. autoclass:: feature_engine.discretisation.Binariser
+    :members:

From 76750585bb963ddaf380ea24a7b67905dfff16fc Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 16:58:49 +0100
Subject: [PATCH 09/15] Renaming Binariser to BinaryDiscretiser to avoid naming
 conflicts and for consistency with other discretisers

---
 feature_engine/discretisation/__init__.py   |  2 ++
 feature_engine/discretisation/binariser.py  | 10 +++++-----
 tests/test_discretisation/test_binariser.py | 22 ++++++++++-----------
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/feature_engine/discretisation/__init__.py b/feature_engine/discretisation/__init__.py
index 5016d1aa9..9d1e602e6 100644
--- a/feature_engine/discretisation/__init__.py
+++ b/feature_engine/discretisation/__init__.py
@@ -8,6 +8,7 @@
 from .equal_frequency import EqualFrequencyDiscretiser
 from .equal_width import EqualWidthDiscretiser
 from .geometric_width import GeometricWidthDiscretiser
+from .binariser import BinaryDiscretiser
 
 __all__ = [
     "DecisionTreeDiscretiser",
@@ -15,4 +16,5 @@
     "EqualWidthDiscretiser",
     "ArbitraryDiscretiser",
     "GeometricWidthDiscretiser",
+    "BinaryDiscretiser",
 ]
diff --git a/feature_engine/discretisation/binariser.py b/feature_engine/discretisation/binariser.py
index cf8f98373..a78223ddd 100644
--- a/feature_engine/discretisation/binariser.py
+++ b/feature_engine/discretisation/binariser.py
@@ -42,17 +42,17 @@
     n_features_in_=_n_features_in_docstring,
     fit_transform=_fit_transform_docstring,
 )
-class Binariser(BaseDiscretiser):
+class BinaryDiscretiser(BaseDiscretiser):
     """
-    The Binariser() divides continuous numerical variables into two intervals, where
+    The BinaryDiscretiser() divides continuous numerical variables into two intervals, where
     the value `threshold`, the point at which the interval is  divided, is determined
     by the user.
 
-    The Binariser() works only with numerical variables.
+    The BinaryDiscretiser() works only with numerical variables.
     A list of variables can be passed as argument. Alternatively, the discretiser
     will automatically select all numerical variables.
 
-    The Binariser() first finds the boundaries for the intervals for
+    The BinaryDiscretiser() first finds the boundaries for the intervals for
     each variable. Then, it transforms the variables, that is, sorts the values into
     the intervals.
 
@@ -110,7 +110,7 @@ class Binariser(BaseDiscretiser):
     >>> from feature_engine.discretisation import EqualWidthDiscretiser
     >>> np.random.seed(42)
     >>> X = pd.DataFrame(dict(x = np.random.randint(1,100, 100)))
-    >>> transformer = Binariser(threshold=50)
+    >>> transformer = BinaryDiscretiser(threshold=50)
     >>> transformer.fit(X)
     >>> transformer.transform(X)['x'].value_counts()
         x
diff --git a/tests/test_discretisation/test_binariser.py b/tests/test_discretisation/test_binariser.py
index b84c862ad..bc0dda892 100644
--- a/tests/test_discretisation/test_binariser.py
+++ b/tests/test_discretisation/test_binariser.py
@@ -2,12 +2,12 @@
 import pytest
 from sklearn.exceptions import NotFittedError
 
-from feature_engine.discretisation.binariser import Binariser
+from feature_engine.discretisation import BinaryDiscretiser
 
 
 def test_automatically_find_variables_and_return_as_numeric(df_normal_dist):
     # test case 1: automatically select variables, return_object=False
-    transformer = Binariser(threshold=0, variables=None, return_object=False)
+    transformer = BinaryDiscretiser(threshold=0, variables=None, return_object=False)
     X = transformer.fit_transform(df_normal_dist)
 
     # transform input
@@ -27,49 +27,49 @@ def test_automatically_find_variables_and_return_as_numeric(df_normal_dist):
 
 
 def test_automatically_find_variables_and_return_as_object(df_normal_dist):
-    transformer = Binariser(threshold=0, variables=None, return_object=True)
+    transformer = BinaryDiscretiser(threshold=0, variables=None, return_object=True)
     X = transformer.fit_transform(df_normal_dist)
     assert X["var"].dtypes == "O"
 
 
 def test_error_when_threshold_not_int_or_float():
     with pytest.raises(TypeError):
-        Binariser(threshold="other")
+        BinaryDiscretiser(threshold="other")
 
 
 def test_error_when_threshold_not_supplied():
     with pytest.raises(TypeError):
-        Binariser()
+        BinaryDiscretiser()
 
 
 def test_error_if_return_object_not_bool():
     with pytest.raises(ValueError):
-        Binariser(threshold=0, return_object="other")
+        BinaryDiscretiser(threshold=0, return_object="other")
 
 
 def test_error_if_input_df_contains_na_in_fit(df_na):
     # test case 3: when dataset contains na, fit method
     with pytest.raises(ValueError):
-        transformer = Binariser(threshold=0)
+        transformer = BinaryDiscretiser(threshold=0)
         transformer.fit(df_na)
 
 
 def test_error_if_input_df_contains_na_in_transform(df_vartypes, df_na):
     # test case 4: when dataset contains na, transform method
     with pytest.raises(ValueError):
-        transformer = Binariser(threshold=0)
+        transformer = BinaryDiscretiser(threshold=0)
         transformer.fit(df_vartypes)
         transformer.transform(df_na[["Name", "City", "Age", "Marks", "dob"]])
 
 
 def test_non_fitted_error(df_vartypes):
     with pytest.raises(NotFittedError):
-        transformer = Binariser(threshold=0)
+        transformer = BinaryDiscretiser(threshold=0)
         transformer.transform(df_vartypes)
 
 
 def test_stout_threshold_out_of_range(df_vartypes, capsys):
-    transformer = Binariser(threshold=20, variables=None, return_object=False)
+    transformer = BinaryDiscretiser(threshold=20, variables=None, return_object=False)
     _ = transformer.fit_transform(df_vartypes[["Age", "Marks"]])
     captured = capsys.readouterr()
     assert (
@@ -79,6 +79,6 @@ def test_stout_threshold_out_of_range(df_vartypes, capsys):
 
 
 def test_return_boundaries(df_normal_dist):
-    transformer = Binariser(threshold=0, return_boundaries=True)
+    transformer = BinaryDiscretiser(threshold=0, return_boundaries=True)
     Xt = transformer.fit_transform(df_normal_dist)
     assert all(x for x in df_normal_dist["var"].unique() if x not in Xt)

From 9d3efab428a00e6660a229a9b46a49193303d067 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:00:58 +0100
Subject: [PATCH 10/15] Updating to BinaryDiscretiser

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 52d448bf1..594566859 100644
--- a/README.md
+++ b/README.md
@@ -95,7 +95,7 @@ Please share your story by answering 1 quick question
 * GeometricWidthDiscretiser
 * DecisionTreeDiscretiser
 * ArbitraryDiscretiser
-* Binariser
+* BinaryDiscretiser
 
 ### Outlier Handling methods
 * Winsorizer

From 85c7831d0cb57b87fc33a33d6613324f461cf6be Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:02:28 +0100
Subject: [PATCH 11/15] Updating to BinaryDiscretiser

---
 docs/index.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.rst b/docs/index.rst
index 29962c2a6..fa8e15668 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -183,7 +183,7 @@ discretization with decision trees:
 - :doc:`api_doc/discretisation/EqualWidthDiscretiser`: sorts variable into equal width intervals
 - :doc:`api_doc/discretisation/DecisionTreeDiscretiser`: uses decision trees to create finite variables
 - :doc:`api_doc/discretisation/GeometricWidthDiscretiser`: sorts variable into geometrical intervals
-- :doc:`api_doc/discretisation/Binariser`: two intervals determined by a threshold
+- :doc:`api_doc/discretisation/BinaryDiscretiser`: sorts variable into two intervals determined by a threshold
 
 Outlier Capping or Removal
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

From 0436a69e3b38304529b4c8ebe7040b7ef569cad2 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:04:37 +0100
Subject: [PATCH 12/15] Removing renamed file

---
 docs/api_doc/discretisation/Binariser.rst | 5 -----
 1 file changed, 5 deletions(-)
 delete mode 100644 docs/api_doc/discretisation/Binariser.rst

diff --git a/docs/api_doc/discretisation/Binariser.rst b/docs/api_doc/discretisation/Binariser.rst
deleted file mode 100644
index e2445c04b..000000000
--- a/docs/api_doc/discretisation/Binariser.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-Binariser
-=========
-
-.. autoclass:: feature_engine.discretisation.Binariser
-    :members:

From bf85e5510cd7ea0b92b491e393a1426193f37729 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:05:05 +0100
Subject: [PATCH 13/15] Adding BinaryDiscretiser to api docs

---
 docs/api_doc/discretisation/BinaryDiscretiser.rst | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 docs/api_doc/discretisation/BinaryDiscretiser.rst

diff --git a/docs/api_doc/discretisation/BinaryDiscretiser.rst b/docs/api_doc/discretisation/BinaryDiscretiser.rst
new file mode 100644
index 000000000..53d0d14e8
--- /dev/null
+++ b/docs/api_doc/discretisation/BinaryDiscretiser.rst
@@ -0,0 +1,5 @@
+BinaryDiscretiser
+=================
+
+.. autoclass:: feature_engine.discretisation.BinaryDiscretiser
+    :members:

From 2ee9f2265abe39604d96fa9bbfb2f949517f9062 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:08:02 +0100
Subject: [PATCH 14/15] Adding BinaryDiscretiser

---
 docs/api_doc/discretisation/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/api_doc/discretisation/index.rst b/docs/api_doc/discretisation/index.rst
index 75c484e9d..9fad0bd14 100644
--- a/docs/api_doc/discretisation/index.rst
+++ b/docs/api_doc/discretisation/index.rst
@@ -18,6 +18,7 @@ into continuous intervals.
 :class:`ArbitraryDiscretiser()`          Sorts values into intervals predefined by the user.
 :class:`DecisionTreeDiscretiser()`       Replaces values by predictions of a decision tree, which are discrete.
 :class:`GeometricWidthDiscretiser()`     Sorts variable into geometrical intervals.
+:class:`BinaryDiscretiser()`             Sorts variable into two intervals determined by a threshold.
 =====================================  ========================================================================
 
 

From ba71711cd4a253f8a8c5baafe213a72cf640de21 Mon Sep 17 00:00:00 2001
From: Doug <douglazenby@gmail.com>
Date: Thu, 15 Aug 2024 17:26:36 +0100
Subject: [PATCH 15/15] typo: Fixing typo.

---
 feature_engine/discretisation/binariser.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/feature_engine/discretisation/binariser.py b/feature_engine/discretisation/binariser.py
index a78223ddd..d6e41570f 100644
--- a/feature_engine/discretisation/binariser.py
+++ b/feature_engine/discretisation/binariser.py
@@ -44,9 +44,9 @@
 )
 class BinaryDiscretiser(BaseDiscretiser):
     """
-    The BinaryDiscretiser() divides continuous numerical variables into two intervals, where
-    the value `threshold`, the point at which the interval is  divided, is determined
-    by the user.
+    The BinaryDiscretiser() divides continuous numerical variables into two intervals,
+    where the value `threshold`, the point at which the interval is  divided, is
+    determined by the user.
 
     The BinaryDiscretiser() works only with numerical variables.
     A list of variables can be passed as argument. Alternatively, the discretiser
@@ -56,8 +56,6 @@ class BinaryDiscretiser(BaseDiscretiser):
     each variable. Then, it transforms the variables, that is, sorts the values into
     the intervals.
 
-    More details in the :ref:`User Guide <equal_width_discretiser>`.
-
     Parameters
     ----------
     {variables}