From ab8eff8b2d60206de851b3df33ac8f3472220d1f Mon Sep 17 00:00:00 2001 From: mdymczyk Date: Mon, 5 Aug 2019 17:38:50 +0900 Subject: [PATCH 1/3] Initial debiasing recipe using LFR. --- transformers/mli/debiasing_lfr.py | 63 +++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 transformers/mli/debiasing_lfr.py diff --git a/transformers/mli/debiasing_lfr.py b/transformers/mli/debiasing_lfr.py new file mode 100644 index 00000000..40529cce --- /dev/null +++ b/transformers/mli/debiasing_lfr.py @@ -0,0 +1,63 @@ +from h2oaicore.transformer_utils import CustomTransformer +from h2oaicore.systemutils import config + +import datatable as dt +import numpy as np + +from aif360.datasets import BinaryLabelDataset +from aif360.algorithms.preprocessing.lfr import LFR + + +class LfrDebiasingTransformer(CustomTransformer): + _regression = False + _multiclass = False + + _numeric_output = False + + _modules_needed_by_name = ['aif360'] + + _display_name = "LrfDebiasingTransformer" + + @staticmethod + def get_default_properties(): + return dict( + col_type="all", + min_cols="all", + max_cols="all", + relative_importance=1, + num_default_instances=1, + ) + + def fit(self, X: dt.Frame, y: np.array = None): + # TODO Do I have here access to config? + privileged_groups = config.privileged_groups + unprivileged_groups = config.unprivileged_groups + favorable_label = config.favorable_label + unfaborable_label = config.unfavorable_label + protected_attribute_names = config.protected_attribute_names + + label_names = np.unique(y) + + self.lfr = LFR( + unprivileged_groups=unprivileged_groups, + privileged_groups=privileged_groups, + verbose=0, + ) + + self.lfr.fit( + BinaryLabelDataset( + favorable_label=favorable_label, + unfavorable_label=unfaborable_label, + df=X.to_pandas(), + label_names=label_names, + protected_attribute_names=protected_attribute_names, + ) + ) + + def fit_transform(self, X: dt.Frame, y: np.array = None): + self.fit(X, y) + return self.transform(X) + + def transform(self, X: dt.Frame): + transformed_X: BinaryLabelDataset = self.lfr.transform(X.to_pandas()) + return transformed_X.features \ No newline at end of file From 034ab17da695f644c63f1025cdf44b467172edfc Mon Sep 17 00:00:00 2001 From: mdymczyk Date: Mon, 5 Aug 2019 20:58:31 +0900 Subject: [PATCH 2/3] Move aif360 import inside the method. --- transformers/mli/debiasing_lfr.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/transformers/mli/debiasing_lfr.py b/transformers/mli/debiasing_lfr.py index 40529cce..5270c99f 100644 --- a/transformers/mli/debiasing_lfr.py +++ b/transformers/mli/debiasing_lfr.py @@ -4,9 +4,6 @@ import datatable as dt import numpy as np -from aif360.datasets import BinaryLabelDataset -from aif360.algorithms.preprocessing.lfr import LFR - class LfrDebiasingTransformer(CustomTransformer): _regression = False @@ -29,7 +26,10 @@ def get_default_properties(): ) def fit(self, X: dt.Frame, y: np.array = None): - # TODO Do I have here access to config? + from aif360.datasets import BinaryLabelDataset + from aif360.algorithms.preprocessing.lfr import LFR + + self._validate_input() privileged_groups = config.privileged_groups unprivileged_groups = config.unprivileged_groups favorable_label = config.favorable_label @@ -59,5 +59,18 @@ def fit_transform(self, X: dt.Frame, y: np.array = None): return self.transform(X) def transform(self, X: dt.Frame): + from aif360.datasets import BinaryLabelDataset transformed_X: BinaryLabelDataset = self.lfr.transform(X.to_pandas()) - return transformed_X.features \ No newline at end of file + return transformed_X.features + + def _validate_input(self): + if "privileged_groups" not in config: + raise ValueError("Privileged groups missing from config!") + if "unprivileged_groups" not in config: + raise ValueError("Unprivileged groups missing from config!") + if "favorable_label" not in config: + raise ValueError("Favorable label missing from config!") + if "unfavorable_label" not in config: + raise ValueError("Unfavorable label missing from config!") + if "protected_attribute_names" not in config: + raise ValueError("Protected attribute names missing from config!") From d4144076f081df2e4cecb336e36fb459980357b5 Mon Sep 17 00:00:00 2001 From: mdymczyk Date: Wed, 7 Aug 2019 19:43:58 +0900 Subject: [PATCH 3/3] LFR transformer --- .../how_to_debug_transformer.cpython-36.pyc | Bin 0 -> 2631 bytes transformers/mli/debiasing_lfr.py | 104 +++++++++++------- 2 files changed, 64 insertions(+), 40 deletions(-) create mode 100644 transformers/__pycache__/how_to_debug_transformer.cpython-36.pyc diff --git a/transformers/__pycache__/how_to_debug_transformer.cpython-36.pyc b/transformers/__pycache__/how_to_debug_transformer.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97a6a24d4c3bb63791a8c43dcc7df81edc7c1aac GIT binary patch literal 2631 zcmZ`*OOG2x5bo}IcV|V4*ye)hN+0nQrfncOKsE zcCxm{7rY2KaNwRlz+d2}%#~CALM~9%&U)<-+UlN~eoR$;Usbg~SzT?cfBo4L-y`I2 zve0ahZ$np)Vc;YnoN|`ZfSIQi*yia3F1NUSOoG~1gge|lCfuE}z=O9M_u$PF-qZ@} zyv`f2t07kS$^nzRqA?|uaCV=x{PE7+qbM7sqPbr@Z|b7S#na(lGiquX<*HZ6Ovq+2 z)GFbkS@fFQGI=IsDwJy8`KEcJ80e(PqqOxBEG69-M{L&Xz6DQp0|rg{l+!7lfZho$ z!7EOFW+m;h%VfezQle~uMOJrwwcYOUNJpLLBGUUphFPLilJB)+^(^r3MM?gk$O;J4}kCZR4ugGsos(jJVe#%vZ1t)rJfA|bRvZMsG!&dkBU1qS^#baep+ zK>`Z28aja(i=*Rqb#`m0bdfzcMLwpm8`}w<_*C04Xfh>_tqGm5-^eed2iS6U?3J`n zx5*Un4Oz+h)|7x`Yo{cAHl>sp$wrA2gY{ei+J|9x9u-`IVGoWzSp%x*ujIgzc1izW zhxYv$rUqC9zkRc!M=dHHIAJCw!tkoGktRyVMK;)c2S12kcgeW@9|zlEnB<8L!}dm6 z#8Ij?K{z?uat)4^lSi3Q)oKsIVJJm7d-d{Ro*WLv$H7WI%)&U0l!9H(Kqk3{Sg@5y zdlG$;E)EzACzhg-m3N}s_}qXgLVdap6gKGi%Kw|%1*+gIS`mg-BMh^G4^x!=FgzSa z>1;*fXJ?q8>Qi(isyYkZz~V9Uz7#C;{Qnkqodk=p49CE3V1sNjA7qF7!tK|D*RON; z1v8oNK(50aufL$Y!B>v$V1+xt_iDBJ{b(_~;Pdl_p|%BGp>|GTJL~SeX}|Hrpw~!L zDDvoOD)_s23MnRRd}&ER`*hQigGh+cwFZJV<&86R>34{*meUPfpfrwjWkqTF5-mf_ z<01|9Xdo&tOY#uj6iCt0Ogb+`8fhp_VUi6BsiQm=En3+;iS?Ua7zfs&N>O`m}C_8cH`m!@`KhSNl(haykc4VK8i>Hj1Y9EW3?9b#sBE05Xs=>VM=ejD z!-aWM(ggNrwDIJIV;)AAxLT!n7#GLQPU@2sCQvoXw?hM9{P8GIgq@cJSmcok_nB@+u{ z7%p@=WfL1>6Uvr}F&m=#mUe!ku@SuJo-C6*^1k{=qY9XS8`~p-d z9|pV?T;|gYtU)Dmh1EZUe<5KAiBJneUc^xR>-bMVV?msu?+&7zN2+224i*oy!N@Q% q^9ZkinOpe58LMw$r1Caqm&+z5pu&8yA*SKyc;1=D+BI**qyGZaqGW6U literal 0 HcmV?d00001 diff --git a/transformers/mli/debiasing_lfr.py b/transformers/mli/debiasing_lfr.py index 5270c99f..7f4defc1 100644 --- a/transformers/mli/debiasing_lfr.py +++ b/transformers/mli/debiasing_lfr.py @@ -1,5 +1,4 @@ from h2oaicore.transformer_utils import CustomTransformer -from h2oaicore.systemutils import config import datatable as dt import numpy as np @@ -9,8 +8,6 @@ class LfrDebiasingTransformer(CustomTransformer): _regression = False _multiclass = False - _numeric_output = False - _modules_needed_by_name = ['aif360'] _display_name = "LrfDebiasingTransformer" @@ -22,55 +19,82 @@ def get_default_properties(): min_cols="all", max_cols="all", relative_importance=1, - num_default_instances=1, ) + @staticmethod + def do_acceptance_test(): + return False + def fit(self, X: dt.Frame, y: np.array = None): + from h2oaicore.systemutils import config from aif360.datasets import BinaryLabelDataset from aif360.algorithms.preprocessing.lfr import LFR - self._validate_input() - privileged_groups = config.privileged_groups - unprivileged_groups = config.unprivileged_groups - favorable_label = config.favorable_label - unfaborable_label = config.unfavorable_label - protected_attribute_names = config.protected_attribute_names + if y is not None: + if 'recipe_dict' in config: + config = config['recipe_dict'] - label_names = np.unique(y) + # LFR supports only numerical columns + # But categoricals which are numeric are ok so setting col_type="all" + if any(unsupported in str(X.ltypes) for unsupported in ['str', 'obj']): + return - self.lfr = LFR( - unprivileged_groups=unprivileged_groups, - privileged_groups=privileged_groups, - verbose=0, - ) + X_pd = X.to_pandas() + X = dt.Frame(X_pd.fillna(X_pd.mean())) - self.lfr.fit( - BinaryLabelDataset( - favorable_label=favorable_label, - unfavorable_label=unfaborable_label, - df=X.to_pandas(), - label_names=label_names, - protected_attribute_names=protected_attribute_names, + frame = dt.cbind(X, dt.Frame(y)) + self.label_names = [frame.names[-1]] + + self.privileged_groups = config['privileged_groups'] + self.unprivileged_groups = config['unprivileged_groups'] + self.favorable_label = float(config['favorable_label']) + self.unfavorable_label = float(config['unfavorable_label']) + self.protected_attribute_names = config['protected_attribute_names'] + + self.lfr = LFR( + unprivileged_groups=self.unprivileged_groups, + privileged_groups=self.privileged_groups, + verbose=0, ) - ) + + self.lfr.fit( + BinaryLabelDataset( + df=frame.to_pandas(), + favorable_label=self.favorable_label, + unfavorable_label=self.unfavorable_label, + label_names=self.label_names, + protected_attribute_names=self.protected_attribute_names, + ) + ) + self.fitted = True def fit_transform(self, X: dt.Frame, y: np.array = None): self.fit(X, y) - return self.transform(X) + return self.transform(X, y) - def transform(self, X: dt.Frame): + def transform(self, X: dt.Frame, y: np.array = None): from aif360.datasets import BinaryLabelDataset - transformed_X: BinaryLabelDataset = self.lfr.transform(X.to_pandas()) - return transformed_X.features - - def _validate_input(self): - if "privileged_groups" not in config: - raise ValueError("Privileged groups missing from config!") - if "unprivileged_groups" not in config: - raise ValueError("Unprivileged groups missing from config!") - if "favorable_label" not in config: - raise ValueError("Favorable label missing from config!") - if "unfavorable_label" not in config: - raise ValueError("Unfavorable label missing from config!") - if "protected_attribute_names" not in config: - raise ValueError("Protected attribute names missing from config!") + # Transformation should only occur during training when y is present + if self.fitted and (self.label_names in X.names or y is not None): + if self.label_names not in X.names: + X = dt.cbind(X, dt.Frame(y)) + + X_pd = X.to_pandas() + X = dt.Frame(X_pd.fillna(X_pd.mean())) + transformed_X: BinaryLabelDataset = self.lfr.transform( + BinaryLabelDataset( + df=X.to_pandas(), + favorable_label=self.favorable_label, + unfavorable_label=self.unfavorable_label, + label_names=self.label_names, + protected_attribute_names=self.protected_attribute_names, + ) + ) + + return dt.Frame( + transformed_X.features, + names=[name+"_lfr" for name in transformed_X.feature_names], + ) + # For predictions no transformation is required + else: + return X