Skip to content

Commit

Permalink
Various documentation and small bug fixes (#170)
Browse files Browse the repository at this point in the history
* remove unused image

* fix slack invite link

* better handling of missing imports

* remove explicit mpl backend

* docs fixes

* fix base rate computation and added test
  • Loading branch information
hoffmansc authored May 20, 2020
1 parent bc55353 commit 92235db
Show file tree
Hide file tree
Showing 9 changed files with 133 additions and 79 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ in development. We encourage the contribution of your metrics, explainers, and
debiasing algorithms.

Get in touch with us on [Slack](https://aif360.slack.com) (invitation
[here](https://join.slack.com/t/aif360/shared_invite/enQtNDI5Nzg2NTk0MTMyLTU4N2UwODVmMTYxZWMwZmEzZmZkODdjMTk5NWUwZDNhNDhlMzNkZDNhOTYwZDNlODc1MTdjYzY5OTU2OWQ1ZmY))!
[here](https://join.slack.com/t/aif360/shared_invite/zt-5hfvuafo-X0~g6tgJQ~7tIAT~S294TQ))!


## Supported bias mitigation algorithms
Expand Down Expand Up @@ -232,6 +232,6 @@ paper.

* Introductory [video](https://www.youtube.com/watch?v=X1NsrcaRQTE) to AI
Fairness 360 by Kush Varshney, September 20, 2018 (32 mins)

## Contributing
The development fork for Rich Subgroup Fairness (`inprocessing/gerryfair_classifier.py`) is [here](https://github.com/sethneel/aif360). Contributions are welcome and a list of potential contributions from the authors can be found [here](https://trello.com/b/0OwPcbVr/gerryfair-development).
14 changes: 8 additions & 6 deletions aif360/algorithms/inprocessing/adversarial_debiasing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
try:
import tensorflow as tf
except ImportError as error:
print("Import error: %s" % (error))
from logging import warning
warning("{}: AdversarialDebiasing will be unavailable. To install, run:\n"
"pip install 'aif360[AdversarialDebiasing]'".format(error))

from aif360.algorithms import Transformer

Expand Down Expand Up @@ -226,10 +228,10 @@ def predict(self, dataset):
Returns:
dataset (BinaryLabelDataset): Transformed dataset.
"""

if self.seed is not None:
np.random.seed(self.seed)

num_test_samples, _ = np.shape(dataset.features)

samples_covered = 0
Expand All @@ -252,13 +254,13 @@ def predict(self, dataset):

pred_labels += self.sess.run(self.pred_labels, feed_dict=batch_feed_dict)[:,0].tolist()
samples_covered += len(batch_features)

# Mutated, fairer dataset with new labels
dataset_new = dataset.copy(deepcopy = True)
dataset_new.scores = np.array(pred_labels, dtype=np.float64).reshape(-1, 1)
dataset_new.labels = (np.array(pred_labels)>0.5).astype(np.float64).reshape(-1,1)


# Map the dataset labels to back to their original values.
temp_labels = dataset_new.labels.copy()

Expand Down
4 changes: 0 additions & 4 deletions aif360/algorithms/inprocessing/gerryfair/heatmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@
"""

import matplotlib
try:
matplotlib.use('TkAgg')
except:
print("Matplotlib Error, comment out matplotlib.use('TkAgg')")
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
Expand Down
134 changes: 70 additions & 64 deletions aif360/algorithms/inprocessing/gerryfair_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,42 +30,41 @@


class GerryFairClassifier(Transformer):
"""Model is an algorithm for learning classifiers that are fair with respect to rich subgroups.
Rich subgroups are defined by [linear] functions over the sensitive attributes, and fairness notions are statistical: false
positive, false negative, and statistical parity rates. This implementation uses a max of two regressions
as a cost-sensitive classification oracle, and supports linear regression, support vector machines, decision trees,
and kernel regression. For details see:
References:
.. [1] "Preventing Fairness Gerrymandering: Auditing and Learning for Subgroup Fairness." Michale Kearns,
Seth Neel, Aaron Roth, Steven Wu. ICML 18'.
.. [2] "An Empirical Study of Rich Subgroup Fairness for Machine Learning". Michael Kearns,
Seth Neel, Aaron Roth, Steven Wu. FAT '19.
"""Model is an algorithm for learning classifiers that are fair with respect
to rich subgroups.
Rich subgroups are defined by (linear) functions over the sensitive
attributes, and fairness notions are statistical: false positive, false
negative, and statistical parity rates. This implementation uses a max of
two regressions as a cost-sensitive classification oracle, and supports
linear regression, support vector machines, decision trees, and kernel
regression. For details see:
References:
.. [1] "Preventing Fairness Gerrymandering: Auditing and Learning for
Subgroup Fairness." Michale Kearns, Seth Neel, Aaron Roth, Steven Wu.
ICML '18.
.. [2] "An Empirical Study of Rich Subgroup Fairness for Machine
Learning". Michael Kearns, Seth Neel, Aaron Roth, Steven Wu. FAT '19.
"""
def __init__(self,
C=10,
printflag=False,
heatmapflag=False,
heatmap_iter=10,
heatmap_path='.',
max_iters=10,
gamma=0.01,
fairness_def='FP',
predictor=linear_model.LinearRegression()):
def __init__(self, C=10, printflag=False, heatmapflag=False,
heatmap_iter=10, heatmap_path='.', max_iters=10, gamma=0.01,
fairness_def='FP', predictor=linear_model.LinearRegression()):
"""Initialize Model Object and set hyperparameters.
Args:
:param C: Maximum L1 Norm for the Dual Variables (hyperparameter)
:param printflag: Print Output Flag
:param heatmapflag: Save Heatmaps every heatmap_iter Flag
:param heatmap_iter: Save Heatmaps every heatmap_iter
:param heatmap_path: Save Heatmaps path
:param max_iters: Time Horizon for the fictitious play dynamic.
:param gamma: Fairness Approximation Paramater
:param fairness_def: Fairness notion, FP, FN, SP.
:param errors: see fit()
:param fairness_violations: see fit()
:param predictor: Hypothesis class for the Learner. Supports LR, SVM, KR, Trees.
C: Maximum L1 Norm for the Dual Variables (hyperparameter)
printflag: Print Output Flag
heatmapflag: Save Heatmaps every heatmap_iter Flag
heatmap_iter: Save Heatmaps every heatmap_iter
heatmap_path: Save Heatmaps path
max_iters: Time Horizon for the fictitious play dynamic.
gamma: Fairness Approximation Paramater
fairness_def: Fairness notion, FP, FN, SP.
errors: see fit()
fairness_violations: see fit()
predictor: Hypothesis class for the Learner. Supports LR, SVM, KR,
Trees.
"""

super(GerryFairClassifier, self).__init__()
Expand All @@ -90,11 +89,12 @@ def fit(self, dataset, early_termination=True):
"""Run Fictitious play to compute the approximately fair classifier.
Args:
dataset: dataset object with its own class definition in datasets folder inherits
from class StandardDataset.
early_termination: Terminate Early if Auditor can't find fairness violation of more than gamma.
dataset: dataset object with its own class definition in datasets
folder inherits from class StandardDataset.
early_termination: Terminate Early if Auditor can't find fairness
violation of more than gamma.
Returns:
A list (errors, fairness violations)
Self
"""

# defining variables and data structures for algorithm
Expand Down Expand Up @@ -151,15 +151,17 @@ def fit(self, dataset, early_termination=True):
return self

def predict(self, dataset, threshold=.5):
"""Return dataset object where labels are the predictions returned by the fitted model.
"""Return dataset object where labels are the predictions returned by
the fitted model.
Args:
:param dataset: dataset object with its own class definition in datasets folder inherits
from class StandardDataset.
:param threshold: The positive prediction cutoff for the soft-classifier.
dataset: dataset object with its own class definition in datasets
folder inherits from class StandardDataset.
threshold: The positive prediction cutoff for the soft-classifier.
Returns:
dataset_new: modified dataset object where the labels attribute are the predictions returned by the self model
dataset_new: modified dataset object where the labels attribute are
the predictions returned by the self model
"""

# Generates predictions.
Expand All @@ -184,11 +186,11 @@ def predict(self, dataset, threshold=.5):

def print_outputs(self, iteration, error, group):
"""Helper function to print outputs at each iteration of fit.
Args:
:param iteration: current iter
:param error: most recent error
:param group: most recent group found by the auditor
:return: n/a
iteration: current iter
error: most recent error
group: most recent group found by the auditor
"""

if self.printflag:
Expand All @@ -201,12 +203,15 @@ def save_heatmap(self, iteration, dataset, predictions, vmin, vmax):
"""Helper Function to save the heatmap.
Args:
:param iteration: current iteration
:param dataset: dataset object with its own class definition in datasets folder inherits
from class StandardDataset.
:param predictions: predictions of the model self on dataset.
:param vmin: see documentation of heatmap.py heat_map function
:param vmax: see documentation of heatmap.py heat_map function
iteration: current iteration
dataset: dataset object with its own class definition in datasets
folder inherits from class StandardDataset.
predictions: predictions of the model self on dataset.
vmin: see documentation of heatmap.py heat_map function
vmax: see documentation of heatmap.py heat_map function
Returns:
(vmin, vmax)
"""

X, X_prime, y = clean.extract_df_from_ds(dataset)
Expand Down Expand Up @@ -234,12 +239,12 @@ def generate_heatmap(self,
"""Helper Function to generate the heatmap at the current time.
Args:
:param iteration:current iteration
:param dataset: dataset object with its own class definition in datasets folder inherits
from class StandardDataset.
:param predictions: predictions of the model self on dataset.
:param vmin: see documentation of heatmap.py heat_map function
:param vmax: see documentation of heatmap.py heat_map function
iteration:current iteration
dataset: dataset object with its own class definition in datasets
folder inherits from class StandardDataset.
predictions: predictions of the model self on dataset.
vmin: see documentation of heatmap.py heat_map function
vmax: see documentation of heatmap.py heat_map function
"""

X, X_prime, y = clean.extract_df_from_ds(dataset)
Expand All @@ -249,16 +254,17 @@ def generate_heatmap(self,
self.heatmap_path, vmin, vmax)

def pareto(self, dataset, gamma_list):
"""Assumes Model has FP specified for metric. Trains for each value of gamma,
returns error, FP (via training), and FN (via auditing) values.
"""Assumes Model has FP specified for metric. Trains for each value of
gamma, returns error, FP (via training), and FN (via auditing) values.
Args:
:param dataset: dataset object with its own class definition in datasets folder inherits
from class StandardDataset.
:param gamma_list: the list of gamma values to generate the pareto curve
dataset: dataset object with its own class definition in datasets
folder inherits from class StandardDataset.
gamma_list: the list of gamma values to generate the pareto curve
Returns:
:return: list of errors, list of fp violations of those models, list of fn violations of those models
list of errors, list of fp violations of those models, list of fn
violations of those models
"""

C = self.C
Expand Down
4 changes: 2 additions & 2 deletions aif360/algorithms/postprocessing/eq_odds_postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def fit(self, dataset_true, dataset_pred):
privileged_groups=self.privileged_groups)

# compute basic statistics
sbr = metric.num_instances(privileged=True) / metric.num_instances()
obr = metric.num_instances(privileged=False) / metric.num_instances()
sbr = metric.base_rate(privileged=True)
obr = metric.base_rate(privileged=False)

fpr0 = metric.false_positive_rate(privileged=True)
fpr1 = metric.false_positive_rate(privileged=False)
Expand Down
7 changes: 6 additions & 1 deletion aif360/algorithms/preprocessing/lfr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
import scipy.optimize as optim

from aif360.algorithms import Transformer
from aif360.algorithms.preprocessing.lfr_helpers import helpers as lfr_helpers
try:
from aif360.algorithms.preprocessing.lfr_helpers import helpers as lfr_helpers
except ImportError as error:
from logging import warning
warning("{}: LFR will be unavailable. To install, run:\n"
"pip install 'aif360[LFR]'".format(error))


class LFR(Transformer):
Expand Down
1 change: 1 addition & 0 deletions docs/source/modules/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Algorithms

algorithms.inprocessing.AdversarialDebiasing
algorithms.inprocessing.ARTClassifier
algorithms.inprocessing.GerryFairClassifier
algorithms.inprocessing.MetaFairClassifier
algorithms.inprocessing.PrejudiceRemover

Expand Down
Binary file removed examples/images/cnn_arch.png
Binary file not shown.
44 changes: 44 additions & 0 deletions tests/test_eq_odds_postprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from sklearn.linear_model import LogisticRegression

from aif360.datasets import AdultDataset
from aif360.algorithms.postprocessing import EqOddsPostprocessing
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.metrics import ClassificationMetric

train, val, test = AdultDataset().split([0.4, 0.7])
lr = LogisticRegression(solver='lbfgs').fit(train.features, train.labels)

val_pred = val.copy()
val_pred.labels = lr.predict(val.features).reshape((-1, 1))
val_pred.scores = lr.predict_proba(val.features)[:, 1]

pred = test.copy()
pred.labels = lr.predict(test.features).reshape((-1, 1))
pred.scores = lr.predict_proba(test.features)[:, 1]

cm_lr = ClassificationMetric(test, pred, unprivileged_groups=[{'sex': 0}],
privileged_groups=[{'sex': 1}])

def test_eqodds():
eqo = EqOddsPostprocessing(unprivileged_groups=[{'sex': 0}],
privileged_groups=[{'sex': 1}], seed=1234567)
pred_eqo = eqo.fit(val, val_pred).predict(pred)
cm_eqo = ClassificationMetric(test, pred_eqo,
unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
# accuracy drop should be less than 10% (arbitrary)
assert (cm_lr.accuracy() - cm_eqo.accuracy()) / cm_lr.accuracy() < 0.1
# approximately equal odds
assert cm_eqo.average_abs_odds_difference() < 0.1

def test_caleq():
ceo = CalibratedEqOddsPostprocessing(cost_constraint='fnr',
unprivileged_groups=[{'sex': 0}],
privileged_groups=[{'sex': 1}], seed=1234567)
pred_ceo = ceo.fit(val, val_pred).predict(pred)

cm_ceo = ClassificationMetric(test, pred_ceo,
unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
# accuracy drop should be less than 10% (arbitrary)
assert (cm_lr.accuracy() - cm_ceo.accuracy()) / cm_lr.accuracy() < 0.1
# approximate GFNR parity
assert abs(cm_ceo.difference(cm_ceo.generalized_false_negative_rate)) < 0.1

0 comments on commit 92235db

Please sign in to comment.