diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6e40b1f8..252a8c6b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -43,6 +43,7 @@ jobs: - name: Install dependencies run: | + sudo apt install libglpk-dev libxml2-dev python -m pip install --upgrade pip setuptools wheel pip install -e '.[all]' pip install flake8 diff --git a/aif360/datasets/law_school_gpa_dataset.py b/aif360/datasets/law_school_gpa_dataset.py index eac3656c..0d5c992a 100644 --- a/aif360/datasets/law_school_gpa_dataset.py +++ b/aif360/datasets/law_school_gpa_dataset.py @@ -1,36 +1,24 @@ -import os import pandas as pd +from sklearn.model_selection import train_test_split from aif360.datasets import RegressionDataset -try: - import tempeh.configurations as tc -except ImportError as error: - from logging import warning - warning("{}: LawSchoolGPADataset will be unavailable. To install, run:\n" - "pip install 'aif360[LawSchoolGPA]'".format(error)) +from aif360.sklearn.datasets.lawschool_dataset import LSAC_URL class LawSchoolGPADataset(RegressionDataset): - """Law School GPA dataset. - - See https://github.com/microsoft/tempeh for details. - """ + """Law School GPA dataset.""" def __init__(self, dep_var_name='zfygpa', - protected_attribute_names=['race'], - privileged_classes=[['white']], + protected_attribute_names=['race', 'gender'], + privileged_classes=[['white'], ['male']], instance_weights_name=None, categorical_features=[], na_values=[], custom_preprocessing=None, metadata=None): """See :obj:`RegressionDataset` for a description of the arguments.""" - dataset = tc.datasets["lawschool_gpa"]() - X_train,X_test = dataset.get_X(format=pd.DataFrame) - y_train, y_test = dataset.get_y(format=pd.Series) - A_train, A_test = dataset.get_sensitive_features(name='race', - format=pd.Series) - all_train = pd.concat([X_train, y_train, A_train], axis=1) - all_test = pd.concat([X_test, y_test, A_test], axis=1) - - df = pd.concat([all_train, all_test], axis=0) + df = pd.read_sas(LSAC_URL, encoding="utf-8") + df.race = df.race1.where(df.race1.isin(['black', 'white'])) + df.gender = df.gender.fillna("female") + df = df[["race", "gender", "lsat", "ugpa", "zfygpa"]].dropna() + df = pd.concat(train_test_split(df, test_size=0.33, random_state=123)) super(LawSchoolGPADataset, self).__init__(df=df, dep_var_name=dep_var_name, diff --git a/aif360/metrics/ot_metric.py b/aif360/metrics/ot_metric.py index 5d07ed30..696b68f9 100644 --- a/aif360/metrics/ot_metric.py +++ b/aif360/metrics/ot_metric.py @@ -1,7 +1,12 @@ from typing import Union import pandas as pd import numpy as np -import ot +try: + import ot +except ImportError as error: + from logging import warning + warning("{}: ot_distance will be unavailable. To install, run:\n" + "pip install 'aif360[OptimalTransport]'".format(error)) from sklearn.preprocessing import LabelEncoder def _normalize(distribution1, distribution2): @@ -17,7 +22,7 @@ def _normalize(distribution1, distribution2): extra = -np.minimum(np.min(distribution1), np.min(distribution2)) distribution1 += extra distribution2 += extra - + total_of_distribution1 = np.sum(distribution1) if total_of_distribution1 != 0: distribution1 /= total_of_distribution1 @@ -75,10 +80,10 @@ def _evaluate( if prot_attr is None: initial_distribution, required_distribution, matrix_distance = _transform(ground_truth, classifier, cost_matrix) return ot.emd2(a=initial_distribution, b=required_distribution, M=matrix_distance, numItermax=num_iters) - + if not ground_truth.nunique() == 2: raise ValueError(f"Expected to have exactly 2 target values, got {ground_truth.nunique()}.") - + # Calculate EMD between ground truth distribution and distribution of each group emds = {} for sa_val in sorted(prot_attr.unique()): @@ -137,7 +142,7 @@ def ot_distance( # Assert correct mode passed if mode not in ['binary', 'continuous', 'nominal', 'ordinal']: raise ValueError(f"Expected one of {['binary', 'continuous', 'nominal', 'ordinal']}, got {mode}.") - + # Assert correct types passed to ground_truth, classifier and prot_attr if not isinstance(ground_truth, (pd.Series, str)): raise TypeError(f"ground_truth: expected pd.Series or str, got {type(ground_truth)}") @@ -148,17 +153,17 @@ def ot_distance( raise TypeError(f"classifier: expected pd.DataFrame for {mode} mode, got {type(classifier)}") if prot_attr is not None and not isinstance(prot_attr, (pd.Series, str)): raise TypeError(f"prot_attr: expected pd.Series or str, got {type(prot_attr)}") - + # Assert correct type passed to cost_matrix if cost_matrix is not None and not isinstance(cost_matrix, np.ndarray): raise TypeError(f"cost_matrix: expected numpy.ndarray, got {type(cost_matrix)}") - + # Assert scoring is "Wasserstein1" if not scoring == "Wasserstein1": raise ValueError(f"Scoring mode can only be \"Wasserstein1\", got {scoring}") - + grt = ground_truth.copy() - + if classifier is not None: cls = classifier.copy() if prot_attr is not None: @@ -171,7 +176,7 @@ def ot_distance( sat.index = grt.index else: sat = None - + uniques = list(grt.unique()) if mode == "binary": if len(uniques) > 2: diff --git a/aif360/sklearn/datasets/__init__.py b/aif360/sklearn/datasets/__init__.py index 525b1431..8c0f1049 100644 --- a/aif360/sklearn/datasets/__init__.py +++ b/aif360/sklearn/datasets/__init__.py @@ -12,4 +12,4 @@ from aif360.sklearn.datasets.openml_datasets import fetch_adult, fetch_german, fetch_bank from aif360.sklearn.datasets.compas_dataset import fetch_compas from aif360.sklearn.datasets.meps_datasets import fetch_meps -from aif360.sklearn.datasets.tempeh_datasets import fetch_lawschool_gpa +from aif360.sklearn.datasets.lawschool_dataset import fetch_lawschool_gpa diff --git a/aif360/sklearn/datasets/lawschool_dataset.py b/aif360/sklearn/datasets/lawschool_dataset.py new file mode 100644 index 00000000..66a11bfa --- /dev/null +++ b/aif360/sklearn/datasets/lawschool_dataset.py @@ -0,0 +1,89 @@ +from io import BytesIO +import os +import urllib + +import pandas as pd +from sklearn.model_selection import train_test_split + +from aif360.sklearn.datasets.utils import standardize_dataset, Dataset + + +# cache location +DATA_HOME_DEFAULT = os.path.join(os.path.dirname(os.path.abspath(__file__)), + '..', 'data', 'raw') +LSAC_URL = "https://github.com/jkomiyama/fairregresion/raw/master/dataset/law/lsac.sas7bdat" + +def fetch_lawschool_gpa(subset="all", *, data_home=None, cache=True, + binary_race=True, fillna_gender="female", + usecols=["race", "gender", "lsat", "ugpa"], + dropcols=None, numeric_only=False, dropna=True): + """Load the Law School GPA dataset. + + Optionally binarizes 'race' to 'white' (privileged) or 'black' (unprivileged). + The other protected attribute is gender ('male' is privileged and 'female' + is unprivileged). The outcome variable is standardized first year GPA + ('zfygpa'). Note: this is a continuous variable, i.e., a regression task. + + Args: + subset ({'train', 'test', or 'all'}, optional): Select the dataset to + load: 'train' for the training set, 'test' for the test set, 'all' + for both. + data_home (string, optional): Specify another download and cache folder + for the datasets. By default all AIF360 datasets are stored in + 'aif360/sklearn/data/raw' subfolders. + cache (bool): Whether to cache downloaded datasets. + binary_race (bool, optional): Filter only white and black students. + fillna_gender (str or None, optional): Fill NA values for gender with + this value. If `None`, leave as NA. Note: this is used for backward- + compatibility with tempeh and may be dropped in later versions. + usecols (single label or list-like, optional): Feature column(s) to + keep. All others are dropped. + dropcols (single label or list-like, optional): Feature column(s) to + drop. + numeric_only (bool): Drop all non-numeric feature columns. + dropna (bool): Drop rows with NAs. + + Returns: + namedtuple: Tuple containing X, y, and sample_weights for the Law School + GPA dataset accessible by index or name. + """ + if subset not in {'train', 'test', 'all'}: + raise ValueError("subset must be either 'train', 'test', or 'all'; " + "cannot be {}".format(subset)) + + cache_path = os.path.join(data_home or DATA_HOME_DEFAULT, + os.path.basename(LSAC_URL)) + if cache and os.path.isfile(cache_path): + df = pd.read_sas(cache_path, encoding="utf-8") + else: + data = urllib.request.urlopen(LSAC_URL).read() + if cache: + os.makedirs(os.path.dirname (cache_path), exist_ok=True) + with open(cache_path, 'wb') as f: + f.write(data) + df = pd.read_sas(BytesIO(data), format="sas7bdat", encoding="utf-8") + + df.race = df.race1.astype('category') + if binary_race: + df.race = df.race.cat.set_categories(['black', 'white'], ordered=True) + + # for backwards-compatibility with tempeh + if fillna_gender is not None: + df.gender = df.gender.fillna(fillna_gender) + df.gender = df.gender.astype('category').cat.set_categories( + ['female', 'male'], ordered=True) + + ds = standardize_dataset(df, prot_attr=['race', 'gender'], target='zfygpa', + usecols=usecols, dropcols=dropcols, + numeric_only=numeric_only, dropna=dropna) + + # for backwards-compatibility with tempeh + train_X, test_X, train_y, test_y = train_test_split(*ds, test_size=0.33, random_state=123) + if subset == "train": + return Dataset(train_X, train_y) + elif subset == "test": + return Dataset(test_X, test_y) + else: + X = pd.concat([train_X, test_X], axis=0) + y = pd.concat([train_y, test_y], axis=0) + return Dataset(X, y) diff --git a/aif360/sklearn/datasets/openml_datasets.py b/aif360/sklearn/datasets/openml_datasets.py index 4525087d..a6a92c2e 100644 --- a/aif360/sklearn/datasets/openml_datasets.py +++ b/aif360/sklearn/datasets/openml_datasets.py @@ -168,19 +168,20 @@ def fetch_german(*, data_home=None, cache=True, binary_age=True, usecols=None, dropcols=dropcols, numeric_only=numeric_only, dropna=dropna) -def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False, +def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False, usecols=None, dropcols=['duration'], numeric_only=False, dropna=False): """Load the Bank Marketing Dataset. - The protected attribute is 'age' (binarized by default as suggested by [#lequy22]: - age >= 25 and age <60 is considered privileged and age< 25 or age >= 60 unprivileged; - see the binary_age flag to keep this continuous). The outcome variable is 'deposit': + The protected attribute is 'age' (binarized by default as suggested by [#lequy22]_: + age >= 25 and age <60 is considered privileged and age< 25 or age >= 60 unprivileged; + see the binary_age flag to keep this continuous). The outcome variable is 'deposit': 'yes' or 'no'. - References: - .. [#lequy22] Le Quy, Tai, et al. "A survey on datasets for fairness‐aware machine - learning." Wiley Interdisciplinary Reviews: Data Mining and Knowledge - Discovery 12.3 (2022): e1452. + References: + .. [#lequy22] `Le Quy, Tai, et al. "A survey on datasets for fairness- + aware machine learning." Wiley Interdisciplinary Reviews: Data Mining + and Knowledge Discovery 12.3 (2022): e1452. + `_ Note: By default, the data is downloaded from OpenML. See the `bank-marketing @@ -235,15 +236,15 @@ def fetch_bank(*, data_home=None, cache=True, binary_age=True, percent10=False, df[col] = df[col].cat.remove_categories('unknown') df.education = df.education.astype('category').cat.reorder_categories( ['primary', 'secondary', 'tertiary'], ordered=True) - + # binarize protected attribute (but not corresponding feature) age = (pd.cut(df.age, [0, 24, 60, 100], ordered=False, - labels=[0, 1, 0] if numeric_only + labels=[0, 1, 0] if numeric_only else ['<25 or >=60', '25-60', '<25 or >=60']) if binary_age else 'age') - age = age.cat.reorder_categories([0, 1] if numeric_only + age = age.cat.reorder_categories([0, 1] if numeric_only else ['<25 or >=60', '25-60']) - + return standardize_dataset(df, prot_attr=[age], target='deposit', usecols=usecols, dropcols=dropcols, numeric_only=numeric_only, dropna=dropna) diff --git a/aif360/sklearn/datasets/tempeh_datasets.py b/aif360/sklearn/datasets/tempeh_datasets.py deleted file mode 100644 index 7adae048..00000000 --- a/aif360/sklearn/datasets/tempeh_datasets.py +++ /dev/null @@ -1,59 +0,0 @@ -import pandas as pd -try: - import tempeh.configurations as tc -except ImportError as error: - from logging import warning - warning("{}: fetch_lawschool_gpa will be unavailable. To install, run:\n" - "pip install 'aif360[LawSchoolGPA]'".format(error)) - -from aif360.sklearn.datasets.utils import standardize_dataset - - -def fetch_lawschool_gpa(subset="all", *, usecols=None, dropcols=None, - numeric_only=False, dropna=True): - """Load the Law School GPA dataset - - Note: - By default, the data is downloaded from tempeh. See - https://github.com/microsoft/tempeh for details. - - Args: - subset ({'train', 'test', or 'all'}, optional): Select the dataset to - load: 'train' for the training set, 'test' for the test set, 'all' - for both. - usecols (single label or list-like, optional): Feature column(s) to - keep. All others are dropped. - dropcols (single label or list-like, optional): Feature column(s) to - drop. - numeric_only (bool): Drop all non-numeric feature columns. - dropna (bool): Drop rows with NAs. FIXME: NAs already dropped by tempeh - - Returns: - namedtuple: Tuple containing X, y, and sample_weights for the Law School - GPA dataset accessible by index or name. - """ - if subset not in {'train', 'test', 'all'}: - raise ValueError("subset must be either 'train', 'test', or 'all'; " - "cannot be {}".format(subset)) - - dataset = tc.datasets["lawschool_gpa"]() - X_train, X_test = dataset.get_X(format=pd.DataFrame) - y_train, y_test = dataset.get_y(format=pd.Series) - A_train, A_test = dataset.get_sensitive_features(name='race', - format=pd.Series) - all_train = pd.concat([X_train, y_train, A_train], axis=1) - all_test = pd.concat([X_test, y_test, A_test], axis=1) - - if subset == "train": - df = all_train - elif subset == "test": - df = all_test - else: - df = pd.concat([all_train, all_test], axis=0) - - df.race = df.race.astype('category').cat.set_categories( - ['black', 'white'], ordered=True) - - return standardize_dataset(df, prot_attr='race', target='zfygpa', - usecols=usecols, dropcols=dropcols, - numeric_only=numeric_only, dropna=dropna) diff --git a/aif360/sklearn/inprocessing/infairness.py b/aif360/sklearn/inprocessing/infairness.py index 04963aa4..99d32eec 100644 --- a/aif360/sklearn/inprocessing/infairness.py +++ b/aif360/sklearn/inprocessing/infairness.py @@ -1,10 +1,16 @@ -from inFairness import fairalgo +try: + from inFairness import fairalgo + from skorch import NeuralNet + from skorch.dataset import unpack_data, Dataset as Dataset_ + from skorch.utils import is_pandas_ndframe +except ImportError as error: + from logging import warning + warning("{}: SenSeI and SenSR will be unavailable. To install, run:\n" + "pip install 'aif360[inFairness]'".format(error)) + Dataset_ = NeuralNet = object from sklearn.preprocessing import LabelBinarizer from sklearn.utils.multiclass import type_of_target from sklearn.exceptions import NotFittedError -from skorch import NeuralNet -from skorch.dataset import unpack_data, Dataset as Dataset_ -from skorch.utils import is_pandas_ndframe class Dataset(Dataset_): diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py index bcbf9c16..80322b17 100644 --- a/aif360/sklearn/metrics/metrics.py +++ b/aif360/sklearn/metrics/metrics.py @@ -17,30 +17,6 @@ from aif360.detectors.mdss.MDSS import MDSS -__all__ = [ - # meta-metrics - 'difference', 'ratio', 'intersection', 'one_vs_rest', - # scorer factory - 'make_scorer', - # helpers - 'num_samples', 'num_pos_neg', - 'specificity_score', 'base_rate', 'selection_rate', 'smoothed_base_rate', - 'smoothed_selection_rate', 'generalized_fpr', 'generalized_fnr', - # group fairness - 'ot_distance', 'statistical_parity_difference', 'disparate_impact_ratio', - 'equal_opportunity_difference', 'average_odds_difference', 'average_predictive_value_difference', - 'average_odds_error', 'class_imbalance', 'kl_divergence', - 'conditional_demographic_disparity', 'smoothed_edf', - 'df_bias_amplification', 'mdss_bias_score', - # individual fairness - 'generalized_entropy_index', 'generalized_entropy_error', - 'between_group_generalized_entropy_error', 'theil_index', - 'coefficient_of_variation', 'consistency_score', - # aliases - 'sensitivity_score', 'mean_difference', 'false_negative_rate_error', - 'false_positive_rate_error' -] - # ============================= META-METRICS =================================== def difference(func, y_true, y_pred=None, prot_attr=None, priv_group=1, sample_weight=None, **kwargs): diff --git a/aif360/sklearn/postprocessing/__init__.py b/aif360/sklearn/postprocessing/__init__.py index af2c1286..b80719ea 100644 --- a/aif360/sklearn/postprocessing/__init__.py +++ b/aif360/sklearn/postprocessing/__init__.py @@ -7,7 +7,7 @@ import pandas as pd from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone from sklearn.model_selection import train_test_split -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from aif360.sklearn.postprocessing.calibrated_equalized_odds import CalibratedEqualizedOdds from aif360.sklearn.postprocessing.reject_option_classification import RejectOptionClassifier, RejectOptionClassifierCV @@ -132,7 +132,7 @@ def fit(self, X, y, sample_weight=None, **fit_params): **fit_params) return self - @if_delegate_has_method('postprocessor_') + @available_if(lambda self: hasattr(self.postprocessor_, "predict")) def predict(self, X): """Predict class labels for the given samples. @@ -151,7 +151,7 @@ def predict(self, X): y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns') return self.postprocessor_.predict(y_score) - @if_delegate_has_method('postprocessor_') + @available_if(lambda self: hasattr(self.postprocessor_, "predict_proba")) def predict_proba(self, X): """Probability estimates. @@ -175,7 +175,7 @@ def predict_proba(self, X): y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns') return self.postprocessor_.predict_proba(y_score) - @if_delegate_has_method('postprocessor_') + @available_if(lambda self: hasattr(self.postprocessor_, "predict_log_proba")) def predict_log_proba(self, X): """Log of probability estimates. @@ -199,7 +199,7 @@ def predict_log_proba(self, X): y_score = pd.DataFrame(y_score, index=X.index).squeeze('columns') return self.postprocessor_.predict_log_proba(y_score) - @if_delegate_has_method('postprocessor_') + @available_if(lambda self: hasattr(self.postprocessor_, "score")) def score(self, X, y, sample_weight=None): """Returns the output of the post-processor's score function on the given test data and labels. diff --git a/aif360/sklearn/postprocessing/reject_option_classification.py b/aif360/sklearn/postprocessing/reject_option_classification.py index 331f63e0..84d4a082 100644 --- a/aif360/sklearn/postprocessing/reject_option_classification.py +++ b/aif360/sklearn/postprocessing/reject_option_classification.py @@ -85,8 +85,6 @@ def __init__(self, prot_attr=None, threshold=0.5, margin=0.1): margin (scalar): Half width of the critical region. Estimates within the critical region are "rejected" and assigned according to their group. Must be between 0 and min(threshold, 1-threshold). - metric ('statistical_parity', 'average_odds', 'equal_opportunity', - or callable): """ self.prot_attr = prot_attr self.threshold = threshold diff --git a/aif360/sklearn/preprocessing/learning_fair_representations.py b/aif360/sklearn/preprocessing/learning_fair_representations.py index 52f62873..a32be1d0 100644 --- a/aif360/sklearn/preprocessing/learning_fair_representations.py +++ b/aif360/sklearn/preprocessing/learning_fair_representations.py @@ -149,7 +149,7 @@ def LFR_optim_objective(x, X, y, priv): x0 = rng.random(w_size + self.n_prototypes*n_feat) bounds = [(0, 1)]*w_size + [(None, None)]*self.n_prototypes*n_feat res = optim.minimize(LFR_optim_objective, x0=x0, method='L-BFGS-B', - args=(torch.tensor(X.to_numpy()), torch.as_tensor(y), priv), + args=(torch.tensor(X.to_numpy(dtype=x0.dtype)), torch.as_tensor(y), priv), jac=True, bounds=bounds, options={'gtol': self.tol, 'maxiter': self.max_iter}) diff --git a/aif360/sklearn/preprocessing/reweighing.py b/aif360/sklearn/preprocessing/reweighing.py index 3af4d7ff..069ff904 100644 --- a/aif360/sklearn/preprocessing/reweighing.py +++ b/aif360/sklearn/preprocessing/reweighing.py @@ -1,6 +1,6 @@ import numpy as np from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone -from sklearn.utils.metaestimators import if_delegate_has_method +from sklearn.utils.metaestimators import available_if from sklearn.utils.validation import has_fit_parameter from aif360.sklearn.utils import check_inputs, check_groups @@ -73,7 +73,7 @@ def fit_transform(self, X, y, sample_weight=None): """ X, y, sample_weight = check_inputs(X, y, sample_weight) - sample_weight_t = np.empty_like(sample_weight) + sample_weight_t = np.empty_like(sample_weight, dtype=float) groups, self.prot_attr_ = check_groups(X, self.prot_attr) # TODO: maintain categorical ordering self.groups_ = np.unique(groups) @@ -88,7 +88,7 @@ def N_(i): return sample_weight[i].sum() for j, c in enumerate(self.classes_): g_and_c = (groups == g) & (y == c) if np.any(g_and_c): - W_gc = N_(groups == g) * N_(y == c) / (N * N_(g_and_c)) + W_gc = N_(groups == g) / N * N_(y == c) / N_(g_and_c) sample_weight_t[g_and_c] = W_gc * sample_weight[g_and_c] self.reweigh_factors_[i, j] = W_gc return X, sample_weight_t @@ -153,7 +153,7 @@ def fit(self, X, y, sample_weight=None): self.estimator_.fit(X, y, sample_weight=sample_weight) return self - @if_delegate_has_method('estimator_') + @available_if(lambda self: hasattr(self.estimator_, "predict")) def predict(self, X): """Predict class labels for the given samples using ``self.estimator_``. @@ -165,7 +165,7 @@ def predict(self, X): """ return self.estimator_.predict(X) - @if_delegate_has_method('estimator_') + @available_if(lambda self: hasattr(self.estimator_, "predict_proba")) def predict_proba(self, X): """Probability estimates from ``self.estimator_``. @@ -181,7 +181,7 @@ def predict_proba(self, X): """ return self.estimator_.predict_proba(X) - @if_delegate_has_method('estimator_') + @available_if(lambda self: hasattr(self.estimator_, "predict_log_proba")) def predict_log_proba(self, X): """Log of probability estimates from ``self.estimator_``. @@ -198,7 +198,7 @@ def predict_log_proba(self, X): """ return self.estimator_.predict_log_proba(X) - @if_delegate_has_method('estimator_') + @available_if(lambda self: hasattr(self.estimator_, "score")) def score(self, X, y, sample_weight=None): """Returns the output of the estimator's score function on the given test data and labels. diff --git a/docs/Makefile b/docs/Makefile index f417938a..765e0cff 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line. SPHINXOPTS = -SPHINXBUILD = python -msphinx +SPHINXBUILD ?= sphinx-build SPHINXPROJ = aif360 SOURCEDIR = source BUILDDIR = build diff --git a/docs/requirements.txt b/docs/requirements.txt index da6e730f..72c936a2 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -5,6 +5,6 @@ fairlearn>=0.7.0 pytest>=3.5.0 # docs -sphinx==1.8.6 +sphinx==7.2.6 jinja2==3.1.3 -sphinx_rtd_theme==0.4.3 +sphinx_rtd_theme==2.0.0 diff --git a/docs/source/conf.py b/docs/source/conf.py index d834cf4b..c4714a2a 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -41,12 +41,16 @@ 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax'] +flv = tuple(map(int, fairlearn.__version__.split('.'))) +if flv > (0, 7, 0) and flv[-1] == 0: + flv = flv[:-1] +flv = '.'.join(map(str, flv)) intersphinx_mapping = { - 'numpy': ('https://docs.scipy.org/doc/numpy/', None), - 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), + 'numpy': ('https://numpy.org/doc/stable/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'sklearn': ('https://scikit-learn.org/stable/', None), - 'fairlearn': (f'https://fairlearn.github.io/v{fairlearn.__version__}/', None), + 'fairlearn': (f'https://fairlearn.org/v{flv}/', None), 'python': ('https://docs.python.org/{}.{}'.format(*sys.version_info), None), 'inFairness': ('https://ibm.github.io/inFairness/', None), 'skorch': ('https://skorch.readthedocs.io/en/stable/', None), @@ -97,7 +101,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -117,6 +121,9 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False +# If true, parameters with trailing underscores will be properly escaped. +strip_signature_backslash = True + # -- Options for HTML output ---------------------------------------------- diff --git a/docs/source/modules/sklearn.rst b/docs/source/modules/sklearn.rst index f92127d7..7fe8c8df 100644 --- a/docs/source/modules/sklearn.rst +++ b/docs/source/modules/sklearn.rst @@ -119,7 +119,6 @@ Group fairness metrics metrics.smoothed_edf metrics.df_bias_amplification metrics.between_group_generalized_entropy_error - metrics.mdss_bias_scan metrics.mdss_bias_score Individual fairness metrics diff --git a/docs/source/templates/class.rst b/docs/source/templates/class.rst index 9ce9f9e6..59cd39a1 100644 --- a/docs/source/templates/class.rst +++ b/docs/source/templates/class.rst @@ -4,6 +4,7 @@ .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} + :members: {% block methods %} {% if methods %} diff --git a/examples/sklearn/demo_exponentiated_gradient_reduction_sklearn.ipynb b/examples/sklearn/demo_exponentiated_gradient_reduction_sklearn.ipynb index a3aa01c7..a7afcc62 100644 --- a/examples/sklearn/demo_exponentiated_gradient_reduction_sklearn.ipynb +++ b/examples/sklearn/demo_exponentiated_gradient_reduction_sklearn.ipynb @@ -593,7 +593,7 @@ ], "source": [ "ohe = make_column_transformer(\n", - " (OneHotEncoder(sparse=False), X_train.dtypes == 'category'),\n", + " (OneHotEncoder(sparse_output=False), X_train.dtypes == 'category'),\n", " remainder='passthrough', verbose_feature_names_out=False)\n", "X_train = pd.DataFrame(ohe.fit_transform(X_train), columns=ohe.get_feature_names_out(), index=X_train.index)\n", "X_test = pd.DataFrame(ohe.transform(X_test), columns=ohe.get_feature_names_out(), index=X_test.index)\n", @@ -784,8 +784,8 @@ ], "source": [ "np.random.seed(0) #for reproducibility\n", - "exp_grad_red = ExponentiatedGradientReduction(prot_attr=prot_attr_cols, \n", - " estimator=estimator, \n", + "exp_grad_red = ExponentiatedGradientReduction(prot_attr=prot_attr_cols,\n", + " estimator=estimator,\n", " constraints=\"EqualizedOdds\",\n", " drop_prot_attr=False)\n", "exp_grad_red.fit(X_train, y_train)\n", @@ -916,11 +916,11 @@ } ], "source": [ - "import fairlearn.reductions as red \n", + "import fairlearn.reductions as red\n", "\n", "np.random.seed(0) #need for reproducibility\n", - "exp_grad_red2 = ExponentiatedGradientReduction(prot_attr=prot_attr_cols, \n", - " estimator=estimator, \n", + "exp_grad_red2 = ExponentiatedGradientReduction(prot_attr=prot_attr_cols,\n", + " estimator=estimator,\n", " constraints=red.EqualizedOdds(),\n", " drop_prot_attr=False)\n", "exp_grad_red2.fit(X_train, y_train)\n", diff --git a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb index d9fec458..ea47f6d5 100644 --- a/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb +++ b/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb @@ -2,12 +2,12 @@ "cells": [ { "cell_type": "markdown", - "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)" - ], "metadata": { "id": "fgfyb_2c9WL4" - } + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/Trusted-AI/AIF360/blob/master/examples/sklearn/demo_grid_search_reduction_regression_sklearn.ipynb)" + ] }, { "cell_type": "markdown", @@ -23,73 +23,24 @@ }, { "cell_type": "code", - "source": [ - "#Install aif360\n", - "#Install Reductions from Fairlearn\n", - "#Install Low School GPA dataset\n", - "!pip install aif360[Reductions,LawSchoolGPA]" - ], + "execution_count": null, "metadata": { - "id": "4Ad7nC129i8f", - "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "4Ad7nC129i8f", + "outputId": "0ce37c29-d5a5-4682-968e-bb33cea9de19" }, - "execution_count": 10, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Requirement already satisfied: aif360[LawSchoolGPA,Reductions] in /usr/local/lib/python3.7/dist-packages (0.5.0)\n", - "Requirement already satisfied: scipy>=1.2.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.7.3)\n", - "Requirement already satisfied: scikit-learn>=1.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.0.2)\n", - "Requirement already satisfied: numpy>=1.16 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.21.6)\n", - "Requirement already satisfied: pandas>=0.24.0 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (1.3.5)\n", - "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (3.2.2)\n", - "Requirement already satisfied: fairlearn~=0.7 in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.7.0)\n", - "Requirement already satisfied: tempeh in /usr/local/lib/python3.7/dist-packages (from aif360[LawSchoolGPA,Reductions]) (0.1.12)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2022.2.1)\n", - "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (2.8.2)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas>=0.24.0->aif360[LawSchoolGPA,Reductions]) (1.15.0)\n", - "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (1.1.0)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=1.0->aif360[LawSchoolGPA,Reductions]) (3.1.0)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (1.4.4)\n", - "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (3.0.9)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->aif360[LawSchoolGPA,Reductions]) (0.11.0)\n", - "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->aif360[LawSchoolGPA,Reductions]) (4.1.1)\n", - "Requirement already satisfied: pytest in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (3.6.4)\n", - "Requirement already satisfied: memory-profiler in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.60.0)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (2.23.0)\n", - "Requirement already satisfied: shap in /usr/local/lib/python3.7/dist-packages (from tempeh->aif360[LawSchoolGPA,Reductions]) (0.41.0)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.7/dist-packages (from memory-profiler->tempeh->aif360[LawSchoolGPA,Reductions]) (5.4.8)\n", - "Requirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.11.0)\n", - "Requirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (8.14.0)\n", - "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (57.4.0)\n", - "Requirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (22.1.0)\n", - "Requirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (0.7.1)\n", - "Requirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.7/dist-packages (from pytest->tempeh->aif360[LawSchoolGPA,Reductions]) (1.4.1)\n", - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (1.24.3)\n", - "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (3.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2022.6.15)\n", - "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->tempeh->aif360[LawSchoolGPA,Reductions]) (2.10)\n", - "Requirement already satisfied: numba in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.56.2)\n", - "Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (1.5.0)\n", - "Requirement already satisfied: slicer==0.0.7 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.0.7)\n", - "Requirement already satisfied: tqdm>4.25.0 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.64.1)\n", - "Requirement already satisfied: packaging>20.9 in /usr/local/lib/python3.7/dist-packages (from shap->tempeh->aif360[LawSchoolGPA,Reductions]) (21.3)\n", - "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (0.39.1)\n", - "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (4.12.0)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->numba->shap->tempeh->aif360[LawSchoolGPA,Reductions]) (3.8.1)\n" - ] - } + "outputs": [], + "source": [ + "#Install aif360\n", + "#Install Reductions from Fairlearn\n", + "!pip install aif360[Reductions]" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "sivw3vma71DE" }, @@ -130,33 +81,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { - "id": "7Xqg6Yn771DG", - "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161", "colab": { "base_uri": "https://localhost:8080/", "height": 238 - } + }, + "id": "7Xqg6Yn771DG", + "outputId": "00fda049-0c6a-4c86-9ee8-f49b9e2d5161" }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " lsat ugpa race\n", - "race \n", - "0 38.0 3.3 0\n", - "1 34.0 4.0 1\n", - "1 34.0 3.9 1\n", - "1 45.0 3.3 1\n", - "1 39.0 2.5 1" - ], "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "
\n", - " " + "
" + ], + "text/plain": [ + " race lsat ugpa\n", + "race gender \n", + "0.0 1 0.0 38.0 3.3\n", + "1.0 0 1.0 34.0 4.0\n", + " 0 1.0 34.0 3.9\n", + " 0 1.0 45.0 3.3\n", + " 1 1.0 39.0 2.5" ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ - "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True)\n", - "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True)\n", + "X_train, y_train = fetch_lawschool_gpa(\"train\", numeric_only=True, dropcols=\"gender\")\n", + "X_test, y_test = fetch_lawschool_gpa(\"test\", numeric_only=True, dropcols=\"gender\")\n", "X_train.head()" ] }, @@ -318,33 +194,20 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "metadata": { - "id": "7CxY3K8i71DI", - "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0", "colab": { "base_uri": "https://localhost:8080/", "height": 238 - } + }, + "id": "7CxY3K8i71DI", + "outputId": "c14dec8a-8a45-4992-af71-fd6cc5761cc0" }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " lsat ugpa race\n", - "race \n", - "0 0.729730 0.825 0.0\n", - "1 0.621622 1.000 1.0\n", - "1 0.621622 0.975 1.0\n", - "1 0.918919 0.825 1.0\n", - "1 0.756757 0.625 1.0" - ], "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "
\n", - " " + "
" + ], + "text/plain": [ + " race lsat ugpa\n", + "race gender \n", + "0.0 1 0.0 0.729730 0.825\n", + "1.0 0 1.0 0.621622 1.000\n", + " 0 1.0 0.621622 0.975\n", + " 0 1.0 0.918919 0.825\n", + " 1 1.0 0.756757 0.625" ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -518,24 +319,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": { - "id": "y3mrAFw471DL", - "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "y3mrAFw471DL", + "outputId": "d7bd400b-2fc1-426b-baab-93139fb48739" }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ "0.7400826321650612" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -557,28 +358,28 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { - "id": "F97LRCdf71DM", - "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "F97LRCdf71DM", + "outputId": "fe6f95a7-fdb9-4a47-bd39-284ef726f983" }, "outputs": [ { - "output_type": "execute_result", "data": { "text/plain": [ - "0.2039259052574467" + "0.20392590525744636" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ - "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred)\n", + "lr_mae_diff = difference(mean_absolute_error, y_test, y_pred, prot_attr=\"race\")\n", "lr_mae_diff" ] }, @@ -602,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "id": "TxejOHFK71DN" }, @@ -622,27 +423,27 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": { - "id": "B63yaCCf71DN", - "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "B63yaCCf71DN", + "outputId": "e3e2c851-b3f0-4296-e868-a46b78899364" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "0.7622719376746615\n" + "0.7622719376746614\n" ] } ], "source": [ "np.random.seed(0) #need for reproducibility\n", - "grid_search_red = GridSearchReduction(prot_attr=\"race\", \n", - " estimator=estimator, \n", + "grid_search_red = GridSearchReduction(prot_attr=\"race\",\n", + " estimator=estimator,\n", " constraints=\"BoundedGroupLoss\",\n", " loss=\"Absolute\",\n", " min_val=y_train.min(),\n", @@ -660,25 +461,25 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": { - "id": "WPd4i-Zj71DO", - "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1", "colab": { "base_uri": "https://localhost:8080/" - } + }, + "id": "WPd4i-Zj71DO", + "outputId": "d40bcb93-fcb1-405b-ffc3-a72ef29157e1" }, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "0.06122151904963524\n" + "0.06122151904963535\n" ] } ], "source": [ - "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred)\n", + "gs_mae_diff = difference(mean_absolute_error, y_test, gs_pred, prot_attr=\"race\")\n", "print(gs_mae_diff)\n", "\n", "#Check if difference decreased\n", @@ -687,6 +488,9 @@ } ], "metadata": { + "colab": { + "provenance": [] + }, "kernelspec": { "display_name": "Python 3.9.7 ('aif360')", "language": "python", @@ -702,17 +506,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.7" + "version": "3.11.4" }, "vscode": { "interpreter": { "hash": "d0c5ced7753e77a483fec8ff7063075635521cce6e0bd54998c8f174742209dd" } - }, - "colab": { - "provenance": [] } }, "nbformat": 4, "nbformat_minor": 0 -} \ No newline at end of file +} diff --git a/requirements.txt b/requirements.txt index 54445aff..33241d32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ tensorflow>=1.13.1 torch cvxpy>=1.0 fairlearn>=0.7.0 -tempeh rpy2==3.4.5 skorch==0.11.0 inFairness>=0.2.2 @@ -25,6 +24,6 @@ pytest>=3.5.0 pytest-cov>=2.8.1 # docs -sphinx==1.8.6 +sphinx==7.2.6 jinja2==3.1.3 -sphinx_rtd_theme==0.4.3 +sphinx_rtd_theme==2.0.0 diff --git a/setup.py b/setup.py index 06d53aaf..f4f7a7de 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,5 @@ from setuptools import setup, find_packages -from functools import reduce +from itertools import chain long_description = """The AI Fairness 360 toolkit is an open-source library to help detect and mitigate bias in machine learning models. The AI Fairness 360 Python package includes a comprehensive set of metrics for datasets and models to @@ -23,12 +23,11 @@ 'FairAdapt': ['rpy2'], 'inFairness': ['skorch', 'inFairness>=0.2.2'], 'notebooks': ['jupyter', 'tqdm', 'igraph[plotting]', 'lightgbm', 'seaborn', 'ipympl'], - 'LawSchoolGPA': ['tempeh'], 'OptimalTransport': ['pot'], } -extras['tests'] = reduce(lambda l1, l2: l1+l2, extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1']) -extras['docs'] = ['sphinx<2', 'jinja2<3.2.0', 'sphinx_rtd_theme'] -extras['all'] = list(reduce(lambda s, l: s.union(l), extras.values(), set())) +extras['tests'] = list(chain(*extras.values(), ['pytest>=3.5', 'pytest-cov>=2.8.1'])) +extras['docs'] = ['sphinx', 'jinja2>3.1.0', 'sphinx_rtd_theme'] +extras['all'] = list(set(chain(*extras.values()))) setup(name='aif360', version=version, diff --git a/tests/sklearn/test_datasets.py b/tests/sklearn/test_datasets.py index 0fefc835..97c08279 100644 --- a/tests/sklearn/test_datasets.py +++ b/tests/sklearn/test_datasets.py @@ -177,7 +177,7 @@ def test_german_matches_old(): old = old.apply(lambda c: c.factorize()[0] if not is_numeric_dtype(c) else c) assert_frame_equal(X.reset_index(drop=True), old.reset_index(drop=True), - check_like=True) + check_like=True, check_dtype=False) def test_fetch_bank(): """Tests Bank Marketing dataset shapes with various options.""" @@ -214,9 +214,9 @@ def test_fetch_lawschool_gpa(): """Tests Law School GPA dataset shapes with various options.""" gpa = fetch_lawschool_gpa() assert len(gpa) == 2 - assert gpa.X.shape == (22342, 3) + assert gpa.X.shape == (22342, 4) assert gpa.y.nunique() > 2 # regression - assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (22342, 3) + assert fetch_lawschool_gpa(numeric_only=True, dropna=False).X.shape == (27478, 4) def test_lawschool_matches_old(): """Tests Law School GPA dataset matches original version."""