Skip to content

Commit

Permalink
[GSK-1711] Added check of push output (#1394)
Browse files Browse the repository at this point in the history
* Added check of push output

* Fixing seed for model used in fixtures

* Switch hash to hashlib.sha1 to get stable output

---------

Co-authored-by: Bazire <[email protected]>
  • Loading branch information
rabah-khalek and Hartorn authored Sep 29, 2023
1 parent 18a9e05 commit 22c6678
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 138 deletions.
18 changes: 13 additions & 5 deletions python-client/giskard/push/perturbation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
"""
from typing import Optional

import hashlib

import numpy as np
import pandas as pd
import sys

from giskard.core.core import SupportedModelTypes
from giskard.datasets.base import Dataset
Expand All @@ -31,6 +32,7 @@
TextTypoTransformation,
TextUppercase,
)

from ..push import PerturbationPush

text_transformation_list = [
Expand Down Expand Up @@ -173,10 +175,16 @@ def _text(
# TextTypoTransformation generates a random typo for text features. In order to have the same typo per
# sample with the push feature in the debugger, we need to generate a unique seed per sample (hashed_seed)
# to guarantee the same perturbation per sample.
hashed_seed = hash(f"{', '.join(map(lambda x: repr(x), ds_slice_copy.df.values))}".encode("utf-8"))
# hash could give negative ints, and np.random.seed accepts only positive ints
positive_hashed_seed = hashed_seed % ((sys.maxsize + 1) * 2)
kwargs = {"rng_seed": positive_hashed_seed}
# SHA1 is used here, since it does not matter that there are collisions
hashed_seed = int.from_bytes(
hashlib.sha1(
(f"{', '.join(map(lambda x: repr(x), ds_slice_copy.df.values))}".encode("utf-8"))
).digest(),
byteorder="big",
signed=False,
)
# hash is positive, since signed is false
kwargs = {"rng_seed": hashed_seed}

t = text_transformation(column=feature, **kwargs)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def drug_classification_model(drug_classification_data) -> SKLearnModel:
steps=[
("one_hot_encoder", OneHotEncoder()),
("resampler", SMOTE()),
("classifier", SVC(kernel="linear", max_iter=250, probability=True)),
("classifier", SVC(random_state=30, kernel="linear", max_iter=250, probability=True)),
]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def enron_model(enron_data) -> SKLearnModel:
("text_Mail", text_transformer, "Content"),
]
)
clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression(max_iter=100))])
clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression(max_iter=100, random_state=30))])

Y = enron_data.df["Target"]
X = enron_data.df.drop(columns="Target")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def fraud_detection_model(fraud_detection_train_data: Dataset) -> Model:
x = fraud_detection_train_data.df.drop(TARGET_COLUMN, axis=1)
y = fraud_detection_train_data.df[TARGET_COLUMN]

estimator = LGBMClassifier()
estimator = LGBMClassifier(random_state=30)
estimator.fit(x, y)

wrapped_model = Model(
Expand Down
4 changes: 2 additions & 2 deletions python-client/tests/fixtures/german_credit_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def german_credit_catboost_raw_model(german_credit_data):
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(
X, Y, test_size=0.20, random_state=30, stratify=Y
)
cb = CatBoostClassifier(iterations=2, learning_rate=1, depth=2)
cb = CatBoostClassifier(iterations=2, learning_rate=1, depth=2, random_seed=0)
cb.fit(X_train, Y_train, columns_to_encode)

model_score = cb.score(X_test, Y_test)
Expand Down Expand Up @@ -115,7 +115,7 @@ def german_credit_raw_model(german_credit_data):
("cat", categorical_transformer, columns_to_encode),
]
)
clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression(max_iter=100))])
clf = Pipeline(steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression(max_iter=100, random_state=30))])

Y = german_credit_data.df["default"]
X = german_credit_data.df[german_credit_data.columns].drop(columns="default")
Expand Down
2 changes: 1 addition & 1 deletion python-client/tests/fixtures/hotel_text__regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def hotel_text_model(hotel_text_data) -> SKLearnModel:
steps=[
("vectorizer_adapter", FunctionTransformer(adapt_vectorizer_input)),
("vectorizer", TfidfVectorizer(max_features=10000)),
("regressor", GradientBoostingRegressor(n_estimators=5)),
("regressor", GradientBoostingRegressor(random_state=30, n_estimators=5)),
]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def medical_transcript_model(medical_transcript_data: Dataset) -> SKLearnModel:
("text_preprocessor", FunctionTransformer(preprocess_text)),
("vectorizer_input_adapter", FunctionTransformer(adapt_vectorizer_input)),
("vectorizer", CountVectorizer(ngram_range=(1, 1))),
("estimator", RandomForestClassifier(n_estimators=1, max_depth=3)),
("estimator", RandomForestClassifier(n_estimators=10, max_depth=3, random_state=30)),
]
)

Expand Down
2 changes: 1 addition & 1 deletion python-client/tests/fixtures/xboost_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def breast_cancer_model(breast_cancer_data: Dataset) -> Model:
breast_cancer_data.df[TARGET_COLUMN_NAME],
random_state=RANDOM_SEED,
)
xgb = XGBClassifier(objective="binary:logistic")
xgb = XGBClassifier(objective="binary:logistic", random_state=30)
xgb.fit(X_train, y_train)
return Model(
model=xgb,
Expand Down
191 changes: 66 additions & 125 deletions python-client/tests/test_push.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import sys

import numpy as np
from giskard.ml_worker.testing.functions.transformation import mad_transformation
import pandas as pd
import pytest

import giskard.push
from giskard.ml_worker.testing.functions.transformation import mad_transformation
from giskard.ml_worker.testing.registry.giskard_test import GiskardTest
from giskard.ml_worker.testing.registry.slicing_function import slicing_function
from giskard.push import Push
Expand All @@ -14,130 +19,68 @@
slice_bounds_quartile,
)
from giskard.slicing.slice import QueryBasedSliceFunction
import pandas as pd


# Classification
def test_instance_if_not_none(german_credit_model, german_credit_data):
for i in range(50):
push_list = [
create_contribution_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_perturbation_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_overconfidence_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_borderline_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
]
for push in push_list:
if push is not None:
assert isinstance(push, Push)


def test_slicing_function(german_credit_model, german_credit_data):
for i in range(50):
push = create_contribution_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]])
if push is not None:
assert isinstance(push.slicing_function, QueryBasedSliceFunction)


def test_test_function(german_credit_model, german_credit_data):
for i in range(50):
push_list = [
create_contribution_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_perturbation_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_overconfidence_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
create_borderline_push(german_credit_model, german_credit_data, german_credit_data.df.iloc[[i]]),
]
for push in push_list:
if push is not None:
for test in push.tests:
assert isinstance(test(), GiskardTest)


# Regression
def test_instance_if_not_none_reg(linear_regression_diabetes, diabetes_dataset_with_target):
for i in range(50):
push_list = [
create_contribution_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_perturbation_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_overconfidence_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_borderline_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
]
for push in push_list:
if push is not None:
assert isinstance(push, Push)


def test_slicing_function_reg(linear_regression_diabetes, diabetes_dataset_with_target):
for i in range(50):
push = create_contribution_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
)
DATASETS = [
pytest.param(("german_credit_model", "german_credit_data", 50), id="German Credit"),
pytest.param(("enron_model", "enron_data", 50), id="Enron"),
pytest.param(("linear_regression_diabetes", "diabetes_dataset_with_target", 50), id="Diabetes"),
]

PUSH_TYPES = [
pytest.param(("contribution", giskard.push.ContributionPush, create_contribution_push), id="Contribution"),
pytest.param(("perturbation", giskard.push.PerturbationPush, create_perturbation_push), id="Perturbation"),
pytest.param(("overconfidence", giskard.push.OverconfidencePush, create_overconfidence_push), id="Overconfidence"),
pytest.param(("borderline", giskard.push.BorderlinePush, create_borderline_push), id="Borderline"),
]
# fmt: off
EXPECTED_COUNTS = {
"german_credit_model" : {
"contribution" :[0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1],
"perturbation": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"overconfidence": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"borderline": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
},
"linear_regression_diabetes": {
"contribution" :[0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0],
"perturbation" :[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"overconfidence" :[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"borderline" :[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
},
"enron_model": {
"contribution" :[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"perturbation" :[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"overconfidence" :[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
"borderline" :[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
}
}
# fmt: on


@pytest.mark.parametrize("dataset", DATASETS)
@pytest.mark.parametrize("push_type", PUSH_TYPES)
def test_test_function(request, dataset, push_type):
model_name, data_name, nb_line = dataset
model = request.getfixturevalue(model_name)
data = request.getfixturevalue(data_name)

push_type_name, push_type_class, push_func = push_type
if model_name == "enron_model" and push_type_name == "perturbation" and sys.platform == "win32":
pytest.skip("This test give different results on windows")

push_list = []
for i in range(nb_line):
push = push_func(model, data, data.df.iloc[[i]])
if push is not None:
assert isinstance(push.slicing_function, QueryBasedSliceFunction)


def test_test_function_reg(linear_regression_diabetes, diabetes_dataset_with_target):
for i in range(50):
push_list = [
create_contribution_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_perturbation_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_overconfidence_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
create_borderline_push(
linear_regression_diabetes, diabetes_dataset_with_target, diabetes_dataset_with_target.df.iloc[[i]]
),
]
for push in push_list:
if push is not None:
for test in push.tests:
assert isinstance(test(), GiskardTest)


# Multiclass Classification
def test_instance_if_not_none_multi(enron_model, enron_data):
for i in range(50):
push_list = [
create_contribution_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_perturbation_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_overconfidence_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_borderline_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
]
for push in push_list:
if push is not None:
assert isinstance(push, Push)


def test_slicing_function_multi(enron_model, enron_data):
for i in range(50):
push = create_contribution_push(enron_model, enron_data, enron_data.df.iloc[[i]])
if push is not None:
assert isinstance(push.slicing_function, QueryBasedSliceFunction)


def test_test_function_multi(enron_model, enron_data):
for i in range(50):
push_list = [
create_contribution_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_perturbation_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_overconfidence_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
create_borderline_push(enron_model, enron_data, enron_data.df.iloc[[i]]),
]
for push in push_list:
if push is not None:
for test in push.tests:
assert isinstance(test(), GiskardTest)
assert isinstance(push, Push)
assert isinstance(push, push_type_class)
push_list.append(len(push.tests))
assert all([isinstance(test(), GiskardTest) for test in push.tests])
if hasattr(push, "slicing_function"):
assert isinstance(push.slicing_function, QueryBasedSliceFunction)
else:
push_list.append(0)
print(push_list)
assert push_list == EXPECTED_COUNTS[model_name][push_type_name]


def test_mad_transformation_mad_precomputed(enron_data):
Expand Down Expand Up @@ -214,9 +157,7 @@ def test_coltype_to_supported_perturbation_type():


def test_text_explain_in_push(medical_transcript_model, medical_transcript_data):

problematic_df_entry = medical_transcript_data.df.iloc[[3]]
output = create_contribution_push(medical_transcript_model, medical_transcript_data, problematic_df_entry)

assert output is not None
assert output.value is not None

0 comments on commit 22c6678

Please sign in to comment.